From b63a822a1b51a99116d6aea6f3b65574bd83fa22 Mon Sep 17 00:00:00 2001 From: Nan Wang Date: Mon, 10 Feb 2025 14:20:57 -0800 Subject: [PATCH] pva: deploy V3 KMD Jira PVAAS-17785 Change-Id: I8ebc4c49aec209c5f82c6725605b62742402500a Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3299880 Tested-by: Nan Wang Reviewed-by: Vishwas M Reviewed-by: Mohnish Jain GVS: buildbot_gerritrpt Reviewed-by: Nan Wang --- drivers/video/tegra/host/pva/Makefile | 104 ++ .../pva/src/fw/baremetal/include/pva-bit.h | 196 ++ .../src/fw/baremetal/include/pva-checkpoint.h | 316 ++++ .../pva/src/fw/baremetal/include/pva-config.h | 231 +++ .../pva/src/fw/baremetal/include/pva-errors.h | 428 +++++ .../src/fw/baremetal/include/pva-fw-version.h | 189 ++ .../pva/src/fw/baremetal/include/pva-packed.h | 30 + .../src/fw/baremetal/include/pva-sys-dma.h | 486 +++++ .../src/fw/baremetal/include/pva-sys-params.h | 150 ++ .../pva/src/fw/baremetal/include/pva-types.h | 44 + .../src/fw/baremetal/include/pva-version.h | 69 + .../include/pva-vpu-syscall-interface.h | 309 ++++ .../tegra/host/pva/src/fw/include/pva_fw.h | 295 +++ .../pva/src/fw/include/pva_fw_address_map.h | 178 ++ .../host/pva/src/fw/include/pva_fw_hyp.h | 120 ++ .../host/pva/src/fw/include/pva_resource.h | 340 ++++ .../tegra/host/pva/src/include/pva_api.h | 349 ++++ .../host/pva/src/include/pva_api_cmdbuf.h | 627 +++++++ .../tegra/host/pva/src/include/pva_api_cuda.h | 222 +++ .../tegra/host/pva/src/include/pva_api_dma.h | 343 ++++ .../host/pva/src/include/pva_api_nvsci.h | 202 +++ .../host/pva/src/include/pva_api_types.h | 396 ++++ .../tegra/host/pva/src/include/pva_api_vpu.h | 33 + .../src/kmd/common/pva_kmd_block_allocator.c | 125 ++ .../src/kmd/common/pva_kmd_block_allocator.h | 50 + .../host/pva/src/kmd/common/pva_kmd_cmdbuf.c | 280 +++ .../host/pva/src/kmd/common/pva_kmd_cmdbuf.h | 265 +++ .../pva/src/kmd/common/pva_kmd_constants.h | 62 + .../host/pva/src/kmd/common/pva_kmd_context.c | 363 ++++ .../host/pva/src/kmd/common/pva_kmd_context.h | 104 ++ .../host/pva/src/kmd/common/pva_kmd_debugfs.c | 142 ++ .../host/pva/src/kmd/common/pva_kmd_debugfs.h | 56 + .../host/pva/src/kmd/common/pva_kmd_device.c | 338 ++++ .../host/pva/src/kmd/common/pva_kmd_device.h | 158 ++ .../host/pva/src/kmd/common/pva_kmd_dma_cfg.c | 148 ++ .../host/pva/src/kmd/common/pva_kmd_dma_cfg.h | 139 ++ .../src/kmd/common/pva_kmd_dma_cfg_binding.c | 369 ++++ .../src/kmd/common/pva_kmd_dma_cfg_validate.c | 821 +++++++++ .../src/kmd/common/pva_kmd_dma_cfg_write.c | 294 +++ .../pva/src/kmd/common/pva_kmd_executable.h | 74 + .../pva/src/kmd/common/pva_kmd_fw_debug.c | 52 + .../pva/src/kmd/common/pva_kmd_fw_debug.h | 26 + .../pva/src/kmd/common/pva_kmd_fw_profiler.c | 338 ++++ .../pva/src/kmd/common/pva_kmd_fw_profiler.h | 41 + .../src/kmd/common/pva_kmd_hwseq_validate.c | 1608 +++++++++++++++++ .../src/kmd/common/pva_kmd_hwseq_validate.h | 336 ++++ .../host/pva/src/kmd/common/pva_kmd_msg.c | 98 + .../host/pva/src/kmd/common/pva_kmd_msg.h | 26 + .../host/pva/src/kmd/common/pva_kmd_mutex.h | 35 + .../pva/src/kmd/common/pva_kmd_op_handler.c | 814 +++++++++ .../pva/src/kmd/common/pva_kmd_op_handler.h | 44 + .../host/pva/src/kmd/common/pva_kmd_pm.c | 210 +++ .../host/pva/src/kmd/common/pva_kmd_pm.h | 19 + .../host/pva/src/kmd/common/pva_kmd_queue.c | 252 +++ .../host/pva/src/kmd/common/pva_kmd_queue.h | 48 + .../host/pva/src/kmd/common/pva_kmd_regs.h | 135 ++ .../src/kmd/common/pva_kmd_resource_table.c | 477 +++++ .../src/kmd/common/pva_kmd_resource_table.h | 153 ++ .../host/pva/src/kmd/common/pva_kmd_sha256.c | 185 ++ .../host/pva/src/kmd/common/pva_kmd_sha256.h | 76 + .../pva/src/kmd/common/pva_kmd_silicon_boot.c | 317 ++++ .../pva/src/kmd/common/pva_kmd_silicon_boot.h | 44 + .../kmd/common/pva_kmd_silicon_elf_parser.c | 414 +++++ .../kmd/common/pva_kmd_silicon_elf_parser.h | 363 ++++ .../kmd/common/pva_kmd_silicon_executable.c | 920 ++++++++++ .../pva/src/kmd/common/pva_kmd_silicon_hwpm.c | 63 + .../pva/src/kmd/common/pva_kmd_silicon_hwpm.h | 50 + .../pva/src/kmd/common/pva_kmd_silicon_isr.c | 135 ++ .../pva/src/kmd/common/pva_kmd_silicon_isr.h | 20 + .../pva/src/kmd/common/pva_kmd_silicon_misc.c | 33 + .../src/kmd/common/pva_kmd_silicon_utils.h | 52 + .../pva/src/kmd/common/pva_kmd_submitter.c | 156 ++ .../pva/src/kmd/common/pva_kmd_submitter.h | 68 + .../host/pva/src/kmd/common/pva_kmd_t23x.c | 88 + .../host/pva/src/kmd/common/pva_kmd_t23x.h | 39 + .../host/pva/src/kmd/common/pva_kmd_t26x.c | 84 + .../host/pva/src/kmd/common/pva_kmd_t26x.h | 46 + .../pva/src/kmd/common/pva_kmd_tegra_stats.c | 141 ++ .../pva/src/kmd/common/pva_kmd_tegra_stats.h | 34 + .../src/kmd/common/pva_kmd_userspace_misc.c | 148 ++ .../host/pva/src/kmd/common/pva_kmd_utils.c | 29 + .../host/pva/src/kmd/common/pva_kmd_utils.h | 28 + .../pva/src/kmd/common/pva_kmd_vpu_app_auth.c | 368 ++++ .../pva/src/kmd/common/pva_kmd_vpu_app_auth.h | 77 + .../host/pva/src/kmd/common/pva_kmd_vpu_ocd.c | 128 ++ .../host/pva/src/kmd/common/pva_kmd_vpu_ocd.h | 36 + .../host/pva/src/kmd/common/pva_plat_faults.h | 40 + .../kmd/common/shim/pva_kmd_device_memory.h | 112 ++ .../src/kmd/common/shim/pva_kmd_shim_ccq.h | 34 + .../kmd/common/shim/pva_kmd_shim_debugfs.h | 29 + .../src/kmd/common/shim/pva_kmd_shim_init.h | 64 + .../kmd/common/shim/pva_kmd_shim_silicon.h | 142 ++ .../src/kmd/common/shim/pva_kmd_shim_utils.h | 72 + .../common/shim/pva_kmd_shim_vpu_app_auth.h | 17 + .../src/kmd/common/shim/pva_kmd_thread_sema.h | 69 + .../tegra/host/pva/src/kmd/include/pva_kmd.h | 183 ++ .../video/tegra/host/pva/src/kmd/linux/Kbuild | 19 + .../pva/src/kmd/linux/include/pva_kmd_linux.h | 39 + .../pva/src/kmd/linux/pva_kmd_linux_debugfs.c | 145 ++ .../pva/src/kmd/linux/pva_kmd_linux_device.c | 390 ++++ .../pva/src/kmd/linux/pva_kmd_linux_device.h | 52 + .../kmd/linux/pva_kmd_linux_device_memory.c | 224 +++ .../pva/src/kmd/linux/pva_kmd_linux_driver.c | 610 +++++++ .../pva/src/kmd/linux/pva_kmd_linux_ioctl.c | 194 ++ .../pva/src/kmd/linux/pva_kmd_linux_isr.c | 81 + .../pva/src/kmd/linux/pva_kmd_linux_isr.h | 31 + .../pva/src/kmd/linux/pva_kmd_linux_misc.c | 96 + .../pva/src/kmd/linux/pva_kmd_linux_smmu.c | 175 ++ .../kmd/linux/pva_kmd_linux_vpu_app_auth.c | 84 + .../host/pva/src/libs/pva/include/pva_bit.h | 205 +++ .../pva/src/libs/pva/include/pva_constants.h | 149 ++ .../pva/src/libs/pva/include/pva_math_utils.h | 851 +++++++++ .../host/pva/src/libs/pva/include/pva_utils.h | 102 ++ 113 files changed, 22508 insertions(+) create mode 100644 drivers/video/tegra/host/pva/Makefile create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h create mode 100644 drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h create mode 100644 drivers/video/tegra/host/pva/src/fw/include/pva_fw.h create mode 100644 drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h create mode 100644 drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h create mode 100644 drivers/video/tegra/host/pva/src/fw/include/pva_resource.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_cuda.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_dma.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_types.h create mode 100644 drivers/video/tegra/host/pva/src/include/pva_api_vpu.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/Kbuild create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.h create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c create mode 100644 drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c create mode 100644 drivers/video/tegra/host/pva/src/libs/pva/include/pva_bit.h create mode 100644 drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h create mode 100644 drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h create mode 100644 drivers/video/tegra/host/pva/src/libs/pva/include/pva_utils.h diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile new file mode 100644 index 00000000..95a90af8 --- /dev/null +++ b/drivers/video/tegra/host/pva/Makefile @@ -0,0 +1,104 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: GPL-2.0-only +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + + +ifndef CONFIG_TEGRA_SYSTEM_TYPE_ACK +ifeq ($(CONFIG_TEGRA_OOT_MODULE),m) + +ifeq ($(findstring ack_src,$(NV_BUILD_KERNEL_OPTIONS)),) + +obj-m := nvhost-pva.o + + +PVA_SYS_DIR := . +PVA_SYS_ABSDIR := $(srctree.nvidia-oot)/drivers/video/tegra/host/pva + + +###### Begin generated section ###### +pva_objs += \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_block_allocator.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_cmdbuf.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_context.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_debugfs.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_device.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_binding.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_validate.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_dma_cfg_write.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_debug.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_fw_profiler.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_hwseq_validate.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_msg.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_op_handler.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_pm.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_queue.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_resource_table.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_sha256.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_boot.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_elf_parser.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_executable.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_hwpm.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_isr.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_silicon_misc.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_submitter.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t23x.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_t26x.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_tegra_stats.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_utils.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_app_auth.o \ + $(PVA_SYS_DIR)/src/kmd/common/pva_kmd_vpu_ocd.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_debugfs.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device_memory.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_driver.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_ioctl.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_isr.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_misc.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_smmu.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_vpu_app_auth.o \ + +pva_inc_flags += \ + -I$(PVA_SYS_ABSDIR)/src/fw/baremetal/include \ + -I$(PVA_SYS_ABSDIR)/src/fw/include \ + -I$(PVA_SYS_ABSDIR)/src/include \ + -I$(PVA_SYS_ABSDIR)/src/kmd/common \ + -I$(PVA_SYS_ABSDIR)/src/kmd/common/shim \ + -I$(PVA_SYS_ABSDIR)/src/kmd/include \ + -I$(PVA_SYS_ABSDIR)/src/kmd/linux/include \ + -I$(PVA_SYS_ABSDIR)/src/libs/pva/include \ + +pva_def_flags += \ + -DPVA_BUILD_MODE=PVA_BUILD_MODE_L4T \ + -DPVA_BUILD_MODE_BAREMETAL=5 \ + -DPVA_BUILD_MODE_L4T=3 \ + -DPVA_BUILD_MODE_NATIVE=1 \ + -DPVA_BUILD_MODE_QNX=2 \ + -DPVA_BUILD_MODE_SIM=4 \ + -DPVA_DEV_MAIN_COMPATIBLE=1 \ + -DPVA_ENABLE_CUDA=1 \ + -DPVA_IS_DEBUG=0 \ + -DPVA_SAFETY=0 \ + -DPVA_SKIP_SYMBOL_TYPE_CHECK \ + -DPVA_SUPPORT_XBAR_RAW=1 \ + -Dpva_kmd_linux_dummy_EXPORTS \ + +###### End generated section ###### + + +nvhost-pva-objs += $(pva_objs) +ccflags-y += $(pva_inc_flags) +ccflags-y += $(pva_def_flags) +ccflags-y += -std=gnu11 + +endif +endif +endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h new file mode 100644 index 00000000..a99b6e38 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-bit.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_BIT_H +#define PVA_BIT_H + +/* + * Bit manipulation macros + */ + +/** + * @brief Number of bits per byte. + */ +#define PVA_BITS_PER_BYTE (8UL) + +/** + * @defgroup PVA_BIT8_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint8_t. + * Parameter that convey the bit position should be in the range + * of 0 to 7 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 7 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a variable of type uint8_t. + */ +#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu)) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Macro used to generate a bit-mask from MSB to LSB in a uint8_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_MASK8(_msb_, _lsb_) \ + ((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) & \ + ~(PVA_BIT8(_lsb_) - 1U)) & \ + 0xffu)) +//! @endcond +/** @} */ + +/** + * @defgroup PVA_BIT16_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint16_t. + * Parameter that convey the bit position should be in the range + * of 0 to 15 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 15 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a 16 bit number. + */ +#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu)) + +/** + * @brief Macro to mask a range(MSB to LSB) of bit positions in a 16 bit number. + * This will set all the bit positions in specified range. + */ +#define PVA_MASK16(_msb_, _lsb_) \ + ((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) & \ + ~(PVA_BIT16(_lsb_) - 1U)) & \ + 0xffffu)) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Macro to extract bits from a 16 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_))) +//! @endcond + +/** + * @brief Macro used to generate a bit-mask from MSB to LSB in a uint16_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_INSERT16(_x_, _msb_, _lsb_) \ + ((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_))) +/** @} */ + +/** + * @defgroup PVA_BIT32_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint32_t. + * Parameter that convey the bit position should be in the range + * of 0 to 31 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 31 inclusive with MSB greater than LSB. + * @{ + */ + +/** + * @brief Macro to set a given bit position in a 32 bit number. + */ +#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffu)) + +/** + * @brief Macro to mask a range(MSB to LSB) of bit positions in a 32 bit number. + * This will set all the bit positions in specified range. + */ +#define PVA_MASK(_msb_, _lsb_) \ + (((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U)) + +/** + * @brief Macro to extract bits from a 32 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_))) + +/** + * @brief Macro to insert a range of bits from a given 32 bit number. + * Range of bits are derived from the number passed as argument. + */ +#define PVA_INSERT(_x_, _msb_, _lsb_) \ + ((((uint32_t)(_x_)) << (_lsb_)) & PVA_MASK((_msb_), (_lsb_))) +/** @} */ + +/** + * @defgroup PVA_BIT64_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint64_t. + * Parameter that convey the bit position should be in the range + * of 0 to 63 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 63 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a 64 bit number. + */ +#define PVA_BIT64(_b_) \ + ((uint64_t)(((uint64_t)1UL << (_b_)) & 0xffffffffffffffffu)) + +/** + * @brief Macro used to generate a bit-mask from (MSB to LSB) in a uint64_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_MASK64(_msb_, _lsb_) \ + (((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) & \ + ~(PVA_BIT64(_lsb_) - (uint64_t)1U)) + +/** + * @brief Macro to extract bits from a 64 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_))) + +/** + * @brief Macro to insert a range of bits into a 64 bit number. + * The bits are derived from the number passed as argument. + */ +#define PVA_INSERT64(_x_, _msb_, _lsb_) \ + ((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_))) + +/** + * @brief Macro to pack a 64 bit number. + * A 64 bit number is generated that has first 32 MSB derived from + * upper 32 bits of passed argument and has lower 32MSB derived from + * lower 32 bits of another passed argument. + */ +#define PVA_PACK64(_l_, _h_) \ + (PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U)) + +/** + * @brief Macro to extract the higher 32 bits from a 64 bit number. + */ +#define PVA_HI32(_x_) ((uint32_t)(((_x_) >> 32U) & 0xFFFFFFFFU)) + +/** + * @brief Macro to extract the lower 32 bits from a 64 bit number. + */ +#define PVA_LOW32(_x_) ((uint32_t)((_x_)&0xFFFFFFFFU)) +/** @} */ + +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h new file mode 100644 index 00000000..c2c6c07f --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-checkpoint.h @@ -0,0 +1,316 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_CHECKPOINT_H +#define PVA_CHECKPOINT_H + +/** + * @file pva-checkpoint.h + * @brief Defines macros to create a checkpoint + */ + +/** + * @defgroup PVA_CHECKPOINT_MACROS Macros to define a checkpoint + * + * @brief Checkpoints are the 32-bit status values that can be written to status + * register during R5's execution. The 32-bit value is divided into four 8-bit values. + * These are: + * - major code: major aspect (usually a unit) of the uCode. Bit Position: [31:24] + * Valid values are defined at @ref PVA_CHECKPOINT_MAJOR_CODES. + * - minor code: minor aspect (usually a function) of the uCode.The interpretation of the + * minor value is determined by the major value. Bit Position: [23:16] + * - flags: flags indicating type of the checkpoint such as error checkpoint, + * performance checkpoint, checkpoint indicating start of an operation, + * checkpoint indicating end of an operation etc. Bit Position: [15:8] + * Valid values are defined at @ref PVA_CHECKPOINT_FLAGS. + * - sequence: disambiguate multiple checkpoints within a minor code or to convey additional + * information. The interpretation of the sequence value is determined by both the + * major and minor values. Bit Position: [7:0] + * Valid values are any values from 0 to UINT8_MAX + * @{ + */ + +/** + * @defgroup PVA_CHECKPOINT_MAJOR_CODES + * @brief Macros to define the major code field of the checkpoint @ingroup PVA_CHECKPOINT_MACROS + * @{ + */ + +/* + * Operational major codes + */ + +/** + * @brief Major code for PVA during Boot. + */ +#define PVA_CHK_MAIN (0x01U) + +//! @endcond + +/** + * @brief Error related major codes + */ +#define PVA_CHK_ABORT (0xFFU) + +/** @} */ + +/** + * @defgroup PVA_CHECKPOINT_HW_STATE_MINOR_CODES + * @brief Macros to define the minor code field of the checkpoints with major code PVA_CHK_HW_STATE + * @ingroup PVA_CHECKPOINT_MACROS + * + * @{ + */ +/** + * @brief Minor code while doing a MMIO HW state check. + */ +#define PVA_CHK_HW_STATE_MMIO (0x01U) + +/** + * @brief Minor code while doing a VIC HW state check. + */ +#define PVA_CHK_HW_STATE_VIC (0x02U) + +/** + * @brief Minor code while doing a ARM register HW state check. + */ +#define PVA_CHK_HW_STATE_ARM (0x03U) + +/** + * @brief Minor code while doing a MPU HW state check. + */ +#define PVA_CHK_HW_STATE_MPU (0x04U) + +/** + * @brief Minor code while doing a DMA HW state check. + */ +#define PVA_CHK_HW_STATE_DMA (0x05U) + +/** + * @brief Minor code while doing a VIC HW state check. + */ +#define PVA_CHK_HW_STATE_GOLDEN (0x06U) +/** @} */ + +/** @} */ + +/** + * @defgroup PVA_ABORT_REASONS + * + * @brief Macros to define the abort reasons + * @{ + */ +/** + * @brief Minor code for abort due to assert. + */ +#define PVA_ABORT_ASSERT (0x01U) + +/** + * @brief Minor code for abort in case pva main call fails. + */ +#define PVA_ABORT_FALLTHRU (0x02U) + +/** + * @brief Minor code for abort in case of fatal IRQ. + */ +#define PVA_ABORT_IRQ (0x05U) + +/** + * @brief Minor code for abort in case of MPU failure. + */ +#define PVA_ABORT_MPU (0x06U) + +/** + * @brief Minor code for abort in case of ARM exception. + */ +#define PVA_ABORT_EXCEPTION (0x07U) + +/** + * @brief Minor code for abort in case of un-supported SID read. + */ +#define PVA_ABORT_UNSUPPORTED (0x09U) + +/** + * @brief Minor code for abort in case of DMA failures. + */ +#define PVA_ABORT_DMA_TASK (0x0cU) + +/** + * @brief Minor code for abort in case of WDT failures. + * Note: This code is not reported to HSM. + */ +#define PVA_ABORT_WATCHDOG (0x0eU) + +//! @endcond + +/** + * @brief Minor code for abort in case of VPU init failures. + */ +#define PVA_ABORT_VPU (0x0fU) + +/** + * @brief Minor code for abort in case of DMA MISR setup failures. + */ +#define PVA_ABORT_DMA (0x10U) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Minor code for abort in case of Mbox errors. + * Note: This is used only in T19x + */ +#define PVA_ABORT_MBOX_WAR (0x12U) +//! @endcond + +/** + * @brief Minor code for abort in case of AISR errors. + */ +#define PVA_ABORT_AISR_QUEUE (0x14U) + +/** + * @brief Minor code for abort in case of bad task. + */ +#define PVA_ABORT_BAD_TASK (0x15U) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Minor code for abort in case of PPE init failures. + * Note: This is only used in T26x + */ +#define PVA_ABORT_PPE (0x16U) +//! @endcond + +/** + * @brief Minor code for abort in case of RAMIC failures. + */ +#define PVA_ABORT_RAMIC (0x20U) + +/** + * @brief Minor Code for SEC for safety errors. + * Note: This code is not reported to HSM. + */ +#define PVA_ABORT_SEC_SERR (0x21U) + +/** + * @brief Minor Code for SEC for functional errors. + * Note: This code is not reported to HSM. + */ +#define PVA_ABORT_SEC_FERR (0x22U) + +/** + * @brief Minor code for abort in case of firewall decode error. + */ +#define PVA_ABORT_L2SRAM_FWDEC (0x23U) + +/** + * @brief Minor code for abort in case of kernel panic. + */ +#define PVA_ABORT_KERNEL_PANIC (0x30U) + +/** + * @brief Minor code for abort in case of Batch Timeout. + */ +#define PVA_ABORT_BATCH_TIMEOUT (0x40U) + +/** + * @brief Minor code for abort in case of DMA Transfer Timeout. + * while in launch phase for the VPU) + */ +#define PVA_ABORT_DMA_SETUP_TIMEOUT (0x41U) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Minor code used when NOC BIST is run. + * Note: This is only used in T19x + */ +#define PVA_ABORT_NOC_BIST (0xfcU) +//! @endcond + +/** @} */ + +/** + * @defgroup PVA_ABORT_ARGUMENTS Macros to define the argument for pva_abort operation + * + * @brief Argument of pva_abort operation is updated in status register + * + */ + +/** + * @defgroup PVA_ABORT_ARGUMENTS_MPU + * @brief Argument to pva_abort() from MPU operations + * @ingroup PVA_ABORT_ARGUMENTS + * @{ + */ +/** + * @brief Minor code when there is an error while configuring MPU. + */ +#define PVA_ABORT_MPU_CONFIG (0xE001U) + +/** + * @brief Minor code when there is an error while initializing MPU. + */ +#define PVA_ABORT_MPU_INIT (0xE002U) +/** @} */ + +/** + * @defgroup PVA_ABORT_ARGUMENTS_VPU + * @brief Argument to pva_abort() from VPU operations + * @ingroup PVA_ABORT_ARGUMENTS + * @{ + */ +/** + * @brief Minor code when VPU is in debug state. + */ +#define PVA_ABORT_VPU_DEBUG (0xE001U) +/** @} */ + +/** + * @defgroup PVA_ABORT_ARGUMENTS_PPE + * @brief Argument to pva_abort() from PPE operations + * @ingroup PVA_ABORT_ARGUMENTS + * @{ + */ +/** + * @brief Minor code when PPE is in debug state. + */ +#define PVA_ABORT_PPE_DEBUG (0xE002U) +/** @} */ + +/** + * @brief Minor Code when DMA state is not idle to perform + * DMA MISR setup. + */ +#define PVA_ABORT_DMA_MISR_BUSY (0xE001U) +/** + * @brief Minor Code in DMA when MISR has timed out + */ +#define PVA_ABORT_DMA_MISR_TIMEOUT (0xE002U) + +/** + * @defgroup PVA_ABORT_ARGUMENTS_IRQ Argument to pva_abort() from IRQs + * @ingroup PVA_ABORT_ARGUMENTS + * @{ + */ + +/** + * @brief Minor Code for Command FIFO used by Interrupt Handler. + */ +#define PVA_ABORT_IRQ_CMD_FIFO (0xE001U) + +#if (0 == DOXYGEN_DOCUMENTATION) +#define PVA_ABORT_IRQ_TEST_HOST (0xE002U) +#endif +/** @} */ +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h new file mode 100644 index 00000000..44609a8e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-config.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_CONFIG_H +#define PVA_CONFIG_H + +#include +#include "pva_fw_constants.h" + +/** + * @defgroup PVA_CONFIG_PARAMS + * + * @brief PVA Configuration parameters. + * @{ + */ +/** + * @brief Queue id for queue0. + */ +#define PVA_FW_QUEUE_0 (0U) + +/** + * @brief Total number of queues that are present + * for communication between KMD and FW. + */ +#define PVA_NUM_QUEUES (8U) + +/** + * @brief Maximum queue id value in PVA System. + */ +#define PVA_MAX_QUEUE_ID (PVA_NUM_QUEUES - 1U) + +/** + * @brief Maximum number of tasks that is supported by a queue. + */ +#define MAX_QUEUE_DEPTH (256U) + +/** + * @brief Number of Hardware Semaphore registers in PVA System. + */ +#define PVA_NUM_SEMA_REGS (4U) + +/** + * @brief Number of Hardware Mailbox registers in PVA System. + */ +#define PVA_NUM_MBOX_REGS (8U) + +/** + * @brief Maximum number of Pre-Actions for a task. + */ +#define PVA_MAX_PREACTIONS (26U) + +/** + * @brief Maximum number of Post-Actions for a task. + */ +#define PVA_MAX_POSTACTIONS (28U) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Maximum number of DMA channels for T26x. + */ +#define PVA_NUM_DMA_CHANNELS_T26X (8U) + +/** + * @brief Total number of AXI data buffers for T26x. + */ +#define PVA_NUM_DMA_ADB_BUFFS_T26X (304U) + +/** + * @brief Number of reserved AXI data buffers for T26x. + */ +#define PVA_NUM_RESERVED_ADB_BUFFERS_T26X (16U) + +/** + * @brief Number of dynamic AXI data buffers for T26x. + * These exclude the reserved AXI data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T26X \ + (PVA_NUM_DMA_ADB_BUFFS_T26X - PVA_NUM_RESERVED_ADB_BUFFERS_T26X) + +/** + * @brief Maximum number of DMA channels for T23x. + */ +#define PVA_NUM_DMA_CHANNELS_T23X (16U) +//! @endcond + +/** + * @brief Number of DMA descriptors for T19x. + */ +#define PVA_NUM_DMA_DESCS_T19X (64U) +/** + * @brief Number of DMA descriptors for T23x. + */ +#define PVA_NUM_DMA_DESCS_T23X (64U) +/** + * @brief Number of DMA descriptors for T26x. + */ +#define PVA_NUM_DMA_DESCS_T26X (96U) + +/** + * @brief Number of reserved DMA channels. These channels + * are reserved per DMA for R5 transfers. These channels + * will be used by R5 to transfer data which it needs. + */ +#define PVA_NUM_RESERVED_CHANNELS (1U) + +/** + * @brief Number of dynamic DMA descriptors for T19x. These descriptors can be + * used by the VPU application transfer data. These exclude + * the reserved descriptors from total available ones. + */ +#define PVA_NUM_DYNAMIC_DESCS_T19X \ + (PVA_NUM_DMA_DESCS_T19X - PVA_NUM_RESERVED_DESCRIPTORS) +/** + * @brief Number of dynamic DMA descriptors for T23x. These descriptors can be + * used by the VPU application transfer data. These exclude + * the reserved descriptors from total available ones. + */ +#define PVA_NUM_DYNAMIC_DESCS_T23X \ + (PVA_NUM_DMA_DESCS_T23X - PVA_NUM_RESERVED_DESCRIPTORS) +/** + * @brief Number of dynamic DMA descriptors for T26x. These descriptors can be + * used by the VPU application transfer data. These exclude + * the reserved descriptors from total available ones. + */ +#define PVA_NUM_DYNAMIC_DESCS_T26X \ + (PVA_NUM_DMA_DESCS_T26X - PVA_NUM_RESERVED_DESCRIPTORS) +/** + * Note: T26x will be brought up first on Linux, and then on QNX. To support this, + * the following macro is needed so that the QNX driver can build without requiring + * any changes. + */ +#define PVA_NUM_DYNAMIC_DESCS (PVA_NUM_DYNAMIC_DESCS_T23X) + +/** + * @brief Number of reserved AXI data buffers for T23x. + */ +#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X (16U) + +/** + * @brief Number of reserved VMEM data buffers. + */ +#define PVA_NUM_RESERVED_VDB_BUFFERS (0U) + +/** + * @brief Total number of VMEM data buffers. + */ +#define PVA_NUM_DMA_VDB_BUFFS (128U) + +/** + * @brief Total number of AXI data buffers for T23x. + */ +#define PVA_NUM_DMA_ADB_BUFFS_T23X (272U) + +/** + * @brief Number of dynamic AXI data buffers for T23x. + * These exclude the reserved AXI data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X \ + (PVA_NUM_DMA_ADB_BUFFS_T23X - PVA_NUM_RESERVED_ADB_BUFFERS_T23X) + +/** + * @brief Number of dynamic VMEM data buffers for T23x. + * These exclude the reserved VMEM data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_VDB_BUFFS \ + (PVA_NUM_DMA_VDB_BUFFS - PVA_NUM_RESERVED_VDB_BUFFERS) + +/** + * @brief The first Reserved DMA descriptor. This is used as a + * starting point to iterate over reserved DMA descriptors. + */ +#define PVA_RESERVED_DESC_START (60U) + +/** + * @brief The first Reserved AXI data buffers. This is used as a + * starting point to iterate over reserved AXI data buffers. + */ +#define PVA_RESERVED_ADB_BUFF_START PVA_NUM_DYNAMIC_ADB_BUFFS + +/** + * @brief This macro has the value to be set by KMD in the shared semaphores + * @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM + * if the syncpoint reserved region must not be configured as uncached + * in R5 MPU. + */ +#define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU) +/** @} */ + +/** + * @defgroup PVA_CONFIG_PARAMS_T19X + * + * @brief PVA Configuration parameters exclusively for T19X. + * @{ + */ +/** + * @brief Number of DMA channels for T19x or Xavier. + */ +#define PVA_NUM_DMA_CHANNELS_T19X (14U) + +/** + * @brief Number of reserved AXI data buffers for T19x. + */ +#define PVA_NUM_RESERVED_ADB_BUFFERS_T19X (8U) + +/** + * @brief Total number of AXI data buffers for T19x. + */ +#define PVA_NUM_DMA_ADB_BUFFS_T19X (256U) + +/** + * @brief Number of dynamic AXI data buffers for T19x. + * These exclude the reserved AXI data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X \ + (PVA_NUM_DMA_ADB_BUFFS_T19X - PVA_NUM_RESERVED_ADB_BUFFERS_T19X) + +/** @} */ +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h new file mode 100644 index 00000000..f3a9e129 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-errors.h @@ -0,0 +1,428 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_ERRORS_H +#define PVA_ERRORS_H + +#include +#include + +/** + * @brief PVA Error codes + */ +typedef uint16_t pva_errors_t; + +/** + * @defgroup PVA_ERRORS + * + * @brief General and interface errors of PVA. + * @{ + */ +/** + * @brief In case of no Error. + */ +#define PVA_ERR_NO_ERROR (0x0U) + +/** + * @brief Error in case of an illegal command + * PVA FW executes commands that are found + * in the command look up table. If a command + * is not part of supported commands, this + * error will be returned. Valid commands can be + * referred at @ref pva_cmd_lookup_t. + * + */ +#define PVA_ERR_BAD_CMD (0x1U) + +/** + * @brief Error in case of bad queue id, ie + * queue id that was requested is not available. + */ +#define PVA_ERR_BAD_QUEUE_ID (0x3U) + +/** + * @brief Error in case of invalid pve-id. This + * error is generated if PVE id is greater + * than @ref PVA_NUM_PVE. + */ +#define PVA_ERR_BAD_PVE_ID (0x4U) + +/** + * @brief Error in case when number of pre-actions + * are more than what can be accommodated. + */ +#define PVA_ERR_BUFF_TOO_SMALL (0x5U) + +/** + * @brief Error in case when requested feature can not be satisfied. + * This error arises in scenarios where certain actions are + * not supported during execution of pre-actions or post-actions. + * For instance, @ref TASK_ACT_WRITE_STATUS is not supported in + * executing pre-actions of task. + */ +#define PVA_ERR_FEATURE_NOT_SUPPORTED (0x6U) + +/** + * @brief Error in case when the address generated or translated does not + * meet the constraints like alignment or non-null. + */ +#define PVA_ERR_BAD_ADDRESS (0x9U) + +/** + * @brief Error in case when timestamp is requested on un-supported action. + */ +#define PVA_ERR_BAD_TIME_VALUE (0xdU) +#if PVA_SAFETY == 0 +/** + * @brief Error in case when the register provided to update + * the status is invalid. + */ +#define PVA_ERR_BAD_STATUS_REG (0x10U) +#endif +//! @endcond +/** + * @brief Error in case of bad task. + * In scenarios where task does not meet the + * necessary criteria like non-zero or 64 byte alignment. + * This error will be returned. + */ +#define PVA_ERR_BAD_TASK (0x15U) + +/** + * @brief Error in case of invalid task action list. Invalid + * action list arises in scenarios like number of + * pre and post actions not being zero but actual + * pre or post action to be performed being NULL. + */ +#define PVA_ERR_BAD_TASK_ACTION_LIST (0x16U) + +/** + * @brief Error when internal state of task is not as expected. + * A task goes through transition of various state while + * executing. In case when a state is not coherent with + * action being performed this error is returned. + * For example, task can not be in a running state + * while tear-down is being performed. + */ +#define PVA_ERR_BAD_TASK_STATE (0x17U) + +/** + * @brief Error when there is a mis-match in input status and the actual status. + * This error occurs when there is a mis-match in status from @ref pva_gen_task_status_t + * and actual status that is populated by FW during task execution. + */ +#define PVA_ERR_TASK_INPUT_STATUS_MISMATCH (0x18U) + +/** + * @brief Error in case of invalid parameters. These errors occur when + * parameters passed are invalid and is applicable for task parameters + * and DMA parameters. + */ +#define PVA_ERR_BAD_PARAMETERS (0x1aU) + +/** + * @brief Error in case of when timed out occurred for batch of task. + */ +#define PVA_ERR_PVE_TIMEOUT (0x23U) + +/** + * @brief Error when VPU has halted or turned off. + */ +#define PVA_ERR_VPU_ERROR_HALT (0x25U) + +/** + * @brief Error after FW sends an abort signal to KMD. KMD will write into status buffers for + * pending tasks after FW sends an abort signal to KMD. + */ +#define PVA_ERR_VPU_BAD_STATE (0x28U) + +/** + * @brief Error in case of exiting VPU. + */ +#define PVA_ERR_VPU_EXIT_ERROR (0x2aU) +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Error in case of exiting PPE. + */ +#define PVA_ERR_PPE_EXIT_ERROR (0x2bU) +//! @endcond +/** + * @brief Error when a task running on PVE caused abort on PVE. + */ +#define PVA_ERR_PVE_ABORT (0x2dU) +/** + * @brief Error in case of Floating point NAN. + */ + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Error in case of Floating point NAN. + */ +#define PVA_ERR_PPE_DIVIDE_BY_0 (0x34U) +/** + * @brief Error in case of Floating point NAN. + */ +#define PVA_ERR_PPE_ILLEGAL_DEBUG (0x36U) + +#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN (0x37U) + +/** + * @brief Error in case of Bad cached DRAM segment. + */ +#define PVA_ERR_BAD_CACHED_DRAM_SEG (0x3aU) + +/** + * @brief Error in case of Bad DRAM IOVA. + */ +#define PVA_ERR_BAD_DRAM_IOVA (0x3cU) +//! @endcond + +/** + * @brief Error in case of Register mis-match. + */ +#define PVA_ERR_REG_MISMATCH (0x3dU) + +/** + * @brief Error in case of AISR queue empty. + */ +#define PVA_ERR_AISR_INPUT_QUEUE_EMPTY (0x3fU) + +/** + * @brief Error in case of AISR queue full. + */ +#define PVA_ERR_AISR_OUTPUT_QUEUE_FULL (0x40U) +#if (PVA_HAS_L2SRAM == 1) +/** + * @brief Error in case of L2SRAM allocation failed due to invalid parameters. + */ +#define PVA_ERR_BAD_L2SRAM_PARAMS (0x41U) +#endif +/** + * @brief Error in case of bad or invalid task parameters. + */ +#define PVA_ERR_BAD_TASK_PARAMS (0x42U) +/** + * @brief Error in case of invalid VPU system call. + */ +#define PVA_ERR_VPU_SYS_ERROR (0x43U) +/** + * @brief Error in case of HW Watchdog timer timeout + */ +#define PVA_ERR_WDT_TIMEOUT_ERROR (0x44U) +/** + * @brief Error in case Golden register check value mismatch. + */ +#define PVA_ERR_GR_REG_MISMATCH (0x45U) +/** + * @brief Error in case Critical register check value mismatch. + */ +#define PVA_ERR_CRIT_REG_MISMATCH (0x46U) +/** @} */ + +/** + * @defgroup PVA_DMA_ERRORS + * + * @brief DMA ERROR codes used across PVA. + * @{ + */ +/** + * @brief Error when DMA transfer mode in DMA descriptor is invalid. + */ +#define PVA_ERR_DMA_TRANSFER_TYPE_INVALID (0x204U) + +/** + * @brief Error when DMA transfer was not successful. + */ +#define PVA_ERR_DMA_CHANNEL_TRANSFER (0x207U) + +/** + * @brief Error in case of BAD DMA descriptor. + */ +#define PVA_ERR_BAD_DMA_DESC_ID (0x208U) + +/** + * @brief Error in case of BAD DMA channel ID. + */ +#define PVA_ERR_BAD_DMA_CHANNEL_ID (0x209U) + +/** + * @brief Error in case of DMA timeout. + */ +#define PVA_ERR_DMA_TIMEOUT (0x20bU) + +/** + * @brief Error when program trying to use channel is already active. + */ +#define PVA_ERR_DMA_INVALID_CONFIG (0x220U) + +/** + * @brief Error in case DMA transfer was not successful. + */ +#define PVA_ERR_DMA_ERROR (0x221U) + +/** + * @brief Error when number of bytes of HW Seq data copy is + * not a multiple of 4. + */ +#define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM (0x216U) + +/** + * @brief Error when number of bytes of HW Seq data copy is + * more than HW Seq RAM size. + */ +#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG (0x217U) +/** @} */ + +/** + * @defgroup PVA_MISR_ERRORS + * + * @brief MISR error codes used across PVA. + * @{ + */ +/** + * @brief Error status when DMA MISR test is not run. + */ +#define PVA_ERR_MISR_NOT_RUN (0x280U) +/** + * @brief Error status when DMA MISR test did not complete. + */ +#define PVA_ERR_MISR_NOT_DONE (0x281U) +/** + * @brief Error status when DMA MISR test timed out. + */ +#define PVA_ERR_MISR_TIMEOUT (0x282U) +/** + * @brief Error status in case of DMA MISR test address failure. + */ +#define PVA_ERR_MISR_ADDR (0x283U) +/** + * @brief Error status in case of DMA MISR test data failure. + */ +#define PVA_ERR_MISR_DATA (0x284U) +/** + * @brief Error status in case of DMA MISR test data and address failure. + */ +#define PVA_ERR_MISR_ADDR_DATA (0x285U) +/** @} */ + +/** + * @defgroup PVA_VPU_ISR_ERRORS + * + * @brief VPU ISR error codes used across PVA. + * @{ + */ +/** + * @defgroup PVA_FAST_RESET_ERRORS + * + * @brief Fast reset error codes used across PVA. + * @{ + */ +/** + * @brief Error when VPU is not in idle state for a reset to be done. + */ +#define PVA_ERR_FAST_RESET_TIMEOUT_VPU (0x401U) +/** + * @brief Error if VPU I-Cache is busy before checking DMA engine for idle state. + */ +#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 (0x402U) +/** + * @brief Error if DMA channel is busy for a reset to be done. + */ +#define PVA_ERR_FAST_RESET_TIMEOUT_CH0 (0x403U) +/** + * @brief Error if VPU I-Cache is busy after checking DMA engine for idle state. + */ +#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 (0x419U) + +#if (PVA_CHIP_ID == CHIP_ID_T26X) +/** + * @brief Error when PPE is not in idle state for a reset to be done. + */ +#define PVA_ERR_FAST_RESET_TIMEOUT_PPE (0x420U) +#endif +/** @} */ + +/** + * @defgroup PVA_L2SRAM_ERRORS + * + * @brief L2SRAM memory error codes used across PVA. + * @{ + */ +/** + * @brief Error if l2sram memory allocation failed because of insufficient l2sram memory or + * if 2 chunks of memory are already allocated. + */ +#define PVA_ERR_ALLOC_FAILED (0x812U) +/** + * @brief Error if If l2sram address given for clearing/freeing is not a valid L2SRAM address + */ +#define PVA_ERR_FREE_FAILED (0x813U) +/** @} */ + +/** + * @defgroup PVA_INFO_ERRORS + * + * @brief Informational error codes. + * @{ + */ +/** + * @brief Error when there is no task. + */ +#define PVA_ERR_NO_TASK (0x997U) +/** + * @brief Error when CCQ IRQ line enable on VIC fails + */ +#define PVA_ERR_CCQ_IRQ_ENABLE_FAILED (0x998U) +/** + * @brief Error when Mailbox IRQ line enable on VIC fails + */ +#define PVA_ERR_MBOX_IRQ_ENABLE_FAILED (0x999U) +/** + * @brief Error when L2SRAM IRQ line enable on VIC fails + */ +#define PVA_ERR_L2SRAM_IRQ_ENABLE_FAILED (0x99AU) +/** + * @brief Error when DMA0 IRQ line enable on VIC fails + */ +#define PVA_ERR_DMA0_IRQ_ENABLE_FAILED (0x99BU) +/** + * @brief Error when DMA1 IRQ line enable on VIC fails + */ +#define PVA_ERR_DMA1_IRQ_ENABLE_FAILED (0x99CU) +/** + * @brief Error when VPU IRQ line enable on VIC fails + */ +#define PVA_ERR_VPU_IRQ_ENABLE_FAILED (0x99DU) +/** + * @brief Error when SEC IRQ line enable on VIC fails + */ +#define PVA_ERR_SEC_IRQ_ENABLE_FAILED (0x99EU) +/** + * @brief Error when RAMIC IRQ line enable on VIC fails + */ +#define PVA_ERR_RAMIC_IRQ_ENABLE_FAILED (0x99FU) + +/** + * @brief Error in case to try again. + * @note This error is internal to FW only. + */ +#define PVA_ERR_TRY_AGAIN (0x9A0U) +/** @} */ + +/* Never used */ +#define PVA_ERR_MAX_ERR (0xFFFFU) + +#endif /* _PVA_ERRORS_H_ */ diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h new file mode 100644 index 00000000..141a66ac --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-fw-version.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2022 NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/* + * Unit: Host Interface Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_FW_VERSION_H +#define PVA_FW_VERSION_H + +#include + +/* + * Note: Below are doxygen comments with the @def command. + * This allows the comment to be physically distant from the define + * being documented. And allows for a single general comment that is + * regardless of the being assigned to the macro. + */ + +/** + * @defgroup PVA_VERSION_TYPE_FLAGS VERSION_TYPE Bit Flags + * + * @brief The bit flags that indicate the qualities of the Built Firmware. + * e.g: Debug, Safety, Test Features, etc. + * + * @see VERSION_TYPE + * @{ + */ + +/** + * @def VERSION_CODE_DEBUG + * @brief Set or Clear the 'debug' bit for the FW version type value. For a safety + * build the value of this define will be zero. + * + * @details This bit is set if the macro @r PVA_DEBUG is defined. + * @see PVA_DEBUG + */ +#if PVA_DEBUG == 1 +#define VERSION_CODE_DEBUG PVA_BIT(0) +#else +#define VERSION_CODE_DEBUG (0U) +#endif + +/** + * @def VERSION_CODE_SAFETY + * @brief Set or Clear the 'safety' bit for the FW version type value. For a safety + * build the value of this define will be non-zero. + * + * @details This bit is set if the macro @r PVA_SAFETY is defined. + * Building for Safety disables certain functions that are used for debug, testing, + * or would otherwise pose a risk to system conforming to safety protocols such as ISO-26262 or + * ASPICE. + * + * @see PVA_SAFETY + */ +#if PVA_SAFETY == 1 +#define VERSION_CODE_SAFETY PVA_BIT(1) +#else +#define VERSION_CODE_SAFETY (0U) +#endif + +/** + * @def VERSION_CODE_PVA_TEST_SUPPORT + * @brief Set or Clear the 'test support' bit for the FW version type value. + * + * @details This bit is set if the macro @r TEST_TASK is defined. + * This bit is expected to be unset during a safety build. + * + * Building with tests support enabled may add additional commands to that + * can be processed by the FW to aid in testing of the system code. Often code of this + * nature can change the processing, memory, or timing characteristics of the system, and + * and should only enabled when explicitly needed. + * + * + * @see TEST_TASK + */ +#if TEST_TASK == 1 +#define VERSION_CODE_PVA_TEST_SUPPORT PVA_BIT(2) +#else +#define VERSION_CODE_PVA_TEST_SUPPORT (0U) +#endif + +/** + * @def VERSION_CODE_STANDALONE_TESTS + * @brief Set or Clear the 'standalone tests' bit for the FW version type value. + * + * @details This bit is set if the macro @r TEST_TASK is defined. + * This bit is expected to be unset during a safety build. + * + * @see TEST_TASK + * + */ +#if TEST_TASK == 1 +#define VERSION_CODE_STANDALONE_TESTS PVA_BIT(3) +#else +#define VERSION_CODE_STANDALONE_TESTS (0U) +#endif +/** @} */ + +/** + * @defgroup PVA_VERSION_MACROS PVA version macros used to calculate the PVA + * FW binary version. + * @{ + */ + +/** + * @brief An 8-bit bit field that describes which conditionally compiled facets of the Firmware + * have been enabled. + * + * @details The value of this macro is used when constructing a 32-bit Firmware Version identifier. + * + @verbatim + | Bit | Structure Field Name | Condition for Enabling | + |:-----:|:----------------------:|:------------------------:| + | 0 | VERSION_CODE_DEBUG | This bit is set when the Firmware is built with @ref PVA_DEBUG defined as equalling 1. + | 1 | VERSION_CODE_SAFETY | This bit is set when the Firmware is built with @ref PVA_SAFETY defined equalling 1. | + | 2 | VERSION_CODE_PVA_TEST_SUPPORT | This bit is set when the Firmware is built with @ref TEST_TASK defined as equalling 1. | + | 3 | VERSION_CODE_STANDALONE_TESTS | This bit is set when the Firmware is built with @ref TEST_TASK defined equalling 1. | + | 4-7 | Reserved | The remaining bits of the bitfield are undefined. | + @endverbatim + * @see PVA_VERSION_TYPE_FLAGS + */ +#define VERSION_TYPE \ + (uint32_t) VERSION_CODE_DEBUG | (uint32_t)VERSION_CODE_SAFETY | \ + (uint32_t)VERSION_CODE_PVA_TEST_SUPPORT | \ + (uint32_t)VERSION_CODE_STANDALONE_TESTS +/** @} */ + +/** + * @defgroup PVA_VERSION_VALUES PVA Major, Minor, and Subminor Version Values + * + * @brief The values listed below are applied to the corresponding fields when + * the PVA_VERSION macro is used. + * + * @see PVA_VERSION, PVA_MAKE_VERSION + * @{ + */ + +/** + * @brief The Major version of the Firmware + */ +#define PVA_VERSION_MAJOR 0x08 + +/** + * @brief The Minor version of the Firmware + */ +#define PVA_VERSION_MINOR 0x02 + +/** + * @brief The sub-minor version of the Firmware. + */ +#define PVA_VERSION_SUBMINOR 0x03 +/** @} */ + +/** + * @def PVA_VERSION_GCID_REVISION + * @brief The GCID Revision of the Firmware. + * + * @details If this version is not otherwise defined during build time, this fallback value is used. + */ +#ifndef PVA_VERSION_GCID_REVISION +/** + * @brief GCID revision of PVA FW binary. + */ +#define PVA_VERSION_GCID_REVISION 0x00000000 +#endif + +/** + * @def PVA_VERSION_BUILT_ON + * @brief The date and time the version of software was built, expressed as the number + * of seconds since the Epoch (00:00:00 UTC, January 1, 1970). + * + * @details If this version is not otherwise defined during build time, this fallback value is used. + */ +#ifndef PVA_VERSION_BUILT_ON +#define PVA_VERSION_BUILT_ON 0x00000000 +#endif +/** @} */ + +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h new file mode 100644 index 00000000..ec9398dc --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-packed.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_PACKED_H +#define PVA_PACKED_H +/** + * @brief Packed attribute that avoids compiler to add any paddings. + * Compiler implicitly adds padding between the structure members + * to make it aligned. To avoid this packed attribute is used. + * Packed is for shared structures between KMD and FW. + * If packed is not used, then we depend on what padding the compiler adds. + * Since KMD and FW are compiled by two different compilers, we need to + * ensure that the offsets of each member of the structure is the same in + * both KMD and FW. To ensure this we pack the structure. + */ +#define PVA_PACKED __attribute__((packed)) +#endif // PVA_PACKED_H diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h new file mode 100644 index 00000000..69ec1862 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-dma.h @@ -0,0 +1,486 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2023 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Direct Memory Access Driver Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +/** + * @file pva-sys-dma.h + * + * @brief Types and constants related to PVA DMA setup and DMA + * descriptors. + */ + +#ifndef PVA_SYS_DMA_H +#define PVA_SYS_DMA_H + +#include +#include +#include + +#include "pva_fw_dma_hw_interface.h" + +/** + * @brief The version number of the current DMA info structure. + * This is used for detecting the DMA info updates for future + * HW releases. + */ +#define PVA_DMA_INFO_VERSION_ID (1U) + +/** + * @brief Number of DMA done masks in DMA info structure, + * corresponding to the number of DMA_COMMON_DMA_OUTPUT_ENABLEx + * registers in the HW. + */ +#define PVA_SYS_DMA_NUM_TRIGGERS (9U) + +/* NOTE : This must be kept as 15 for build to be + * successful, because in pva_fw_test we configure + * 15 channel, but internally we check if the + * number of channels requested is less than the + * maximum number of available channels */ +/** + * @brief Maximum Number of DMA channel configurations + * in DMA info structure. + */ +#define PVA_SYS_DMA_NUM_CHANNELS (15U) + +/** + * @brief Maximum number of DMA descriptors allowed + * for use for VPU for T23x + */ +#define PVA_SYS_DMA_MAX_DESCRIPTORS_T23X (60U) +/** + * @brief Maximum number of DMA descriptors allowed + * for use for VPU for T26x + */ +#define PVA_SYS_DMA_MAX_DESCRIPTORS_T26X (92U) + +/** + * @brief DMA registers for VPU0 and VPU1 which are primarily + * used by DMA config and R5 initialization. + * + * For more information refer to section 3.4 in PVA Cluster IAS + * document (Document 11 in Supporting Documentation and References) + */ +/** + * @brief DMA channel base register for VPU0. + */ +#define PVA_DMA0_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_REG_CH_0_BASE) +/** + * @brief DMA common base register for VPU0. + */ +#define PVA_DMA0_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_COMMON_BASE) +/** + * @brief DMA DESCRAM base register for VPU0. + */ +#define PVA_DMA0_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA0_DESCRAM_BASE) +/** + * @brief DMA channel base register for VPU1. + */ +#define PVA_DMA1_REG_CH_0 PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_REG_CH_0_BASE) +/** + * @brief DMA common base register for VPU1. + */ +#define PVA_DMA1_COMMON PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_COMMON_BASE) +/** + * @brief DMA DESCRAM base register for VPU1. + */ +#define PVA_DMA1_DESCRAM PVA_OFFSET(NV_ADDRESS_MAP_PVA0_DMA1_DESCRAM_BASE) +/** @} */ + +/** + * + * @brief DMA channel configuration for a user task. + * + * The DMA channel structure contains the set-up of a + * PVA DMA channel used for the VPU app. + * + * This VPU app should configure the channel information + * in this format + * + * @note : For more information on channel configuration, refer section 4.1.2 and 6.4 in + * the DMA IAS document (Document 6 in Supporting Documentation and References) + */ +typedef struct PVA_PACKED { + /** + * @brief HW DMA channel number from 1 to @ref PVA_NUM_DMA_CHANNELS. + */ + uint8_t ch_number; + /** + * @brief Padding bytes of 3 added to align the next + * field of 4 bytes + */ + uint8_t pad_dma_channel1[3]; + /** + * @brief The value to be written to DMA channel + * control 0 register + */ + uint32_t cntl0; + /** + * @brief The value to be written to DMA channel + * control 1 register + */ + uint32_t cntl1; + /** + * @brief The value to be written to DMA channel + * boundary pad register + */ + uint32_t boundary_pad; + /** + * @brief This value to be written to DMA HW sequence + * control register. + */ + uint32_t hwseqcntl; + /** + * @brief This field is unused in t19x and T23x. + * It contains the value to be written to DMA + * channel HWSEQFSCNTL register. + */ + uint32_t hwseqfscntl; + /** + * @brief Output enable mask + */ + uint32_t outputEnableMask; + /** + * @brief Padding 8 bytes to align the whole structure + * to 32 byte boundary + */ + uint32_t pad_dma_channel0[1]; +} pva_dma_ch_config_t; + +/** + * + * @brief DMA info for an application. The app maybe a VPU app which + * runs an algorithm on VPU or a DMA app which just has DMA configuration + * to move certain data. In both cases the application should + * configure the DMA information in this structure format + * + */ +typedef struct PVA_PACKED { + /** + * @brief The size of the dma_info structure. + * Should be populated with value sizeof(pva_dma_info_t) + * This is used to validate that the DRAM location populated + * by KMD is valid + */ + uint16_t dma_info_size; + /** + * @brief This field is used to populate the DMA Info version + * In case we need to create a new + * DMA version structure then the FW can distinguish the DMA + * info structure. Currently it should be populated with value + * @ref PVA_DMA_INFO_VERSION_ID + */ + uint16_t dma_info_version; + + /** + * @brief The number of used channels. This field can + * be populated with values from 0 to + * @ref PVA_NUM_DMA_CHANNELS both inclusive. + */ + uint8_t num_channels; + /** + * @brief Number of used descriptors. + * + * Note: In generations of PVA where the reserved descriptor range lies + * in the middle of the entire descriptor range, when the range of + * descriptors requested by the user crosses over the reserved descriptor + * range, 'num_descriptors' will include the number of the reserved + * descriptors as well. + * E.g., if reserved descriptors are at indices 60-63 and user application + * needs 70 descriptors, 'num_descriptor' will equal 74. However, + * if user application needs 30 descriptors, 'num_descriptors' will be 30. + * + * On T19x and T23x, the field can be populated + * with values from 0 inclusive to less than + * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + * + * On T26x, the field can be populated with values from 0 inclusive to + * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS + */ + uint8_t num_descriptors; + /** + * @brief The number of bytes used in HW sequencer + */ + uint16_t num_hwseq; + + /** + * @brief The First HW descriptor ID used. + * + * On T19x and T23x, the field can be populated + * with values from 0 inclusive to less than + * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + * + * On T26x, the field can be populated with values from 0 inclusive to + * @ref PVA_SYS_DMA_MAX_DESCRIPTORS + @ref PVA_NUM_RESERVED_DESCRIPTORS + */ + uint8_t descriptor_id; + /** + * @brief Padding for alignment of next element + */ + uint8_t pva_dma_info_pad_0[3]; + + /** + * @brief DMA done triggers used by the VPU app. + * Correspond to COMMON_DMA_OUTPUT_ENABLE registers. + */ + uint32_t dma_triggers[PVA_SYS_DMA_NUM_TRIGGERS]; + /** + * @brief DMA channel config used by the VPU app. + * One app can have upto @ref PVA_NUM_DMA_CHANNELS + * DMA channel configurations. The size of the array + * is @ref PVA_SYS_DMA_NUM_CHANNELS for additional + * configuration required for future products. + */ + pva_dma_ch_config_t dma_channels[PVA_SYS_DMA_NUM_CHANNELS]; + /** + * @brief Value to be set in DMA common configuration register. + */ + uint32_t dma_common_config; + /** + * @brief IOVA to an array of @ref pva_dtd_t, aligned at 64 bytes + * which holds the DMA descriptors used by the VPU app + */ + pva_iova dma_descriptor_base; + /** + * @brief HW sequencer configuration base address. + */ + pva_iova dma_hwseq_base; + /** + * @brief IOVA to a structure of @ref pva_dma_misr_config_t, + * location where DMA MISR configuration information is stored. + */ + pva_iova dma_misr_base; +} pva_dma_info_t; + +/** + * @brief DMA descriptor. + * + * PVA DMA Descriptor in packed HW format. + * The individual fields can be found from + * the DMA IAS document (Document 6 in Supporting Documentation and References) + * section 4.1.3.2 + */ +typedef struct PVA_PACKED { + /** @brief TRANSFER_CONTROL0 byte has DSTM in lower 2 bits, SRC_TF in 3rd bit, + * DDTM in 4th to 6th bit,DST_TF in 7th bit */ + uint8_t transfer_control0; + /** @brief Next descriptor ID to be executed*/ + uint8_t link_did; + /** @brief Highest 8 bits of the 40 bit source address*/ + uint8_t src_adr1; + /** @brief Highest 8 bits of the 40 bit destination address*/ + uint8_t dst_adr1; + /** @brief Lower 32 bits of the 40 bit source address*/ + uint32_t src_adr0; + /** @brief Lower 32 bits of the 40 bit destination address*/ + uint32_t dst_adr0; + /** @brief Length of tile line*/ + uint16_t tx; + /** @brief Number of tile lines*/ + uint16_t ty; + /** @brief Source Line pitch to advance to every line of 2D tile.*/ + uint16_t slp_adv; + /** @brief Destination Line Pitch to advance to every line of 2D tile.*/ + uint16_t dlp_adv; + /** @brief SRC PT1 CNTL has st1_adv in low 24 bits and ns1_adv in high 8 bits. */ + uint32_t srcpt1_cntl; + /** @brief DST PT1 CNTL has dt1_adv in low 24 bits and nd1_adv in high 8 bits. */ + uint32_t dstpt1_cntl; + /** @brief SRC PT2 CNTL has st2_adv in low 24 bits and ns2_adv in high 8 bits. */ + uint32_t srcpt2_cntl; + /** @brief DST PT2 CNTL has dt2_adv in low 24 bits and nd2_adv in high 8 bits. */ + uint32_t dstpt2_cntl; + /** @brief SRC PT3 CNTL has st3_adv in low 24 bits and ns3_adv in high 8 bits. */ + uint32_t srcpt3_cntl; + /** @brief DST PT3 CNTL has dt3_adv in low 24 bits and nd3_adv in high 8 bits. */ + uint32_t dstpt3_cntl; + /** @brief Source circular buffer Start address offset */ + uint16_t sb_start; + /** @brief Destination circular buffer Start address offset*/ + uint16_t db_start; + /** @brief Source buffer size in bytes for circular buffer mode from Source address.*/ + uint16_t sb_size; + /** @brief Destination buffer size in bytes for circular buffer mode from destination address.*/ + uint16_t db_size; + /** @brief currently reserved*/ + uint16_t trig_ch_events; + /** @brief SW or HW events used for triggering the channel*/ + uint16_t hw_sw_trig_events; + /** @brief Tile x coordinates, for boundary padding in pixels*/ + uint8_t px; + /** @brief Tile y coordinates, for boundary padding in pixels*/ + uint8_t py; + /** @brief Transfer control byte has lower 2 bits as BPP data, bit 2 with PXDIR, bit 3 as PYDIR, + * bit 4 as BPE, bit 5 as TTS, bit 6 RSVD, Bit 7 ITC. + */ + uint8_t transfer_control1; + /** @brief Transfer control 2 gas bit 0 as PREFEN, bit 1 as DCBM, bit 2 as SCBM, Bit 3 to 3 as SBADR.*/ + uint8_t transfer_control2; + /** @brief Circular buffer upper bits for start address and size*/ + uint8_t cb_ext; + /** @brief Reserved*/ + uint8_t rsvd; + /** @brief Full replicated destination base address in VMEM aligned to 64 byte atom*/ + uint16_t frda; +} pva_dtd_t; + +/** + * + * @brief DMA MISR configuration information. This information is used by R5 + * to program MISR registers if a task requests MISR computation on its + * output DMA channels. + * + */ +typedef struct PVA_PACKED { + /** @brief Reference value for CRC computed on write addresses, i.e., MISR 1 */ + uint32_t ref_addr; + /** @brief Seed value for address CRC*/ + uint32_t seed_crc0; + /** @brief Reference value for CRC computed on first 256-bits of AXI write data */ + uint32_t ref_data_1; + /** @brief Seed value for write data CRC*/ + uint32_t seed_crc1; + /** @brief Reference value for CRC computed on second 256-bits of AXI write data */ + uint32_t ref_data_2; + /** + * @brief MISR timeout value configured in DMA common register + * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is calculated as + * number of AXI clock cycles. + */ + uint32_t misr_timeout; +} pva_dma_misr_config_t; + +/** + * @defgroup PVA_DMA_TC0_BITS + * + * @brief PVA Transfer Control 0 Bitfields + * + * @{ + */ +/** + * @brief The shift value for extracting DSTM field + */ +#define PVA_DMA_TC0_DSTM_SHIFT (0U) +/** + * @brief The mask to be used to extract DSTM field + */ +#define PVA_DMA_TC0_DSTM_MASK (7U) + +/** + * @brief The shift value for extracting DDTM field + */ +#define PVA_DMA_TC0_DDTM_SHIFT (4U) +/** + * @brief The mask to be used to extract DDTM field + */ +#define PVA_DMA_TC0_DDTM_MASK (7U) +/** @} */ + +/** + * @defgroup PVA_DMA_TM + * + * @brief DMA Transfer Modes. These can be used for both + * Source (DSTM) and Destination (DDTM) transfer modes + * + * @note : For more information on transfer modes, refer section 4.1.3.1 in + * the DMA IAS document (Document 6 in Supporting Documentation and References) + * + * @{ + */ +/** + * @brief To indicate invalid transfer mode + */ +#define PVA_DMA_TM_INVALID (0U) +/** + * @brief To indicate MC transfer mode + */ +#define PVA_DMA_TM_MC (1U) +/** + * @brief To indicate VMEM transfer mode + */ +#define PVA_DMA_TM_VMEM (2U) +#if ENABLE_UNUSED == 1U +#define PVA_DMA_TM_CVNAS (3U) +#endif +/** + * @brief To indicate L2SRAM transfer mode + */ +#define PVA_DMA_TM_L2RAM (3U) +/** + * @brief To indicate TCM transfer mode + */ +#define PVA_DMA_TM_TCM (4U) +/** + * @brief To indicate MMIO transfer mode + */ +#define PVA_DMA_TM_MMIO (5U) +/** + * @brief To indicate Reserved transfer mode + */ +#define PVA_DMA_TM_RSVD (6U) +/** + * @brief To indicate VPU configuration transfer mode. + * This is only available in Source transfer mode or + * (DSTM). In Destination transfer mode, this value is + * reserved. + */ +#define PVA_DMA_TM_VPU (7U) +/** @} */ + +#if (ENABLE_UNUSED == 1U) +/** + * @brief The macro defines the number of + * bits to shift right to get the PXDIR field + * in Transfer Control 1 register in DMA + * Descriptor + */ +#define PVA_DMA_TC1_PXDIR_SHIFT (2U) + +/** + * @brief The macro defines the number of + * bits to shift right to get the PYDIR field + * in Transfer Control 1 register in DMA + * Descriptor + */ +#define PVA_DMA_TC1_PYDIR_SHIFT (3U) +#endif +/** + * @defgroup PVA_DMA_BPP + * + * @brief PVA DMA Bits per Pixel + * + * @{ + */ +/** + * @brief To indicate that the size of pixel data + * is 1 byte + */ +#define PVA_DMA_BPP_INT8 (0U) +#if ENABLE_UNUSED == 1U +#define PVA_DMA_BPP_INT16 (1U) +#endif +/** @} */ + +/** + * @brief PVA DMA Pad X direction set to right + */ +#define PVA_DMA_PXDIR_RIGHT (1U) + +/** + * @brief PVA DMA Pad Y direction set to bottom + */ +#define PVA_DMA_PYDIR_BOT (1U) + +#endif /* PVA_SYS_DMA_H */ diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h new file mode 100644 index 00000000..32eb7cae --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-sys-params.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2023 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Task Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +/** + * @file pva-sys-params.h + * + * @brief Types and constants related to VPU application parameters. + */ + +#ifndef PVA_SYS_PARAMS_H +#define PVA_SYS_PARAMS_H + +#include +#include +#include + +/** @brief VPU app parameters provided by kernel-user which is to be copied to + * VMEM during runtime + * + * The VPU App parameters contains kernel-user-provided data to be + * copied into the VMEM before executing the VPU app. The parameter + * headers are stored in the IOVA address stored in the param_base + * member of this structure. + * + * The FW can also initialize complex datatypes, which are marked by + * special param_base outside the normal IOVA space. See the structure + * pva_vpu_instance_data_t for an example. + */ +typedef struct PVA_PACKED { + /** @brief IOVA address of the parameter data */ + pva_iova param_base; + /** @brief VMEM offset where parameter data is to be copied */ + uint32_t addr; + /** @brief Size of the parameter data in bytes */ + uint32_t size; +} pva_vpu_parameter_list_t; + +/** + * @brief The structure holds information of various + * VMEM parameters that is submitted in the task. + */ +typedef struct PVA_PACKED { + /** + * @brief The IOVA address of the parameter data. + * This should point to an array of type @ref pva_vpu_parameter_list_t . + * If no parameters are present this should be set to 0 + */ + pva_iova parameter_data_iova; + + /** + * @brief The starting IOVA address of the parameter data whose size + * is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data is copied + * from DRAM to TCM using DMA, and then memcopied to VMEM. + * If no small parameters are present this should be set to 0. + */ + pva_iova small_vpu_param_data_iova; + + /** + * @brief The number of bytes of small VPU parameter data, i.e the + * data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small + * parameters are present, this should be set to 0 + */ + uint32_t small_vpu_parameter_data_size; + + /** + * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which + * the VPU large parameters are present, i.e the vpu parameters whose size is greater + * than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index + * immediately after the small parameters. If no large parameter is present, then + * this field value will be same as the value of + * @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field + */ + uint32_t large_vpu_parameter_list_start_index; + + /** + * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which + * the VPU instance parameters are present. This value will always point to the index + * immediately after the large parameters if large parameters are present, else it + * will be the same value as @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index + * field. + */ + uint32_t vpu_instance_parameter_list_start_index; +} pva_vpu_parameter_info_t; + +/** @brief Special marker for IOVA address of parameter data of a task to differentiate + * if the parameter data specified in task should be used or if FW should create a supported + * parameter data instance. If the IOVA address of parameter data is lesser than this + * special marker, then use the parameter data specified in the task, else FW + * creates the parameter data. + */ +#define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL) + +/** @brief Macro used to create new parameter base markers + * from the special marker address @ref PVA_COMPLEX_IOVA + */ +#define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL)) + +/** @brief Special Marker for @ref pva_vpu_instance_data_t */ +#define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL) + +/** + * @brief The minimuim size of the VPU parameter for it to be considered + * as a large parameter + */ +#define PVA_DMA_VMEM_COPY_THRESHOLD (uint32_t)(256U) + +/** + * @brief The maximum combined size of all VMEM parameters + * that will be supported by PVA + */ +#define VMEM_PARAMETER_BUFFER_MAX_SIZE (uint32_t)(8192U) + +/** + * @brief The maximum number of symbols that will be supported + * for one task + */ +#define TASK_VMEM_PARAMETER_MAX_SYMBOLS (uint32_t)(128U) + +/** + * @brief Information of the VPU instance data passed to VPU kernel. + */ +typedef struct PVA_PACKED { + /** @brief ID of the VPU assigned to the task */ + uint16_t pve_id; + /** @brief Variable to indicate that ppe task was launched or not */ + uint16_t ppe_task_launched; + /** @brief Base of the VMEM memory */ + uint32_t vmem_base; + /** @brief Base of the DMA descriptor SRAM memory */ + uint32_t dma_descriptor_base; + /** @brief Base of L2SRAM allocated for the task executed */ + uint32_t l2ram_base; + /** @brief Size of L2SRAM allocated for the task executed */ + uint32_t l2ram_size; +} pva_vpu_instance_data_t; + +#endif /* PVA_SYS_PARAMS_H */ diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h new file mode 100644 index 00000000..101208b7 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Utility Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_TYPES_H +#define PVA_TYPES_H +#include + +/** + * @brief Used to represent address (IOVA) in PVA system. + */ +typedef uint64_t pva_iova; + +/** + * @brief Used to store Queue IDs, that represent the + * actual hardware queue id between FW and KMD. + */ +typedef uint8_t pva_queue_id_t; + +/** + * @brief Used to store PVE ID, that represents which + * PVE is being referred to . + */ +typedef uint8_t pva_pve_id_t; + +/** + * @brief Used to store Status interface ID, that is used + * to know through which status needs to be written. + */ +typedef uint8_t pva_status_interface_id_t; + +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h new file mode 100644 index 00000000..c6f46317 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-version.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2021 NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/* + * Unit: Host Interface Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_VERSION_H +#define PVA_VERSION_H + +#include +#include +#include + +/** + * @brief Calculate a 32-bit build version with @ref PVA_VERSION_SUBMINOR, + * @ref PVA_VERSION_MINOR, @ref PVA_VERSION_MAJOR and @ref VERSION_TYPE macros. + * + * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation + * features were enabled when the firmware was compiled. + * + * @param [in] \_major\_ an unsigned, 8-bit value containing the major version of the + * compiled firmware. + * + * @param [in] \_minor\_ an unsigned, 8-bit value containing the minor version of the + * compiled firmware. + * + * @param [in] \_subminor\_ an unsigned, 8-bit value containing the sub-minor version + * of the compiled firmware. + @verbatim + | ------------- | ---------------------| + | Bit Ranges | Function | + | ------------- | ---------------------| + | 7-0 | subminor version | + | 15-8 | minor version | + | 23-16 | major version | + | 31-24 | version type | + ---------------------------------------- + @endverbatim + */ +#define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_) \ + (PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) | \ + PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0)) + +/** + * @brief Calculate PVA R5 FW binary version by calling @ref PVA_MAKE_VERSION macro. + * + * @param [in] \_type\_ an 8-bit bitfield containing flags indicating which compilation + * features were enabled when the firmware was compiled. + * + * @see VERSION_TYPE For details on how to construct the @p \_type\_ field. + * + * @see PVA_VERSION_MAJOR, PVA_VERSION_MINOR, PVA_VERSION_SUBMINOR for details + * on the values used at the time this documentation was produced. + */ +#define PVA_VERSION(_type_) \ + PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR, \ + PVA_VERSION_SUBMINOR) + +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h new file mode 100644 index 00000000..ebaa0c8a --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/baremetal/include/pva-vpu-syscall-interface.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +/* + * Unit: VPU Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +/** + * @file pva-vpu-syscall-interface.h + * + * @brief Syscall command specification + * + * VPU uses syscall commands to request services from R5. A syscall command is a + * 32bit value that consists of a 8 bit syscall ID and 24 bit parameter. If more + * information needs to be passed to R5, the parameter field will be a pointer + * to a VMEM location. + */ + +#ifndef PVA_VPU_SYSCALL_INTERFACE_H +#define PVA_VPU_SYSCALL_INTERFACE_H + +#include + +/** + * @defgroup PVA_VPU_SYSCALL + * + * @brief PVA VPU SYS call IDs for each type of + * SYS call. + * @{ + */ + +//! @cond DISABLE_DOCUMENTATION + +/** + * @brief VPU Syscall id for vpu printf write. + */ +#define PVA_FW_PE_SYSCALL_ID_WRITE (1U) +//! @endcond +/** + * @brief VPU Syscall id for Icache prefetch. + */ +#define PVA_FW_PE_SYSCALL_ID_ICACHE_PREFETCH (2U) + +/** + * @brief VPU Syscall id for masking exceptions. + */ +#define PVA_FW_PE_SYSCALL_ID_MASK_EXCEPTION (3U) + +/** + * @brief VPU Syscall id for unmasking exceptions. + */ +#define PVA_FW_PE_SYSCALL_ID_UNMASK_EXCEPTION (4U) +//! @cond DISABLE_DOCUMENTATION +/** + * @brief VPU Syscall id for sampling VPU performance counters + */ +#define PVA_FW_PE_SYSCALL_ID_PERFMON_SAMPLE (5U) +//! @endcond +/** @} */ + +/** + * @defgroup PVA_VPU_SYSCALL_WRITE_PARAM_GROUP + * + * @brief Parameter specification for syscall write + */ + +/** + * @defgroup PVA_VPU_SYSCALL_COMMAND_FIELDS_GROUP + * + * @brief The command format to be used while issuing vpu syscall command from VPU kernel to R5. + * The fields mentioned in this group is used for submitting the command + * through the Signal_R5 interface from VPU kernel. + * + * @{ + */ + +/** + * @brief The most significant bit of the vpu syscall ID field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_ID_MSB (31U) + +/** + * @brief The least significant bit of the vpu syscall ID field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_ID_LSB (24U) + +/** + * @brief The most significant bit of the vpu syscall parameter field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PARAM_MSB (23U) + +/** + * @brief The least significant bit of the vpu syscall parameter field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PARAM_LSB (0U) +/** @} */ + +/** + * @defgroup PVA_VPU_SYSCALL_ICACHE_PREFETCH_PARAM_FIELDS_GROUP + * + * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel to R5 for syscall icache prefetch. + * The fields mentioned in this group is used for submitting the icache prefetch command + * through the Signal_R5 interface from VPU kernel. + * + * @{ + */ + +/** + * @brief The most significant bit of the prefetch cache line count field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_MSB (23U) + +/** + * @brief The least significant bit of the prefetch cache line count field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PREFETCH_CACHE_LINE_COUNT_LSB (16U) + +/** + * @brief The most significant bit of the prefetch address field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_MSB (15U) + +/** + * @brief The least significant bit of the prefetch address field in + * the vpu syscall command interface + */ +#define PVA_FW_PE_SYSCALL_PREFETCH_ADDR_LSB (0U) +/** @} */ + +/** + * @defgroup PVA_VPU_SYSCALL_MASK_UNMASK_PARAM_FIELDS_GROUP + * + * @brief The parameter format to be used while issuing vpu syscall command from VPU kernel + * to R5 for masking or unmasking FP NaN Exception. + * The fields mentioned in this group is used for submitting the mask and unmask FP NaN eception command + * through the Signal_R5 interface from VPU kernel. + * + * @{ + */ + +/** + * @brief Parameter specification for syscall mask/unmask exceptions + */ +#define PVA_FW_PE_MASK_FP_INV_NAN (1U << 2U) +/** @} */ + +/** + * @breif Write syscall parameter will be a pointer to this struct + * @{ + */ +typedef union { + struct { + uint32_t addr; + uint32_t size; + } in; + struct { + uint32_t written_size; + } out; +} pva_fw_pe_syscall_write; +/** @} */ + +/** + * @defgroup PVA_VPU_SYSCALL_PERFMON_SAMPLE_PARAM_GROUP + * + * @brief Parameter specification for syscall perfmon_sample + * + * @{ + */ + +/** + * @brief Perfmon sample syscall parameter will be a pointer to this struct + */ +typedef struct { + /** counter_mask[0] is for ID: 0-31; counter_mask[1] is for ID: 32-63 */ + uint32_t counter_mask[2]; + uint32_t output_addr; +} pva_fw_pe_syscall_perfmon_sample; + +/** + * @brief Index for t26x performance counters for VPU + */ +#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T26X (0U) +#define PERFMON_COUNTER_ID_VPS_ID_VALID_T26X (1U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T26X (2U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T26X (3U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T26X (4U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T26X (5U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T26X (6U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T26X (7U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T26X (8U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T26X (9U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T26X (10U) +#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T26X (11U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T26X (12U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T26X (13U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T26X (14U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T26X (15U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T26X (16U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T26X (17U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T26X (18U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T26X (19U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_FETCH_REQ_T26X (20U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_T26X (21U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREEMPT_T26X (22U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_LINES_T26X (23U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_MISS_DUR_T26X (24U) +#define PERFMON_COUNTER_ID_VPS_ICACHE_PREFETCH_DUR_T26X (25U) +#define PERFMON_COUNTER_ID_DLUT_BUSY_T26X (26U) +#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T26X (27U) +#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T26X (28U) +#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T26X (29U) +#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T26X (30U) +#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T26X (31U) +#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T26X (32U) +#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T26X (33U) +#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T26X (34U) +#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T26X (35U) +#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T26X (36U) + +/** + * @brief Index for t23x performance counters + */ +#define PERFMON_COUNTER_ID_VPS_STALL_ID_NO_VAL_INSTR_T23X (0U) +#define PERFMON_COUNTER_ID_VPS_ID_VALID_T23X (1U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_REG_DEPEND_T23X (2U) +#define PERFMON_COUNTER_ID_VPS_STALL_ID_ONLY_T23X (3U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX1_ONLY_T23X (4U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RSC_HZRD_T23X (5U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_DATA_HZRD_T23X (6U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX4_RAMIC_HI_PRI_T23X (7U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX5_APB_T23X (8U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RSC_HZRD_T23X (9U) +#define PERFMON_COUNTER_ID_VPS_STALL_EX8_RAMIC_HI_PRI_T23X (10U) +#define PERFMON_COUNTER_ID_VPS_WFE_GPI_EX_STATE_T23X (11U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L01_T23X (12U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L01_T23X (13U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_L23_T23X (14U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_RD_REQ_ACT_L23_T23X (15U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L01_T23X (16U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L01_T23X (17U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_L23_T23X (18U) +#define PERFMON_COUNTER_ID_VMEMIF_RAMIC_WR_REQ_ACT_L23_T23X (19U) +#define PERFMON_COUNTER_ID_ICACHE_FETCH_REQ_T23X (20U) +#define PERFMON_COUNTER_ID_ICACHE_MISS_T23X (21U) +#define PERFMON_COUNTER_ID_ICACHE_PREEMP_T23X (22U) +#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_LINES_T23X (23U) +#define PERFMON_COUNTER_ID_ICACHE_MISS_DUR_T23X (24U) +#define PERFMON_COUNTER_ID_ICACHE_PREFETCH_DUR_T23X (25U) +#define PERFMON_COUNTER_ID_DLUT_BUSY_T23X (26U) +#define PERFMON_COUNTER_ID_DLUT_VPU_BOTH_BUSY_T23X (27U) +#define PERFMON_COUNTER_ID_VPU_WAIT_FOR_DLUT_T23X (28U) +#define PERFMON_COUNTER_ID_DLUT_WAIT_FOR_VPU_T23X (29U) +#define PERFMON_COUNTER_ID_DLUT_IDX_TRANS_T23X (30U) +#define PERFMON_COUNTER_ID_DLUT_LUT_TRANS_T23X (31U) +#define PERFMON_COUNTER_ID_DLUT_OUT_TRANS_T23X (32U) +#define PERFMON_COUNTER_ID_DLUT_IDX_REQ_ACT_T23X (33U) +#define PERFMON_COUNTER_ID_DLUT_LUT_REQ_ACT_T23X (34U) +#define PERFMON_COUNTER_ID_DLUT_OUT_REQ_ACT_T23X (35U) +#define PERFMON_COUNTER_ID_DLUT_NULL_GROUPS_T23X (36U) + +/** + * @brief Index for t26x performance counters for PPE + */ +#define PERFMON_COUNTER_ID_PPS_STALL_ID_NO_VAL_INSTR_T26X (0U) +#define PERFMON_COUNTER_ID_PPS_ID_VALID_T26X (1U) +#define PERFMON_COUNTER_ID_PPS_STALL_ID_REG_DEPEND_T26X (2U) +#define PERFMON_COUNTER_ID_PPS_STALL_ID_ONLY_T26X (3U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX1_ONLY_T26X (4U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_LD_DEPENDENCY_T26X (5U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_ST_DEPENDENCY_T26X (6U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_IORF_DEPENDENCY_T26X (7U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STRM_STORE_FLUSH_T26X (8U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_STORE_FLUSH_T26X (9U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STORE_FLUSH_T26X (10U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_LD_T26X (11U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_ST_T26X (12U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STREAM_START_T26X (13U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LD_T26X (14U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_ST_T26X (15U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_SCALAR_LDST_T26X (16U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_PUSHBACK_T26X (17U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_STQ_PUSHBACK_T26X (18U) +#define PERFMON_COUNTER_ID_PPS_STALL_EX3_LDQ_FLUSH_T26X (19U) +#define PERFMON_COUNTER_ID_PPS_WFE_GPI_EX_STATE_T26X (20U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_FETCH_REQ_T26X (21U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_T26X (22U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREEMPT_T26X (23U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_LINES_T26X (24U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_MISS_DUR_T26X (25U) +#define PERFMON_COUNTER_ID_PPS_ICACHE_PREFETCH_DUR_T26X (26U) +/** @} */ + +#endif /*PVA_VPU_SYSCALL_INTERFACE_H*/ diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h new file mode 100644 index 00000000..89b0ab51 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_FW_H +#define PVA_FW_H +#include "pva_api.h" +#include "pva_bit.h" +#include "pva_constants.h" +#include "pva_fw_address_map.h" +#include "pva_math_utils.h" + +/* The sizes of these structs must be explicitly padded to align to 4 bytes */ + +struct pva_fw_prefence { + uint8_t offset_hi; + uint8_t pad0[3]; + uint32_t offset_lo; + uint32_t resource_id; + uint32_t value; +}; + +struct pva_fw_postfence { + uint8_t offset_hi; + uint8_t ts_offset_hi; +/** Privileged user queue may need to trigger fence that exists in user's own + * resource table. Set this flags to tell FW to use user's resource table when + * writing this post fence. This also applies to timestamp resource ID. */ +#define PVA_FW_POSTFENCE_FLAGS_USER_FENCE (1 << 0) + uint8_t flags; + uint8_t pad0; + uint32_t offset_lo; + uint32_t resource_id; + uint32_t value; + + /* Timestamp part */ + uint32_t ts_resource_id; + uint32_t ts_offset_lo; +}; + +struct pva_fw_memory_addr { + uint8_t offset_hi; + uint8_t pad0[3]; + uint32_t resource_id; + uint32_t offset_lo; +}; + +struct pva_fw_cmdbuf_submit_info { + uint8_t num_prefence; + uint8_t num_postfence; + uint8_t num_input_status; + uint8_t num_output_status; +#define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB (1) +#define PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB (0) + uint8_t flags; + uint8_t first_chunk_offset_hi; + /** First chunk size*/ + uint16_t first_chunk_size; + struct pva_fw_prefence prefences[PVA_MAX_NUM_PREFENCES]; + struct pva_fw_memory_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS]; + /** Resource ID of the first chunk */ + uint32_t first_chunk_resource_id; + /** First chunk offset within the resource*/ + uint32_t first_chunk_offset_lo; + /** Execution Timeout */ + uint32_t execution_timeout_ms; + struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS]; + struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES]; +}; + +/* This is the header of the circular buffer */ +struct pva_fw_submit_queue_header { + /** + * Head index of the circular buffer. Updated by R5, read by CCPLEX + * (UMD/KMD). + */ + volatile uint32_t cb_head; + /** + * Tail index of the circular buffer. Updated by CCPLEX. + * + * CCPLEX informs R5 the tail index through CCQ. In case KMD needs to + * flush the queue. KMD may need to read the tail from here. + */ + volatile uint32_t cb_tail; + /* Immediately followed by an array of struct pva_cmdbuf_submit_info */ +}; + +static inline uint32_t pva_fw_queue_count(uint32_t head, uint32_t tail, + uint32_t size) +{ + if (tail >= head) { + return safe_subu32(tail, head); + } else { + return safe_addu32(safe_subu32(size, head), tail); + } +} + +static inline uint32_t pva_fw_queue_space(uint32_t head, uint32_t tail, + uint32_t size) +{ + return safe_subu32( + safe_subu32(size, pva_fw_queue_count(head, tail, size)), 1u); +} + +/* CCQ commands: KMD -> R5, through CCQ FIFO */ + +/* + * Most CCQ commands are meant to be used at init time. + * During runtime, only use PVA_FW_CCQ_OP_UPDATE_TAIL + */ +#define PVA_FW_CCQ_OPCODE_MSB 63 +#define PVA_FW_CCQ_OPCODE_LSB 60 + +/* + * tail value bit field: 31 - 0 + * queue id bit field: 40 - 32 + */ +#define PVA_FW_CCQ_OP_UPDATE_TAIL 0 +#define PVA_FW_CCQ_TAIL_MSB 31 +#define PVA_FW_CCQ_TAIL_LSB 0 +#define PVA_FW_CCQ_QUEUE_ID_MSB 40 +#define PVA_FW_CCQ_QUEUE_ID_LSB 32 + +/* + * resource table IOVA addr bit field: 39 - 0 + * resource table number of entries bit field: 59 - 40 + */ +#define PVA_FW_CCQ_OP_SET_RESOURCE_TABLE 1 +#define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB 39 +#define PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB 0 +#define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB 59 +#define PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB 40 + +/* + * submission queue IOVA addr bit field: 39 - 0 + * submission queue number of entries bit field: 59 - 40 + */ +#define PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE 2 +#define PVA_FW_CCQ_QUEUE_ADDR_MSB 39 +#define PVA_FW_CCQ_QUEUE_ADDR_LSB 0 +#define PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB 59 +#define PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB 40 + +/* KMD and FW communicate using messages. + * + * Message can contain up to 6 uint32_t. + * + * The first uint32_t is the header that contains message type and length. + */ +#define PVA_FW_MSG_MAX_LEN 6 + +/* KMD send messages to R5 using CCQ FIFO. The message length is always 64 bit. */ + +/* When R5 send messages to KMD using CCQ statuses, we use status 3 - 8 + * + * msg[0] = STATUS8 -> generate interrupt to KMD + * msg[1] = STATUS3 + * msg[2] = STATUS4 + * msg[3] = STATUS5 + * msg[4] = STATUS6 + * msg[5] = STATUS7 + */ +#define PVA_FW_MSG_STATUS_BASE 3 +#define PVA_FW_MSG_STATUS_LAST 8 + +#define PVA_FW_MSG_TYPE_MSB 30 +#define PVA_FW_MSG_TYPE_LSB 25 +#define PVA_FW_MSG_LEN_MSB 24 +#define PVA_FW_MSG_LEN_LSB 22 +/* The remaining bits (0 - 21) of msg[0] can be used for message specific + * payload */ + +/* Message types: R5 -> CCPLEX */ +#define PVA_FW_MSG_TYPE_ABORT 1 +#define PVA_FW_MSG_TYPE_BOOT_DONE 2 +#define PVA_FW_MSG_TYPE_FLUSH_PRINT 3 +#define PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER 3 + +/* Message types: CCPLEX -> R5 */ +#define PVA_FW_MSG_TYPE_UPDATE_TAIL 32 + +/* Parameters for message ABORT + * ABORT message contains a short string (up to 22 chars). + * The first two charactors are in the message header (bit 15 - 0). + */ +#define PVA_FW_MSG_ABORT_STR_MAX_LEN 22 + +/* Parameters for message BOOT_DONE */ +#define PVA_FW_MSG_R5_START_TIME_LO_IDX 1 +#define PVA_FW_MSG_R5_START_TIME_HI_IDX 2 +#define PVA_FW_MSG_R5_READY_TIME_LO_IDX 3 +#define PVA_FW_MSG_R5_READY_TIME_HI_IDX 4 + +/* Parameters for message FLUSH PRINT */ +struct pva_fw_print_buffer_header { +#define PVA_FW_PRINT_BUFFER_OVERFLOWED (1 << 0) +#define PVA_FW_PRINT_FAILURE (1 << 1) + uint32_t flags; + uint32_t tail; + /* Followed by print content */ +}; + +/* Parameters for message resource unregister */ +/* Table ID is stored in msg[0], bit: 0 - 7 */ +#define PVA_FW_MSG_RESOURCE_TABLE_ID_MSB 7 +#define PVA_FW_MSG_RESOURCE_TABLE_ID_LSB 0 +/* Followed by up to 5 resource IDs. The actual number of resource ID is + * indicated by the message length. */ + +/** @brief Circular buffer based data channel to share data between R5 and CCPLEX */ +struct pva_data_channel { + uint32_t size; +#define PVA_DATA_CHANNEL_OVERFLOW (1U << 0U) + uint32_t flags; + uint32_t head; + /** + * Offset location in the circular buffer where from VPU printf data will be written by FW + */ + uint32_t tail; + /* Immediately followed by circular buffer data */ +}; + +/* PVA FW Event profiling definitions */ + +// Event identifiers +#define PVA_FW_EVENT_DO_CMD PVA_BIT8(1) +#define PVA_FW_EVENT_SCAN_QUEUES PVA_BIT8(2) +#define PVA_FW_EVENT_SCAN_SLOTS PVA_BIT8(3) +#define PVA_FW_EVENT_RUN_VPU PVA_BIT8(4) + +// Event message format +struct pva_fw_event_message { + uint32_t event : 5; + uint32_t type : 3; + uint32_t arg1 : 8; + uint32_t arg2 : 8; + uint32_t arg3 : 8; +}; + +// Each event is one of the following types. This should fit within 3 bits +enum pva_fw_events_type { + EVENT_TRY = 0U, + EVENT_START, + EVENT_YIELD, + EVENT_DONE, + EVENT_ERROR, + EVENT_TYPE_MAX = 7U +}; + +static inline const char *event_type_to_string(enum pva_fw_events_type status) +{ + switch (status) { + case EVENT_TRY: + return "TRY"; + case EVENT_START: + return "START"; + case EVENT_YIELD: + return "YIELD"; + case EVENT_DONE: + return "DONE"; + case EVENT_ERROR: + return "ERROR"; + default: + return ""; + } +} + +enum pva_fw_timestamp_t { + TIMESTAMP_TYPE_TSE = 0, + TIMESTAMP_TYPE_CYCLE_COUNT = 1 +}; + +struct pva_fw_profiling_buffer_header { +#define PVA_FW_PROFILING_BUFFER_OVERFLOWED (1 << 0) +#define PVA_FW_PROFILING_FAILURE (1 << 1) + uint32_t flags; + uint32_t tail; + /* Followed by print content */ +}; +/* End of PVA FW Event profiling definitions */ + +struct pva_kmd_fw_tegrastats { + uint64_t window_start_time; + uint64_t window_end_time; + uint64_t total_utilization[PVA_NUM_PVE]; +}; + +#endif // PVA_FW_H diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h new file mode 100644 index 00000000..2c4c01fa --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_address_map.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + */ + +/* + * Unit: Boot Unit + * SWUD Document: + * p4sw-swarm.nvidia.com/view/sw/embedded/docs/projects/active/DRIVE_6.0/QNX/PLC_Work_Products/Element_WPs/Autonomous_Middleware/PVA/04_Unit_Design/PVA_FW/SWE-PVAFW-006-SWUD.pdf + */ +#ifndef PVA_FW_ADDRESS_MAP_H +#define PVA_FW_ADDRESS_MAP_H + +/** + * @brief Starting R5 address where FW code and data is placed. + * This address is expected to be programmed in PVA_CFG_AR1PRIV_START by KMD. + * This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG1 + * and PVA_CFG_R5PRIV_USEGREG1 registers would point. + */ +#define FW_CODE_DATA_START_ADDR 0x60000000 + +/** + * @brief R5 address where FW code and data is expected to end. + * This address is expected to be programmed in PVA_CFG_AR1PRIV_END by KMD. + */ +#if PVA_DEV_MAIN_COMPATIBLE == 1 +#define FW_CODE_DATA_END_ADDR 0x60220000 +#else +#define FW_CODE_DATA_END_ADDR 0x62000000 +#endif +/** + * @defgroup PVA_EXCEPTION_VECTORS + * + * @brief Following macros define R5 addresses that are expected to be + * programmed by KMD in EVP registers as is. + * @{ + */ +/** + * @brief R5 address of reset exception vector + */ +#define EVP_RESET_VECTOR 0x60040C00 +/** + * @brief R5 address of undefined instruction exception vector + */ +#define EVP_UNDEFINED_INSTRUCTION_VECTOR (EVP_RESET_VECTOR + 0x400 * 1) +/** + * @brief R5 address of svc exception vector + */ +#define EVP_SVC_VECTOR (EVP_RESET_VECTOR + 0x400 * 2) +/** + * @brief R5 address of prefetch abort exception vector + */ +#define EVP_PREFETCH_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 3) +/** + * @brief R5 address of data abort exception vector + */ +#define EVP_DATA_ABORT_VECTOR (EVP_RESET_VECTOR + 0x400 * 4) +/** + * @brief R5 address of reserved exception vector. + * It points to a dummy handler. + */ +#define EVP_RESERVED_VECTOR (EVP_RESET_VECTOR + 0x400 * 5) +/** + * @brief R5 address of IRQ exception vector + */ +#define EVP_IRQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 6) +/** + * @brief R5 address of FIQ exception vector + */ +#define EVP_FIQ_VECTOR (EVP_RESET_VECTOR + 0x400 * 7) +/** @} */ + +/** + * @defgroup PVA_DEBUG_BUFFERS + * + * @brief These buffers are arranged in the following order: + * TRACE_BUFFER followed by CODE_COVERAGE_BUFFER followed by DEBUG_LOG_BUFFER. + * @{ + */ +/** + * @brief Maximum size of trace buffer in bytes. + */ +#define FW_TRACE_BUFFER_SIZE 0x40000 +/** + * @brief Maximum size of code coverage buffer in bytes. + */ +#define FW_CODE_COVERAGE_BUFFER_SIZE 0x80000 +/** + * @brief Maximum size of debug log buffer in bytes. + */ +#if PVA_DEV_MAIN_COMPATIBLE == 1 +#define FW_DEBUG_LOG_BUFFER_SIZE 0x40000 +#else +#define FW_DEBUG_LOG_BUFFER_SIZE 0x400000 +#endif +/** @} */ + +/** + * @brief Total size of buffers used for FW debug in bytes. + * TBD: Update this address based on build configuration once KMD changes are merged. + */ +#define FW_DEBUG_DATA_TOTAL_SIZE \ + (FW_TRACE_BUFFER_SIZE + FW_DEBUG_LOG_BUFFER_SIZE + \ + FW_CODE_COVERAGE_BUFFER_SIZE) + +/** + * @brief Starting R5 address where FW debug related data is placed. + * This address is expected to be programmed in PVA_CFG_AR2PRIV_START by KMD. + * This address is also expected to be used as offset where PVA_CFG_R5PRIV_LSEGREG2 + * and PVA_CFG_R5PRIV_USEGREG2 registers would point. + */ +#define FW_DEBUG_DATA_START_ADDR (0x70000000) //1879048192 0x70000000 + +/** + * @brief R5 address where FW debug related data is expected to end. + * This address is expected to be programmed in PVA_CFG_AR2PRIV_END by KMD. + */ +#define FW_DEBUG_DATA_END_ADDR \ + (FW_DEBUG_DATA_START_ADDR + FW_DEBUG_DATA_TOTAL_SIZE) + +/** + * @brief Starting R5 address where FW expects shared buffers between KMD and FW to be placed. + * This is to be used as offset when programming PVA_CFG_R5USER_LSEGREG and PVA_CFG_R5USER_USEGREG. + */ +#define FW_SHARED_MEMORY_START (0x80000000U) //2147483648 0x80000000 + +/** + * @defgroup PVA_HYP_SCR_VALUES + * + * @brief Following macros specify SCR firewall values that are expected to be + * programmed by Hypervisor. + * @{ + */ +/** + * @brief EVP SCR firewall to enable only CCPLEX read/write access. + */ +#define PVA_EVP_SCR_VAL 0x19000202 + +/** + * @brief PRIV SCR firewall to enable only CCPLEX and R5 read/write access. + */ +#define PVA_PRIV_SCR_VAL 0x1F008282 + +/** + * @brief CCQ SCR firewall to enable only CCPLEX write access and R5 read access. + */ +#define PVA_CCQ_SCR_VAL 0x19000280 + +/** + * @brief Status Ctl SCR firewall to enable only CCPLEX read access and R5 read/write access. + */ +#define PVA_STATUS_CTL_SCR_VAL 0x1f008082 +/** @} */ + +/** + * @defgroup PVA_KMD_SCR_VALUES + * + * @brief Following macros specify SCR firewall values that are expected to be + * programmed by KMD. + * @{ + */ +/** + * @brief SECEXT_INTR SCR firewall to enable only CCPLEX and R5 read/write access. + */ +#define PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL 0x39008282U +/** + * @brief PROC SCR firewall to enable only CCPLEX read/write access and R5 read only access. + */ +#define PVA_PROC_SCR_PROC_VAL 0x39000282U +/** @} */ + +#endif diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h new file mode 100644 index 00000000..c0a938b6 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw_hyp.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_FW_HYP_H +#define PVA_FW_HYP_H + +/** + * @defgroup PVA_BOOT_TIME_MBOX + * + * @brief This group defines the mailboxes used by KMD to pass start iovas required for + * user segment and priv2 segment configuration during boot. + * @{ + */ +/** + * @brief Used to pass bits 31-0 of start iova of user segment. + */ +#define PVA_MBOXID_USERSEG_L (1U) +/** + * @brief Used to pass bits 39-32 of start iova of user segment. + */ +#define PVA_MBOXID_USERSEG_H (2U) +/** + * @brief Used to pass bits 31-0 of start iova of priv2 segment. + */ +#define PVA_MBOXID_PRIV2SEG_L (3U) +/** + * @brief Used to pass bits 39-32 of start iova of priv2 segment. + */ +#define PVA_MBOXID_PRIV2SEG_H (4U) +/** @} */ + +/** + * @defgroup PVA_SHARED_SEMAPHORE_STATUS_GROUP + * + * @brief The status bits for the shared semaphore which are mentioned in + * the group are used to communicate various information between KMD and + * PVA R5 FW. The highest 16 bits are used to send information from KMD to + * R5 FW and the lower 16 bits are used to send information from R5 FW to KMD by + * writing to the @ref PVA_BOOT_SEMA semaphore + * + * The bit-mapping of the semaphore is described below. The table below shows the mapping which + * is sent by KMD to FW. + * + * | Bit Position | Bit Field Name | Description | + * |:------------:|:---------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------:| + * | 31 | BOOT INT | To indicate that KMD is expecting an interrupt from R5 once boot is complete | + * | 30 | Reserved | Reserved for future use | + * | 27-25 | Reserved | Reserved for future use | + * | 23-21 | Reserved | Reserved for future use | + * | 20 | CG DISABLE | To indicate the PVA R5 FW should disable the clock gating feature | + * | 19 | VMEM RD WAR DISABLE | To disable the VMEM Read fail workaround feature | + * | 18-16 | Reserved | Reserved for future use | + * + * The table below shows the mapping which is sent by FW to KMD + * + * | Bit Position | Bit Field Name | Description | + * |:------------:|:---------------------:|:-----------------------------------------------------------------------------------------------------------:| + * | 15-11 | Reserved | Reserved for future use | + * | 07-03 | Reserved | Reserved for future use | + * | 02 | HALTED | To indicate to KMD that the PVA R5 FW has halted execution | + * | 01 | BOOT DONE | To indicate to KMD that the PVA R5 FW booting is complete | + * + * @{ + */ + +//! @endcond + +/** + * @brief This field is used to indicate that the R5 FW should + * disable the clock gating feature + */ +#define PVA_BOOT_SEMA_CG_DISABLE PVA_BIT(20U) +//! @cond DISABLE_DOCUMENTATION + +/** Tell firmware that block linear surfaces are in XBAR_RAW format instead of + * TEGRA_RAW format */ +#define PVA_BOOT_SEMA_USE_XBAR_RAW PVA_BIT(17U) + +#define PVA_BOOT_SEMA 0U + +/** + * @brief This macro has the value to be set by KMD in the shared semaphores + * @ref PVA_PREFENCE_SYNCPT_REGION_IOVA_SEM or @ref PVA_POSTFENCE_SYNCPT_REGION_IOVA_SEM + * if the syncpoint reserved region must not be configured as uncached + * in R5 MPU. + */ +#define PVA_R5_SYNCPT_REGION_IOVA_OFFSET_NOT_SET (0xFFFFFFFFU) +/** @} */ + +/* Runtime mailbox messages between firmware and hypervisor */ + +/* When hypervisor send messages to R5 through mailboxes, we use mailbox 0 - 1 + * msg[0] = mailbox 1 -> generate interrupt to R5 + * msg[1] = mailbox 0 + */ +#define PVA_FW_MBOX_TO_R5_BASE 0 +#define PVA_FW_MBOX_TO_R5_LAST 1 + +/* When R5 send messages to hypervisor through mailboxes, we use mailbox 2 - 7 + * msg[0] = mailbox 7 -> generate interrupt to hypervisor + * msg[1] = mailbox 2 + * msg[2] = mailbox 3 + * msg[3] = mailbox 4 + * msg[4] = mailbox 5 + * msg[5] = mailbox 6 + */ +#define PVA_FW_MBOX_TO_HYP_BASE 2 +#define PVA_FW_MBOX_TO_HYP_LAST 7 + +#define PVA_FW_MBOX_FULL_BIT PVA_BIT(31) + +#endif // PVA_FW_HYP_H diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h b/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h new file mode 100644 index 00000000..82b5efea --- /dev/null +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_resource.h @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_RESOURCE_H +#define PVA_RESOURCE_H +#include "pva_api.h" +#include "pva_api_dma.h" +#include "pva_bit.h" +#include "pva_constants.h" +#include "pva_utils.h" +#include "pva_math_utils.h" + +/* The sizes of these structs must be explicitly padded to align to 4 bytes */ + +struct pva_fw_dma_descriptor { + uint8_t transfer_control0; + uint8_t link_did; + uint8_t src_adr1; + uint8_t dst_adr1; + uint32_t src_adr0; + uint32_t dst_adr0; + uint16_t tx; + uint16_t ty; + uint16_t slp_adv; + uint16_t dlp_adv; + uint32_t srcpt1_cntl; + uint32_t dstpt1_cntl; + uint32_t srcpt2_cntl; + uint32_t dstpt2_cntl; + uint32_t srcpt3_cntl; + uint32_t dstpt3_cntl; + uint16_t sb_start; + uint16_t db_start; + uint16_t sb_size; + uint16_t db_size; + uint16_t trig_ch_events; + uint16_t hw_sw_trig_events; + uint8_t px; + uint8_t py; + uint8_t transfer_control1; + uint8_t transfer_control2; + uint8_t cb_ext; + uint8_t rsvd; + uint16_t frda; +}; + +/** Each slot is mapped to number of pva_fw_dma_reloc. When + * bind_dram/vmem_slot command is executed, the slot_id will be an index into + * the slot array. The slot contains starting index and count of reloc structs. + * All descriptor fields identified by the reloc structs will be patched. + */ +struct pva_fw_dma_slot { +/** This slot can be bound to a DRAM buffer */ +#define PVA_FW_DMA_SLOT_FLAG_DRAM (1u << 0u) +/** This slot can be bound to a L2SRAM buffer */ +#define PVA_FW_DMA_SLOT_FLAG_L2SRAM (1u << 1u) +/** This slot can be bound to a VMEM DATA buffer */ +#define PVA_FW_DMA_SLOT_FLAG_VMEM_DATA (1u << 2u) +/** This slot can be bound to a VMEM VPU config table buffer */ +#define PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE (1u << 3u) +/** This slot has enabled circular buffer. Slot with this flags cannot be bound + * to block linear surface. */ +#define PVA_FW_DMA_SLOT_FLAG_CB (1u << 4u) +#define PVA_FW_DMA_SLOT_FLAG_BOUND (1u << 5u) + uint8_t flags; + uint8_t pad; + /** Bitmask of channels that use this slot */ + uint16_t ch_use_mask; + + /** The number of descriptor fields that share this slot. Each field + * will have a pva_fw_dma_reloc struct + */ + uint16_t reloc_count; + /** Starting index in the pva_fw_dma_reloc array */ + uint16_t reloc_start_idx; + + int64_t start_addr; + int64_t end_addr; +}; + +static inline uint32_t get_slot_size(struct pva_fw_dma_slot const *slot) +{ + uint32_t size = UINT32_MAX; + int64_t tmp_size = 0; + if (slot->end_addr < slot->start_addr) { + return size; + } + tmp_size = slot->end_addr - slot->start_addr; + if (tmp_size > (int64_t)UINT32_MAX) { + return size; + } + size = (uint32_t)tmp_size; + return size; +} + +/** + * A relocate struct identifies an address field (src, dst or dst2) in + * the descriptor. The identified address field contains an offset instead of + * absolute address. The base address will be added to the offset during + * binding. + * + * This struct only has 2 bytes, so an array of this struct must have an even + * number of elements to satisfy alignment requirement. + */ +struct pva_fw_dma_reloc { + uint8_t desc_index; +/** This relocation is for source field */ +#define PVA_FW_DMA_RELOC_FIELD_SRC 1u +/** This relocation is for destination field */ +#define PVA_FW_DMA_RELOC_FIELD_DST 2u +/** This relocation is for destination 2 field */ +#define PVA_FW_DMA_RELOC_FIELD_DST2 3u + uint8_t field; +}; + +struct pva_fw_dma_channel { + uint32_t cntl0; + uint32_t cntl1; + uint32_t boundary_pad; + uint32_t hwseqcntl; + uint32_t hwseqfscntl; +}; + +struct pva_fw_data_section_info { + uint32_t data_buf_off; /*< offset in data section data byte array */ + uint32_t vmem_addr; + uint32_t size; +}; + +struct pva_dma_resource_map { +// TODO: These macros should be derived using the maximum limits across platforms +// Today, they are being hardcoded. Make it automatic +#define PVA_DMA_NUM_CHANNEL_PARTITIONS \ + ((PVA_MAX_NUM_DMA_CHANNELS) / (PVA_DMA_CHANNEL_ALIGNMENT)) +#define PVA_DMA_NUM_DESCRIPTOR_PARTITIONS \ + ((PVA_MAX_NUM_DMA_DESC) / (PVA_DMA_DESCRIPTOR_ALIGNMENT)) +#define PVA_DMA_NUM_ADB_PARTITIONS \ + ((PVA_MAX_NUM_ADB_BUFFS) / (PVA_DMA_ADB_ALIGNMENT)) +#define PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS \ + ((PVA_MAX_NUM_HWSEQ_WORDS) / (PVA_DMA_HWSEQ_WORD_ALIGNMENT)) + + uint64_t channels : PVA_DMA_NUM_CHANNEL_PARTITIONS; + uint64_t descriptors : PVA_DMA_NUM_DESCRIPTOR_PARTITIONS; + uint64_t adbs : PVA_DMA_NUM_ADB_PARTITIONS; + uint64_t hwseq_words : PVA_DMA_NUM_HWSEQ_WORD_PARTITIONS; + uint64_t triggers : 1; +}; + +static inline void +pva_dma_resource_map_reset(struct pva_dma_resource_map *resource_map) +{ + resource_map->channels = 0u; + resource_map->descriptors = 0u; + resource_map->adbs = 0u; + resource_map->hwseq_words = 0u; + resource_map->triggers = 0u; +} + +// Note: the following pva_dma_resource_map_* APIs assume an alignment requirement +// on the 'start' index. We do not enforce it here though. If this requirement +// is not met, the FW may falsely predicted resource conflicts between commands. +// However, this will not impact functionality or correctness. +static inline void +pva_dma_resource_map_add_channels(struct pva_dma_resource_map *map, + uint16_t start, uint16_t count) +{ + map->channels |= pva_mask64(start, count, PVA_DMA_CHANNEL_ALIGNMENT); +} + +static inline void +pva_dma_resource_map_add_descriptors(struct pva_dma_resource_map *map, + uint16_t start, uint16_t count) +{ + map->descriptors |= + pva_mask64(start, count, PVA_DMA_DESCRIPTOR_ALIGNMENT); +} + +static inline void +pva_dma_resource_map_add_adbs(struct pva_dma_resource_map *map, uint16_t start, + uint16_t count) +{ + map->adbs |= pva_mask64(start, count, PVA_DMA_ADB_ALIGNMENT); +} + +static inline void +pva_dma_resource_map_add_hwseq_words(struct pva_dma_resource_map *map, + uint16_t start, uint16_t count) +{ + map->hwseq_words |= + pva_mask64(start, count, PVA_DMA_HWSEQ_WORD_ALIGNMENT); +} + +static inline void +pva_dma_resource_map_add_triggers(struct pva_dma_resource_map *map) +{ + // If an application is running on VPU, it has access to all the triggers + // Only FW and DMA-only workloads can initiate transfers in parallel to + // a running VPU application, but they do not require triggers. + map->triggers |= 1; +} + +static inline void +pva_dma_resource_map_copy_channels(struct pva_dma_resource_map *dst_map, + struct pva_dma_resource_map *src_map) +{ + dst_map->channels |= src_map->channels; +} + +static inline void +pva_dma_resource_map_copy_descriptors(struct pva_dma_resource_map *dst_map, + struct pva_dma_resource_map *src_map) +{ + dst_map->descriptors |= src_map->descriptors; +} + +static inline void +pva_dma_resource_map_copy_adbs(struct pva_dma_resource_map *dst_map, + struct pva_dma_resource_map *src_map) +{ + dst_map->adbs |= src_map->adbs; +} + +static inline void +pva_dma_resource_map_copy_triggers(struct pva_dma_resource_map *dst_map, + struct pva_dma_resource_map *src_map) +{ + dst_map->triggers |= src_map->triggers; +} + +static inline void +pva_dma_resource_map_copy_hwseq_words(struct pva_dma_resource_map *dst_map, + struct pva_dma_resource_map *src_map) +{ + dst_map->hwseq_words |= src_map->hwseq_words; +} + +struct pva_dma_config_resource { + uint8_t base_descriptor; + uint8_t base_channel; + uint8_t num_descriptors; + uint8_t num_channels; + + uint16_t num_dynamic_slots; + /** Must be an even number to satisfy padding requirement. */ + uint16_t num_relocs; + /** Indices of channels. Once the corresponding bit is set, the block height of + * this channel should not be changed. */ + uint16_t ch_block_height_fixed_mask; + + uint16_t base_hwseq_word; + uint16_t num_hwseq_words; + uint16_t pad; + + uint32_t vpu_exec_resource_id; + uint32_t common_config; + uint32_t output_enable[PVA_NUM_DMA_TRIGGERS]; + + struct pva_dma_resource_map dma_resource_map; + /* Followed by of pva_fw_dma_slot */ + /* Followed by of pva_fw_dma_reloc */ + /* Followed by an array of pva_fw_dma_channel */ + /* Followed by an array of pva_fw_dma_descriptor */ + + /* ===================================================================== + * The following fields do not need to be fetched into TCM. The DMA config + * resource size (as noted in the resource table) does not include these + * fields */ + + /* Followed by an array of hwseq words */ +}; + +struct pva_fw_vmem_buffer { +#define PVA_FW_SYM_TYPE_MSB 31 +#define PVA_FW_SYM_TYPE_LSB 29 +#define PVA_FW_VMEM_ADDR_MSB 28 +#define PVA_FW_VMEM_ADDR_LSB 0 + uint32_t addr; + uint32_t size; +}; + +struct pva_exec_bin_resource { + uint8_t code_addr_hi; + uint8_t data_section_addr_hi; + uint8_t num_data_sections; + uint8_t pad; + + uint32_t code_addr_lo; + uint32_t data_section_addr_lo; + uint32_t code_size; + uint32_t num_vmem_buffers; + + /* Followed by number of pva_fw_data_section_info */ + /* Followed by number of pva_fw_vmem_buffer */ +}; + +static inline struct pva_fw_dma_slot * +pva_dma_config_get_slots(struct pva_dma_config_resource *dma_config) +{ + return (struct pva_fw_dma_slot + *)((uint8_t *)dma_config + + sizeof(struct pva_dma_config_resource)); +} + +static inline struct pva_fw_dma_reloc * +pva_dma_config_get_relocs(struct pva_dma_config_resource *dma_config) +{ + return (struct pva_fw_dma_reloc + *)((uint8_t *)pva_dma_config_get_slots(dma_config) + + sizeof(struct pva_fw_dma_slot) * + dma_config->num_dynamic_slots); +} + +static inline struct pva_fw_dma_channel * +pva_dma_config_get_channels(struct pva_dma_config_resource *dma_config) +{ + return (struct pva_fw_dma_channel *)((uint8_t *) + pva_dma_config_get_relocs( + dma_config) + + sizeof(struct pva_fw_dma_reloc) * + dma_config->num_relocs); +} + +static inline struct pva_fw_dma_descriptor * +pva_dma_config_get_descriptors(struct pva_dma_config_resource *dma_config) +{ + return (struct pva_fw_dma_descriptor + *)((uint8_t *)pva_dma_config_get_channels(dma_config) + + sizeof(struct pva_fw_dma_channel) * + dma_config->num_channels); +} + +#endif // PVA_RESOURCE_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api.h b/drivers/video/tegra/host/pva/src/include/pva_api.h new file mode 100644 index 00000000..4614819f --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api.h @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_API_H +#define PVA_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pva_api_types.h" +#include "pva_api_dma.h" +#include "pva_api_vpu.h" +#include "pva_api_cmdbuf.h" + +/* Core APIs */ + +/** + * @brief Create a PVA context. + * + * @param[in] pva_index Select which PVA instance to use if there are multiple PVAs + * in the SOC. + * @param[in] max_resource_count Maximum number of resources this context can have. + * @param[out] ctx Pointer to the created context. + */ +enum pva_error pva_context_create(uint32_t pva_index, + uint32_t max_resource_count, + struct pva_context **ctx); + +/** + * @brief Destroy a PVA context. + * + * A context can only be destroyed after all queues are destroyed. + * + * @param[in] ctx Pointer to the context to destroy. + */ +void pva_context_destroy(struct pva_context *ctx); + +/** + * @brief Create a PVA queue. + * + * @param[in] ctx Pointer to the context. + * @param[in] max_submission_count Max number of submissions that can be queued. + * @param[out] queue Pointer to the created queue. + */ +enum pva_error pva_queue_create(struct pva_context *ctx, + uint32_t max_submission_count, + struct pva_queue **queue); + +/** + * @brief Destroy a PVA queue. + * + * @param[in] queue Pointer to the queue to destroy. + */ +void pva_queue_destroy(struct pva_queue *queue); + +/** + * @brief Allocate DRAM memory that can be mapped PVA's device space + * + * @param[in] size Size of the memory to allocate. + * @param[out] out_mem Pointer to the allocated memory. + */ +enum pva_error pva_memory_alloc(uint64_t size, struct pva_memory **out_mem); + +/** + * @brief Map the memory to CPU's virtual space. + * + * @param[in] mem Pointer to the memory to map. + * @param[in] access_mode Access mode for the memory. PVA_ACCESS_RD or + * PVA_ACCESS_RW. + * @param[out] out_va Pointer to the virtual address of the mapped memory. + */ +enum pva_error pva_memory_cpu_map(struct pva_memory *mem, uint32_t access_mode, + void **out_va); + +/** + * @brief Unmap the memory from CPU's virtual space. + * + * @param[in] mem Pointer to the memory to unmap. + * @param[in] va Previously mapped virtual address. + */ +enum pva_error pva_memory_cpu_unmap(struct pva_memory *mem, void *va); + +/** + * @brief Free the memory. + * + * Freeing a registered memory is okay since KMD holds a reference to the memory. + * + * @param mem Pointer to the memory to free. + */ +void pva_memory_free(struct pva_memory *mem); + +/** + * @brief Wait for a syncpoint to reach a value. + * + * @param[in] ctx Pointer to the context. + * @param[in] syncpiont_id Syncpoint ID to wait on. + * @param[in] value Value to wait for. + * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite. + */ +enum pva_error pva_syncpoint_wait(struct pva_context *ctx, + uint32_t syncpiont_id, uint32_t value, + uint64_t timeout_us); + +/** + * @brief Submit a batch of command buffers. + * + * @param[in] queue Pointer to the queue. + * @param[in] submit_infos Array of submit info structures. + * @param[in] count Number of submit info structures. + * @param[in] timeout_us Timeout in microseconds. PVA_TIMEOUT_INF for infinite. + * + * @note Concurrent submission to the same queue needs to be serialized by the + * caller. + */ +enum pva_error +pva_cmdbuf_batch_submit(struct pva_queue *queue, + struct pva_cmdbuf_submit_info *submit_infos, + uint32_t count, uint64_t timeout_us); + +/** + * @brief Get the symbol table for a registered executable. + * + * @param[in] ctx Pointer to the context. + * @param[in] exe_resource_id Resource ID of the executable. + * @param[out] out_info Pointer to the symbol info array. + * @param[in] max_num_symbols Maximum number of symbols to return. + */ +enum pva_error pva_executable_get_symbols(struct pva_context *ctx, + uint32_t exe_resource_id, + struct pva_symbol_info *out_info, + uint32_t max_num_symbols); + +/** + * @brief Submit a list of asynchronous registration operations to KMD. + * + * The operations can be: + * - Memory registration + * - Executable registration + * - DMA config registration + * + * The response buffer will contain the resource IDs of the registered + * resources. Any command buffers that use these resources should wait on the + * returned post fence. + * + * @param[in] ctx Pointer to the context. + * @param[in] fence Pointer to the post fence to wait on. If NULL, it means the + * caller is not interested in waiting. This usually only applies to unregister + * operations. + * @param[in] Input buffer containing the list of operations. + * @param[out] Output buffer to store the response. + * + * @note Input and output buffer may be the same buffer. + */ +enum pva_error pva_ops_submit_async(struct pva_context *ctx, + struct pva_fence *fence, + struct pva_ops_buffer const *input_buffer, + struct pva_ops_buffer *output_buffer); + +/** + * @brief Perform a list of registration operations synchronously. + * + * The operations can be: + * - Memory registration + * - Executable registration + * - DMA config registration + * + * The response buffer will contain the resource IDs of the registered + * resources. + * + * @param[in] ctx Pointer to the context. + * @param[in] Input buffer containing the list of operations. + * @param[out] Output buffer to store the response. + * + * @note Input and output buffer may be the same buffer. + * + */ +enum pva_error pva_ops_submit(struct pva_context *ctx, + struct pva_ops_buffer const *input_buffer, + struct pva_ops_buffer *output_buffer); + +/** Size of the ops buffer header. When user allocates memory for ops buffer, + * this size needs to be added. */ +#define PVA_OPS_BUFFER_HEADER_SIZE 64 +/** + * @brief Initialize pva_ops_buffer to keep track of the state of + * operations buffer during preparation. + * + * @param[out] buf_handle Pointer to the pva_ops_buffer object to initialize. + * @param[in] buf Pointer to the buffer that will store the operations. + * @param[in] size Size of the buffer. + */ +enum pva_error pva_ops_buffer_init(struct pva_ops_buffer *buf_handle, void *buf, + uint32_t size); + +#define PVA_OPS_MEMORY_REG_SIZE 64 +/** + * @brief Append a memory registration operation to the operations buffer. + * + * @param[in] ctx Pointer to the context. + * @param[in] mem Pointer to the memory to register. + * @param[in] segment Memory segment to register. + * @param[in] access_flags Access flags for the memory. + * @param[out] op_buf Pointer to the operations buffer. + */ +enum pva_error pva_ops_append_memory_register(struct pva_context *ctx, + struct pva_memory *mem, + enum pva_memory_segment segment, + uint32_t access_flags, + struct pva_ops_buffer *op_buf); +#define PVA_OPS_EXEC_REG_HEADER_SIZE 16 +/** + * @brief Append an executable registration operation to the operations. + * + * @param[in] ctx Pointer to the context. + * @param[in] executable Pointer to the executable binary content. + * @param[in] executable_size Size of the executable. + * @param[out] op_buf Pointer to the operations buffer. + */ +enum pva_error pva_ops_append_executable_register( + struct pva_context *ctx, void const *executable, + uint32_t executable_size, struct pva_ops_buffer *op_buf); + +#define PVA_OPS_DMA_CONFIG_REG_SIZE (24 * 1024) +/** + * @brief Append a DMA config registration operation to the operations. + * @param[in] ctx Pointer to the context. + * @param[in] dma_config Pointer to the DMA config. + * @param[out] op_buf Pointer to the operations buffer. + */ +enum pva_error +pva_ops_append_dma_config_register(struct pva_context *ctx, + struct pva_dma_config const *dma_config, + struct pva_ops_buffer *op_buf); + +#define PVA_OPS_UNREG_SIZE 16 +enum pva_error pva_ops_append_unregister(struct pva_context *ctx, + uint32_t resource_id, + struct pva_ops_buffer *op_buf); + +/** + * @brief Parse the response buffer to get the resource ID of the registered + * memory or DMA configuration. + * + * @param[in] resp_buf Pointer to the response buffer. + * @param[out] resource_id output resource ID. + */ +enum pva_error pva_ops_parse_register_resp(struct pva_ops_buffer *resp_buf, + uint32_t *resource_id); + +/** + * @brief Parse the response buffer to get the resource ID of the registered + * executable. + * + * @param[in] resp_buf Pointer to the response buffer. + * @param[out] num_symbols Number of symbols in the executable. + * @param[out] resource_id output resource ID. + */ +enum pva_error pva_ops_parse_exec_register_resp(struct pva_ops_buffer *op_buf, + uint32_t *num_symbols, + uint32_t *resource_id); + +#define PVA_DATA_CHANNEL_HEADER_SIZE 32 +/** + * @brief Initialize VPU print buffer + * + * @param[in] data Pointer to VPU print buffer. + * @param[in] size Size of VPU print buffer. + */ +struct pva_data_channel; +enum pva_error pva_init_data_channel(void *data, uint32_t size, + struct pva_data_channel **data_channel); + +/** + * @brief Read VPU print buffer + * + * @param[in] data Pointer to VPU print buffer. + * @param[out] read_buffer Pointer to output buffer in which data will be read. + * @param[in] bufferSize Size of output buffer. + * @param[out] read_size Size of actual data read in output buffer. + */ +enum pva_error pva_read_data_channel(struct pva_data_channel *data_channel, + uint8_t *read_buffer, uint32_t bufferSize, + uint32_t *read_size); + +/** + * @brief Duplicate PVA memory object. + * + * This function duplicates a PVA memory object. The new object will have shared + * ownership of the memory. + * + * @param[in] src Pointer to the source memory object. + * @param[in] access_mode Access mode for the new memory object. It should be + * more restrictive than the source memory. Passing 0 will use the same access + * mode as the source memory. + * @param[out] dst Resulting duplicated memory object. + */ +enum pva_error pva_memory_duplicate(struct pva_memory *src, + uint32_t access_mode, + struct pva_memory **dst); + +/** + * @brief Get memory attributes. + * + * @param[in] mem Pointer to the memory. + * @param[out] out_attrs Pointer to the memory attributes. + */ +void pva_memory_get_attrs(struct pva_memory const *mem, + struct pva_memory_attrs *out_attrs); + +/** \brief Specifies the PVA system software major version. */ +#define PVA_SYSSW_MAJOR_VERSION (2U) + +/** \brief Specifies the PVA system software minor version. */ +#define PVA_SYSSW_MINOR_VERSION (7U) + +/** + * @brief Get PVA system software version. + * + * PVA system software version is defined as the latest version of cuPVA which is fully supported + * by this version of the PVA system software. + * + * @param[out] version version of currently running system SW, computed as: + (PVA_SYSSW_MAJOR_VERSION * 1000) + PVA_SYSSW_MINOR_VERSION + * @return PVA_SUCCESS on success, else error code indicating the failure. + */ +enum pva_error pva_get_version(uint32_t *version); + +/** + * @brief Get the hardware characteristics of the PVA. + * + * @param[out] pva_hw_char Pointer to the hardware characteristics. + */ +enum pva_error +pva_get_hw_characteristics(struct pva_characteristics *pva_hw_char); + +#ifdef __cplusplus +} +#endif + +#endif // PVA_API_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h new file mode 100644 index 00000000..c4365a5a --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h @@ -0,0 +1,627 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_API_CMDBUF_H +#define PVA_API_CMDBUF_H +#include "pva_api_types.h" + +//Maximum number of slots for maintaining Timestamps +#define PVA_MAX_QUERY_SLOTS_COUNT 32U + +/** The common header for all commands. + */ +struct pva_cmd_header { +#define PVA_CMD_PRIV_OPCODE_FLAG (1U << 7U) + /** Opcode for the command. MSB of opcode indicates whether this command is + * privileged or not */ + uint8_t opcode; + /** Command specific flags */ + uint8_t flags; + /** + * For pva_cmd_barrier: barrier_group specifies which group this barrier + * waits for. + * For pva_cmd_retire_barrier_group: barrier_group specifies which id will + * be retired. Retired ids can be re-used by future commands and will refer + * to a new logical group. + * For all other commands: barrier_group specifies which barrier group this + * command belongs to. Other commands are able to defer execution until all + * commands in the barrier group have completed, or stall the cmd buffer + * until such a time. Note that asynchronous commands may complete in an + * order different to the order in which they appear in the commmand + * buffer. + */ + uint8_t barrier_group; + /** Length in 4-bytes, including this header. */ + uint8_t len; +}; + +struct pva_user_dma_allowance { +#define PVA_USER_DMA_ALLOWANCE_ADB_STEP_SIZE 8 + uint32_t channel_idx : 4; + uint32_t desc_start_idx : 7; + uint32_t desc_count : 7; + uint32_t adb_start_idx : 6; + uint32_t adb_count : 6; +}; + +/* Basic Commands */ + +/** Does nothing. It can be used as a place holder in the command buffer. */ +struct pva_cmd_noop { +#define PVA_CMD_OPCODE_NOOP 0U + struct pva_cmd_header header; +}; + +/** Link next chunk. This command can be placed anywhere in the command buffer. + * Firmware will start fetching the next chunk when this command is executed. */ +struct pva_cmd_link_chunk { +#define PVA_CMD_OPCODE_LINK_CHUNK 1U + struct pva_cmd_header header; + uint8_t next_chunk_offset_hi; + uint8_t pad; + uint16_t next_chunk_size; /**< Size of next chunk in bytes */ + uint32_t next_chunk_resource_id; + uint32_t next_chunk_offset_lo; + struct pva_user_dma_allowance user_dma; +}; + +/** Barrier command. The user can assign a barrier group to any asynchronous + * command. The barrier command blocks FW execution until the specified group of + * asynchronous commands have completed. Up to 8 barrier groups are supported. + * + * @note A barrier command is not typically required since FW stalls + * automatically in the event of hardware conflicts or when issuing a command is + * deemed unsafe according to the state machines. However, if a stall is needed + * for other reasons, the barrier command can be utilized. + */ +struct pva_cmd_barrier { +#define PVA_CMD_OPCODE_BARRIER 2U + struct pva_cmd_header header; +}; + +/** Acquire one or more PVE systems, each of which includes a VPS, DMA and PPE. + * It blocks until specified number of engines are acquired. + * By default, the lowest engine ID acquired is set as the current engine. + * Acquired engines will be automatically released when this command buffer finishes. + * They can also be released using release_engine command. + */ +struct pva_cmd_acquire_engine { +#define PVA_CMD_OPCODE_ACQUIRE_ENGINE 3U + struct pva_cmd_header header; + uint8_t engine_count; + uint8_t pad[3]; +}; + +/** Release all PVE systems acquired. It is legal to release engine when engine + * is still running. The released engine won’t be available to be acquired until + * it finishes and becomes idle again. */ +struct pva_cmd_release_engine { +#define PVA_CMD_OPCODE_RELEASE_ENGINE 4U + struct pva_cmd_header header; +}; + +/** Set a PVE engine as current. Following commands will modify this engine. The + * zero-based engine index must be less than the acquired engine number. */ +struct pva_cmd_set_current_engine { +#define PVA_CMD_OPCODE_SET_CURRENT_ENGINE 5U + struct pva_cmd_header header; + uint8_t engine_index; + uint8_t pad[3]; +}; + +/** This command specifies the executable to use for the following VPU launches. + * It doesn’t do anything other than setting the context for the following + * commands. + * + * Note: This command cannot be initiated if any of the DMA sets (that access + * VMEM) are in a running state, in order to prevent mismatches between DMA sets + * and VPU executables. The command buffer will stall until these DMA sets are + * finished. */ +struct pva_cmd_set_vpu_executable { +#define PVA_CMD_OPCODE_SET_VPU_EXECUTABLE 6U + struct pva_cmd_header header; + uint32_t vpu_exec_resource_id; +}; + +/** This command clears the entire VMEM. User may choose to skip VMEM clear if + * there are no bss sections in the VPU executable. Since VMEM can be accessed + * by both VPU and PPE, this command drives both the VPU state machine and the + * PPE state machine. As a result, it can only be started if both VPU state + * machine and PPE state machine are in valid states (Idle or Binded). */ +struct pva_cmd_clear_vmem { +#define PVA_CMD_OPCODE_CLEAR_VMEM 7U + struct pva_cmd_header header; +}; + +/** This command configures VPU hardware. Specifically, it configures code + * segment register and copies data sections. */ +struct pva_cmd_init_vpu_executable { +#define PVA_CMD_OPCODE_INIT_VPU_EXECUTABLE 8U + struct pva_cmd_header header; + struct pva_user_dma_allowance user_dma; +}; + +/** Start VPU instruction prefetch from specified entry point. Currently, the + * entry point index must be 0. More entry points will be supported in the + * future. Note that this command merely triggers the prefetch but does not wait + * for the prefetch to complete. Therefore, this command is synchronous. */ +struct pva_cmd_prefetch_vpu_code { +#define PVA_CMD_OPCODE_PREFETCH_VPU_CODE 9U + struct pva_cmd_header header; + uint32_t entry_point_index; +}; + +/** Run the VPU program from the specified entry point until finish. The + * lifetime of this command covers the entire VPU program execution. Since this + * command is asynchronous, it doesn’t block the following commands from + * execution. */ +struct pva_cmd_run_vpu { +#define PVA_CMD_OPCODE_RUN_VPU 10U + struct pva_cmd_header header; + uint32_t entry_point_index; +}; + +/** Copy data from opaque payload to a VPU variable. Firmware may choose to copy + * with R5 or DMA. If using DMA, channel 0 will be used. */ +struct pva_cmd_set_vpu_parameter { +#define PVA_CMD_OPCODE_SET_VPU_PARAMETER 11U + struct pva_cmd_header header; + uint16_t data_size; + uint16_t pad; + uint32_t symbol_id; + uint32_t vmem_offset; + /* Followed by number of bytes, padded to 4 bytes */ +}; + +/** Copy data from a DRAM buffer to a VPU variable. DMA will be used to perform + * the copy. The user can optionally provide a user channel, a descriptor and + * ADBs to speed up the copy. */ +struct pva_cmd_set_vpu_parameter_with_buffer { +#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_BUFFER 12U + struct pva_cmd_header header; + struct pva_user_dma_allowance user_dma; + uint8_t src_dram_offset_hi; + uint8_t pad[3]; + uint32_t data_size; + uint32_t dst_symbol_id; + uint32_t dst_vmem_offset; + uint32_t src_dram_resource_id; + uint32_t src_dram_offset_lo; +}; + +/** For set_vpu_parameter_with_address command, set this flag in header.flags to + * indicate that the target symbol is the legacy pointer symbol type: + * pva_fw_vpu_legacy_ptr_symbol, which only supports 32bit offset and 32bit + * size. */ +#define PVA_CMD_FLAGS_USE_LEGACY_POINTER 0x1 +/** Copy the address of a DRAM buffer to a VPU variable. The variable must be + * laid out exactly according to pva_fw_vpu_ptr_symbol + */ +struct pva_cmd_set_vpu_parameter_with_address { +#define PVA_CMD_OPCODE_SET_VPU_PARAMETER_WITH_ADDRESS 13U + struct pva_cmd_header header; + uint8_t dram_offset_hi; + uint8_t pad[3]; + uint32_t symbol_id; + uint32_t dram_resource_id; + uint32_t dram_offset_lo; +}; + +#define PVA_MAX_DMA_SETS_PER_DMA_ENGINE 4 +#define PVA_DMA_CONFIG_FETCH_BUFFER_PER_DMA_ENGINE 1 + +/** This command first acquires the TCM scratch and then fetches DMA configuration + * into the scratch. The command does not modify DMA + * hardware, allowing FW to continue using user channels for data transfer after + * its execution. This command only uses channel 0 to fetch the DMA + * configuration. However, user can still help speed up the process by + * providing additional ADBs. This command will block if there’s no TCM scratch + * available. If there’s no pending commands AND there’s no TCM scratch, then it + * means we encountered a dead lock, the command buffer will be aborted. */ +struct pva_cmd_fetch_dma_configuration { +#define PVA_CMD_OPCODE_FETCH_DMA_CONFIGURATION 14U + struct pva_cmd_header header; + uint8_t dma_set_id; + uint8_t pad[3]; + uint32_t resource_id; + struct pva_user_dma_allowance user_dma; +}; + +/** Setup DMA hardware registers using previously fetched DMA configuration. FW + * uses channel 0 to copy DMA descriptors into descriptor RAM. The user can + * provide additional ADBs to speed up the process. The command will block until + * the needed channels, descriptors and hwseq words are acquired. The command must + * also validate that all source and destinations fields of each DMA descriptor + * being programmed is bound to a resource. + */ +struct pva_cmd_setup_dma { +#define PVA_CMD_OPCODE_SETUP_DMA 15U + struct pva_cmd_header header; + struct pva_user_dma_allowance user_dma; + uint8_t dma_set_id; + uint8_t pad[3]; +}; + +/** Run DMA channels according to the current DMA configuration until they are + * finished. The lifetime of this command covers the entire DMA transfer. The + * command shall block until the needed VDBs/ADBs and triggers (GPIOs) are + * acquired. + + * @note This command checks that the DMA set to be started is indeed paired + * with the currently bound VPU executable. If not, this constitutes a + * programming error, and the command buffer will be aborted. */ +struct pva_cmd_run_dma { +#define PVA_CMD_OPCODE_RUN_DMA 16U + struct pva_cmd_header header; + uint8_t dma_set_id; + uint8_t pad[3]; +}; + +/** This command specifies the executable to use for the following PPE launches. + * It doesn’t do anything other than setting the context for the following + * commands. */ +struct pva_cmd_set_ppe_executable { +#define PVA_CMD_OPCODE_SET_PPE_EXECUTABLE 17U + struct pva_cmd_header header; + uint32_t ppe_exec_resource_id; +}; + +/** Start PPE instruction prefetch from specified entry point. Currently, the + * entry point index must be 0. Note that this command merely triggers the + * prefetch but does not wait for the prefetch to complete. Therefore, this + * command is synchronous. */ +struct pva_cmd_prefetch_ppe_code { +#define PVA_CMD_OPCODE_PREFETCH_PPE_CODE 18U + struct pva_cmd_header header; + uint32_t entry_point_index; +}; + +/** Setup PPE code segment and data sections. */ +struct pva_cmd_init_ppe_executable { +#define PVA_CMD_OPCODE_INIT_PPE_EXECUTABLE 19U + struct pva_cmd_header header; + struct pva_user_dma_allowance user_dma; +}; + +/** Run the PPE program until finish. This lifetime of this command covers the + * entire PPE program execution. */ +struct pva_cmd_run_ppe { +#define PVA_CMD_OPCODE_RUN_PPE 20U + struct pva_cmd_header header; + uint32_t entry_point_index; +}; + +#define PVA_BARRIER_GROUP_0 0U +#define PVA_BARRIER_GROUP_1 1U +#define PVA_BARRIER_GROUP_2 2U +#define PVA_BARRIER_GROUP_3 3U +#define PVA_BARRIER_GROUP_4 4U +#define PVA_BARRIER_GROUP_5 5U +#define PVA_BARRIER_GROUP_6 6U +#define PVA_BARRIER_GROUP_7 7U + +#define PVA_MAX_BARRIER_GROUPS 8U + +#define PVA_BARRIER_GROUP_INVALID 0xFFU + +/** + * @brief Captures a timestamp to DRAM + * + * This command allows you to capture a timestamp using one of three modes: + * + * - **IMMEDIATE_MODE**: Captures the timestamp immediately. + * - **VPU START MODE**: Enqueue a timestamp to be captured the next time the + * current VPU starts. Up to 8 VPU start timestamps may be active at a time + * for a given engine. + * - **VPU DONE MODE**: Enqueue a timestamp to be captured the next time the + * current VPU enters done state. Up to 8 VPU done timestamps may be active at + * a time for a given engine. + * - **DEFER MODE**: Defers the timestamp capture by specifying a barrier group. + * The timestamp will be captured once the commands in the specified barrier + * group have completed. Each barrier group allows one timestamp to be active + * at a time. + * + * The timestamp will be available in DRAM after waiting on any postfence. + * + * @note This command is asynchronous, ensuring it does not block the next command. + */ +struct pva_cmd_capture_timestamp { +#define PVA_CMD_OPCODE_CAPTURE_TIMESTAMP 21U + struct pva_cmd_header header; + uint8_t offset_hi; + uint8_t defer_barrier_group; +#define PVA_CMD_CAPTURE_MODE_IMMEDIATE 0U +#define PVA_CMD_CAPTURE_MODE_VPU_START 1U +#define PVA_CMD_CAPTURE_MODE_VPU_DONE 2U +#define PVA_CMD_CAPTURE_MODE_DEFER 3U + uint8_t capture_mode; + uint8_t pad; + uint32_t resource_id; + uint32_t offset_lo; +}; + +/** Set the address of the status buffer. FW will output detailed command buffer + * status in case of command buffer abort. */ +struct pva_cmd_request_status { +#define PVA_CMD_OPCODE_CAPTURE_STATUS 22U + struct pva_cmd_header header; + uint8_t offset_hi; + uint8_t pad[3]; + uint32_t resource_id; + uint32_t offset_lo; +}; + +/** Blocks until l2ram is available. To prevent deadlock with other command + * buffers, l2ram must be acquired prior to acquiring any engine. It will be + * automatically freed when this command buffer finishes. If persistence is + * required, it must be saved to DRAM. One command buffer may only hold one + * L2SRAM allocation at a time. */ +struct pva_cmd_bind_l2sram { +#define PVA_CMD_OPCODE_BIND_L2SRAM 23U + struct pva_cmd_header header; + uint8_t dram_offset_hi; +#define FILL_ON_MISS (1U << 0U) +#define FLUSH_ON_EVICTION (1U << 1U) + uint8_t access_policy; + uint8_t pad[2]; + uint32_t dram_resource_id; + uint32_t dram_offset_lo; + uint32_t l2sram_size; + struct pva_user_dma_allowance user_dma; +}; + +/** Free previously allocated l2ram. This command is asynchronous because it + * needs to wait for all commands that are started before it to complete. */ +struct pva_cmd_release_l2sram { +#define PVA_CMD_OPCODE_RELEASE_L2SRAM 24U + struct pva_cmd_header header; +}; + +/* + * This command writes data to a DRAM region. The DRAM region is described + * by resource ID, offset and size fields. The data to be written is placed + * right after the command struct. For this command to successfully execute, + * the following conditions must be met: + * 1. 'resource_id' should point to a valid resource in DRAM. + * 2. the offset and size fields should add up to be less than or equal to the size of DRAM resource. + */ +struct pva_cmd_write_dram { +#define PVA_CMD_OPCODE_WRITE_DRAM 25U + struct pva_cmd_header header; + uint8_t offset_hi; + uint8_t pad; + uint16_t write_size; + uint32_t resource_id; + uint32_t offset_lo; + /* Followed by write_size bytes, padded to 4 bytes boundary */ +}; + +/** Set this bit to @ref pva_surface_format to indicate if the surface format is + * block linear or pitch linear. + * + * For block linear surfaces, the starting address for a descriptor is: + * IOVA_OF(resource_id) + surface_base_offset + PL2BL(slot_offset + desc_offset). + * + * For pitch linear surfaces, the starting address for a descriptor is: + * IOVA_OF(resource_id) + surface_base_offset + slot_offset + desc_offset + */ +#define PVA_CMD_FLAGS_SURFACE_FORMAT_MSB 0U +#define PVA_CMD_FLAGS_SURFACE_FORMAT_LSB 0U +/** MSB of log2 block height in flags field of the command header */ +#define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_MSB 3U +/** LSB of log2 block height in flags field of the command header */ +#define PVA_CMD_FLAGS_LOG2_BLOCK_HEIGHT_LSB 1U +/** Bind a DRAM surface to a slot. The surface can be block linear or pitch + * linear. */ +struct pva_cmd_bind_dram_slot { +#define PVA_CMD_OPCODE_BIND_DRAM_SLOT 26U + /** flags field will contain block linear flag and block height */ + struct pva_cmd_header header; + uint8_t dma_set_id; /**< ID of the DMA set */ + uint8_t slot_offset_hi; + uint8_t surface_base_offset_hi; + uint8_t slot_id; /**< ID of slot to bind */ + uint32_t resource_id; /**< Resource ID of the DRAM allocation for the surface */ + uint32_t slot_offset_lo; /**< Per-slot offset in pitch linear domain, from slot base to surface base */ + uint32_t surface_base_offset_lo; /**< Surface base offset in bytes, from surface base to allocation base */ +}; + +struct pva_cmd_bind_vmem_slot { +#define PVA_CMD_OPCODE_BIND_VMEM_SLOT 27U + struct pva_cmd_header header; + uint8_t dma_set_id; + uint8_t slot_id; + uint8_t pad[2]; + uint32_t symbol_id; + uint32_t offset; +}; + +/** @brief Unregisters a resource. + * + * This command immediately removes the specified resource from the resource + * table upon execution. However, FW does not immediately notify KMD to + * deallocate the resource as it may still be in use by other concurrently + * running command buffers in the same context. + * + * The FW takes note of the currently running command buffers and notifies the + * KMD to deallocate the resource once these command buffers have completed + * their execution. + * + * @note If a command buffer in the same context either hangs or executes for an + * extended period, no resources can be effectively freed, potentially leading + * to resource exhaustion. + */ +struct pva_cmd_unregister_resource { +#define PVA_CMD_OPCODE_UNREGISTER_RESOURCE 28U + struct pva_cmd_header header; + uint32_t resource_id; +}; + +/** Write instance parameter to a VMEM symbol. */ +struct pva_cmd_set_vpu_instance_parameter { +#define PVA_CMD_OPCODE_SET_VPU_INSTANCE_PARAMETER 29U + struct pva_cmd_header header; + uint32_t symbol_id; +}; + +struct pva_cmd_run_unit_tests { +#define PVA_CMD_OPCODE_RUN_UNIT_TESTS 30U + struct pva_cmd_header header; +#define PVA_FW_UTESTS_MAX_ARGC 16U + uint8_t argc; + uint8_t pad[3]; + uint32_t in_resource_id; + uint32_t in_offset; + uint32_t in_size; + uint32_t out_resource_id; + uint32_t out_offset; + uint32_t out_size; +}; + +struct pva_cmd_set_vpu_print_cb { +#define PVA_CMD_OPCODE_SET_VPU_PRINT_CB 31U + struct pva_cmd_header header; + uint32_t cb_resource_id; + uint32_t cb_offset; +}; + +struct pva_cmd_invalidate_l2sram { +#define PVA_CMD_OPCODE_INVALIDATE_L2SRAM 32U + struct pva_cmd_header header; + uint8_t dram_offset_hi; + uint8_t pad[3]; + uint32_t dram_resource_id; + uint32_t dram_offset_lo; + uint32_t l2sram_size; +}; + +struct pva_cmd_flush_l2sram { +#define PVA_CMD_OPCODE_FLUSH_L2SRAM 33U + struct pva_cmd_header header; + struct pva_user_dma_allowance user_dma; +}; + +struct pva_cmd_err_inject { +#define PVA_CMD_OPCODE_ERR_INJECT 34U + struct pva_cmd_header header; + enum pva_error_inject_codes err_inject_code; +}; + +struct pva_cmd_patch_l2sram_offset { +#define PVA_CMD_OPCODE_PATCH_L2SRAM_OFFSET 35U + struct pva_cmd_header header; + uint8_t dma_set_id; + uint8_t slot_id; + uint8_t pad[2]; + uint32_t offset; +}; + +/** After retiring a barrier group, all future commands which refer to that barrier group id will be + * mapped to a new logical barrier group. This allows re-using barrier ids within a command buffer. + */ +struct pva_cmd_retire_barrier_group { +#define PVA_CMD_OPCODE_RETIRE_BARRIER_GROUP 36U + struct pva_cmd_header header; +}; + +#define PVA_CMD_OPCODE_COUNT 37U + +struct pva_cmd_init_resource_table { +#define PVA_CMD_OPCODE_INIT_RESOURCE_TABLE (0U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + /**< Resource table id is from 0 to 7, 0 is the device's resource table, + * 1-7 are users'. */ + uint8_t resource_table_id; + uint8_t resource_table_addr_hi; + uint8_t pad[2]; + uint32_t resource_table_addr_lo; + uint32_t max_n_entries; +}; + +struct pva_cmd_deinit_resource_table { +#define PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE (1U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t resource_table_id; + uint8_t pad[3]; +}; + +struct pva_cmd_update_resource_table { +#define PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE (2U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t resource_table_id; + uint8_t pad[3]; + uint32_t resource_id; + struct pva_resource_entry entry; +}; + +struct pva_cmd_init_queue { +#define PVA_CMD_OPCODE_INIT_QUEUE (3U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t ccq_id; + uint8_t queue_id; + uint8_t queue_addr_hi; + uint8_t pad; + uint32_t queue_addr_lo; + uint32_t max_n_submits; +}; + +struct pva_cmd_deinit_queue { +#define PVA_CMD_OPCODE_DEINIT_QUEUE (4U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t ccq_id; + uint8_t queue_id; + uint8_t pad[2]; +}; + +struct pva_cmd_enable_fw_profiling { +#define PVA_CMD_OPCODE_ENABLE_FW_PROFILING (5U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t buffer_offset_hi; + uint8_t timestamp_type; + uint8_t pad[2]; + uint32_t buffer_resource_id; + uint32_t buffer_size; + uint32_t buffer_offset_lo; + uint32_t filter; +}; + +struct pva_cmd_disable_fw_profiling { +#define PVA_CMD_OPCODE_DISABLE_FW_PROFILING (6U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +struct pva_cmd_get_tegra_stats { +#define PVA_CMD_OPCODE_GET_TEGRA_STATS (7U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint8_t buffer_offset_hi; + bool enabled; + uint8_t pad[2]; + uint32_t buffer_resource_id; + uint32_t buffer_size; + uint32_t buffer_offset_lo; +}; + +struct pva_cmd_suspend_fw { +#define PVA_CMD_OPCODE_SUSPEND_FW (8U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +struct pva_cmd_resume_fw { +#define PVA_CMD_OPCODE_RESUME_FW (9U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; +}; + +#define PVA_CMD_PRIV_OPCODE_COUNT 10U + +#define PVA_MAX_CMDBUF_CHUNK_LEN 1024 +#define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN) + +#endif // PVA_API_CMDBUF_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h b/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h new file mode 100644 index 00000000..5a912208 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_cuda.h @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: LicenseRef-NvidiaProprietary + * + * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual + * property and proprietary rights in and to this material, related + * documentation and any modifications thereto. Any use, reproduction, + * disclosure or distribution of this material and related documentation + * without an express license agreement from NVIDIA CORPORATION or + * its affiliates is strictly prohibited. + */ + +#ifndef PVA_API_CUDA_H +#define PVA_API_CUDA_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cuda.h" +#include "pva_api_types.h" + +/** + * @brief Structure for cuExtend queue data needed for command submission. + */ +struct pva_cuextend_queue_data { + /*! Holds a pointer to pva queue object */ + struct pva_queue *queue; + /*! Holds engine affinity for command submission*/ + uint32_t affinity; +}; + +/** + * @brief Function type for cuExtend register memory callback + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] mem The pointer to a \ref pva_memory object. This register memory callback shall transfer the + * ownership of the memory to the client, and it is client's responsibility to release the memory. + * @param[in] cuda_ptr CUDA device pointer. + * @param[in] cached_flags The cached flags for the memory. + * @return \ref pva_error The completion status of register memory operation. + */ +typedef enum pva_error (*pva_cuextend_memory_register)(void *callback_args, + struct pva_memory *mem, + void *cuda_ptr, + uint32_t cached_flags); + +/** + * @brief Function type for cuExtend unregister memory callback. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] cuda_ptr CUDA device pointer. + * @return \ref pva_error The completion status of unregister memory operation. + */ +typedef enum pva_error (*pva_cuextend_memory_unregister)(void *callback_args, + void *cuda_ptr); + +/** + * @brief Function type for cuExtend register stream callback. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[out] stream_payload Client data associated with a CUDA stream. + * @param[in] flags Reserved for future. Must set to 0. + * @return \ref pva_error The completion status of register stream operation. + */ +typedef enum pva_error (*pva_cuextend_stream_register)(void *callback_args, + void **stream_payload, + uint64_t flags); + +/** + * @brief Function type for cuExtend unregister stream callback. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register. + * @param[in] flags Reserved for future. Must set to 0. + * @return \ref pva_error The completion status of unregister stream operation. + */ +typedef enum pva_error (*pva_cuextend_stream_unregister)(void *callback_args, + void *stream_payload, + uint64_t flags); + +/** + * @brief Function type for cuExtend acquire queue callback. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register. + * @param[out] queue_data Output pointer to a pva_cuextend_queue_data object. + * @return \ref pva_error The completion status of acquire queue operation. + */ +typedef enum pva_error (*pva_cuextend_queue_acquire)( + void *callback_args, void *stream_payload, + struct pva_cuextend_queue_data **queue_data); + +/** + * @brief Function type for cuExtend release queue callback. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] stream_payload Client data returned by \ref pva_cuextend_stream_register. + * @return \ref pva_error The completion status of release queue operation. + */ +typedef enum pva_error (*pva_cuextend_queue_release)(void *callback_args, + void *stream_payload, + void *queue_data); + +/** + * @brief Function type for retrieving error code from cuExtend. + * + * @param[in] teardown_ctx Pointer to the cuExtend context pointer. + */ +typedef enum pva_error (*pva_cuextend_get_error)(void *teardown_ctx); + +/** + * @brief Function type for cuExtend teardown callback. + * + * It is expected that the client does the following necessary actions in this callback: + * Blocking wait for all pending tasks on all queues. In the wait loop, periodically check for CUDA error by calling \ref pva_cuextend_get_error, + * hop out then loop if there is an error. + * + * @param[in] callback_args Pointer to the callback arguments provided by client during cuExtend initialization. + * @param[in] teardown_ctx Pointer to a teardown context passed by cuExtend teardown callback. + * @param[in] get_error Function pointer to get CUDA error function. + * @return \ref pva_error The completion status of release queue operation. + */ +typedef enum pva_error (*pva_cuextend_teardown)( + void *callback_args, void *teardown_ctx, + pva_cuextend_get_error get_error); + +/** + * @brief Structure for cuExtend callbacks provided by the caller during cuExtend initialization. + */ +struct pva_cuextend_callbacks { + /*! Holds the register memory callback */ + pva_cuextend_memory_register mem_reg; + /*! Holds the unregister memory callback */ + pva_cuextend_memory_unregister mem_unreg; + /*! Holds the register stream callback */ + pva_cuextend_stream_register stream_reg; + /*! Holds the unregister stream callback */ + pva_cuextend_stream_unregister stream_unreg; + /*! Holds the acquire queue callback */ + pva_cuextend_queue_acquire queue_acquire; + /*! Holds the release queue callback */ + pva_cuextend_queue_release queue_release; + /*! Holds the teardown callback */ + pva_cuextend_teardown teardown; + /*! Pointer to the callback arguments provided by client during cuExtend initialization */ + void *args; +}; + +/** + * @brief Initialize cuExtend context. + * + * This function must be called before any other cuExtend functions. It does the following: + * + * 1. Load cuExtend library and retrieves function pointers to the library's exported functions. + * 2. Add PVA to CUDA unified context model. + * 3. Initialize the opaque cuExtend impl pointer. + * + * @param[in] ctx Pointer to a PVA context object. + * @param[in] callbacks Pointer to CUDA interop callbacks. + * @return \ref pva_error The completion status of the initialization operation. + */ +enum pva_error pva_cuextend_init(struct pva_context *ctx, + struct pva_cuextend_callbacks *callbacks); + +/** + * @brief De-initialize cuExtend context. + * + * This function must be called at the context destructor in the client. It does the following: + * + * 1. Clear the opaque cuExtend impl pointer in pva context object. + * 2. Remove PVA to from cuExtend context. + * 3. Unload cuExtend library and clear all the function pointers. + * + * @param[in] ctx Pointer to a PVA context object. + * @return \ref pva_error The completion status of the de-initialization operation. + */ +enum pva_error pva_cuextend_deinit(struct pva_context *ctx); + +/** + * @brief Import a memory region from a CUDA context into a PVA context. + * + * @param[in] ctx Pointer to a PVA context structure. + * @param[in] cuda_ptr Pointer to CUDA memory provided by client. + * @param[in] size Size of the memory region. + * @param[in] access_type Access flag provided by client. + * @param[out] out_mem Pointer to the imported memory object. + * @param[out] cached_flags Output cached flags for the memory. + * @return \ref pva_error The completion status of the initialization operation. + */ +enum pva_error pva_cuextend_memory_import(struct pva_context *ctx, + void *cuda_ptr, uint64_t size, + uint32_t access_mode, + struct pva_memory **out_mem, + uint32_t *cached_flags); + +/** + * @brief Submit a batch of command buffers via a CUDA stream. + * + * @param[in] queue Pointer to the queue. If queue is not NULL, this API will try to submit the client tasks to this queue directly. + * Otherwise, it will call queue_acquire callback to query a pva_queue object from stream payload, and then submit + * the tasks to the queried queue. + * @param[in] stream A CUDA stream. + * @param[in] submit_infos Array of submit info structures. + * @param[in] count Number of submit info structures. + * @param[in] timeout_ms Timeout in milliseconds. PVA_TIMEOUT_INF for infinite. + * @return \ref pva_error The completion status of the submit operation. + * + * @note Concurrent submission to the same queue needs to be serialized by the + * caller. + */ +enum pva_error +pva_cuextend_cmdbuf_batch_submit(struct pva_queue *queue, CUstream stream, + struct pva_cmdbuf_submit_info *submit_infos, + uint32_t count, uint64_t timeout_ms); + +#ifdef __cplusplus +} +#endif + +#endif // PVA_API_CUDA_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h new file mode 100644 index 00000000..eccbd02e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_API_DMA_H +#define PVA_API_DMA_H +#include "pva_api_types.h" + +/** Bit indices for VPU GPIO triggers */ +enum pva_gpio_bit { + GPIO_VPU_CFG_BIT = 4U, + GPIO_READ0_BIT = 16U, + GPIO_READ1_BIT = 17U, + GPIO_READ2_BIT = 18U, + GPIO_READ3_BIT = 19U, + GPIO_READ4_BIT = 20U, + GPIO_READ5_BIT = 21U, + GPIO_READ6_BIT = 22U, + GPIO_WRITE0_BIT = 23U, + GPIO_WRITE1_BIT = 24U, + GPIO_WRITE2_BIT = 25U, + GPIO_WRITE3_BIT = 26U, + GPIO_WRITE4_BIT = 27U, + GPIO_WRITE5_BIT = 28U, + GPIO_WRITE6_BIT = 29U +}; + +enum pva_dma_descriptor_id { + PVA_DMA_DESC_NONE = 0, + PVA_DMA_DESC0 = 1, + PVA_DMA_DESC1 = 2, + PVA_DMA_DESC2 = 3, + PVA_DMA_DESC3 = 4, + PVA_DMA_DESC4 = 5, + PVA_DMA_DESC5 = 6, + PVA_DMA_DESC6 = 7, + PVA_DMA_DESC7 = 8, + PVA_DMA_DESC8 = 9, + PVA_DMA_DESC9 = 10, + PVA_DMA_DESC10 = 11, + PVA_DMA_DESC11 = 12, + PVA_DMA_DESC12 = 13, + PVA_DMA_DESC13 = 14, + PVA_DMA_DESC14 = 15, + PVA_DMA_DESC15 = 16, + PVA_DMA_DESC16 = 17, + PVA_DMA_DESC17 = 18, + PVA_DMA_DESC18 = 19, + PVA_DMA_DESC19 = 20, + PVA_DMA_DESC20 = 21, + PVA_DMA_DESC21 = 22, + PVA_DMA_DESC22 = 23, + PVA_DMA_DESC23 = 24, + PVA_DMA_DESC24 = 25, + PVA_DMA_DESC25 = 26, + PVA_DMA_DESC26 = 27, + PVA_DMA_DESC27 = 28, + PVA_DMA_DESC28 = 29, + PVA_DMA_DESC29 = 30, + PVA_DMA_DESC30 = 31, + PVA_DMA_DESC31 = 32, + PVA_DMA_DESC32 = 33, + PVA_DMA_DESC33 = 34, + PVA_DMA_DESC34 = 35, + PVA_DMA_DESC35 = 36, + PVA_DMA_DESC36 = 37, + PVA_DMA_DESC37 = 38, + PVA_DMA_DESC38 = 39, + PVA_DMA_DESC39 = 40, + PVA_DMA_DESC40 = 41, + PVA_DMA_DESC41 = 42, + PVA_DMA_DESC42 = 43, + PVA_DMA_DESC43 = 44, + PVA_DMA_DESC44 = 45, + PVA_DMA_DESC45 = 46, + PVA_DMA_DESC46 = 47, + PVA_DMA_DESC47 = 48, + PVA_DMA_DESC48 = 49, + PVA_DMA_DESC49 = 50, + PVA_DMA_DESC50 = 51, + PVA_DMA_DESC51 = 52, + PVA_DMA_DESC52 = 53, + PVA_DMA_DESC53 = 54, + PVA_DMA_DESC54 = 55, + PVA_DMA_DESC55 = 56, + PVA_DMA_DESC56 = 57, + PVA_DMA_DESC57 = 58, + PVA_DMA_DESC58 = 59, + PVA_DMA_DESC59 = 60, + PVA_DMA_DESC60 = 61, + PVA_DMA_DESC61 = 62, + PVA_DMA_DESC62 = 63, + PVA_DMA_DESC63 = 64 +}; + +/** + * The values of the enum members conform to the definitions of DMA descriptors' + * trig_vpu_events field. Therefore, they can be assigned to trig_vpu_events + * directly. + */ +enum pva_dma_trigger { + PVA_DMA_NO_TRIG = 0, + PVA_DMA_TRIG_READ0, + PVA_DMA_TRIG_WRITE0, + PVA_DMA_TRIG_VPU_CFG, + PVA_DMA_TRIG_READ1, + PVA_DMA_TRIG_WRITE1, + PVA_DMA_TRIG_READ2, + PVA_DMA_TRIG_WRITE2, + PVA_DMA_TRIG_READ3, + PVA_DMA_TRIG_WRITE3, + PVA_DMA_TRIG_READ4, + PVA_DMA_TRIG_WRITE4, + PVA_DMA_TRIG_READ5, + PVA_DMA_TRIG_WRITE5, + PVA_DMA_TRIG_READ6, + PVA_DMA_TRIG_WRITE6, + PVA_DMA_TRIG_HWSEQ_RD, + PVA_DMA_TRIG_HWSEQ_WR, +}; + +enum pva_dma_trigger_mode { + PVA_DMA_TRIG_MODE_DIS = 0, + PVA_DMA_TRIG_MODE_4TH_DIM, + PVA_DMA_TRIG_MODE_3RD_DIM, + PVA_DMA_TRIG_MODE_TILE +}; + +enum pva_dma_transfer_mode { + PVA_DMA_TRANS_MODE_INVALID = 0, + PVA_DMA_TRANS_MODE_DRAM = 1, + PVA_DMA_TRANS_MODE_VMEM = 2, + PVA_DMA_TRANS_MODE_L2SRAM = 3, + PVA_DMA_TRANS_MODE_TCM = 4, + /** MMIO is valid as dst in VPU config mode only */ + PVA_DMA_TRANS_MODE_MMIO = 5, + PVA_DMA_TRANS_MODE_RSVD = 5, + /** VPU config mode, valid for src only */ + PVA_DMA_TRANS_MODE_VPUCFG = 7 +}; + +struct pva_dma_transfer_attr { + uint8_t rpt1; + uint8_t rpt2; + uint8_t rpt3; + uint8_t cb_enable; + uint8_t transfer_mode; + /** When dynamic slot flag is set, it means the memory location will be + * relocated by commands. + */ +#define PVA_DMA_DYNAMIC_SLOT (1 << 15) +#define PVA_DMA_STATIC_SLOT (1 << 14) +#define PVA_DMA_SLOT_INVALID 0 +#define PVA_DMA_SLOT_ID_MASK 0xFF +#define PVA_DMA_MAX_NUM_SLOTS 256 + uint16_t slot; + /** Line pitch in pixels */ + uint16_t line_pitch; + uint32_t cb_start; + uint32_t cb_size; + int32_t adv1; + int32_t adv2; + int32_t adv3; + uint64_t offset; +}; + +struct pva_dma_descriptor { + /** + * Linked descriptor ID + * + * - 0: No linked descriptor + * - N (> 0): Linking to descriptor N - 1 in the descriptor array + */ + uint8_t link_desc_id; + uint8_t px; + uint8_t py; + /** enum pva_dma_trigger_mode */ + uint8_t trig_event_mode; + /** Trigger from enum pva_dma_trigger */ + uint8_t trig_vpu_events; + uint8_t desc_reload_enable; + /** + * Log2(number bytes per pixel). + * + * - 0: 1 byte per pixel + * - 1: 2 bytes per pixel + * - 2: 4 bytes per pixel + * - others: invalid + */ + uint8_t log2_pixel_size; + uint8_t px_direction; + uint8_t py_direction; + uint8_t boundary_pixel_extension; + /** TCM transfer size */ + uint8_t tts; + /** + * - 0: transfer true completion disabled + * - 1: transfer true completion enabled + */ + uint8_t trans_true_completion; + uint8_t prefetch_enable; + + uint16_t tx; + uint16_t ty; + uint16_t dst2_slot; + uint32_t dst2_offset; + struct pva_dma_transfer_attr src; + struct pva_dma_transfer_attr dst; +}; + +struct pva_dma_channel { + /** + * Starting descriptor index in the descriptor array + * + * Valid range is [0, max_num_descriptors - 1]. This is different from + * link_desc_id field, where 0 means no linked descriptor. + */ + uint8_t desc_index; + uint8_t vdb_count; + uint8_t vdb_offset; + uint8_t req_per_grant; + uint8_t prefetch_enable; + uint8_t ch_rep_factor; + uint8_t hwseq_enable; + uint8_t hwseq_traversal_order; + uint8_t hwseq_tx_select; + uint8_t hwseq_trigger_done; + uint8_t hwseq_frame_count; + uint8_t hwseq_con_frame_seq; + uint16_t hwseq_start; + uint16_t hwseq_end; + uint16_t adb_count; + uint16_t adb_offset; + /*! + * Holds the trigger signal this channel will react to. + * + * IAS: + * DMA_COMMON_DMA_OUTPUT_ENABLEn (4 Bytes) + * + * Mapping: + * chanId corresponding to this structure is allocated by KMD. + * DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[chanId] = outputEnableMask.bit[0]; + * DMA_COMMON_DMA_OUTPUT_ENABLE0.bit[16 + chanId] = outputEnableMask.bit[1]; + * DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[chanId] = outputEnableMask.bit[2]; + * DMA_COMMON_DMA_OUTPUT_ENABLE1.bit[16 + chanId] = outputEnableMask.bit[3]; + * DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[chanId] = outputEnableMask.bit[4]; + * DMA_COMMON_DMA_OUTPUT_ENABLE2.bit[16 + chanId] = outputEnableMask.bit[5]; + * DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[chanId] = outputEnableMask.bit[6]; + * DMA_COMMON_DMA_OUTPUT_ENABLE3.bit[16 + chanId] = outputEnableMask.bit[7]; + * DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[chanId] = outputEnableMask.bit[8]; + * DMA_COMMON_DMA_OUTPUT_ENABLE4.bit[16 + chanId] = outputEnableMask.bit[9]; + * DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[chanId] = outputEnableMask.bit[10]; + * DMA_COMMON_DMA_OUTPUT_ENABLE5.bit[16 + chanId] = outputEnableMask.bit[11]; + * DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[chanId] = outputEnableMask.bit[12]; + * DMA_COMMON_DMA_OUTPUT_ENABLE6.bit[16 + chanId] = outputEnableMask.bit[13]; + * DMA_COMMON_DMA_OUTPUT_ENABLE7.bit[chanId] = outputEnableMask.bit[14]; + * DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[chanId] = outputEnableMask.bit[15]; + * DMA_COMMON_DMA_OUTPUT_ENABLE8.bit[16 + chanId] = outputEnableMask.bit[16]; + */ + uint32_t output_enable_mask; + uint32_t pad_value; +}; + +struct pva_dma_config_header { +/* In order to make efficient the allocation and tracking of DMA resources, DMA resources + * are allocated in groups. For example, descriptors may be allocated in groups of 4, which + * means that every allocation of descriptors will start at an alignment of 4. The following + * macros control the alignment/grouping requirement of DMA resources. + */ +// TODO: Add compile time asserts to ensure the following alignment requirments don't result +// in fractional resource partitions? +#define PVA_DMA_CHANNEL_ALIGNMENT 1 +#define PVA_DMA_DESCRIPTOR_ALIGNMENT 4 +#define PVA_DMA_ADB_ALIGNMENT 16 +#define PVA_DMA_HWSEQ_WORD_ALIGNMENT 128 + uint8_t base_channel; + uint8_t base_descriptor; + uint8_t num_channels; + uint8_t num_descriptors; + + uint16_t num_static_slots; + uint16_t num_dynamic_slots; + + uint16_t base_hwseq_word; + uint16_t num_hwseq_words; + uint32_t vpu_exec_resource_id; + + /* For serialized version of pva_dma_config, the following fields follow + * immediately after this header. The starting addresses of these fields + * must be aligned to 8 bytes */ + + /* An array of hwseq words */ + /* An array of pva_dma_channel */ + /* An array of pva_dma_descriptor */ + /* An array of pva_dma_slot_buffer */ +}; + +enum pva_dma_static_binding_type { + PVA_DMA_STATIC_BINDING_INVALID = 0, + PVA_DMA_STATIC_BINDING_DRAM, + PVA_DMA_STATIC_BINDING_VMEM, +}; + +/** Max block height is 32 GOB */ +#define PVA_DMA_MAX_LOG2_BLOCK_HEIGHT 5 + +struct pva_dma_dram_binding { + /** enum pva_surface_format */ + uint8_t surface_format; + uint8_t log2_block_height; + uint32_t resource_id; + uint64_t surface_base_offset; + uint64_t slot_offset; +}; + +struct pva_dma_vmem_binding { + struct pva_vmem_addr addr; +}; + +struct pva_dma_static_binding { + /** enum pva_dma_static_binding_type */ + uint8_t type; + union { + struct pva_dma_dram_binding dram; + struct pva_dma_vmem_binding vmem; + }; +}; + +struct pva_dma_config { + struct pva_dma_config_header header; + uint32_t *hwseq_words; + struct pva_dma_channel *channels; + struct pva_dma_descriptor *descriptors; + struct pva_dma_static_binding *static_bindings; +}; + +#endif // PVA_API_DMA_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h b/drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h new file mode 100644 index 00000000..90d21d9b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_nvsci.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_API_NVSCI_H +#define PVA_API_NVSCI_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pva_api_types.h" +#include "nvscibuf.h" +#include "nvscisync.h" + +/** + * @brief Fill NvSciBuf attributes required by PVA. + * + * @param[out] scibuf_attr The NvSciBuf attribute list to be filled with PVA-specific attributes. + */ +enum pva_error pva_nvsci_buf_fill_attrs(NvSciBufAttrList scibuf_attr); + +/** + * @brief Fill NvSciSync attributes required by PVA. + * + * @param[in] access_mode Access mode for the sync object, determining how PVA + * will interact with the sync object (read, write, etc.) + * @param[out] attr_list The NvSciSync attribute list to be populated with attributes. + */ +enum pva_error pva_nvsci_sync_fill_attrs(uint32_t access_mode, + NvSciSyncAttrList attr_list); + +/** + * @brief Holds the metadata for a NvSci plane. + */ +struct pva_plane_attrs { + uint32_t line_pitch; + uint32_t width_in_bytes; + uint32_t height; + uint64_t offset; +}; + +#define PVA_SURFACE_ATTRS_MAX_NUM_PLANES 6U + +/** + * @brief Holds the metadata for a NvSci surface. + */ +struct pva_surface_attrs { + bool is_surface; + enum pva_surface_format format; + uint32_t n_planes; + uint64_t size; + struct pva_plane_attrs planes[PVA_SURFACE_ATTRS_MAX_NUM_PLANES]; + uint8_t log2_gobs_per_block_y[PVA_SURFACE_ATTRS_MAX_NUM_PLANES]; +}; + +/** + * @brief Import an NvSciBuf object into PVA. + * + * This function imports an NvSciBuf buffer object into PVA for further + * operations. It creates a PVA memory object representing the buffer and + * retrieves surface information about the buffer. + * + * The caller is responsible for freeing the PVA memory object. + * + * @param[in] obj The NvSciBuf object to be imported. + * @param[in] access_mode Access mode for the buffer, determining the PVA's permissions for interaction. + * @param[out] out_obj A pointer to the PVA memory object representing the imported buffer. + * @param[out] out_surf_info Surface metadata of the buffer + */ +enum pva_error pva_nvsci_buf_import(NvSciBufObj obj, uint32_t access_mode, + struct pva_memory **out_obj, + struct pva_surface_attrs *out_surf_info); + +/** + * @brief An opaque object representing an imported NvSciSync object. + */ +struct pva_nvsci_syncobj; + +/** + * @brief Describes the attributes of an imported NvSciSync object. + * + * This structure contains details about the memory buffers associated with the + * imported NvSciSync object. + */ +struct pva_nvsci_syncobj_attrs { + struct pva_memory * + semaphore_buf; /**< Pointer to the semaphore memory buffer; NULL if syncpoints are used. */ + struct pva_memory * + timestamp_buf; /**< Pointer to the timestamp memory buffer; NULL if unused. */ + struct pva_memory + *status_buf; /**< Pointer to the status memory buffer. */ +}; + +/** + * @brief Import an NvSciSync object into the PVA. + * + * This function imports an NvSciSync object into PVA, enabling it to be used + * for synchronization of operations. + * + * @param[in] ctx The PVA context in which the sync object is to be used. + * @param[in] nvsci_obj The NvSciSync object to be imported. + * @param[in] access_mode The access mode for the sync object, indicating how PVA will use it. + * @param[out] out_obj A pointer to the resulting PVA sync object handle. + */ +enum pva_error pva_nvsci_syncobj_import(struct pva_context *ctx, + NvSciSyncObj nvsci_obj, + uint32_t access_mode, + struct pva_nvsci_syncobj **out_obj); + +/** + * @brief Retrieve the attributes of an imported NvSciSync object. + * + * This function fills in the provided attribute structure with details from + * the imported NvSciSync object, including information relevant for semaphores, + * timestamps, and status. + * + * @param[in] syncobj The NvSciSync object whose attributes are to be retrieved. + * @param[out] out_attrs The structure to be filled with the sync object's attributes. + */ +void pva_nvsci_syncobj_get_attrs(struct pva_nvsci_syncobj const *syncobj, + struct pva_nvsci_syncobj_attrs *out_attrs); + +/** + * @brief Free an imported NvSciSync object. + * + * This function releases the resources associated with a PVA NvSciSync object, + * including PVA memory objects for semaphores, timestamps and statuses. + * + * @param[in] syncobj The PVA sync object to be freed. + */ +void pva_nvsci_syncobj_free(struct pva_nvsci_syncobj *syncobj); + +/** + * @brief Get the next status slot for a new fence. + * + * @param[in] syncobj The imported NvSciSyncObj + * @param[out] out_status_slot The status slot index for the next fence. + */ +enum pva_error pva_nvsci_syncobj_next_status(struct pva_nvsci_syncobj *syncobj, + uint32_t *out_status_slot); + +/** + * @brief Get the next timestamp slot for a new fence. + * + * @param[in] syncobj The imported NvSciSyncObj + * @param[out] out_timestamp_slot The timestamp slot index for the next fence. + */ +enum pva_error +pva_nvsci_syncobj_next_timestamp(struct pva_nvsci_syncobj *syncobj, + uint32_t *out_timestamp_slot); + +/** + * @brief Fence data for import and export. + */ +struct pva_nvsci_fence_info { + uint32_t index; /**< The index of the fence. */ + uint32_t value; /**< The value of the fence. */ + uint32_t status_slot; /**< The slot index for the status. */ + uint32_t timestamp_slot; /**< The slot index for the timestamp. */ +}; +/** + * @brief Import a NvSciSync fence into a PVA fence. + * + * @param[in] nvsci_fence The NvSciSync fence to be imported. + * @param[in] pva_syncobj The previously imported NvSciSyncObj that's associated with the fence. + * @param[out] out_fence_info The information about the NvSci fence. It can be used to fill a pva_fence. + * + * @note This function only fills the index and value field of the pva_fence. + * The user needs to set the semaphore resource ID if the sync object is a + * semaphore. + * + */ +enum pva_error +pva_nvsci_fence_import(NvSciSyncFence const *nvsci_fence, + struct pva_nvsci_syncobj const *pva_syncobj, + struct pva_nvsci_fence_info *out_fence_info); + +/** + * @brief Export a PVA fence into an NvSciSync fence. + * + * @param[in] fence_info The information about the fence to be exported. + * @param[in] syncobj The previously imported NvSciSyncObj that's associated with the fence. + * @param[out] out_nvsci_fence The resulting NvSciSync fence object. + */ +enum pva_error +pva_nvsci_fence_export(struct pva_nvsci_fence_info const *fence_info, + struct pva_nvsci_syncobj const *syncobj, + NvSciSyncFence *out_nvsci_fence); + +#ifdef __cplusplus +} +#endif + +#endif // PVA_API_NVSCI_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_types.h b/drivers/video/tegra/host/pva/src/include/pva_api_types.h new file mode 100644 index 00000000..ccfe04e0 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_types.h @@ -0,0 +1,396 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_API_TYPES_H +#define PVA_API_TYPES_H +#if !defined(__KERNEL__) +#include +#include +#include +#define container_of(ptr, type, member) \ + (type *)((char *)(ptr) - (char *)&((type *)0)->member) +#else +#include +#include +#include +#include +#define UINT64_MAX U64_MAX +#define UINT32_MAX U32_MAX +#endif + +#ifndef NULL +#define NULL ((void *)0) +#endif + +#define FOREACH_ERR(ACT) \ + ACT(PVA_SUCCESS) \ + ACT(PVA_UNKNOWN_ERROR) \ + ACT(PVA_BAD_PARAMETER_ERROR) \ + ACT(PVA_NOT_IMPL) \ + ACT(PVA_NOENT) \ + ACT(PVA_NOMEM) \ + ACT(PVA_INVAL) \ + ACT(PVA_TIMEDOUT) \ + ACT(PVA_INTERNAL) \ + ACT(PVA_CMDBUF_NOT_FOUND) \ + ACT(PVA_CMDBUF_INVALID) \ + ACT(PVA_CMDBUF_TOO_LARGE) \ + ACT(PVA_RES_OUT_OF_RANGE) \ + ACT(PVA_AGAIN) \ + ACT(PVA_NO_RESOURCE_ID) \ + ACT(PVA_INVALID_RESOURCE) \ + ACT(PVA_INVALID_RESOURCE_SIZE) \ + ACT(PVA_INVALID_RESOURCE_ALIGNMENT) \ + ACT(PVA_QUEUE_FULL) \ + ACT(PVA_INVALID_IOVA) \ + ACT(PVA_NO_PERM) \ + ACT(PVA_INVALID_CMD_OPCODE) \ + ACT(PVA_BUF_OUT_OF_RANGE) \ + ACT(PVA_CMDBUF_NO_BEGIN) \ + ACT(PVA_NO_CCQ) \ + ACT(PVA_INPUT_STATUS_ERROR) \ + ACT(PVA_ENOSPC) \ + ACT(PVA_EACCES) \ + ACT(PVA_ERANGE) \ + ACT(PVA_BAD_SURFACE_BASE_ALIGNMENT) \ + ACT(PVA_BAD_DESC_ADDR_ALIGNMENT) \ + ACT(PVA_INVALID_DMA_CONFIG) \ + ACT(PVA_INVALID_SYMBOL) \ + ACT(PVA_INVALID_BINDING) \ + ACT(PVA_EINTR) \ + ACT(PVA_FILL_NVSCIBUF_ATTRS_FAILED) \ + ACT(PVA_NVSCIBUF_SET_ATTR_FAILED) \ + ACT(PVA_IMPORT_FROM_NVSCIBUF_FAILED) \ + ACT(PVA_NVSCISYNC_SET_ATTR_FAILED) \ + ACT(PVA_RETRIEVE_DATA_FROM_NVSCISYNC_FAILED) \ + ACT(PVA_UPDATE_DATA_TO_NVSCISYNC_FAILED) \ + ACT(PVA_UNSUPPORTED_NVSCISYNC_TIMESTAMP_FORMAT) \ + ACT(PVA_INVALID_NVSCISYNC_FENCE) \ + ACT(PVA_ERR_CMD_NOT_SUPPORTED) \ + ACT(PVA_CUDA_INITIALIZED) \ + ACT(PVA_CUDA_LOAD_LIBRARY_FAILED) \ + ACT(PVA_CUDA_ADD_CLIENT_FAILED) \ + ACT(PVA_CUDA_REMOVE_CLIENT_FAILED) \ + ACT(PVA_CUDA_INIT_FAILED) \ + ACT(PVA_CUDA_SUBMIT_FAILED) \ + ACT(PVA_CUDA_GET_RM_HANDLE_FAILED) \ + ACT(PVA_CUDA_INTERNAL_ERROR) \ + ACT(PVA_ERR_CMD_INVALID_VPU_STATE) \ + ACT(PVA_ERR_CMD_VMEM_BUF_OUT_OF_RANGE) \ + ACT(PVA_ERR_CMD_L2SRAM_BUF_OUT_OF_RANGE) \ + ACT(PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE) \ + ACT(PVA_ERR_CMD_INVALID_BLOCK_HEIGHT) \ + ACT(PVA_ERR_CMD_PAYLOAD_TOO_SMALL) \ + ACT(PVA_ERR_CMD_ENGINE_NOT_ACQUIRED) \ + ACT(PVA_ERR_CMD_INVALID_SYMBOL_TYPE) \ + ACT(PVA_ERR_CMD_INVALID_ENGINE) \ + ACT(PVA_ERR_CMD_INVALID_DMA_SET_ID) \ + ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_ID) \ + ACT(PVA_ERR_CMD_INVALID_DMA_SLOT_TYPE) \ + ACT(PVA_ERR_CMD_INVALID_USER_ALLOWANCE) \ + ACT(PVA_ERR_CMD_INCOMPATIBLE_RESOURCE) \ + ACT(PVA_ERR_CMD_INSUFFICIENT_PRIVILEGE) \ + ACT(PVA_ERR_CMD_INVALID_BARRIER_ID) \ + ACT(PVA_ERR_CMD_CAPTURE_SLOTS_EXCEEDED) \ + ACT(PVA_ERR_CMD_INVALID_CAPTURE_MODE) \ + ACT(PVA_ERR_CMD_INVALID_L2SRAM_POLICY) \ + ACT(PVA_ERR_FW_DMA0_IRQ_ENABLE_FAILED) \ + ACT(PVA_ERR_FW_DMA1_IRQ_ENABLE_FAILED) \ + ACT(PVA_ERR_FW_BAD_DMA_STATE) \ + ACT(PVA_ERR_FW_RESOURCE_IN_USE) \ + ACT(PVA_ERR_FW_VPU_ERROR_STATE) \ + ACT(PVA_ERR_FW_VPU_RETCODE_NONZERO) \ + ACT(PVA_ERR_FW_INVALID_CMD_OPCODE) \ + ACT(PVA_ERR_FW_INVALID_VPU_CMD_SEQ) \ + ACT(PVA_ERR_FW_INVALID_DMA_CMD_SEQ) \ + ACT(PVA_ERR_FW_INVALID_L2SRAM_CMD_SEQ) \ + ACT(PVA_ERR_FW_ENGINE_NOT_RELEASED) \ + ACT(PVA_ERR_FW_UTEST) \ + ACT(PVA_ERR_VPU_ERROR_STATE) \ + ACT(PVA_ERR_VPU_RETCODE_NONZERO) \ + ACT(PVA_ERR_VPU_ILLEGAL_INSTR) \ + ACT(PVA_ERR_VPU_DIVIDE_BY_0) \ + ACT(PVA_ERR_VPU_FP_NAN) \ + ACT(PVA_ERR_VPU_IN_DEBUG) \ + ACT(PVA_ERR_VPU_DLUT_CFG) \ + ACT(PVA_ERR_VPU_DLUT_MISS) \ + ACT(PVA_ERR_VPU_CP_ACCESS) \ + ACT(PVA_ERR_PPE_ILLEGAL_INSTR) \ + ACT(PVA_ERR_MATH_OP) \ + ACT(PVA_ERR_HWSEQ_INVALID) \ + ACT(PVA_ERR_CODE_COUNT) + +enum pva_error { +#define ADD_COMMA(name) name, + FOREACH_ERR(ADD_COMMA) +#undef ADD_COMMA +}; + +enum pva_chip_id { + PVA_CHIP_T19X, + PVA_CHIP_T23X, + PVA_CHIP_T26X, + PVA_CHIP_OTHERS +}; + +enum pva_hw_gen { + PVA_HW_GEN1, + PVA_HW_GEN2, + PVA_HW_GEN3, +}; + +/* Opaque API data types */ +struct pva_context; +struct pva_queue; +struct pva_memory; + +struct pva_memory_attrs { + uint32_t access_mode; + uint64_t offset; + uint64_t size; +}; + +/** + * @brief A memory address accessible by PVA. + */ +struct pva_dram_addr { + uint32_t resource_id; + uint64_t offset; +}; + +struct pva_vmem_addr { + uint32_t symbol_id; + uint32_t offset; +}; + +/** + * @brief Represents a synchronization fence, which can be associated with + * either a memory semaphore or a syncpoint for signaling or waiting operations. + * + * The UMD handles semaphores and syncpoints differently when used as + * postfences: + * - Semaphores: UMD does not track future values. + * - Syncpoints: UMD tracks future values. + * + * To use semaphore for either prefences and postfences: + * - Set `semaphore_resource_id` to the resource ID of the memory backing the semaphore. + * - Set `index` to the byte offset divided by the semaphore size (`sizeof(uint32_t)`). + * - Set `value` to the semaphore's signaling or waiting value. + * + * To use syncpoint for prefences: + * - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`. + * - Set `index` to the syncpoint ID to wait for. + * - Set `value` to the waiting value. + * + * To use syncpoint for postfences: + * - Set `semaphore_resource_id` to `PVA_RESOURCE_ID_INVALID`. + * - Do not set `index` or `value`. + * - After submission, UMD will assign `index` to the queue syncpoint ID and `value` to the expected future value. + */ +struct pva_fence { + /** Resource ID of the memory semaphore. If resource ID is + * PVA_RESOURCE_ID_INVALID, then the sync object primitive is assumed to + * be syncpoint. */ + uint32_t semaphore_resouce_id; + /** Represents either the semaphore index or the syncpoint ID, depending + * on the sync object primitive type. + */ + uint32_t index; + /** Represents the semaphore or syncpoint value used for signaling or + * waiting. */ + uint32_t value; +}; + +struct pva_fw_vpu_ptr_symbol { + uint64_t base; + uint64_t offset; + uint64_t size; +}; + +struct pva_fw_vpu_legacy_ptr_symbol { + uint64_t base; + uint32_t offset; + uint32_t size; +}; + +enum pva_surface_format { + PVA_SURF_FMT_PITCH_LINEAR = 0, + PVA_SURF_FMT_BLOCK_LINEAR +}; + +enum pva_memory_segment { + /** Memory segment directly reachable by R5. Command buffer chunk + * memories need to be allocated from this segment */ + PVA_MEMORY_SEGMENT_R5 = 1, + /** Memory segment reachable only by DMA. User buffers should be + * allocated from this segment */ + PVA_MEMORY_SEGMENT_DMA = 2, +}; + +enum pva_symbol_type { + /*! Specifies the an invalid symbol type */ + PVA_SYM_TYPE_INVALID = 0, + /*! Specifies a data symbol */ + PVA_SYM_TYPE_DATA, + /*! Specifies a VPU config table symbol */ + PVA_SYM_TYPE_VPUC_TABLE, + /*! Specifies a Pointer symbol */ + PVA_SYM_TYPE_POINTER, + /*! Specifies a System symbol */ + PVA_SYM_TYPE_SYSTEM, + /*! Specifies an extended Pointer symbol */ + PVA_SYM_TYPE_POINTER_EX, + PVA_SYM_TYPE_MAX, +}; + +/** + * \brief Holds PVA Sync Client Type. + * Currently NvSciSync supports NvSciSyncFences with syncpoint primitive type only. + */ +enum pva_sync_client_type { + /*! For a given SyncObj PVA acts as a signaler. This type corresponds to + * postfences from PVA. */ + PVA_SYNC_CLIENT_TYPE_SIGNALER, + /*! For a given SyncObj PVA acts as a waiter. This type corresponds to + * prefences to PVA. */ + PVA_SYNC_CLIENT_TYPE_WAITER, + /*! For a given SyncObj PVA acts as both signaler and waiter. */ + PVA_SYNC_CLIENT_TYPE_SIGNALER_WAITER, + /*! Specifies the non inclusive upper bound of valid values. */ + PVA_SYNC_CLIENT_TYPE_MAX, + /*! Reserved bound of valid values. */ + PVA_SYNC_CLIENT_TYPE_RESERVED = 0x7FFFFFFF, +}; + +#define PVA_SYMBOL_ID_INVALID 0U +#define PVA_SYMBOL_ID_BASE 1U +#define PVA_MAX_SYMBOL_NAME_LEN 64U +struct pva_symbol_info { + char name[PVA_MAX_SYMBOL_NAME_LEN + 1U]; + enum pva_symbol_type symbol_type; + uint32_t size; + uint32_t vmem_addr; + /** Symbol ID local to this executable */ + uint32_t symbol_id; /*< Starting from PVA_SYMBOL_ID_BASE */ +}; + +#define PVA_RESOURCE_ID_INVALID 0U +#define PVA_RESOURCE_ID_BASE 1U +struct pva_resource_entry { +#define PVA_RESOURCE_TYPE_INVALID 0U +#define PVA_RESOURCE_TYPE_DRAM 1U +#define PVA_RESOURCE_TYPE_EXEC_BIN 2U +#define PVA_RESOURCE_TYPE_DMA_CONFIG 3U + uint8_t type; + uint8_t smmu_context_id; + uint8_t addr_hi; + uint8_t size_hi; + uint32_t addr_lo; + uint32_t size_lo; +}; + +/** \brief Maximum number of queues per context */ +#define PVA_MAX_QUEUES_PER_CONTEXT (8) + +/** \brief Specifies the memory is GPU CACHED. */ +#define PVA_GPU_CACHED_MEMORY (1u << 1u) + +#define PVA_ACCESS_RO (1U << 0) /**< Read only access */ +#define PVA_ACCESS_WO (1U << 1) /**< Write only access */ +#define PVA_ACCESS_RW \ + (PVA_ACCESS_RO | PVA_ACCESS_WO) /**< Read and write access */ + +#define PVA_TIMEOUT_INF UINT64_MAX /**< Infinite timeout */ + +#define PVA_MAX_NUM_INPUT_STATUS 2 /**< Maximum number of input statuses */ +#define PVA_MAX_NUM_OUTPUT_STATUS 2 /**< Maximum number of output statuses */ +#define PVA_MAX_NUM_PREFENCES 2 /**< Maximum number of pre-fences */ +#define PVA_MAX_NUM_POSTFENCES 2 /**< Maximum number of post-fences */ +/** Maximum number of timestamps */ +#define PVA_MAX_NUM_TIMESTAMPS PVA_MAX_NUM_POSTFENCES + +struct pva_cmdbuf_submit_info { + uint8_t num_prefences; + uint8_t num_postfences; + uint8_t num_input_status; + uint8_t num_output_status; + uint8_t num_timestamps; +#define PVA_ENGINE_AFFINITY_NONE 0 +#define PVA_ENGINE_AFFINITY_ENGINE0 (1 << 0) +#define PVA_ENGINE_AFFINITY_ENGINE1 (1 << 1) +#define PVA_ENGINE_AFFINITY_ANY \ + (PVA_ENGINE_AFFINITY_ENGINE0 | PVA_ENGINE_AFFINITY_ENGINE1) + uint8_t engine_affinity; + /** Size of the first chunk */ + uint16_t first_chunk_size; + /** Resource ID of the first chunk */ + uint32_t first_chunk_resource_id; + /** Offset of the first chunk within the resource */ + uint64_t first_chunk_offset; +#define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU +#define PVA_EXEC_TIMEOUT_INF 0U + /** Execution Timeout */ + uint32_t execution_timeout_ms; + struct pva_fence prefences[PVA_MAX_NUM_PREFENCES]; + struct pva_fence postfences[PVA_MAX_NUM_POSTFENCES]; + struct pva_dram_addr input_statuses[PVA_MAX_NUM_INPUT_STATUS]; + struct pva_dram_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS]; + struct pva_dram_addr timestamps[PVA_MAX_NUM_TIMESTAMPS]; +}; + +struct pva_ops_buffer { + void *base; /**< Buffer holding a list of async operations */ + uint32_t offset; /**< First unused byte in the buffer */ + uint32_t size; /**< Size of the buffer */ +}; + +struct pva_cmdbuf_status { + /** Timestamp reflecting when the status was updated. This is in resolution of ns */ + uint64_t timestamp; + /** Additional status information for the engine state */ + uint32_t info32; + /** Additional status information for the engine state */ + uint16_t info16; + /** Error code. Type: enum pva_error */ + uint16_t status; +}; + +/** \brief Holds the PVA capabilities. */ +struct pva_characteristics { + /*! Holds the number of PVA engines. */ + uint32_t pva_engine_count; + /*! Holds the number of VPUs per PVA engine. */ + uint32_t pva_pve_count; + /*! Holds the PVA generation information */ + enum pva_hw_gen hw_version; + uint16_t max_desc_count; + uint16_t max_ch_count; + uint16_t max_adb_count; + uint16_t max_hwseq_word_count; + uint16_t max_vmem_region_count; + uint16_t reserved_desc_start; + uint16_t reserved_desc_count; + uint16_t reserved_adb_start; + uint16_t reserved_adb_count; +}; + +enum pva_error_inject_codes { + PVA_ERR_INJECT_WDT_HW_ERR, // watchdog Hardware error + PVA_ERR_INJECT_WDT_TIMEOUT, // watchdog Timeout error +}; + +/* + * !!!! DO NOT MODIFY !!!!!! + * These values are defined as per DriveOS guidelines + */ +#define PVA_INPUT_STATUS_SUCCESS (0) +#define PVA_INPUT_STATUS_INVALID (0xFFFF) + +#endif // PVA_API_TYPES_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h new file mode 100644 index 00000000..16651803 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_vpu.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_API_VPU_H +#define PVA_API_VPU_H +#include "pva_api_types.h" + +/** + * @brief Information of the VPU instance data passed to VPU kernel. + */ +struct pva_vpu_instance_data { + /** @brief ID of the VPU assigned to the task */ + uint16_t engine_id; + /** @brief Variable to indicate that ppe task was launched or not */ + uint16_t ppe_task_launched; + /** @brief Base of the VMEM memory */ + uint32_t vmem_base; + /** @brief Base of the DMA descriptor SRAM memory */ + uint32_t dma_descriptor_base; + /** @brief Base of L2SRAM allocated for the task executed */ + uint32_t l2ram_base; + /** @brief Size of L2SRAM allocated for the task executed */ + uint32_t l2ram_size; +}; + +#endif // PVA_API_VPU_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c new file mode 100644 index 00000000..8c62c0b5 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.c @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_block_allocator.h" +#include "pva_kmd_utils.h" +#include "pva_api.h" + +#define INVALID_ID 0xFFFFFFFF +enum pva_error +pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator, + void *block_mem, uint32_t base_id, + uint32_t block_size, uint32_t max_num_blocks) +{ + enum pva_error err = PVA_SUCCESS; + + allocator->free_slot_head = INVALID_ID; + allocator->next_free_slot = 0; + allocator->max_num_blocks = max_num_blocks; + allocator->block_size = block_size; + allocator->base_id = base_id; + + allocator->blocks = block_mem; + + allocator->slot_in_use = pva_kmd_zalloc( + sizeof(*allocator->slot_in_use) * max_num_blocks); + if (!allocator->slot_in_use) { + err = PVA_NOMEM; + goto err_out; + } + + return PVA_SUCCESS; +err_out: + return err; +} + +void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator) +{ + pva_kmd_free(allocator->slot_in_use); +} + +static inline void *get_block(struct pva_kmd_block_allocator *allocator, + uint32_t slot) +{ + uintptr_t base = (uintptr_t)allocator->blocks; + uintptr_t addr = base + (slot * allocator->block_size); + return (void *)addr; +} + +static inline uint32_t next_slot(struct pva_kmd_block_allocator *allocator, + uint32_t slot) +{ + uint32_t *next = (uint32_t *)get_block(allocator, slot); + return *next; +} + +void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id) +{ + void *block = NULL; + uint32_t slot = INVALID_ID; + + if (allocator->free_slot_head != INVALID_ID) { + slot = allocator->free_slot_head; + allocator->free_slot_head = + next_slot(allocator, allocator->free_slot_head); + } else { + if (allocator->next_free_slot < allocator->max_num_blocks) { + slot = allocator->next_free_slot; + allocator->next_free_slot++; + } else { + goto err_out; + } + } + allocator->slot_in_use[slot] = true; + + *out_id = slot + allocator->base_id; + block = get_block(allocator, slot); + return block; +err_out: + return NULL; +} + +static bool is_slot_valid(struct pva_kmd_block_allocator *allocator, + uint32_t slot) +{ + if (slot >= allocator->max_num_blocks) { + return false; + } + + return allocator->slot_in_use[slot]; +} + +void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id) +{ + uint32_t slot = id - allocator->base_id; + if (!is_slot_valid(allocator, slot)) { + return NULL; + } + + return get_block(allocator, slot); +} + +enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, + uint32_t id) +{ + uint32_t slot = id - allocator->base_id; + uint32_t *next; + if (!is_slot_valid(allocator, slot)) { + return PVA_INVAL; + } + + allocator->slot_in_use[slot] = false; + next = (uint32_t *)get_block(allocator, slot); + *next = allocator->free_slot_head; + allocator->free_slot_head = slot; + + return PVA_SUCCESS; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h new file mode 100644 index 00000000..35162d4b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_block_allocator.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_BLOCK_ALLOCATOR_H +#define PVA_KMD_BLOCK_ALLOCATOR_H + +#include "pva_api.h" + +struct pva_kmd_block_allocator { + uint32_t free_slot_head; + uint32_t base_id; + uint32_t max_num_blocks; + uint32_t next_free_slot; + uint32_t block_size; + void *blocks; + bool *slot_in_use; +}; + +enum pva_error +pva_kmd_block_allocator_init(struct pva_kmd_block_allocator *allocator, + void *chunk_mem, uint32_t base_id, + uint32_t chunk_size, uint32_t max_num_chunks); + +void *pva_kmd_alloc_block(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id); +static inline void * +pva_kmd_zalloc_block(struct pva_kmd_block_allocator *allocator, + uint32_t *out_id) +{ + void *ptr = pva_kmd_alloc_block(allocator, out_id); + if (ptr != NULL) { + memset(ptr, 0, allocator->block_size); + } + return ptr; +} + +void *pva_kmd_get_block(struct pva_kmd_block_allocator *allocator, uint32_t id); +enum pva_error pva_kmd_free_block(struct pva_kmd_block_allocator *allocator, + uint32_t id); + +void pva_kmd_block_allocator_deinit(struct pva_kmd_block_allocator *allocator); + +#endif // PVA_KMD_BLOCK_ALLOCATOR_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c new file mode 100644 index 00000000..9cb2af20 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.c @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_cmdbuf.h" +#include "pva_api_cmdbuf.h" +#include "pva_kmd_utils.h" +#include "pva_math_utils.h" + +#define CHUNK_STATE_INVALID 0 +#define CHUNK_STATE_FENCE_TRIGGERED 1 + +static uint32_t * +get_chunk_states(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool) +{ + return (uint32_t *)pva_offset_pointer( + cmdbuf_chunk_pool->mem_base_va, + cmdbuf_chunk_pool->chunk_states_offset); +} + +static void *get_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, + uint32_t chunk_id) +{ + return pva_offset_pointer(cmdbuf_chunk_pool->mem_base_va, + cmdbuf_chunk_pool->chunk_size * chunk_id); +} + +static uint32_t get_chunk_id_from_res_offset( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint64_t offset) +{ + ASSERT(offset >= cmdbuf_chunk_pool->mem_offset); + offset -= cmdbuf_chunk_pool->mem_offset; + return offset / cmdbuf_chunk_pool->chunk_size; +} + +enum pva_error pva_kmd_cmdbuf_chunk_pool_init( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, + uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size, + uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va) +{ + uint32_t *chunk_states; + uint32_t i; + enum pva_error err; + + ASSERT(mem_size >= pva_kmd_cmdbuf_pool_get_required_mem_size( + chunk_size, num_chunks)); + + cmdbuf_chunk_pool->mem_resource_id = mem_resource_id; + cmdbuf_chunk_pool->mem_offset = mem_offset; + cmdbuf_chunk_pool->mem_size = mem_size; + cmdbuf_chunk_pool->chunk_size = chunk_size; + cmdbuf_chunk_pool->num_chunks = num_chunks; + cmdbuf_chunk_pool->mem_base_va = mem_base_va; + cmdbuf_chunk_pool->chunk_states_offset = chunk_size * num_chunks; + chunk_states = get_chunk_states(cmdbuf_chunk_pool); + for (i = 0; i < num_chunks; i++) { + chunk_states[i] = CHUNK_STATE_INVALID; + } + + err = pva_kmd_block_allocator_init(&cmdbuf_chunk_pool->block_allocator, + mem_base_va, 0, chunk_size, + num_chunks); + return err; +} + +void pva_kmd_cmdbuf_chunk_pool_deinit(struct pva_kmd_cmdbuf_chunk_pool *pool) +{ + pva_kmd_block_allocator_deinit(&pool->block_allocator); +} + +void pva_kmd_free_linked_cmdbuf_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool, + uint32_t chunk_id) +{ + struct pva_cmd_link_chunk *begin; + uint32_t *chunk_states; + uint64_t offset; + uint32_t resource_id; + + chunk_states = get_chunk_states(pool); + while (true) { + begin = get_chunk(pool, chunk_id); + chunk_states[chunk_id] = CHUNK_STATE_INVALID; + offset = assemble_addr(begin->next_chunk_offset_hi, + begin->next_chunk_offset_lo); + resource_id = begin->next_chunk_resource_id; + pva_kmd_free_block(&pool->block_allocator, chunk_id); + if (resource_id == PVA_RESOURCE_ID_INVALID) { + break; + } + ASSERT(resource_id == pool->mem_resource_id); + /* Free next chunk */ + chunk_id = get_chunk_id_from_res_offset(pool, offset); + } +} + +static bool recycle_chunks(struct pva_kmd_cmdbuf_chunk_pool *pool) +{ + uint32_t *chunk_states; + uint32_t i; + bool freed = false; + + chunk_states = get_chunk_states(pool); + for (i = 0; i < pool->num_chunks; i++) { + if (chunk_states[i] == CHUNK_STATE_FENCE_TRIGGERED) { + pva_kmd_free_linked_cmdbuf_chunks(pool, i); + freed = true; + break; + } + } + + return freed; +} + +enum pva_error +pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *pool, + uint32_t *out_chunk_id) +{ + enum pva_error err = PVA_SUCCESS; + void *chunk; + + chunk = pva_kmd_alloc_block(&pool->block_allocator, out_chunk_id); + if (chunk == NULL) { + if (recycle_chunks(pool)) { + chunk = pva_kmd_alloc_block(&pool->block_allocator, + out_chunk_id); + ASSERT(chunk != NULL); + } else { + err = PVA_NOMEM; + } + } + + return err; +} + +void pva_kmd_get_free_notifier_fence(struct pva_kmd_cmdbuf_chunk_pool *pool, + uint32_t chunk_id, + struct pva_fw_postfence *fence) +{ + uint64_t offset_sum = + safe_addu64(pool->mem_offset, pool->chunk_states_offset); + uint64_t chunk_size = + (uint64_t)safe_mulu32((uint32_t)sizeof(uint32_t), chunk_id); + uint64_t state_offset = safe_addu64(offset_sum, chunk_size); + memset(fence, 0, sizeof(*fence)); + fence->resource_id = pool->mem_resource_id; + fence->offset_lo = iova_lo(state_offset); + fence->offset_hi = iova_hi(state_offset); + fence->value = CHUNK_STATE_FENCE_TRIGGERED; + fence->ts_resource_id = PVA_RESOURCE_ID_INVALID; +} + +static void *current_cmd(struct pva_kmd_cmdbuf_builder *builder) +{ + return pva_offset_pointer( + pva_kmd_get_cmdbuf_chunk_va(builder->pool, + builder->current_chunk_id), + builder->current_chunk_offset); +} + +static void begin_chunk(struct pva_kmd_cmdbuf_builder *builder) +{ + struct pva_cmd_link_chunk *cmd = pva_kmd_get_cmdbuf_chunk_va( + builder->pool, builder->current_chunk_id); + memset(cmd, 0, sizeof(*cmd)); + cmd->header.opcode = PVA_CMD_OPCODE_LINK_CHUNK; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->next_chunk_resource_id = PVA_RESOURCE_ID_INVALID; + builder->current_chunk_offset = sizeof(*cmd); +} + +static void end_chunk(struct pva_kmd_cmdbuf_builder *builder) +{ + /* Size of this chunk is now known. Update the header of the previous chunk. */ + *builder->chunk_size_ptr = builder->current_chunk_offset; +} + +static void link_chunk(struct pva_kmd_cmdbuf_builder *builder, + uint32_t new_chunk_id) +{ + struct pva_cmd_link_chunk *old_link; + uint64_t new_chunk_offset; + + old_link = (struct pva_cmd_link_chunk *)pva_kmd_get_cmdbuf_chunk_va( + builder->pool, builder->current_chunk_id); + new_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset(builder->pool, + new_chunk_id); + old_link->next_chunk_resource_id = builder->pool->mem_resource_id; + old_link->next_chunk_offset_lo = iova_lo(new_chunk_offset); + old_link->next_chunk_offset_hi = iova_hi(new_chunk_offset); + /* The new chunk size is still unknown. We record the pointer here. */ + builder->chunk_size_ptr = &old_link->next_chunk_size; +} + +void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder, + uint16_t size) +{ + uint16_t max_size; + enum pva_error err; + void *cmd_start; + + max_size = safe_subu16(builder->pool->chunk_size, + (uint16_t)sizeof(struct pva_cmd_link_chunk)); + + ASSERT(size <= max_size); + + if ((builder->current_chunk_offset + size) > + builder->pool->chunk_size) { + /* Not enough space in the current chunk. Allocate a new one. */ + uint32_t new_chunk_id; + + err = pva_kmd_alloc_cmdbuf_chunk(builder->pool, &new_chunk_id); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("No more chunk in the pool"); + goto err_out; + } + end_chunk(builder); + link_chunk(builder, new_chunk_id); + + builder->current_chunk_id = new_chunk_id; + builder->current_chunk_offset = 0; + begin_chunk(builder); + } + + cmd_start = current_cmd(builder); + (void)memset(cmd_start, 0, size); + + builder->current_chunk_offset += size; + + return cmd_start; +err_out: + return NULL; +} + +enum pva_error +pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder, + struct pva_kmd_cmdbuf_chunk_pool *chunk_pool) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t const min_chunk_size = sizeof(struct pva_cmd_link_chunk); + + ASSERT(chunk_pool->chunk_size >= min_chunk_size); + + builder->pool = chunk_pool; + err = pva_kmd_alloc_cmdbuf_chunk(chunk_pool, + &builder->current_chunk_id); + if (err != PVA_SUCCESS) { + goto err_out; + } + builder->current_chunk_offset = 0; + builder->first_chunk_size = 0; + builder->first_chunk_id = builder->current_chunk_id; + builder->chunk_size_ptr = &builder->first_chunk_size; + + begin_chunk(builder); + + return PVA_SUCCESS; +err_out: + return err; +} + +void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder, + uint32_t *out_first_chunk_id, + uint16_t *out_first_chunk_size) +{ + end_chunk(builder); + *out_first_chunk_id = builder->first_chunk_id; + *out_first_chunk_size = builder->first_chunk_size; +} + +void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder) +{ + pva_kmd_free_linked_cmdbuf_chunks(builder->pool, + builder->first_chunk_id); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h new file mode 100644 index 00000000..407970e2 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_CMDBUF_H +#define PVA_KMD_CMDBUF_H +#include "pva_fw.h" +#include "pva_resource.h" +#include "pva_kmd_block_allocator.h" +#include "pva_kmd_mutex.h" +#include "pva_api_cmdbuf.h" +#include "pva_utils.h" +#include "pva_math_utils.h" + +struct pva_kmd_queue; + +/** + * A fixed-size pool of command buffer chunks. + * + * We can allocate chunks from this pool. When submitting the chunks, we should + * request a post fence from the pool for the first chunk. When the post fence + * is triggered, the chain of chunks will be considered free by the pool. + */ +struct pva_kmd_cmdbuf_chunk_pool { + uint16_t chunk_size; + uint32_t num_chunks; + uint32_t mem_resource_id; + uint64_t mem_size; + uint64_t mem_offset; /**< Starting offset in the resource that can be + * used by this pool */ + uint64_t chunk_states_offset; + void *mem_base_va; + struct pva_kmd_block_allocator block_allocator; +}; + +static inline uint64_t +pva_kmd_cmdbuf_pool_get_required_mem_size(uint16_t chunk_size, + uint32_t num_chunks) +{ + /* Add storage required for free notifier fences */ + return (chunk_size + sizeof(uint32_t)) * num_chunks; +} + +/** + * Initialize the chunk pool. + * + * @param[out] Pointer to the pool. + * + * @param[in] mem_resource_id Resource ID of the memory to be used for the pool. + * + * @param[in] mem_offset Offset of the memory to be used for the pool. + + * @param[in] mem_size Size of the memory to be used for the pool. + * + * @param[in] chunk_size Size of each chunk in the pool. + * + * @param[in] num_chunks Number of chunks in the pool. + * + * @param[in] mem_base_va Virtual address of the memory to be used for the pool. + * The virtual address is the base address of the resource. + */ +enum pva_error pva_kmd_cmdbuf_chunk_pool_init( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, + uint32_t mem_resource_id, uint64_t mem_offset, uint32_t mem_size, + uint16_t chunk_size, uint32_t num_chunks, void *mem_base_va); + +void pva_kmd_cmdbuf_chunk_pool_deinit( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool); + +/** + * Allocate a chunk from the pool. + * + * If the chunk is submitted, then free will be done automatically when + * free-notifier fence is triggered. + */ +enum pva_error +pva_kmd_alloc_cmdbuf_chunk(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, + uint32_t *out_chunk_id); + +/** + * Free a linked list of chunks. + * + * We only need to call this function if we decide not to submit the chunks, + * usually in error path. + */ +void pva_kmd_free_linked_cmdbuf_chunks( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id); + +/** + * Get the free-notifier fence. + * + * @param[in] The first chunk of the command buffer to be submitted. + * + * @param[out] The free-notifier fence that should be submitted with the command buffer. + */ +void pva_kmd_get_free_notifier_fence( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id, + struct pva_fw_postfence *fence); + +static inline void * +pva_kmd_get_cmdbuf_chunk_va(struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, + uint32_t chunk_id) +{ + return (void *)((uintptr_t)cmdbuf_chunk_pool->mem_base_va + + chunk_id * cmdbuf_chunk_pool->chunk_size); +} + +static inline uint64_t pva_kmd_get_cmdbuf_chunk_res_offset( + struct pva_kmd_cmdbuf_chunk_pool *cmdbuf_chunk_pool, uint32_t chunk_id) +{ + uint64_t chunk_size = (uint64_t)safe_mulu32( + chunk_id, (uint32_t)cmdbuf_chunk_pool->chunk_size); + return safe_addu64(cmdbuf_chunk_pool->mem_offset, chunk_size); +} + +/** + * Utility for building a command buffer with multiple chunks. + * + * The builder will automatically allocate chunks from the pool when the current + * chunk is full. + */ +struct pva_kmd_cmdbuf_builder { + uint16_t first_chunk_size; + uint16_t current_chunk_offset; + uint32_t first_chunk_id; + uint32_t current_chunk_id; + struct pva_kmd_cmdbuf_chunk_pool *pool; + uint16_t *chunk_size_ptr; /**< Pointer to the chunk size field of the previous link_chunk command */ +}; + +enum pva_error +pva_kmd_cmdbuf_builder_init(struct pva_kmd_cmdbuf_builder *builder, + struct pva_kmd_cmdbuf_chunk_pool *chunk_pool); + +void *pva_kmd_reserve_cmd_space(struct pva_kmd_cmdbuf_builder *builder, + uint16_t size); +void pva_kmd_cmdbuf_builder_finalize(struct pva_kmd_cmdbuf_builder *builder, + uint32_t *out_first_chunk_id, + uint16_t *out_first_chunk_size); + +void pva_kmd_cmdbuf_builder_cancel(struct pva_kmd_cmdbuf_builder *builder); + +static inline void pva_kmd_set_cmd_init_resource_table( + struct pva_cmd_init_resource_table *cmd, uint8_t resource_table_id, + uint64_t iova_addr, uint32_t max_num_entries) +{ + cmd->header.opcode = PVA_CMD_OPCODE_INIT_RESOURCE_TABLE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->resource_table_id = resource_table_id; + cmd->resource_table_addr_lo = iova_lo(iova_addr); + cmd->resource_table_addr_hi = iova_hi(iova_addr); + cmd->max_n_entries = max_num_entries; +} + +static inline void +pva_kmd_set_cmd_deinit_resource_table(struct pva_cmd_deinit_resource_table *cmd, + uint8_t resource_table_id) +{ + cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_RESOURCE_TABLE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->resource_table_id = resource_table_id; +} + +static inline void pva_kmd_set_cmd_init_queue(struct pva_cmd_init_queue *cmd, + uint8_t ccq_id, uint8_t queue_id, + uint64_t iova_addr, + uint32_t max_num_submit) +{ + cmd->header.opcode = PVA_CMD_OPCODE_INIT_QUEUE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->ccq_id = ccq_id; + cmd->queue_id = queue_id; + cmd->queue_addr_lo = iova_lo(iova_addr); + cmd->queue_addr_hi = iova_hi(iova_addr); + cmd->max_n_submits = max_num_submit; +} + +static inline void +pva_kmd_set_cmd_deinit_queue(struct pva_cmd_deinit_queue *cmd, uint8_t ccq_id, + uint8_t queue_id) +{ + cmd->header.opcode = PVA_CMD_OPCODE_DEINIT_QUEUE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->ccq_id = ccq_id; + cmd->queue_id = queue_id; +} + +static inline void pva_kmd_set_cmd_update_resource_table( + struct pva_cmd_update_resource_table *cmd, uint32_t resource_table_id, + uint32_t resource_id, struct pva_resource_entry const *entry) +{ + cmd->header.opcode = PVA_CMD_OPCODE_UPDATE_RESOURCE_TABLE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->resource_table_id = resource_table_id; + cmd->resource_id = resource_id; + cmd->entry = *entry; +} + +static inline void +pva_kmd_set_cmd_unregister_resource(struct pva_cmd_unregister_resource *cmd, + uint32_t resource_id) +{ + cmd->header.opcode = PVA_CMD_OPCODE_UNREGISTER_RESOURCE; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->resource_id = resource_id; +} + +static inline void +pva_kmd_set_cmd_enable_fw_profiling(struct pva_cmd_enable_fw_profiling *cmd, + uint32_t buffer_resource_id, + uint32_t buffer_size, uint64_t offset, + uint32_t filter, uint8_t timestamp_type) +{ + cmd->header.opcode = PVA_CMD_OPCODE_ENABLE_FW_PROFILING; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->buffer_resource_id = buffer_resource_id; + cmd->buffer_offset_hi = iova_hi(offset); + cmd->buffer_offset_lo = iova_lo(offset); + cmd->buffer_size = buffer_size; + cmd->filter = filter; + cmd->timestamp_type = timestamp_type; +} + +static inline void +pva_kmd_set_cmd_disable_fw_profiling(struct pva_cmd_disable_fw_profiling *cmd) +{ + cmd->header.opcode = PVA_CMD_OPCODE_DISABLE_FW_PROFILING; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); +} + +static inline void pva_kmd_set_cmd_get_tegra_stats( + struct pva_cmd_get_tegra_stats *cmd, uint32_t buffer_resource_id, + uint32_t buffer_size, uint64_t offset, bool enabled) +{ + cmd->header.opcode = PVA_CMD_OPCODE_GET_TEGRA_STATS; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->buffer_resource_id = buffer_resource_id; + cmd->buffer_offset_hi = iova_hi(offset); + cmd->buffer_offset_lo = iova_lo(offset); + cmd->buffer_size = buffer_size; + cmd->enabled = enabled; +} + +static inline void pva_kmd_set_cmd_suspend_fw(struct pva_cmd_suspend_fw *cmd) +{ + uint64_t len = (sizeof(*cmd) / sizeof(uint32_t)); + cmd->header.opcode = PVA_CMD_OPCODE_SUSPEND_FW; + ASSERT(len <= 255u); + cmd->header.len = (uint8_t)(len); +} + +static inline void pva_kmd_set_cmd_resume_fw(struct pva_cmd_resume_fw *cmd) +{ + uint64_t len = (sizeof(*cmd) / sizeof(uint32_t)); + cmd->header.opcode = PVA_CMD_OPCODE_RESUME_FW; + ASSERT(len <= 255u); + cmd->header.len = (uint8_t)(len); +} +#endif // PVA_KMD_CMDBUF_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h new file mode 100644 index 00000000..51e7d7c0 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_constants.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_CONSTANTS_H +#define PVA_KMD_CONSTANTS_H +#include "pva_constants.h" +/* Limits related to KMD's own submission*/ +#define PVA_KMD_MAX_NUM_KMD_RESOURCES 32 +#define PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS 1 +#define PVA_KMD_MAX_NUM_KMD_CHUNKS 32 +#define PVA_KMD_MAX_NUM_KMD_SUBMITS 32 + +/* Limits related to User's privileged submission */ +#define PVA_KMD_MAX_NUM_PRIV_CHUNKS 256 +#define PVA_KMD_MAX_NUM_PRIV_SUBMITS 256 + +#define PVA_KMD_USER_CONTEXT_ID_BASE 1u +#define PVA_KMD_PVA0_T23x_REG_BASE 0x16000000 +#define PVA_KMD_PVA0_T23x_REG_SIZE 0x800000 + +#define PVA_KMD_TIMEOUT_INF UINT64_MAX + +// clang-format off +#if PVA_BUILD_MODE == PVA_BUILD_MODE_SIM + #define PVA_KMD_TIMEOUT_FACTOR 100 +#else + #define PVA_KMD_TIMEOUT_FACTOR 1 +#endif +// clang-format on + +#define PVA_KMD_TIMEOUT(val) (val * PVA_KMD_TIMEOUT_FACTOR) + +#define PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS PVA_KMD_TIMEOUT(100) /*< 100 ms */ +#define PVA_KMD_WAIT_FW_TIMEOUT_US PVA_KMD_TIMEOUT(1000000) /*< 1 second*/ +#define PVA_KMD_WAIT_FW_POLL_INTERVAL_US PVA_KMD_TIMEOUT(100) /*< 100 us*/ +#define PVA_KMD_FW_BOOT_TIMEOUT_MS PVA_KMD_TIMEOUT(1000) /*< 1 seconds */ + +#define PVA_NUM_RW_SYNCPTS 56 + +// clang-format off +#if PVA_DEV_MAIN_COMPATIBLE == 1 + #define PVA_KMD_LOAD_FROM_GSC_DEFAULT true + #if PVA_SAFETY == 1 + #define PVA_KMD_APP_AUTH_DEFAULT true + #else + #define PVA_KMD_APP_AUTH_DEFAULT false + #endif +#else + #define PVA_KMD_LOAD_FROM_GSC_DEFAULT false + #define PVA_KMD_APP_AUTH_DEFAULT false +#endif +// clang-format on + +#endif // PVA_KMD_CONSTANTS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c new file mode 100644 index 00000000..84bbf25f --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_utils.h" +#include "pva_constants.h" +#include "pva_api_cmdbuf.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_context.h" +#include "pva_kmd_constants.h" + +struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva) +{ + uint32_t alloc_id; + enum pva_error err; + struct pva_kmd_context *ctx; + + ctx = pva_kmd_zalloc_block(&pva->context_allocator, &alloc_id); + if (ctx == NULL) { + goto err_out; + } + ctx->ccq_id = alloc_id; + ctx->resource_table_id = ctx->ccq_id; + ctx->smmu_ctx_id = ctx->ccq_id; + ctx->pva = pva; + ctx->max_n_queues = PVA_MAX_NUM_QUEUES_PER_CONTEXT; + ctx->ccq0_lock_ptr = &pva->ccq0_lock; + pva_kmd_mutex_init(&ctx->ccq_lock); + pva_kmd_mutex_init(&ctx->resource_table_lock); + ctx->queue_allocator_mem = pva_kmd_zalloc(sizeof(struct pva_kmd_queue) * + ctx->max_n_queues); + if (ctx->queue_allocator_mem == NULL) { + goto free_ctx; + } + + err = pva_kmd_block_allocator_init(&ctx->queue_allocator, + ctx->queue_allocator_mem, 0, + sizeof(struct pva_kmd_queue), + ctx->max_n_queues); + if (err != PVA_SUCCESS) { + goto free_queue_mem; + } + + return ctx; +free_queue_mem: + pva_kmd_free(ctx->queue_allocator_mem); +free_ctx: + pva_kmd_free(ctx); +err_out: + return NULL; +} + +static enum pva_error notify_fw_context_init(struct pva_kmd_context *ctx) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter; + struct pva_cmd_init_resource_table *res_cmd; + struct pva_cmd_init_queue *queue_cmd; + struct pva_cmd_update_resource_table *update_cmd; + struct pva_resource_entry entry = { 0 }; + uint32_t fence_val; + enum pva_error err; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + res_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*res_cmd)); + ASSERT(res_cmd != NULL); + + pva_kmd_set_cmd_init_resource_table( + res_cmd, ctx->resource_table_id, + ctx->ctx_resource_table.table_mem->iova, + ctx->ctx_resource_table.n_entries); + + queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); + ASSERT(queue_cmd != NULL); + + pva_kmd_set_cmd_init_queue( + queue_cmd, PVA_PRIV_CCQ_ID, + ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/ + ctx->ctx_queue.queue_memory->iova, + ctx->ctx_queue.max_num_submit); + + update_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*update_cmd)); + ASSERT(update_cmd != NULL); + + err = pva_kmd_make_resource_entry(&ctx->pva->dev_resource_table, + ctx->submit_memory_resource_id, + &entry); + ASSERT(err == PVA_SUCCESS); + + pva_kmd_set_cmd_update_resource_table(update_cmd, + 0, /* KMD's resource table ID */ + ctx->submit_memory_resource_id, + &entry); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + // Error is either QUEUE_FULL or TIMEDOUT + goto cancel_builder; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when initializing context"); + goto err_out; + } + + return PVA_SUCCESS; + +cancel_builder: + pva_kmd_cmdbuf_builder_cancel(&builder); +err_out: + return err; +} + +static enum pva_error notify_fw_context_deinit(struct pva_kmd_context *ctx) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &ctx->pva->submitter; + struct pva_cmd_deinit_resource_table *deinit_table_cmd; + struct pva_cmd_deinit_queue *deinit_queue_cmd; + uint32_t fence_val; + enum pva_error err; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + + deinit_queue_cmd = + pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_queue_cmd)); + ASSERT(deinit_queue_cmd != NULL); + pva_kmd_set_cmd_deinit_queue( + deinit_queue_cmd, PVA_PRIV_CCQ_ID, + ctx->ccq_id /* For privileged queues, queue ID == user CCQ ID*/ + ); + + deinit_table_cmd = + pva_kmd_reserve_cmd_space(&builder, sizeof(*deinit_table_cmd)); + ASSERT(deinit_table_cmd != NULL); + pva_kmd_set_cmd_deinit_resource_table(deinit_table_cmd, + ctx->resource_table_id); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto cancel_builder; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when deinitializing context"); + goto err_out; + } + + return PVA_SUCCESS; +cancel_builder: + pva_kmd_cmdbuf_builder_cancel(&builder); +err_out: + return err; +} + +enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, + uint32_t res_table_capacity) +{ + enum pva_error err; + uint32_t queue_mem_size; + uint64_t chunk_mem_size; + struct pva_fw_postfence post_fence = { 0 }; + struct pva_syncpt_rw_info *syncpts; + uint64_t size; + + /* Power on PVA if not already */ + err = pva_kmd_device_busy(ctx->pva); + if (err != PVA_SUCCESS) { + goto err_out; + } + + /* Allocate RW syncpoints for this context */ + syncpts = (struct pva_syncpt_rw_info *)pva_kmd_alloc_block( + &ctx->pva->syncpt_allocator, &ctx->syncpt_block_index); + ASSERT(syncpts != NULL); + + /* Init resource table for this context */ + err = pva_kmd_resource_table_init(&ctx->ctx_resource_table, ctx->pva, + ctx->smmu_ctx_id, res_table_capacity, + res_table_capacity); + if (err != PVA_SUCCESS) { + goto drop_device; + } + + /* Init privileged queue for this context */ + queue_mem_size = pva_get_submission_queue_memory_size( + PVA_KMD_MAX_NUM_PRIV_SUBMITS); + ctx->ctx_queue_mem = + pva_kmd_device_memory_alloc_map(queue_mem_size, ctx->pva, + PVA_ACCESS_RW, + PVA_R5_SMMU_CONTEXT_ID); + if (ctx->ctx_queue_mem == NULL) { + err = PVA_NOMEM; + goto deinit_table; + } + + pva_kmd_queue_init( + &ctx->ctx_queue, ctx->pva, PVA_PRIV_CCQ_ID, + ctx->ccq_id, /* Context's PRIV queue ID is identical to CCQ ID */ + &ctx->pva->ccq0_lock, ctx->ctx_queue_mem, + PVA_KMD_MAX_NUM_PRIV_SUBMITS); + + /* Allocate memory for submission */ + chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size( + PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_PRIV_CHUNKS); + /* Allocate one post fence at the end. This memory will be added to + * KMD's own resource table. We don't need to explicitly free it. It + * will be freed after we drop the resource. */ + size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t)); + ctx->submit_memory = pva_kmd_device_memory_alloc_map( + size, ctx->pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + if (ctx->submit_memory == NULL) { + err = PVA_NOMEM; + goto queue_deinit; + } + + /* Add submit memory to resource table */ + pva_kmd_mutex_lock(&ctx->pva->resource_table_lock); + err = pva_kmd_add_dram_buffer_resource(&ctx->pva->dev_resource_table, + ctx->submit_memory, + &ctx->submit_memory_resource_id); + pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock); + if (err != PVA_SUCCESS) { + goto free_submit_memory; + } + + /* Init chunk pool */ + err = pva_kmd_cmdbuf_chunk_pool_init( + &ctx->chunk_pool, ctx->submit_memory_resource_id, + 0 /* offset */, chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE, + PVA_KMD_MAX_NUM_PRIV_CHUNKS, ctx->submit_memory->va); + if (err != PVA_SUCCESS) { + goto free_dram_buffer_resource; + } + + /* Init fence */ + ctx->fence_offset = chunk_mem_size; + + /* Init submitter */ + pva_kmd_mutex_init(&ctx->submit_lock); + pva_kmd_mutex_init(&ctx->chunk_pool_lock); + post_fence.resource_id = ctx->submit_memory_resource_id; + post_fence.offset_lo = iova_lo(ctx->fence_offset); + post_fence.offset_hi = iova_hi(ctx->fence_offset); + post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID; + pva_kmd_submitter_init( + &ctx->submitter, &ctx->ctx_queue, &ctx->submit_lock, + &ctx->chunk_pool, &ctx->chunk_pool_lock, + pva_offset_pointer(ctx->submit_memory->va, ctx->fence_offset), + &post_fence); + + /* Use KMD's queue to inform FW */ + err = notify_fw_context_init(ctx); + if (err != PVA_SUCCESS) { + goto deinit_submitter; + } + ctx->inited = true; + + return PVA_SUCCESS; + +deinit_submitter: + pva_kmd_mutex_deinit(&ctx->chunk_pool_lock); + pva_kmd_mutex_deinit(&ctx->submit_lock); + pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool); +free_dram_buffer_resource: + pva_kmd_drop_resource(&ctx->pva->dev_resource_table, + ctx->submit_memory_resource_id); +free_submit_memory: + pva_kmd_device_memory_free(ctx->submit_memory); +queue_deinit: + pva_kmd_queue_deinit(&ctx->ctx_queue); + pva_kmd_device_memory_free(ctx->ctx_queue_mem); +deinit_table: + pva_kmd_resource_table_deinit(&ctx->ctx_resource_table); +drop_device: + pva_kmd_device_idle(ctx->pva); +err_out: + return err; +} + +void pva_kmd_context_deinit(struct pva_kmd_context *ctx) +{ + enum pva_error err; + + if (ctx->inited) { + err = notify_fw_context_deinit(ctx); + ASSERT(err == PVA_SUCCESS); + pva_kmd_verify_all_resources_free(&ctx->ctx_resource_table); + pva_kmd_device_idle(ctx->pva); + pva_kmd_mutex_deinit(&ctx->submit_lock); + pva_kmd_mutex_deinit(&ctx->chunk_pool_lock); + pva_kmd_cmdbuf_chunk_pool_deinit(&ctx->chunk_pool); + pva_kmd_mutex_lock(&ctx->pva->resource_table_lock); + pva_kmd_drop_resource(&ctx->pva->dev_resource_table, + ctx->submit_memory_resource_id); + pva_kmd_mutex_unlock(&ctx->pva->resource_table_lock); + pva_kmd_queue_deinit(&ctx->ctx_queue); + pva_kmd_device_memory_free(ctx->ctx_queue_mem); + pva_kmd_resource_table_deinit(&ctx->ctx_resource_table); + pva_kmd_free_block(&ctx->pva->syncpt_allocator, + ctx->syncpt_block_index); + ctx->inited = false; + } +} + +static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx) +{ + enum pva_error err; + struct pva_kmd_queue_destroy_in_args args; + + for (uint32_t queue_id = 0u; queue_id < ctx->max_n_queues; queue_id++) { + struct pva_kmd_queue *queue = + pva_kmd_get_block(&ctx->queue_allocator, queue_id); + if (queue != NULL) { + args.queue_id = queue_id; + err = pva_kmd_queue_destroy(ctx, &args); + ASSERT(err == PVA_SUCCESS); + } + } +} + +void pva_kmd_context_destroy(struct pva_kmd_context *ctx) +{ + enum pva_error err; + + pva_kmd_destroy_all_queues(ctx); + pva_kmd_context_deinit(ctx); + pva_kmd_block_allocator_deinit(&ctx->queue_allocator); + pva_kmd_free(ctx->queue_allocator_mem); + pva_kmd_mutex_deinit(&ctx->ccq_lock); + pva_kmd_mutex_deinit(&ctx->resource_table_lock); + err = pva_kmd_free_block(&ctx->pva->context_allocator, ctx->ccq_id); + ASSERT(err == PVA_SUCCESS); +} + +struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva, + uint8_t alloc_id) +{ + return pva_kmd_get_block(&pva->context_allocator, alloc_id); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h new file mode 100644 index 00000000..32c138b8 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_CONTEXT_H +#define PVA_KMD_CONTEXT_H +#include "pva_api.h" +#include "pva_constants.h" +#include "pva_kmd_block_allocator.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_mutex.h" +#include "pva_kmd_submitter.h" + +struct pva_kmd_device; + +/** + * @brief This struct manages a user context in KMD. + * + * One KMD user context is uniquely mapped to a UMD user context. Each context + * is assigned a unique CCQ block and, on QNX and Linux, a unique file + * descriptor. + */ +struct pva_kmd_context { + struct pva_kmd_device *pva; + uint8_t resource_table_id; + uint8_t ccq_id; + uint8_t smmu_ctx_id; + + bool inited; + + pva_kmd_mutex_t resource_table_lock; + struct pva_kmd_resource_table ctx_resource_table; + + struct pva_kmd_submitter submitter; + /** The lock protects the submission to the queue, including + * incrementing the post fence */ + pva_kmd_mutex_t submit_lock; + /** Privileged queue owned by this context. It uses the privileged + * resource table (ID 0). */ + struct pva_kmd_device_memory *ctx_queue_mem; + + /** Privileged queue owned by the context */ + struct pva_kmd_queue ctx_queue; + /** Pointer to the ccq0 lock owned by device*/ + pva_kmd_mutex_t *ccq0_lock_ptr; + + /** memory needed for submission: including command buffer chunks and fences */ + struct pva_kmd_device_memory *submit_memory; + /** Resource ID of the submission memory, registered with the privileged resource table (ID 0) */ + uint32_t submit_memory_resource_id; + uint64_t fence_offset; /**< fence offset within submit_memory*/ + + pva_kmd_mutex_t chunk_pool_lock; + struct pva_kmd_cmdbuf_chunk_pool chunk_pool; + + uint32_t max_n_queues; + void *queue_allocator_mem; + struct pva_kmd_block_allocator queue_allocator; + + /** This lock protects the context's own CCQ access. We don't really use + * it because we don't do user queue submission in KMD. + */ + pva_kmd_mutex_t ccq_lock; + void *plat_data; + uint64_t ccq_shm_handle; + + /** Index of block of syncpoints allocated for this context */ + uint32_t syncpt_block_index; + uint32_t syncpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT]; +}; + +/** + * @brief Allocate a KMD context. + */ +struct pva_kmd_context *pva_kmd_context_create(struct pva_kmd_device *pva); + +/** + * @brief Destroy a KMD context. + */ +void pva_kmd_context_destroy(struct pva_kmd_context *client); + +/** + * @brief Initialize a KMD context. + * + * The user provides a CCQ range (inclusive on both ends) and the KMD will pick + * one CCQ from this range. + */ +enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, + uint32_t res_table_capacity); + +void pva_kmd_context_deinit(struct pva_kmd_context *ctx); + +struct pva_kmd_context *pva_kmd_get_context(struct pva_kmd_device *pva, + uint8_t alloc_id); + +#endif // PVA_KMD_CONTEXT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c new file mode 100644 index 00000000..e309d2ff --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_device.h" +#include "pva_kmd_debugfs.h" +#include "pva_kmd_fw_profiler.h" +#include "pva_kmd_silicon_utils.h" +#include "pva_kmd_vpu_ocd.h" +#include "pva_kmd_tegra_stats.h" +#include "pva_kmd_vpu_app_auth.h" + +void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva) +{ + static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3", + "ocd_vpu1_v3" }; + pva_kmd_debugfs_create_bool(pva, "stats_enable", + &pva->debugfs_context.stats_enable); + pva_kmd_debugfs_create_bool(pva, "vpu_debug", + &pva->debugfs_context.vpu_debug); + pva_kmd_debugfs_create_u32(pva, "profile_level", + &pva->debugfs_context.profile_level); + pva->debugfs_context.vpu_fops.read = &update_vpu_stats; + pva->debugfs_context.vpu_fops.pdev = pva; + pva_kmd_debugfs_create_file(pva, "vpu_stats", + &pva->debugfs_context.vpu_fops); + for (uint32_t i = 0; i < NUM_VPU_BLOCKS; i++) { + pva->debugfs_context.vpu_ocd_fops[i].open = + &pva_kmd_vpu_ocd_open; + pva->debugfs_context.vpu_ocd_fops[i].release = + &pva_kmd_vpu_ocd_release; + pva->debugfs_context.vpu_ocd_fops[i].read = + &pva_kmd_vpu_ocd_read; + pva->debugfs_context.vpu_ocd_fops[i].write = + &pva_kmd_vpu_ocd_write; + pva->debugfs_context.vpu_ocd_fops[i].pdev = pva; + pva->debugfs_context.vpu_ocd_fops[i].file_data = + (void *)&pva->regspec.vpu_dbg_instr_reg_offset[i]; + pva_kmd_debugfs_create_file( + pva, vpu_ocd_names[i], + &pva->debugfs_context.vpu_ocd_fops[i]); + } + + pva->debugfs_context.allowlist_fops.write = &update_vpu_allowlist; + pva->debugfs_context.allowlist_fops.pdev = pva; + pva_kmd_debugfs_create_file(pva, "vpu_app_authentication", + &pva->debugfs_context.allowlist_fops); + + pva_kmd_device_init_profiler(pva); + pva_kmd_device_init_tegra_stats(pva); +} + +void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva) +{ + pva_kmd_device_deinit_tegra_stats(pva); + pva_kmd_device_deinit_profiler(pva); + pva_kmd_debugfs_remove_nodes(pva); +} + +static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, + uint8_t *out_buffer, uint64_t len) +{ + char kernel_buffer[256]; + int64_t formatted_len; + + formatted_len = snprintf( + kernel_buffer, sizeof(kernel_buffer), + "%llu\n%llu\n%llu\n%llu\n", + (long long unsigned int)(kmd_tegra_stats->window_start_time), + (long long unsigned int)(kmd_tegra_stats->window_end_time), + (long long unsigned int) + kmd_tegra_stats->average_vpu_utilization[0], + (long long unsigned int) + kmd_tegra_stats->average_vpu_utilization[1]); + + if (formatted_len <= 0) { + return 0; + } + + formatted_len++; //accounting for null terminating character + + if (len < (uint64_t)formatted_len) { + return 0; + } + + // Copy the formatted string from kernel buffer to user buffer + if (pva_kmd_copy_data_to_user(out_buffer, kernel_buffer, + formatted_len)) { + pva_kmd_log_err("failed to copy read buffer to user"); + return 0; + } + + return formatted_len; +} + +int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, uint64_t size) +{ + uint64_t size_read = 0U; + struct pva_kmd_tegrastats kmd_tegra_stats; + + kmd_tegra_stats.window_start_time = 0; + kmd_tegra_stats.window_end_time = 0; + kmd_tegra_stats.average_vpu_utilization[0] = 0; + kmd_tegra_stats.average_vpu_utilization[1] = 0; + + pva_kmd_log_err("Reading VPU stats"); + pva_kmd_notify_fw_get_tegra_stats(dev, &kmd_tegra_stats); + + size_read = print_vpu_stats(&kmd_tegra_stats, out_buffer, size); + + return size_read; +} + +int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, + const uint8_t *in_buffer, uint64_t offset, + uint64_t size) +{ + char strbuf[2]; // 1 byte for '0' or '1' and another 1 byte for the Null character + uint32_t pva_auth_enable; + unsigned long retval; + retval = pva_kmd_copy_data_from_user(strbuf, in_buffer, sizeof(strbuf)); + if (retval != 0u) { + pva_kmd_log_err("Failed to copy write buffer from user"); + return -1; + } + + pva_auth_enable = pva_kmd_strtol(strbuf, 16); + + pva->pva_auth->pva_auth_enable = (pva_auth_enable == 1) ? true : false; + + if (pva->pva_auth->pva_auth_enable) + pva->pva_auth->pva_auth_allow_list_parsed = false; + + return 2; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h new file mode 100644 index 00000000..8040aeb2 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_DEBUGFS_H +#define PVA_KMD_DEBUGFS_H +#include "pva_kmd.h" +#include "pva_kmd_shim_debugfs.h" +#include "pva_kmd_fw_profiler.h" + +#define NUM_VPU_BLOCKS 2U + +/** + * Maximum length of file operation + */ +#define MAX_FILE_LEN 256U + +struct pva_kmd_file_ops { + int (*open)(struct pva_kmd_device *dev); + int (*release)(struct pva_kmd_device *dev); + int64_t (*read)(struct pva_kmd_device *dev, void *file_data, + uint8_t *data, uint64_t offset, uint64_t size); + int64_t (*write)(struct pva_kmd_device *dev, void *file_data, + const uint8_t *data, uint64_t offset, uint64_t size); + void *pdev; + void *file_data; +}; + +struct pva_kmd_debugfs_context { + bool stats_enable; + bool vpu_debug; + bool vpu_print_enable; + char *allowlist_path; + uint32_t profile_level; + struct pva_kmd_file_ops vpu_fops; + struct pva_kmd_file_ops allowlist_fops; + struct pva_kmd_file_ops hwpm_fops; + void *data_hwpm; + struct pva_kmd_file_ops vpu_ocd_fops[NUM_VPU_BLOCKS]; + struct pva_kmd_fw_profiling_config g_fw_profiling_config; +}; + +void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev); +void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev); +int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, uint64_t size); +int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, + const uint8_t *in_buffer, uint64_t offset, + uint64_t size); +#endif //PVA_KMD_DEBUGFS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c new file mode 100644 index 00000000..95d299f3 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_api_types.h" +#include "pva_kmd_fw_debug.h" +#include "pva_kmd_utils.h" +#include "pva_api_cmdbuf.h" +#include "pva_api.h" +#include "pva_kmd_constants.h" +#include "pva_fw.h" +#include "pva_bit.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device.h" +#include "pva_kmd_context.h" +#include "pva_kmd_t23x.h" +#include "pva_kmd_t26x.h" +#include "pva_kmd_regs.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_fw_profiler.h" +#include "pva_kmd_vpu_app_auth.h" +#include "pva_utils.h" +#include "pva_kmd_debugfs.h" +#include "pva_kmd_tegra_stats.h" +#include "pva_kmd_shim_silicon.h" + +/** + * @brief Send address and size of the resource table to FW through CCQ. + * + * Initialization through CCQ is only intended for KMD's own resource table (the + * first resource table created). + */ +void pva_kmd_send_resource_table_info_by_ccq( + struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table) +{ + enum pva_error err; + uint64_t addr = res_table->table_mem->iova; + uint32_t n_entries = res_table->n_entries; + uint64_t ccq_entry = + PVA_INSERT64(PVA_FW_CCQ_OP_SET_RESOURCE_TABLE, + PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) | + PVA_INSERT64(addr, PVA_FW_CCQ_RESOURCE_TABLE_ADDR_MSB, + PVA_FW_CCQ_RESOURCE_TABLE_ADDR_LSB) | + PVA_INSERT64(n_entries, PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_MSB, + PVA_FW_CCQ_RESOURCE_TABLE_N_ENTRIES_LSB); + + pva_kmd_mutex_lock(&pva->ccq0_lock); + err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + ASSERT(err == PVA_SUCCESS); + pva_kmd_mutex_unlock(&pva->ccq0_lock); +} + +/** + * @brief Send address and size of the queue to FW through CCQ. + * + * Initialization through CCQ is only intended for KMD's own queue (the first + * queue created). + */ +void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, + struct pva_kmd_queue *queue) +{ + enum pva_error err; + uint64_t addr = queue->queue_memory->iova; + uint32_t max_submit = queue->max_num_submit; + uint64_t ccq_entry = + PVA_INSERT64(PVA_FW_CCQ_OP_SET_SUBMISSION_QUEUE, + PVA_FW_CCQ_OPCODE_MSB, PVA_FW_CCQ_OPCODE_LSB) | + PVA_INSERT64(addr, PVA_FW_CCQ_QUEUE_ADDR_MSB, + PVA_FW_CCQ_QUEUE_ADDR_LSB) | + PVA_INSERT64(max_submit, PVA_FW_CCQ_QUEUE_N_ENTRIES_MSB, + PVA_FW_CCQ_QUEUE_N_ENTRIES_LSB); + pva_kmd_mutex_lock(&pva->ccq0_lock); + err = pva_kmd_ccq_push_with_timeout(pva, PVA_PRIV_CCQ_ID, ccq_entry, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + ASSERT(err == PVA_SUCCESS); + pva_kmd_mutex_unlock(&pva->ccq0_lock); +} + +/** + * Initialize submission related data structures for this device. + * + * - Create a resource table. + * - Add DRAM resources to the resource table. These are used for command buffer + * chunks and post fences. + * - Create a queue. + */ +static void pva_kmd_device_init_submission(struct pva_kmd_device *pva) +{ + uint32_t queue_mem_size; + uint64_t chunk_mem_size; + uint64_t size; + enum pva_error err; + struct pva_fw_postfence post_fence = { 0 }; + + /* Init KMD's queue */ + queue_mem_size = pva_get_submission_queue_memory_size( + PVA_KMD_MAX_NUM_KMD_SUBMITS); + + pva->queue_memory = pva_kmd_device_memory_alloc_map( + queue_mem_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + ASSERT(pva->queue_memory != NULL); + + pva_kmd_queue_init(&pva->dev_queue, pva, PVA_PRIV_CCQ_ID, + 0 /* KMD's queue ID is 0 */, &pva->ccq0_lock, + pva->queue_memory, PVA_KMD_MAX_NUM_KMD_SUBMITS); + + /* Init KMD's resource table */ + err = pva_kmd_resource_table_init(&pva->dev_resource_table, pva, + PVA_R5_SMMU_CONTEXT_ID, + PVA_KMD_MAX_NUM_KMD_RESOURCES, + PVA_KMD_MAX_NUM_KMD_DMA_CONFIGS); + ASSERT(err == PVA_SUCCESS); + + /* Allocate memory for submission*/ + chunk_mem_size = pva_kmd_cmdbuf_pool_get_required_mem_size( + PVA_MAX_CMDBUF_CHUNK_SIZE, PVA_KMD_MAX_NUM_KMD_CHUNKS); + + size = safe_addu64(chunk_mem_size, (uint64_t)sizeof(uint32_t)); + /* Allocate one post fence at the end. We don't need to free this memory + * explicitly as it will be freed after we drop the resource. */ + pva->submit_memory = pva_kmd_device_memory_alloc_map( + size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + ASSERT(pva->submit_memory != NULL); + + /* Add submit memory to resource table */ + err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table, + pva->submit_memory, + &pva->submit_memory_resource_id); + ASSERT(err == PVA_SUCCESS); + pva_kmd_update_fw_resource_table(&pva->dev_resource_table); + + /* Init chunk pool */ + pva_kmd_cmdbuf_chunk_pool_init( + &pva->chunk_pool, pva->submit_memory_resource_id, 0, + chunk_mem_size, PVA_MAX_CMDBUF_CHUNK_SIZE, + PVA_KMD_MAX_NUM_KMD_CHUNKS, pva->submit_memory->va); + + /* Init fence */ + pva->fence_offset = chunk_mem_size; + + /* Init submitter */ + pva_kmd_mutex_init(&pva->submit_lock); + pva_kmd_mutex_init(&pva->chunk_pool_lock); + post_fence.resource_id = pva->submit_memory_resource_id; + post_fence.offset_lo = iova_lo(pva->fence_offset); + post_fence.offset_hi = iova_hi(pva->fence_offset); + post_fence.ts_resource_id = PVA_RESOURCE_ID_INVALID; + pva_kmd_submitter_init( + &pva->submitter, &pva->dev_queue, &pva->submit_lock, + &pva->chunk_pool, &pva->chunk_pool_lock, + pva_offset_pointer(pva->submit_memory->va, pva->fence_offset), + &post_fence); +} + +static void pva_kmd_device_deinit_submission(struct pva_kmd_device *pva) +{ + pva_kmd_mutex_deinit(&pva->chunk_pool_lock); + pva_kmd_mutex_deinit(&pva->submit_lock); + pva_kmd_cmdbuf_chunk_pool_deinit(&pva->chunk_pool); + /* Submit memory will be freed after dropping the resource */ + pva_kmd_drop_resource(&pva->dev_resource_table, + pva->submit_memory_resource_id); + pva_kmd_resource_table_deinit(&pva->dev_resource_table); + pva_kmd_queue_deinit(&pva->dev_queue); + pva_kmd_device_memory_free(pva->queue_memory); +} + +struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, + uint32_t device_index, + bool app_authenticate) +{ + struct pva_kmd_device *pva; + enum pva_error err; + uint32_t chunk_size; + uint32_t size; + + pva = pva_kmd_zalloc_nofail(sizeof(*pva)); + + pva->device_index = device_index; + pva->load_from_gsc = false; + pva->is_hv_mode = true; + pva->max_n_contexts = PVA_MAX_NUM_USER_CONTEXTS; + pva_kmd_mutex_init(&pva->powercycle_lock); + pva_kmd_mutex_init(&pva->ccq0_lock); + pva_kmd_mutex_init(&pva->resource_table_lock); + pva_kmd_sema_init(&pva->fw_boot_sema, 0); + size = safe_mulu32((uint32_t)sizeof(struct pva_kmd_context), + pva->max_n_contexts); + pva->context_mem = pva_kmd_zalloc(size); + ASSERT(pva->context_mem != NULL); + + err = pva_kmd_block_allocator_init(&pva->context_allocator, + pva->context_mem, + PVA_KMD_USER_CONTEXT_ID_BASE, + sizeof(struct pva_kmd_context), + pva->max_n_contexts); + ASSERT(err == PVA_SUCCESS); + + if (chip_id == PVA_CHIP_T23X) { + pva_kmd_device_init_t23x(pva); + } else if (chip_id == PVA_CHIP_T26X) { + pva_kmd_device_init_t26x(pva); + } else { + FAULT("SOC not supported"); + } + + pva_kmd_device_plat_init(pva); + + chunk_size = safe_mulu32((uint32_t)sizeof(struct pva_syncpt_rw_info), + (uint32_t)PVA_NUM_RW_SYNCPTS_PER_CONTEXT); + err = pva_kmd_block_allocator_init(&pva->syncpt_allocator, + pva->syncpt_rw, 0, chunk_size, + PVA_MAX_NUM_USER_CONTEXTS); + ASSERT(err == PVA_SUCCESS); + + pva_kmd_device_init_submission(pva); + + err = pva_kmd_init_vpu_app_auth(pva, app_authenticate); + ASSERT(err == PVA_SUCCESS); + + pva->is_suspended = false; + + return pva; +} + +static void pva_kmd_wait_for_active_contexts(struct pva_kmd_device *pva) +{ + uint8_t allocated = 0; + + /* Make sure no context is active by allocating all contexts here. */ + while (allocated < pva->max_n_contexts) { + uint32_t unused_id; + struct pva_kmd_context *ctx; + + ctx = pva_kmd_alloc_block(&pva->context_allocator, &unused_id); + if (ctx != NULL) { + allocated = safe_addu32(allocated, 1U); + } else { + pva_kmd_sleep_us(1000); + } + } +} + +void pva_kmd_device_destroy(struct pva_kmd_device *pva) +{ + pva_kmd_wait_for_active_contexts(pva); + pva_kmd_device_deinit_submission(pva); + pva_kmd_device_plat_deinit(pva); + pva_kmd_block_allocator_deinit(&pva->syncpt_allocator); + pva_kmd_block_allocator_deinit(&pva->context_allocator); + pva_kmd_free(pva->context_mem); + pva_kmd_mutex_deinit(&pva->ccq0_lock); + pva_kmd_mutex_deinit(&pva->resource_table_lock); + pva_kmd_mutex_deinit(&pva->powercycle_lock); + pva_kmd_free(pva->pva_auth); + pva_kmd_free(pva); +} + +enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) +{ + enum pva_error err = PVA_SUCCESS; + + pva_kmd_mutex_lock(&pva->powercycle_lock); + if (pva->refcount == 0) { + pva_kmd_allocate_syncpts(pva); + + err = pva_kmd_power_on(pva); + if (err != PVA_SUCCESS) { + goto unlock; + } + + err = pva_kmd_init_fw(pva); + if (err != PVA_SUCCESS) { + goto unlock; + } + /* Reset KMD queue */ + pva->dev_queue.queue_header->cb_head = 0; + pva->dev_queue.queue_header->cb_tail = 0; + + pva_kmd_send_resource_table_info_by_ccq( + pva, &pva->dev_resource_table); + pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue); + pva_kmd_notify_fw_enable_profiling(pva); + } + pva->refcount = safe_addu32(pva->refcount, 1U); + +unlock: + pva_kmd_mutex_unlock(&pva->powercycle_lock); + return err; +} + +void pva_kmd_device_idle(struct pva_kmd_device *pva) +{ + pva_kmd_mutex_lock(&pva->powercycle_lock); + ASSERT(pva->refcount > 0); + pva->refcount--; + if (pva->refcount == 0) { + /* Disable FW profiling */ + /* TODO: once debugfs is up, move these calls */ + // pva_kmd_notify_fw_disable_profiling(pva); + // pva_kmd_drain_fw_profiling_buffer(pva, + // &pva->fw_profiling_buffer); + pva_kmd_deinit_fw(pva); + pva_kmd_power_off(pva); + } + pva_kmd_mutex_unlock(&pva->powercycle_lock); +} + +enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva, + uint8_t ccq_id, uint64_t ccq_entry, + uint64_t sleep_interval_us, + uint64_t timeout_us) +{ + /* spin until we have space or timeout reached */ + while (pva_kmd_get_ccq_space(pva, ccq_id) == 0) { + if (timeout_us == 0) { + pva_kmd_log_err( + "pva_kmd_ccq_push_with_timeout Timed out"); + return PVA_TIMEDOUT; + } + pva_kmd_sleep_us(sleep_interval_us); + timeout_us = sat_sub64(timeout_us, sleep_interval_us); + } + /* TODO: memory write barrier is needed here */ + pva_kmd_ccq_push(pva, ccq_id, ccq_entry); + + return PVA_SUCCESS; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h new file mode 100644 index 00000000..99687f6a --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_DEVICE_H +#define PVA_KMD_DEVICE_H +#include "pva_constants.h" +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_mutex.h" +#include "pva_kmd_block_allocator.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_submitter.h" +#include "pva_kmd_regs.h" +#include "pva_kmd_thread_sema.h" +#include "pva_kmd_fw_debug.h" +#include "pva_kmd_shim_init.h" +#include "pva_kmd_shim_ccq.h" +#include "pva_kmd_fw_profiler.h" +#include "pva_kmd_constants.h" +#include "pva_kmd_debugfs.h" + +struct pva_syncpt_rw_info { + /** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot + * and first field gets updated by pva_kmd_allocator everytime its freed */ + uint32_t syncpt_value; + uint32_t syncpt_id; + uint64_t syncpt_iova; +}; + +/** A struct to maintain start and end address of vmem region */ +struct vmem_region { + /**! Start address of vmem region */ + uint32_t start; + /**! End address of vmem region */ + uint32_t end; +}; + +struct pva_kmd_hw_constants { + enum pva_hw_gen hw_gen; + uint8_t n_vmem_regions; + uint32_t n_dma_descriptors; + uint32_t n_user_dma_channels; + uint32_t n_hwseq_words; + uint32_t n_dynamic_adb_buffs; + uint32_t n_smmu_contexts; +}; + +/** + * @brief This struct manages a single PVA cluster. + * + * Fields in this struct should be common across all platforms. Platform + * specific data is stored in plat_data field. + */ +struct pva_kmd_device { + uint32_t device_index; + uint32_t r5_image_smmu_context_id; + uint32_t stream_ids[PVA_MAX_NUM_SMMU_CONTEXTS]; + + struct pva_kmd_hw_constants hw_consts; + + uint64_t reg_phy_base[PVA_KMD_APERTURE_COUNT]; + uint64_t reg_size[PVA_KMD_APERTURE_COUNT]; + + struct pva_kmd_regspec regspec; + + uint8_t max_n_contexts; + void *context_mem; + struct pva_kmd_block_allocator context_allocator; + + pva_kmd_mutex_t resource_table_lock; + struct pva_kmd_resource_table dev_resource_table; + + struct pva_kmd_submitter submitter; + /** The lock protects the submission to the queue, including + * incrementing the post fence */ + pva_kmd_mutex_t submit_lock; + struct pva_kmd_device_memory *queue_memory; + struct pva_kmd_queue dev_queue; + pva_kmd_mutex_t ccq0_lock; + + /** memory needed for submission: including command buffer chunks and fences */ + struct pva_kmd_device_memory *submit_memory; + uint32_t submit_memory_resource_id; + uint64_t fence_offset; /**< fence offset within submit_memory*/ + + pva_kmd_mutex_t chunk_pool_lock; + struct pva_kmd_cmdbuf_chunk_pool chunk_pool; + + pva_kmd_mutex_t powercycle_lock; + uint32_t refcount; + + /** ISR post this semaphore when FW completes boot */ + pva_kmd_sema_t fw_boot_sema; + + struct pva_kmd_device_memory *fw_debug_mem; + struct pva_kmd_device_memory *fw_bin_mem; + struct pva_kmd_device_memory *fw_profiling_buffer_memory; + uint32_t fw_profiling_buffer_resource_id; + struct pva_kmd_fw_profiling_buffer fw_profiling_buffer; + struct pva_kmd_fw_print_buffer fw_print_buffer; + + struct pva_kmd_device_memory *tegra_stats_memory; + uint32_t tegra_stats_resource_id; + uint32_t tegra_stats_buf_size; + + bool load_from_gsc; + bool is_hv_mode; + struct pva_kmd_debugfs_context debugfs_context; + /** Sector packing format for block linear surfaces */ + uint8_t bl_sector_pack_format; + + /** Offset between 2 syncpoints */ + uint32_t syncpt_offset; + uint64_t syncpt_ro_iova; + uint64_t syncpt_rw_iova; + uint32_t num_syncpts; + struct pva_syncpt_rw_info syncpt_rw[PVA_NUM_RW_SYNCPTS]; + struct pva_kmd_block_allocator syncpt_allocator; + + struct vmem_region *vmem_regions_tab; + bool support_hwseq_frame_linking; + + void *plat_data; + void *fw_handle; + + struct pva_vpu_auth *pva_auth; + bool is_suspended; +}; + +struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, + uint32_t device_index, + bool app_authenticate); + +void pva_kmd_device_destroy(struct pva_kmd_device *pva); + +enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva); +void pva_kmd_device_idle(struct pva_kmd_device *pva); + +enum pva_error pva_kmd_ccq_push_with_timeout(struct pva_kmd_device *pva, + uint8_t ccq_id, uint64_t ccq_entry, + uint64_t sleep_interval_us, + uint64_t timeout_us); + +void pva_kmd_send_resource_table_info_by_ccq( + struct pva_kmd_device *pva, struct pva_kmd_resource_table *res_table); + +void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, + struct pva_kmd_queue *queue); +#endif // PVA_KMD_DEVICE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c new file mode 100644 index 00000000..af69bfe5 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_dma_cfg.h" +#include "pva_utils.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device.h" + +#define PVA_KMD_INVALID_CH_IDX 0xFF + +void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux) +{ + uint32_t i; + + for (i = 0; i < dma_aux->dram_res_count; i++) { + pva_kmd_drop_resource(dma_aux->res_table, + dma_aux->static_dram_res_ids[i]); + } + + if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) { + pva_kmd_drop_resource(dma_aux->res_table, + dma_aux->vpu_bin_res_id); + } +} + +static void trace_dma_channels(struct pva_dma_config const *dma_config, + uint8_t *desc_to_ch) +{ + uint32_t ch_index; + struct pva_dma_config_header const *cfg_hdr = &dma_config->header; + struct pva_dma_channel *channel; + uint32_t num_descs = dma_config->header.num_descriptors; + + for (ch_index = 0; ch_index < cfg_hdr->num_channels; ch_index++) { + uint8_t desc_index; + + channel = &dma_config->channels[ch_index]; + desc_index = channel->desc_index; + for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) { + desc_index = array_index_nospec(desc_index, num_descs); + if (desc_to_ch[desc_index] != PVA_KMD_INVALID_CH_IDX) { + //Already traced this descriptor + break; + } + desc_to_ch[desc_index] = ch_index; + desc_index = sat_sub8( + dma_config->descriptors[desc_index].link_desc_id, + 1); + } + } +} + +enum pva_error +pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, + void *dma_config_payload, uint32_t dma_config_size, + struct pva_kmd_dma_resource_aux *dma_aux, + void *fw_dma_cfg, uint32_t *out_fw_fetch_size) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t fw_fetch_size; + struct pva_dma_config dma_config; + struct pva_fw_dma_slot *dyn_slots; + struct pva_fw_dma_reloc *dyn_relocs; + struct pva_fw_dma_slot *static_slots = dma_aux->static_slots; + struct pva_fw_dma_reloc *static_relocs = dma_aux->static_relocs; + struct pva_kmd_dma_access *access_sizes = dma_aux->access_sizes; + // Mapping descriptor index to channel index + uint8_t desc_to_ch[PVA_MAX_NUM_DMA_DESC]; + + for (uint32_t i = 0; i < PVA_MAX_NUM_DMA_DESC; i++) { + desc_to_ch[i] = PVA_KMD_INVALID_CH_IDX; + } + + //set access_sizes to 0 by default + (void)memset( + access_sizes, 0, + (PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access))); + + err = pva_kmd_parse_dma_config(dma_config_payload, dma_config_size, + &dma_config, + &resource_table->pva->hw_consts); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_validate_dma_config(&dma_config, + &resource_table->pva->hw_consts, + access_sizes, + dma_aux->hw_dma_descs_mask); + if (err != PVA_SUCCESS) { + goto err_out; + } + + trace_dma_channels(&dma_config, desc_to_ch); + + err = pva_kmd_compute_dma_access(&dma_config, access_sizes, + dma_aux->hw_dma_descs_mask); + if (err != PVA_SUCCESS) { + goto err_out; + } + + dyn_slots = pva_offset_pointer(fw_dma_cfg, + sizeof(struct pva_dma_config_resource)); + + dyn_relocs = pva_offset_pointer(dyn_slots, + dma_config.header.num_dynamic_slots * + sizeof(*dyn_slots)); + + pva_kmd_collect_relocs(&dma_config, access_sizes, static_slots, + dma_config.header.num_static_slots, + static_relocs, dyn_slots, + dma_config.header.num_dynamic_slots, dyn_relocs, + desc_to_ch); + + pva_kmd_write_fw_dma_config( + &dma_config, fw_dma_cfg, &fw_fetch_size, + resource_table->pva->support_hwseq_frame_linking); + + dma_aux->res_table = resource_table; + err = pva_kmd_dma_use_resources(&dma_config, dma_aux); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_bind_static_buffers(fw_dma_cfg, dma_aux, static_slots, + dma_config.header.num_static_slots, + static_relocs, + dma_config.static_bindings, + dma_config.header.num_static_slots); + if (err != PVA_SUCCESS) { + goto drop_res; + } + + *out_fw_fetch_size = fw_fetch_size; + + return PVA_SUCCESS; +drop_res: + pva_kmd_unload_dma_config(dma_aux); +err_out: + return err; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h new file mode 100644 index 00000000..3a3c277b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_DMA_CFG_H +#define PVA_KMD_DMA_CFG_H + +#include "pva_kmd.h" +#include "pva_resource.h" + +/* Mask to extract the GOB offset from the Surface address */ +#define PVA_DMA_BL_GOB_OFFSET_MASK 0x3E00U + +/* Right shift value for moving GOB offset value extracted from surface address to LSB */ +#define PVA_DMA_BL_GOB_OFFSET_MASK_RSH 6U + +struct pva_kmd_dma_access_entry { + int64_t start_addr; + int64_t end_addr; +}; +struct pva_kmd_dma_access { + struct pva_kmd_dma_access_entry src; + struct pva_kmd_dma_access_entry dst; + struct pva_kmd_dma_access_entry dst2; +}; + +struct pva_kmd_resource_table; +struct pva_kmd_hw_constants; + +/** Auxiliary information needed for managing DMA resources: + * + * - Hold references to DRAM buffers and VPU bin used by the DMA configuration. + * - Scratch buffers needed during DMA configuration loading. + */ +struct pva_kmd_dma_resource_aux { + struct pva_kmd_resource_table *res_table; + uint32_t vpu_bin_res_id; + + uint32_t dram_res_count; + /** DRAM buffers statically referenced by the DMA configuration */ + uint32_t static_dram_res_ids[PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS]; + + /* Below are work buffers need during DMA configuration loading. They + * don't fit on stack. */ + struct pva_fw_dma_slot static_slots[PVA_KMD_MAX_NUM_DMA_SLOTS]; + struct pva_fw_dma_reloc static_relocs[PVA_KMD_MAX_NUM_DMA_SLOTS]; + struct pva_kmd_dma_access access_sizes[PVA_MAX_NUM_DMA_DESC]; + uint64_t hw_dma_descs_mask[((PVA_MAX_NUM_DMA_DESC / 64ULL) + 1ULL)]; +}; + +enum pva_error +pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size, + struct pva_dma_config *out_cfg, + struct pva_kmd_hw_constants const *hw_consts); + +enum pva_error +pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_resource_aux *dma_aux); + +enum pva_error +pva_kmd_validate_dma_config(struct pva_dma_config const *dma_cfg, + struct pva_kmd_hw_constants const *hw_consts, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask); + +enum pva_error +pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask); + +void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_access const *access_sizes, + struct pva_fw_dma_slot *out_static_slots, + uint16_t num_static_slots, + struct pva_fw_dma_reloc *out_static_relocs, + struct pva_fw_dma_slot *out_dyn_slots, + uint16_t num_dyn_slots, + struct pva_fw_dma_reloc *out_dyn_relocs, + uint8_t const *desc_to_ch); + +/** + * @brief Bind static buffers to the DMA configuration. + * + * When binding static buffers, we edit pva_dma_config in-place and replace the + * offset field with the final addresses of static buffers. + * + * We also validate that the DMA configuration does not access those static + * buffers out of range. + */ +enum pva_error pva_kmd_bind_static_buffers( + struct pva_dma_config_resource *fw_dma_cfg, + struct pva_kmd_dma_resource_aux *dma_aux, + struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots, + struct pva_fw_dma_reloc const *static_relocs, + struct pva_dma_static_binding const *static_bindings, + uint32_t num_static_bindings); + +/** + * @brief Convert user DMA configuration to firmware format. + */ +void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg, + void *fw_dma_config, + uint32_t *out_fw_fetch_size, + bool support_hwseq_frame_linking); + +/** + * @brief Load DMA configuration into firmware format. + * + * This function mostly does the following things: + * + * - Validate the DMA configuration. + * - Bind static resources (buffers) and embed their addresses directly in the + * firmware DMA configuration. + * - Hold references to DRAM buffers and VPU bin used by the DMA configuration. + * - Convert the DMA configuration into firmware format. + * + * @param resource_table the resource table for the context. + * @param dma_config DMA configuration from user space. + * @param dma_config_size Size of the dma_config buffer. + * @param dma_aux Auxiliary information needed for loading the DMA + * configuration. + * @param fw_dma_cfg Output buffer for the firmware DMA configuration. + * @param out_fw_fetch_size Size of the firmware DMA configuration that needs to + * be fetched into TCM. + */ +enum pva_error +pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, + void *dma_config, uint32_t dma_config_size, + struct pva_kmd_dma_resource_aux *dma_aux, + void *fw_dma_cfg, uint32_t *out_fw_fetch_size); + +void pva_kmd_unload_dma_config(struct pva_kmd_dma_resource_aux *dma_aux); +#endif // PVA_KMD_DMA_CFG_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c new file mode 100644 index 00000000..fce869ff --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c @@ -0,0 +1,369 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device_memory.h" +#include "pva_api.h" +#include "pva_kmd_dma_cfg.h" +#include "pva_api_dma.h" +#include "pva_kmd_constants.h" +#include "pva_kmd_device.h" + +static uint32_t get_slot_line_pitch(struct pva_fw_dma_descriptor *descs, + struct pva_fw_dma_reloc const *relocs, + struct pva_fw_dma_slot const *slot) +{ + struct pva_fw_dma_reloc const *reloc = &relocs[slot->reloc_start_idx]; + uint32_t first_desc_index = reloc->desc_index; + struct pva_fw_dma_descriptor *first_desc = &descs[first_desc_index]; + uint8_t log2_bpp = + PVA_EXTRACT(first_desc->transfer_control1, 1, 0, uint8_t); + + if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) { + return first_desc->slp_adv << log2_bpp; + } else { + return first_desc->dlp_adv << log2_bpp; + } +} + +static enum pva_error +set_channel_block_height(struct pva_dma_config_resource *dma_config, + uint16_t ch_mask, uint8_t log2_block_height) +{ + struct pva_fw_dma_channel *channels = + pva_dma_config_get_channels(dma_config); + + // max block height is 32 GOB + if (log2_block_height > PVA_DMA_MAX_LOG2_BLOCK_HEIGHT) { + pva_kmd_log_err("Invalid block height"); + return PVA_ERR_CMD_INVALID_BLOCK_HEIGHT; + } + + while (ch_mask > 0) { + uint8_t ch_index = __builtin_ctz(ch_mask); + if (dma_config->ch_block_height_fixed_mask & (1 << ch_index)) { + /* If this bit is already set, it means block height cannot be changed. */ + uint8_t set_bh = PVA_EXTRACT(channels[ch_index].cntl0, + 27, 25, uint8_t); + if (set_bh != log2_block_height) { + pva_kmd_log_err("Conflicting block height"); + return PVA_INVAL; + } + } else { + channels[ch_index].cntl0 &= ~PVA_MASK(27, 25); + channels[ch_index].cntl0 |= + PVA_INSERT(log2_block_height, 27, 25); + + dma_config->ch_block_height_fixed_mask |= + (1 << ch_index); + } + + ch_mask &= ~(1 << ch_index); + } + return PVA_SUCCESS; +} + +static enum pva_error +bind_static_dram_slot(struct pva_dma_config_resource *dma_config, + struct pva_kmd_dma_resource_aux *dma_aux, + struct pva_fw_dma_slot const *slot, + struct pva_fw_dma_reloc const *static_relocs, + struct pva_dma_dram_binding const *dram_bd) +{ + struct pva_fw_dma_descriptor *descs = + pva_dma_config_get_descriptors(dma_config); + enum pva_error err = PVA_SUCCESS; + struct pva_fw_dma_reloc const *relocs; + bool is_block_linear = + (dram_bd->surface_format == PVA_SURF_FMT_BLOCK_LINEAR); + uint32_t line_pitch = get_slot_line_pitch(descs, static_relocs, slot); + uint8_t log2_block_height = dram_bd->log2_block_height; + struct pva_kmd_dram_resource *dram_res = + &pva_kmd_peek_resource(dma_aux->res_table, dram_bd->resource_id) + ->dram; + uint64_t slot_offset_pl = dram_bd->slot_offset; + uint64_t surface_base_addr = + sat_add64(dram_bd->surface_base_offset, dram_res->mem->iova); + /* When binding a buffer, we add the binding->surface_base_offset to the + * buffer base address. Therefore, the effective buffer size is + * reduced by the offset. */ + uint64_t max_surface_size = + sat_sub64(dram_res->mem->size, dram_bd->surface_base_offset); + uint64_t sector_pack_format = 0; + int64_t slot_access_start_addr = 0LL; + int64_t slot_access_end_addr = 0LL; + uint64_t slot_surface_combined_offset = 0ULL; + pva_math_error math_error = MATH_OP_SUCCESS; + + if ((slot->flags & PVA_FW_DMA_SLOT_FLAG_DRAM) == 0) { + pva_kmd_log_err("Binding DRAM buffer to incompatible slot"); + err = PVA_INVALID_BINDING; + goto out; + } + + if (is_block_linear) { + if (slot->flags & PVA_FW_DMA_SLOT_FLAG_CB) { + pva_kmd_log_err( + "Block linear surface is not compatible with circular buffer"); + err = PVA_INVALID_BINDING; + goto out; + } + max_surface_size = + pva_max_bl_surface_size(max_surface_size, + log2_block_height, line_pitch, + &math_error); + if (math_error != MATH_OP_SUCCESS) { + pva_kmd_log_err( + "bind_static_dram_slot pva_max_bl_surface_size triggered a math error"); + err = PVA_ERR_MATH_OP; + goto out; + } + + if (!pva_is_512B_aligned(surface_base_addr)) { + pva_kmd_log_err( + "BL surface base address is not 512B aligned"); + err = PVA_BAD_SURFACE_BASE_ALIGNMENT; + goto out; + } + + err = set_channel_block_height(dma_config, slot->ch_use_mask, + dram_bd->log2_block_height); + if (err != PVA_SUCCESS) { + goto out; + } + sector_pack_format = + dma_aux->res_table->pva->bl_sector_pack_format; + } + + slot_surface_combined_offset = addu64( + slot_offset_pl, dram_bd->surface_base_offset, &math_error); + + if (slot_surface_combined_offset >= (uint64_t)MAX_INT64) { + pva_kmd_log_err("Slot surface offset too large"); + return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE; + } + + slot_access_start_addr = + adds64(slot->start_addr, (int64_t)slot_surface_combined_offset, + &math_error); + + slot_access_end_addr = + adds64(slot->end_addr, (int64_t)slot_surface_combined_offset, + &math_error); + + max_surface_size = addu64(max_surface_size, + dram_bd->surface_base_offset, &math_error); + + if (max_surface_size >= (uint64_t)MAX_INT64) { + pva_kmd_log_err("DRAM buffer too large for slot binding"); + return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE; + } + + if (math_error != MATH_OP_SUCCESS) { + pva_kmd_log_err("Math error during slot binding"); + return PVA_ERR_MATH_OP; + } + + if (slot_access_start_addr < 0LL) { + pva_kmd_log_err( + "DRAM buffer offset underflows for slot binding"); + return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE; + } + + if (slot_access_end_addr > (int64_t)max_surface_size) { + pva_kmd_log_err("DRAM buffer too small for slot binding"); + return PVA_ERR_CMD_DRAM_BUF_OUT_OF_RANGE; + } + + relocs = &static_relocs[slot->reloc_start_idx]; + for (uint32_t i = 0; i < slot->reloc_count; i++) { + struct pva_fw_dma_reloc const *reloc = &relocs[i]; + struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index]; + uint8_t *addr_hi_ptr; + uint32_t *addr_lo_ptr; + uint32_t format_field_shift = 0; + uint64_t addr; + uint64_t desc_offset_pl; + uint64_t offset; + + if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) { + addr_hi_ptr = &desc->src_adr1; + addr_lo_ptr = &desc->src_adr0; + format_field_shift = 3; //SRC_TF in TRANSFER_CONTROL0 + } else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) { + addr_hi_ptr = &desc->dst_adr1; + addr_lo_ptr = &desc->dst_adr0; + format_field_shift = 7; //DST_TF in TRANSFER_CONTROL0 + } else { /* PVA_FW_DMA_RELOC_FIELD_DST2 */ + pva_kmd_log_err("Binding DRAM buffer to DST2 slot"); + err = PVA_INVAL; + goto out; + } + desc_offset_pl = assemble_addr(*addr_hi_ptr, *addr_lo_ptr); + offset = sat_add64(slot_offset_pl, desc_offset_pl); + desc->transfer_control0 &= ~(1 << format_field_shift); + if (is_block_linear) { + /* We need to insert bits surface_base_addr[13, 9] to + * transfer_control2[7:3] as specified by DMA IAS. This helps the + * HW identify starting GOB index inside a block. */ + desc->transfer_control2 &= ~PVA_MASK(7, 3); + desc->transfer_control2 |= + PVA_INSERT8(PVA_EXTRACT64(surface_base_addr, 13, + 9, uint8_t), + 7, 3); + desc->transfer_control0 |= 1 << format_field_shift; + + offset = pva_pl_to_bl_offset(offset, line_pitch, + log2_block_height, + &math_error); + if (math_error != MATH_OP_SUCCESS) { + pva_kmd_log_err( + "pva_fw_do_cmd_bind_dram_slot pva_pl_to_bl_offset triggered a math error"); + err = PVA_ERR_MATH_OP; + goto out; + } + if (!pva_is_64B_aligned(offset)) { + pva_kmd_log_err( + "Descriptor starting address is not aligned to 64 bytes"); + err = PVA_BAD_DESC_ADDR_ALIGNMENT; + goto out; + } + } + addr = sat_add64(surface_base_addr, offset); + addr |= (sector_pack_format << PVA_BL_SECTOR_PACK_BIT_SHIFT); + *addr_hi_ptr = iova_hi(addr); + *addr_lo_ptr = iova_lo(addr); + } +out: + return err; +} + +static enum pva_error +bind_static_vmem_slot(struct pva_dma_config_resource *dma_config, + struct pva_kmd_dma_resource_aux *dma_aux, + struct pva_fw_dma_slot const *slot, + struct pva_fw_dma_reloc const *static_relocs, + struct pva_dma_vmem_binding const *vmem_bd) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_fw_dma_descriptor *descs = + pva_dma_config_get_descriptors(dma_config); + struct pva_kmd_vpu_bin_resource *vpu_bin; + struct pva_symbol_info *sym; + uint32_t buffer_size, buffer_addr; + struct pva_fw_dma_reloc const *relocs; + enum pva_symbol_type needed_sym_type; + + if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_DATA) { + needed_sym_type = PVA_SYM_TYPE_DATA; + } else if (slot->flags & PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE) { + needed_sym_type = PVA_SYM_TYPE_VPUC_TABLE; + } else { + pva_kmd_log_err("Unexpected VMEM slot flags"); + err = PVA_INTERNAL; + goto out; + } + +#if defined(WAR_PVAAS16267) + needed_sym_type = PVA_SYM_TYPE_DATA; +#endif + + vpu_bin = &pva_kmd_peek_resource(dma_aux->res_table, + dma_aux->vpu_bin_res_id) + ->vpu_bin; + sym = pva_kmd_get_symbol_with_type(&vpu_bin->symbol_table, + vmem_bd->addr.symbol_id, + needed_sym_type); + if (sym == NULL) { + err = PVA_INVALID_SYMBOL; + goto out; + } + + buffer_size = sat_sub32(sym->size, vmem_bd->addr.offset); + buffer_addr = sat_add32(sym->vmem_addr, vmem_bd->addr.offset); + + if (buffer_size < get_slot_size(slot)) { + pva_kmd_log_err("VMEM buffer too small for slot binding"); + err = PVA_RES_OUT_OF_RANGE; + goto out; + } + + relocs = &static_relocs[slot->reloc_start_idx]; + for (uint32_t i = 0; i < slot->reloc_count; i++) { + struct pva_fw_dma_reloc const *reloc = &relocs[i]; + struct pva_fw_dma_descriptor *desc = &descs[reloc->desc_index]; + + if (reloc->field == PVA_FW_DMA_RELOC_FIELD_SRC) { + desc->src_adr0 = sat_add32(buffer_addr, desc->src_adr0); + } else if (reloc->field == PVA_FW_DMA_RELOC_FIELD_DST) { + desc->dst_adr0 = sat_add32(buffer_addr, desc->dst_adr0); + } else { + if (!pva_is_64B_aligned(buffer_addr)) { + pva_kmd_log_err( + "VMEM replication address not aligned to 64 bytes"); + err = PVA_INVAL; + goto out; + } + + desc->frda = + ((uint16_t)(buffer_addr >> 6U) + desc->frda) & + 0x3FFF; + } + } + +out: + return err; +} + +enum pva_error pva_kmd_bind_static_buffers( + struct pva_dma_config_resource *fw_dma_cfg_hdr, + struct pva_kmd_dma_resource_aux *dma_aux, + struct pva_fw_dma_slot const *static_slots, uint16_t num_static_slots, + struct pva_fw_dma_reloc const *static_relocs, + struct pva_dma_static_binding const *static_bindings, + uint32_t num_static_bindings) +{ + uint32_t slot_id; + enum pva_error err = PVA_SUCCESS; + + if (num_static_bindings != num_static_slots) { + pva_kmd_log_err("Invalid number of static bindings"); + err = PVA_INVAL; + goto out; + } + + // Reset BL status for each channel + fw_dma_cfg_hdr->ch_block_height_fixed_mask = 0U; + + for (slot_id = 0U; slot_id < num_static_slots; slot_id++) { + struct pva_fw_dma_slot const *st_slot = &static_slots[slot_id]; + struct pva_dma_static_binding const *binding = + &static_bindings[slot_id]; + + if (binding->type == PVA_DMA_STATIC_BINDING_DRAM) { + err = bind_static_dram_slot(fw_dma_cfg_hdr, dma_aux, + st_slot, static_relocs, + &binding->dram); + + } else { // PVA_FW_DMA_SLOT_FLAG_VMEM + err = bind_static_vmem_slot(fw_dma_cfg_hdr, dma_aux, + st_slot, static_relocs, + &binding->vmem); + } + + if (err != PVA_SUCCESS) { + goto out; + } + } + +out: + return err; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c new file mode 100644 index 00000000..34e4f06b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c @@ -0,0 +1,821 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_hwseq_validate.h" +#include "pva_api.h" +#include "pva_kmd_dma_cfg.h" +#include "pva_api_dma.h" +#include "pva_kmd_device.h" +#include "pva_math_utils.h" + +struct pva_fw_dma_reloc_slot_info { + struct pva_fw_dma_slot *slots; + struct pva_fw_dma_reloc *relocs; + uint16_t num_slots; + uint8_t *reloc_off; +}; +struct pva_fw_dma_reloc_slots { + struct pva_fw_dma_reloc_slot_info dyn_slot; + struct pva_fw_dma_reloc_slot_info static_slot; +}; + +static enum pva_error +validate_channel_mapping(struct pva_dma_config const *out_cfg, + struct pva_kmd_hw_constants const *hw_consts) +{ + struct pva_dma_channel *channel; + struct pva_dma_config_header const *cfg_hdr = &out_cfg->header; + pva_math_error math_err = MATH_OP_SUCCESS; + + for (uint8_t i = 0U; i < cfg_hdr->num_channels; i++) { + channel = &out_cfg->channels[i]; + if ((channel->desc_index >= out_cfg->header.num_descriptors) || + (pva_is_reserved_desc(channel->desc_index))) { + pva_kmd_log_err( + "ERR: Invalid Channel Descriptor Index"); + return PVA_INVAL; + } + if (addu8(channel->vdb_count, channel->vdb_offset, &math_err) > + PVA_NUM_DYNAMIC_VDB_BUFFS) { + pva_kmd_log_err("ERR: Invalid Channel control data"); + return PVA_INVAL; + } + if (addu16(channel->adb_count, channel->adb_offset, &math_err) > + hw_consts->n_dynamic_adb_buffs) { + pva_kmd_log_err("ERR: Invalid ADB Buff Size or Offset"); + return PVA_INVAL; + } + } + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("validate_channel_mapping math error"); + return PVA_ERR_MATH_OP; + } + + return PVA_SUCCESS; +} + +static enum pva_error validate_padding(struct pva_dma_descriptor *desc) +{ + if ((desc->px != 0U) && (desc->px >= desc->tx)) { + return PVA_INVAL; + } + + if ((desc->py != 0U) && (desc->py >= desc->ty)) { + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +static bool is_valid_vpu_trigger_mode(struct pva_dma_descriptor *desc) +{ + bool valid = true; + if (desc->trig_event_mode != 0U) { + switch (desc->trig_vpu_events) { + case PVA_DMA_NO_TRIG: + //HW Sequencer check + break; + case PVA_DMA_TRIG_VPU_CFG: + if (desc->src.transfer_mode != + PVA_DMA_TRANS_MODE_VPUCFG) { + valid = false; + } + break; + case PVA_DMA_TRIG_READ0: + case PVA_DMA_TRIG_READ1: + case PVA_DMA_TRIG_READ2: + case PVA_DMA_TRIG_READ3: + case PVA_DMA_TRIG_READ4: + case PVA_DMA_TRIG_READ5: + case PVA_DMA_TRIG_READ6: + if ((desc->src.transfer_mode != + (uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) && + (desc->dst.transfer_mode != + (uint8_t)PVA_DMA_TRANS_MODE_VMEM)) { + valid = false; + } + break; + case PVA_DMA_TRIG_WRITE0: + case PVA_DMA_TRIG_WRITE1: + case PVA_DMA_TRIG_WRITE2: + case PVA_DMA_TRIG_WRITE3: + case PVA_DMA_TRIG_WRITE4: + case PVA_DMA_TRIG_WRITE5: + case PVA_DMA_TRIG_WRITE6: + if ((desc->src.transfer_mode != + (uint8_t)PVA_DMA_TRANS_MODE_VPUCFG) && + (desc->src.transfer_mode != + (uint8_t)PVA_DMA_TRANS_MODE_VMEM)) { + valid = false; + } + break; + default: + valid = false; + break; + } + } + return valid; +} + +static bool validate_src_dst_adv_val(struct pva_dma_descriptor *desc, + bool relax_dim3_check) +{ + uint8_t is_any_rpt_zero = 0U; + + is_any_rpt_zero = desc->src.rpt1 & desc->src.rpt2 & desc->dst.rpt1 & + desc->dst.rpt2; + + if ((desc->trig_event_mode == (uint8_t)PVA_DMA_TRIG_MODE_4TH_DIM) && + (is_any_rpt_zero == 0U)) { + return false; + } + + if (desc->trig_event_mode == ((uint8_t)PVA_DMA_TRIG_MODE_3RD_DIM)) { + if (false == relax_dim3_check) { + if (((desc->src.rpt1 == 0U) && + (desc->dst.rpt1 == 0U))) { + return false; + } + } else { + if (((desc->dst.rpt1 == 0U) || + (desc->src.rpt1 > desc->dst.rpt1))) { + return false; + } + } + } + + return true; +} + +static enum pva_error +validate_dma_desc_trans_cntl2(struct pva_dma_descriptor *desc) +{ + if ((desc->prefetch_enable != 0U) && + ((desc->tx == 0U) || (desc->ty == 0U) || + (desc->src.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_DRAM) || + (desc->dst.transfer_mode != (uint32_t)PVA_DMA_TRANS_MODE_VMEM))) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +static enum pva_error +validate_descriptor(struct pva_dma_descriptor *desc, + struct pva_dma_config_header const *cfg_hdr) +{ + enum pva_error err = PVA_SUCCESS; + + err = validate_padding(desc); + if ((desc->dst.transfer_mode == PVA_DMA_TRANS_MODE_VMEM) && + (err != PVA_SUCCESS)) { + return err; + } + + if (!(is_valid_vpu_trigger_mode(desc))) { + pva_kmd_log_err("Bad trigger"); + return PVA_INVAL; + } + + /** Check src/dstADV values with respect to ECET bits */ + if (false == validate_src_dst_adv_val(desc, false)) { + pva_kmd_log_err( + "Invalid src/dst ADV values with respect to ECET"); + return PVA_INVAL; + } + + /* DMA_DESC_TRANS CNTL2 */ + if (PVA_SUCCESS != validate_dma_desc_trans_cntl2(desc)) { + pva_kmd_log_err("Bad trans cntl 2"); + return PVA_INVAL; + } + + /* DMA_DESC_LDID */ + if ((desc->link_desc_id > cfg_hdr->num_descriptors) || + ((desc->link_desc_id != 0) && + pva_is_reserved_desc(desc->link_desc_id - PVA_DMA_DESC0))) { + pva_kmd_log_err("ERR: Invalid linker Desc ID"); + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +static bool +is_dma_config_header_valid(struct pva_dma_config_header const *cfg_hdr, + struct pva_kmd_hw_constants const *hw_consts) +{ + if (((cfg_hdr->base_descriptor + cfg_hdr->num_descriptors) > + hw_consts->n_dma_descriptors) || + ((cfg_hdr->base_channel + cfg_hdr->num_channels) > + (hw_consts->n_user_dma_channels + 1U)) || + ((cfg_hdr->base_hwseq_word + cfg_hdr->num_hwseq_words) > + hw_consts->n_hwseq_words) || + (cfg_hdr->num_static_slots > PVA_KMD_MAX_NUM_DMA_SLOTS) || + (cfg_hdr->num_dynamic_slots > PVA_KMD_MAX_NUM_DMA_RELOCS) || + (cfg_hdr->base_channel == 0U)) { + return false; + } + return true; +} + +enum pva_error +pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size, + struct pva_dma_config *out_cfg, + struct pva_kmd_hw_constants const *hw_consts) +{ + struct pva_dma_config_header const *cfg_hdr = dma_config; + uintptr_t offset = 0; + + if (dma_config_size < sizeof(*cfg_hdr)) { + pva_kmd_log_err("DMA configuration too small"); + return PVA_INVAL; + } + + out_cfg->header = *cfg_hdr; + if (!(is_dma_config_header_valid(cfg_hdr, hw_consts))) { + pva_kmd_log_err("Invalid PVA DMA Configuration Header"); + return PVA_INVAL; + } + + offset += PVA_ALIGN8(sizeof(*cfg_hdr)); + + out_cfg->hwseq_words = pva_offset_pointer(dma_config, offset); + offset += PVA_ALIGN8(cfg_hdr->num_hwseq_words * + sizeof(*out_cfg->hwseq_words)); + + out_cfg->channels = pva_offset_pointer(dma_config, offset); + offset += + PVA_ALIGN8(cfg_hdr->num_channels * sizeof(*out_cfg->channels)); + + out_cfg->descriptors = pva_offset_pointer(dma_config, offset); + offset += PVA_ALIGN8(cfg_hdr->num_descriptors * + sizeof(*out_cfg->descriptors)); + + out_cfg->static_bindings = pva_offset_pointer(dma_config, offset); + offset += PVA_ALIGN8(cfg_hdr->num_static_slots * + sizeof(*out_cfg->static_bindings)); + + if (offset > dma_config_size) { + pva_kmd_log_err("DMA configuration is smaller than expected"); + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +static enum pva_error +validate_descriptors(struct pva_dma_config const *dma_config) +{ + uint32_t i = 0U; + enum pva_error err = PVA_SUCCESS; + struct pva_dma_config_header const *cfg_hdr = &dma_config->header; + struct pva_dma_descriptor *desc; + + for (i = 0; i < cfg_hdr->num_descriptors; i++) { + if (pva_is_reserved_desc(i)) { + // skip over the reserved descriptor range + i = PVA_RESERVED_DESCRIPTORS_END; + continue; + } + + desc = &dma_config->descriptors[i]; + err = validate_descriptor(desc, cfg_hdr); + if (err != PVA_SUCCESS) { + return err; + } + } + + return err; +} + +enum pva_error +pva_kmd_validate_dma_config(struct pva_dma_config const *dma_config, + struct pva_kmd_hw_constants const *hw_consts, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask) +{ + enum pva_error err = PVA_SUCCESS; + + err = validate_channel_mapping(dma_config, hw_consts); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Bad Channels"); + return err; + } + + err = validate_descriptors(dma_config); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Bad Descriptors"); + return err; + } + + if (dma_config->header.num_hwseq_words != 0U) { + err = validate_hwseq(dma_config, hw_consts, access_sizes, + hw_dma_descs_mask); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Bad HW Sequencer Blob"); + return err; + } + } + + return err; +} + +enum pva_error +pva_kmd_dma_use_resources(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_resource_aux *dma_aux) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_vpu_bin_resource *vpu_bin = NULL; + uint32_t i; + + /* Increment reference count for VPU bin */ + if (dma_cfg->header.vpu_exec_resource_id != PVA_RESOURCE_ID_INVALID) { + struct pva_kmd_resource_record *vpu_bin_rec; + + vpu_bin_rec = pva_kmd_use_resource( + dma_aux->res_table, + dma_cfg->header.vpu_exec_resource_id); + if (vpu_bin_rec == NULL) { + pva_kmd_log_err( + "VPU exec resource id used by DMA config does not exist"); + err = PVA_INVAL; + goto err_out; + } + if (vpu_bin_rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) { + pva_kmd_log_err( + "Invalid VPU exec resource id used by DMA config"); + err = PVA_INVAL; + goto drop_vpu_bin; + } + vpu_bin = &vpu_bin_rec->vpu_bin; + } + + dma_aux->vpu_bin_res_id = dma_cfg->header.vpu_exec_resource_id; + + dma_aux->dram_res_count = 0; + /* Increment reference count for all static DRAM buffers; For static + * VMEM buffers, check that symbol ID is valid. */ + for (i = 0; i < dma_cfg->header.num_static_slots; i++) { + struct pva_dma_static_binding const *slot_buf = + &dma_cfg->static_bindings[i]; + + if (slot_buf->type == PVA_DMA_STATIC_BINDING_DRAM) { + struct pva_kmd_resource_record *rec; + + rec = pva_kmd_use_resource(dma_aux->res_table, + slot_buf->dram.resource_id); + if (rec == NULL) { + pva_kmd_log_err( + "DRAM buffers used by DMA config do not exist"); + err = PVA_INVAL; + goto drop_dram; + } + + dma_aux->static_dram_res_ids[dma_aux->dram_res_count] = + slot_buf->dram.resource_id; + dma_aux->dram_res_count += 1; + + if (rec->type != PVA_RESOURCE_TYPE_DRAM) { + pva_kmd_log_err( + "Invalid DRAM resource id used DMA config"); + err = PVA_INVAL; + goto drop_dram; + } + } else if (slot_buf->type == PVA_DMA_STATIC_BINDING_VMEM) { + if (vpu_bin == NULL) { + pva_kmd_log_err( + "VPU bin resource not found for static VMEM buffer"); + err = PVA_INVAL; + goto drop_dram; + } + + if (pva_kmd_get_symbol(&vpu_bin->symbol_table, + slot_buf->vmem.addr.symbol_id) == + NULL) { + pva_kmd_log_err("Invalid VMEM symbol ID"); + err = PVA_INVAL; + goto drop_dram; + } + } else { + pva_kmd_log_err("Invalid slot buffer type"); + err = PVA_INVAL; + goto drop_dram; + } + } + + return PVA_SUCCESS; +drop_dram: + for (i = 0; i < dma_aux->dram_res_count; i++) { + pva_kmd_drop_resource(dma_aux->res_table, + dma_aux->static_dram_res_ids[i]); + } +drop_vpu_bin: + if (dma_aux->vpu_bin_res_id != PVA_RESOURCE_ID_INVALID) { + pva_kmd_drop_resource(dma_aux->res_table, + dma_aux->vpu_bin_res_id); + } +err_out: + return err; +} + +static uint16_t get_slot_id(uint16_t slot) +{ + return slot & PVA_DMA_SLOT_ID_MASK; +} + +static uint8_t get_slot_flag(uint8_t transfer_mode, bool cb_enable) +{ + uint8_t flags = 0; + if (transfer_mode == PVA_DMA_TRANS_MODE_VMEM) { + flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_DATA; + } else if (transfer_mode == PVA_DMA_TRANS_MODE_L2SRAM) { + flags |= PVA_FW_DMA_SLOT_FLAG_L2SRAM; + } else if (transfer_mode == PVA_DMA_TRANS_MODE_DRAM) { + flags |= PVA_FW_DMA_SLOT_FLAG_DRAM; + } else if (transfer_mode == PVA_DMA_TRANS_MODE_VPUCFG) { + flags |= PVA_FW_DMA_SLOT_FLAG_VMEM_VPUC_TABLE; + } + + if (cb_enable) { + flags |= PVA_FW_DMA_SLOT_FLAG_CB; + } + return flags; +} + +static void update_reloc_count(uint16_t slot, uint8_t transfer_mode, + bool cb_enable, + struct pva_fw_dma_slot *out_static_slots, + uint16_t num_static_slots, + struct pva_fw_dma_slot *out_dyn_slots, + uint16_t num_dyn_slots) +{ + uint8_t slot_id = get_slot_id(slot); + + if (slot & PVA_DMA_DYNAMIC_SLOT) { + out_dyn_slots[slot_id].reloc_count = + safe_addu16(out_dyn_slots[slot_id].reloc_count, 1U); + out_dyn_slots[slot_id].flags |= + get_slot_flag(transfer_mode, cb_enable); + } else if (slot & PVA_DMA_STATIC_SLOT) { + out_static_slots[slot_id].reloc_count = + safe_addu16(out_static_slots[slot_id].reloc_count, 1U); + ; + out_static_slots[slot_id].flags |= + get_slot_flag(transfer_mode, cb_enable); + } +} + +static void count_relocs(struct pva_dma_config const *dma_cfg, + struct pva_fw_dma_slot *out_static_slots, + uint16_t num_static_slots, + struct pva_fw_dma_slot *out_dyn_slots, + uint16_t num_dyn_slots) +{ + uint8_t i; + struct pva_dma_descriptor *desc; + + for (i = 0U; i < dma_cfg->header.num_descriptors; i++) { + if (pva_is_reserved_desc(i)) { + // skip over the reserved descriptor range + i = PVA_RESERVED_DESCRIPTORS_END; + continue; + } + desc = &dma_cfg->descriptors[i]; + + update_reloc_count(desc->src.slot, desc->src.transfer_mode, + desc->src.cb_enable, out_static_slots, + num_static_slots, out_dyn_slots, + num_dyn_slots); + + update_reloc_count(desc->dst.slot, desc->dst.transfer_mode, + desc->dst.cb_enable, out_static_slots, + num_static_slots, out_dyn_slots, + num_dyn_slots); + + update_reloc_count(desc->dst2_slot, desc->dst.transfer_mode, + desc->dst.cb_enable, out_static_slots, + num_static_slots, out_dyn_slots, + num_dyn_slots); + } +} + +static void write_one_reloc(uint8_t ch_index, uint32_t desc_index, + uint16_t slot, uint8_t transfer_mode, + uint8_t reloc_field, + struct pva_fw_dma_reloc_slot_info *info, + struct pva_kmd_dma_access_entry const *access_entry) +{ + uint16_t slot_id = get_slot_id(slot); + uint16_t reloc_id = safe_addu16(info->slots[slot_id].reloc_start_idx, + info->reloc_off[slot_id]); + + int64_t old_start_addr = info->slots[slot_id].start_addr; + int64_t old_end_addr = info->slots[slot_id].end_addr; + + info->slots[slot_id].start_addr = + mins64(access_entry->start_addr, old_start_addr); + info->slots[slot_id].end_addr = + maxs64(access_entry->end_addr, old_end_addr); + + info->slots[slot_id].ch_use_mask |= (1U << (ch_index & 0x1FU)); + + info->relocs[reloc_id].desc_index = desc_index; + info->relocs[reloc_id].field = reloc_field; + + info->reloc_off[slot_id] = safe_addu8(info->reloc_off[slot_id], 1U); +} + +static void handle_reloc(uint16_t slot, uint8_t transfer_mode, + struct pva_kmd_dma_access_entry const *access_entry, + struct pva_fw_dma_reloc_slots *rel_info, + uint8_t reloc_field, uint8_t ch_index, + uint8_t desc_index) +{ + if (slot & PVA_DMA_DYNAMIC_SLOT) { + write_one_reloc(ch_index, desc_index, slot, transfer_mode, + reloc_field, &rel_info->dyn_slot, access_entry); + } else if (slot & PVA_DMA_STATIC_SLOT) { + write_one_reloc(ch_index, desc_index, slot, transfer_mode, + reloc_field, &rel_info->static_slot, + access_entry); + } +} + +static void write_relocs(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_access const *access_sizes, + struct pva_fw_dma_reloc_slots *rel_info, + uint8_t const *desc_to_ch) +{ + uint32_t i; + uint16_t start_idx = 0U; + struct pva_dma_descriptor *desc = NULL; + uint8_t ch_index = 0U; + + for (i = 0U; i < rel_info->dyn_slot.num_slots; i++) { + rel_info->dyn_slot.slots[i].reloc_start_idx = start_idx; + start_idx = safe_addu16( + start_idx, rel_info->dyn_slot.slots[i].reloc_count); + } + + for (i = 0U; i < rel_info->static_slot.num_slots; i++) { + rel_info->static_slot.slots[i].reloc_start_idx = start_idx; + start_idx = safe_addu16( + start_idx, rel_info->static_slot.slots[i].reloc_count); + } + + for (i = 0U; i < dma_cfg->header.num_descriptors; i++) { + if (pva_is_reserved_desc(i)) { + // skip over the reserved descriptor range + i = PVA_RESERVED_DESCRIPTORS_END; + continue; + } + desc = &dma_cfg->descriptors[i]; + ch_index = desc_to_ch[i]; + + handle_reloc(desc->src.slot, desc->src.transfer_mode, + &access_sizes[i].src, rel_info, + PVA_FW_DMA_RELOC_FIELD_SRC, ch_index, i); + handle_reloc(desc->dst.slot, desc->dst.transfer_mode, + &access_sizes[i].dst, rel_info, + PVA_FW_DMA_RELOC_FIELD_DST, ch_index, i); + handle_reloc(desc->dst2_slot, desc->dst.transfer_mode, + &access_sizes[i].dst2, rel_info, + PVA_FW_DMA_RELOC_FIELD_DST2, ch_index, i); + } +} + +static enum pva_error +validate_descriptor_tile_and_padding(struct pva_dma_descriptor *desc, + bool is_dst) +{ + enum pva_error err = PVA_SUCCESS; + + if (desc->ty == 0U) { + err = PVA_INVALID_DMA_CONFIG; + return err; + } + + if (!is_dst) { + if ((desc->tx <= desc->px) || (desc->ty <= desc->py)) { + // invalid tile size/padding config + err = PVA_INVALID_DMA_CONFIG; + return err; + } + } + + return PVA_SUCCESS; +} + +static enum pva_error get_access_size(struct pva_dma_descriptor *desc, + struct pva_kmd_dma_access_entry *entry, + bool is_dst, + struct pva_kmd_dma_access_entry *dst2) + +{ + struct pva_dma_transfer_attr *attr = NULL; + uint32_t tx = 0U; + uint32_t ty = 0U; + uint64_t tile_size = 0U; + int64_t start = 0; + int64_t end = 0; + int32_t dim_offset = 0; + uint32_t dim_offset_U = 0U; + uint32_t num_bytes = 0U; + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + + // early out for empty tiles + if (desc->tx == 0U) { + return err; + } + + err = validate_descriptor_tile_and_padding(desc, is_dst); + if (err != PVA_SUCCESS) { + return err; + } + + if (is_dst) { + attr = &desc->dst; + tx = desc->tx; + ty = desc->ty; + } else { + attr = &desc->src; + tx = subu32((uint32_t)desc->tx, (uint32_t)desc->px, &math_err); + ty = subu32((uint32_t)desc->ty, (uint32_t)desc->py, &math_err); + } + + if (attr->offset > (uint64_t)(MAX_INT64)) { + err = PVA_INVALID_DMA_CONFIG; + pva_kmd_log_err("Offset is too large"); + goto err_out; + } + + dim_offset_U = mulu32((uint32_t)(attr->line_pitch), + subu32(ty, 1U, &math_err), &math_err); + + if (attr->cb_enable != 0U) { + tile_size = addu32(dim_offset_U, tx, &math_err); + tile_size = tile_size + << (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL); + + if (tile_size > attr->cb_size) { + pva_kmd_log_err( + "Tile size is bigger than circular buffer size"); + err = PVA_INVALID_DMA_CONFIG; + } + start = 0LL; + end = (int64_t)attr->cb_size; + goto end; + } + + end += adds64((int64_t)dim_offset_U, (int64_t)tx, &math_err); + + // 3rd dim + // 3rd dim + dim_offset = muls32((attr->adv1), (int32_t)(attr->rpt1), &math_err); + start += mins32(dim_offset, 0); + end += maxs32(dim_offset, 0); + // 4th dim + dim_offset = muls32((attr->adv2), (int32_t)(attr->rpt2), &math_err); + start += mins32(dim_offset, 0); + end += maxs32(dim_offset, 0); + // 5th dim + dim_offset = muls32((attr->adv3), (int32_t)(attr->rpt3), &math_err); + start += mins32(dim_offset, 0); + end += maxs32(dim_offset, 0); + // convert to byte range + num_bytes = + ((uint32_t)1U << (desc->log2_pixel_size & MAX_BYTES_PER_PIXEL)); + start *= (int64_t)num_bytes; + end *= (int64_t)num_bytes; + + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("get_access_size math error"); + goto err_out; + } + +end: + entry->start_addr = + adds64(mins64(start, end), convert_to_signed_s64(attr->offset), + &math_err); + entry->end_addr = + adds64(maxs64(start, end), convert_to_signed_s64(attr->offset), + &math_err); + + if (is_dst) { + dst2->start_addr = + adds64(mins64(start, end), (int64_t)desc->dst2_offset, + &math_err); + + dst2->end_addr = adds64(maxs64(start, end), + (int64_t)desc->dst2_offset, &math_err); + } + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("get_access_size math error"); + } +err_out: + return err; +} + +enum pva_error +pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask) +{ + uint32_t i; + struct pva_dma_descriptor *desc = NULL; + enum pva_error err = PVA_SUCCESS; + bool skip_swseq_size_compute = false; + + for (i = 0; i < dma_cfg->header.num_descriptors; i++) { + /** + * Check if DMA descriptor has been used in HW Sequencer. + * If used, skip_swseq_size_compute = true + * else skip_swseq_size_compute = false + * + * If skip_swseq_size_compute == true then set access_sizes to 0 + * else go ahead with access_sizes calculation.access_sizes + */ + skip_swseq_size_compute = ((hw_dma_descs_mask[i / 64ULL] & + (1ULL << (i & 0x3FU))) == 1U); + if (pva_is_reserved_desc(i)) { + // skip over the reserved descriptor range + i = PVA_RESERVED_DESCRIPTORS_END; + continue; + } + + if (skip_swseq_size_compute == true) { + continue; + } + + desc = &dma_cfg->descriptors[i]; + + //Calculate src_size + err = get_access_size(desc, &access_sizes[i].src, false, + &access_sizes[i].dst2); + if (err != PVA_SUCCESS) { + goto out; + } + + //Calculate dst_size + err = get_access_size(desc, &access_sizes[i].dst, true, + &access_sizes[i].dst2); + + if (err != PVA_SUCCESS) { + goto out; + } + } + +out: + return err; +} + +void pva_kmd_collect_relocs(struct pva_dma_config const *dma_cfg, + struct pva_kmd_dma_access const *access_sizes, + struct pva_fw_dma_slot *out_static_slots, + uint16_t num_static_slots, + struct pva_fw_dma_reloc *out_static_relocs, + struct pva_fw_dma_slot *out_dyn_slots, + uint16_t num_dyn_slots, + struct pva_fw_dma_reloc *out_dyn_relocs, + uint8_t const *desc_to_ch) +{ + struct pva_fw_dma_reloc_slots rel_info = { 0 }; + uint8_t static_reloc_off[PVA_MAX_NUM_DMA_DESC * 3]; + uint8_t dyn_reloc_off[PVA_MAX_NUM_DMA_DESC * 3]; + + memset(out_static_slots, 0, + num_static_slots * sizeof(*out_static_slots)); + memset(out_dyn_slots, 0, num_dyn_slots * sizeof(*out_dyn_slots)); + + /* First pass: count the number of relocates for each slot */ + count_relocs(dma_cfg, out_static_slots, num_static_slots, out_dyn_slots, + num_dyn_slots); + + memset(static_reloc_off, 0U, sizeof(static_reloc_off)); + memset(dyn_reloc_off, 0U, sizeof(dyn_reloc_off)); + + rel_info.dyn_slot.slots = out_dyn_slots; + rel_info.dyn_slot.relocs = out_dyn_relocs; + rel_info.dyn_slot.num_slots = num_dyn_slots; + rel_info.dyn_slot.reloc_off = dyn_reloc_off; + + rel_info.static_slot.slots = out_static_slots; + rel_info.static_slot.relocs = out_static_relocs; + rel_info.static_slot.num_slots = num_static_slots; + rel_info.static_slot.reloc_off = static_reloc_off; + + /* Second pass: write reloc info */ + write_relocs(dma_cfg, access_sizes, &rel_info, desc_to_ch); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c new file mode 100644 index 00000000..11499da5 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device_memory.h" +#include "pva_api.h" +#include "pva_api_types.h" +#include "pva_kmd_dma_cfg.h" +#include "pva_resource.h" +#include "pva_kmd_hwseq_validate.h" + +static void write_dma_channel(struct pva_dma_channel const *ch, + uint8_t base_desc_index, + struct pva_fw_dma_channel *fw_ch, + struct pva_dma_resource_map *dma_resource_map, + bool support_hwseq_frame_linking) +{ + /* DMA_CHANNEL_CNTL0_CHSDID: DMA_CHANNEL_CNTL0[0] = descIndex + 1;*/ + fw_ch->cntl0 = + (((ch->desc_index + base_desc_index + 1U) & 0xFFU) << 0U); + + /* DMA_CHANNEL_CNTL0_CHVMEMOREQ */ + fw_ch->cntl0 |= ((ch->vdb_count & 0xFFU) << 8U); + + /* DMA_CHANNEL_CNTL0_CHBH */ + fw_ch->cntl0 |= ((ch->adb_count & 0x1FFU) << 16U); + + /* DMA_CHANNEL_CNTL0_CHPREF */ + fw_ch->cntl0 |= ((ch->prefetch_enable & 1U) << 30U); + + /* DMA_CHANNEL_CNTL1_CHPWT */ + fw_ch->cntl1 = ((ch->req_per_grant & 0x7U) << 2U); + + /* DMA_CHANNEL_CNTL1_CHVDBSTART */ + fw_ch->cntl1 |= ((ch->vdb_offset & 0x7FU) << 16U); + + /* DMA_CHANNEL_CNTL1_CHADBSTART */ + fw_ch->cntl1 |= ((ch->adb_offset & 0x1FFU) << 23U); + + fw_ch->boundary_pad = ch->pad_value; + + fw_ch->cntl1 |= ((ch->ch_rep_factor & 0x7U) << 8U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQSTART */ + fw_ch->hwseqcntl = ((ch->hwseq_start & 0x1FFU) << 0U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEND */ + fw_ch->hwseqcntl |= ((ch->hwseq_end & 0x1FFU) << 12U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTD */ + fw_ch->hwseqcntl |= ((ch->hwseq_trigger_done & 0x3U) << 24U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTS */ + fw_ch->hwseqcntl |= ((ch->hwseq_tx_select & 0x1U) << 27U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTO */ + fw_ch->hwseqcntl |= ((ch->hwseq_traversal_order & 0x1U) << 30U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEN */ + fw_ch->hwseqcntl |= ((ch->hwseq_enable & 0x1U) << 31U); + + /* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQFCNT*/ + fw_ch->hwseqfscntl |= + (((uint32_t)ch->hwseq_con_frame_seq & 0x1U) << 0U); + + /* DMA_CHANNEL_HWSEQFSCNTL_CHHWSEQCFS*/ + fw_ch->hwseqfscntl |= + (((uint32_t)ch->hwseq_frame_count & 0x3FU) << 16U); + + pva_dma_resource_map_add_adbs(dma_resource_map, ch->adb_offset, + ch->adb_count); +} + +static uint32_t assemble_rpt_cntl(uint8_t rpt, uint32_t adv) +{ + return PVA_INSERT(rpt, 31, 24) | PVA_INSERT(adv, 23, 0); +} + +static void write_dma_descriptor(struct pva_dma_descriptor const *desc, + struct pva_fw_dma_descriptor *fw_desc) +{ + fw_desc->src_adr0 = iova_lo(desc->src.offset); + fw_desc->src_adr1 = iova_hi(desc->src.offset); + + fw_desc->dst_adr0 = iova_lo(desc->dst.offset); + fw_desc->dst_adr1 = iova_hi(desc->dst.offset); + + /* DMA_DESC_TRANS CNTL0 */ + fw_desc->transfer_control0 = PVA_INSERT(desc->src.transfer_mode, 2, 0) | + PVA_INSERT(desc->dst.transfer_mode, 6, 4); + /* DMA_DESC_TRANS CNTL1 */ + fw_desc->transfer_control1 = + PVA_INSERT(desc->log2_pixel_size, 1, 0) | + PVA_INSERT(desc->px_direction, 2, 2) | + PVA_INSERT(desc->py_direction, 3, 3) | + PVA_INSERT(desc->boundary_pixel_extension, 4, 4) | + PVA_INSERT(desc->tts, 5, 5) | + PVA_INSERT(desc->trans_true_completion, 7, 7); + /* DMA_DESC_TRANS CNTL2 */ + fw_desc->transfer_control2 = PVA_INSERT(desc->prefetch_enable, 0, 0) | + PVA_INSERT(desc->dst.cb_enable, 1, 1) | + PVA_INSERT(desc->src.cb_enable, 2, 2); + + fw_desc->link_did = desc->link_desc_id; + + /* DMA_DESC_TX */ + fw_desc->tx = desc->tx; + /* DMA_DESC_TY */ + fw_desc->ty = desc->ty; + /* DMA_DESC_DLP_ADV */ + fw_desc->dlp_adv = desc->dst.line_pitch; + /* DMA_DESC_SLP_ADV */ + fw_desc->slp_adv = desc->src.line_pitch; + /* DMA_DESC_DB_START */ + fw_desc->db_start = desc->dst.cb_start; + /* DMA_DESC_DB_SIZE */ + fw_desc->db_size = desc->dst.cb_size; + /* DMA_DESC_SB_START */ + fw_desc->sb_start = desc->src.cb_start; + /* DMA_DESC_SB_SIZE */ + fw_desc->sb_size = desc->src.cb_size; + /* DMA_DESC_TRIG_CH */ + /* Channel events are not supported */ + fw_desc->trig_ch_events = 0U; + /* DMA_DESC_HW_SW_TRIG */ + fw_desc->hw_sw_trig_events = + PVA_INSERT(desc->trig_event_mode, 1, 0) | + PVA_INSERT(desc->trig_vpu_events, 5, 2) | + PVA_INSERT(desc->desc_reload_enable, 12, 12); + /* DMA_DESC_PX */ + fw_desc->px = desc->px; + /* DMA_DESC_PY */ + fw_desc->py = desc->py; + /* DMA_DESC_FRDA */ + fw_desc->frda = ((desc->dst2_offset >> 6U) & 0x3FFF); + + /* DMA_DESC_NDTM_CNTL0 */ + fw_desc->cb_ext = (((desc->src.cb_start >> 16) & 0x1) << 0) | + (((desc->dst.cb_start >> 16) & 0x1) << 2) | + (((desc->src.cb_size >> 16) & 0x1) << 4) | + (((desc->dst.cb_size >> 16) & 0x1) << 6); + + /* DMA_DESC_NS1_ADV & DMA_DESC_ST1_ADV */ + fw_desc->srcpt1_cntl = + assemble_rpt_cntl(desc->src.rpt1, desc->src.adv1); + fw_desc->srcpt2_cntl = + assemble_rpt_cntl(desc->src.rpt2, desc->src.adv2); + fw_desc->srcpt3_cntl = + assemble_rpt_cntl(desc->src.rpt3, desc->src.adv3); + fw_desc->dstpt1_cntl = + assemble_rpt_cntl(desc->dst.rpt1, desc->dst.adv1); + fw_desc->dstpt2_cntl = + assemble_rpt_cntl(desc->dst.rpt2, desc->dst.adv2); + fw_desc->dstpt3_cntl = + assemble_rpt_cntl(desc->dst.rpt3, desc->dst.adv3); +} + +static void write_triggers(struct pva_dma_config const *dma_cfg, + struct pva_dma_config_resource *fw_cfg, + struct pva_dma_resource_map *dma_resource_map) +{ + uint32_t i, j; + bool trigger_required = false; + + memset(fw_cfg->output_enable, 0, sizeof(fw_cfg->output_enable)); + + for (i = 0; i < dma_cfg->header.num_channels; i++) { + struct pva_dma_channel const *ch = &dma_cfg->channels[i]; + uint8_t ch_num = i + dma_cfg->header.base_channel; + uint32_t mask; + + mask = ch->output_enable_mask; + /* READ/STORE triggers */ + for (j = 0; j < 7; j++) { + fw_cfg->output_enable[j] |= + (((mask >> 2 * j) & 1U) << ch_num); + fw_cfg->output_enable[j] |= + (((mask >> (2 * j + 1)) & 1U) + << (ch_num + 16U)); + } + + /* VPU config trigger */ + fw_cfg->output_enable[7] |= (((mask >> 14) & 1U) << ch_num); + /* HWSEQ tirgger */ + fw_cfg->output_enable[8] |= (((mask >> 15) & 1U) << ch_num); + fw_cfg->output_enable[8] |= + (((mask >> 16) & 1U) << (ch_num + 16U)); + + if (mask != 0) { + trigger_required = true; + } + } + + if (trigger_required) { + pva_dma_resource_map_add_triggers(dma_resource_map); + } +} + +void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg, + void *fw_dma_config, + uint32_t *out_fw_fetch_size, + bool support_hwseq_frame_linking) +{ + struct pva_dma_config_resource *hdr; + struct pva_fw_dma_channel *fw_channels; + struct pva_fw_dma_descriptor *fw_descs; + struct pva_fw_dma_slot *fw_slots, *last_slot; + struct pva_dma_resource_map *dma_resource_map; + uint32_t *hwseq_words; + uintptr_t offset; + uint32_t i; + + hdr = fw_dma_config; + hdr->base_channel = dma_cfg->header.base_channel; + hdr->base_descriptor = dma_cfg->header.base_descriptor; + hdr->base_hwseq_word = dma_cfg->header.base_hwseq_word; + hdr->num_channels = dma_cfg->header.num_channels; + hdr->num_descriptors = dma_cfg->header.num_descriptors; + hdr->num_hwseq_words = dma_cfg->header.num_hwseq_words; + hdr->vpu_exec_resource_id = dma_cfg->header.vpu_exec_resource_id; + hdr->num_dynamic_slots = dma_cfg->header.num_dynamic_slots; + + dma_resource_map = &hdr->dma_resource_map; + pva_dma_resource_map_reset(dma_resource_map); + pva_dma_resource_map_add_channels(dma_resource_map, + dma_cfg->header.base_channel, + dma_cfg->header.num_channels); + pva_dma_resource_map_add_descriptors(dma_resource_map, + dma_cfg->header.base_descriptor, + dma_cfg->header.num_descriptors); + pva_dma_resource_map_add_hwseq_words(dma_resource_map, + dma_cfg->header.base_hwseq_word, + dma_cfg->header.num_hwseq_words); + + offset = sizeof(*hdr); + fw_slots = pva_offset_pointer(fw_dma_config, offset); + + if (hdr->num_dynamic_slots > 0) { + last_slot = &fw_slots[hdr->num_dynamic_slots - 1]; + + hdr->num_relocs = safe_addu16(last_slot->reloc_start_idx, + last_slot->reloc_count); + /* Round of the number of relocs to satisfy alignment requirement */ + hdr->num_relocs = safe_pow2_roundup_u16(hdr->num_relocs, 2U); + + offset += sizeof(struct pva_fw_dma_slot) * + hdr->num_dynamic_slots + + sizeof(struct pva_fw_dma_reloc) * hdr->num_relocs; + } else { + hdr->num_relocs = 0; + } + + fw_channels = pva_offset_pointer(fw_dma_config, offset); + offset += sizeof(*fw_channels) * hdr->num_channels; + + fw_descs = pva_offset_pointer(fw_dma_config, offset); + offset += sizeof(*fw_descs) * hdr->num_descriptors; + + /* Do not include fields beyond descriptors as they are not fetched to + * TCM */ + *out_fw_fetch_size = offset; + + for (i = 0; i < hdr->num_channels; i++) { + write_dma_channel(&dma_cfg->channels[i], + dma_cfg->header.base_descriptor, + &fw_channels[i], dma_resource_map, + support_hwseq_frame_linking); + } + + for (i = 0; i < dma_cfg->header.num_descriptors; i++) { + if (pva_is_reserved_desc(i)) { + // skip over the reserved descriptor range + i = PVA_RESERVED_DESCRIPTORS_END; + continue; + } + write_dma_descriptor(&dma_cfg->descriptors[i], &fw_descs[i]); + } + + write_triggers(dma_cfg, fw_dma_config, dma_resource_map); + + hwseq_words = pva_offset_pointer(fw_dma_config, offset); + + memcpy(hwseq_words, dma_cfg->hwseq_words, + sizeof(*hwseq_words) * hdr->num_hwseq_words); + + /*TODO: write hdr->common_config for hwseq and MISR*/ +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h new file mode 100644 index 00000000..283c7d66 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_EXECUTABLE_H +#define PVA_KMD_EXECUTABLE_H +#include "pva_kmd.h" +#include "pva_resource.h" +#include "pva_kmd_utils.h" + +struct pva_kmd_device; +struct pva_kmd_device_memory; + +struct pva_kmd_exec_symbol_table { + uint32_t n_symbols; + struct pva_symbol_info *symbols; +}; + +static inline struct pva_symbol_info * +pva_kmd_get_symbol(struct pva_kmd_exec_symbol_table *symbol_table, + uint32_t symbol_id) +{ + struct pva_symbol_info *symbol = NULL; + uint32_t idx = symbol_id - PVA_SYMBOL_ID_BASE; + + if (idx >= symbol_table->n_symbols) { + pva_kmd_log_err("Symbol ID out of range\n"); + return NULL; + } + + symbol = &symbol_table->symbols[idx]; + return symbol; +} + +static inline struct pva_symbol_info * +pva_kmd_get_symbol_with_type(struct pva_kmd_exec_symbol_table *symbol_table, + uint32_t symbol_id, + enum pva_symbol_type symbol_type) +{ + struct pva_symbol_info *symbol = NULL; + + symbol = pva_kmd_get_symbol(symbol_table, symbol_id); + if (!symbol) { + return NULL; + } + +#if !defined(PVA_SKIP_SYMBOL_TYPE_CHECK) + if (symbol->symbol_type != symbol_type) { + pva_kmd_log_err("Unexpected symbol type\n"); + return NULL; + } +#endif + + return symbol; +} + +enum pva_error +pva_kmd_load_executable(void *executable_data, uint32_t executable_size, + struct pva_kmd_device *pva, uint8_t dma_smmu_id, + struct pva_kmd_exec_symbol_table *out_symbol_table, + struct pva_kmd_device_memory **out_metainfo, + struct pva_kmd_device_memory **out_sections); + +void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table, + struct pva_kmd_device_memory *metainfo, + struct pva_kmd_device_memory *sections); + +#endif // PVA_KMD_EXECUTABLE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c new file mode 100644 index 00000000..f0b11903 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_fw_debug.h" +#include "pva_kmd_utils.h" +#include "pva_api.h" + +void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer) +{ + uint32_t tail = print_buffer->buffer_info->tail; + + if (tail > print_buffer->size) { + pva_kmd_log_err( + "Firmware print tail is out of bounds! Refusing to print\n"); + pva_dbg_printf("Tail %u vs size %u\n", tail, + print_buffer->size); + return; + } + + while (print_buffer->head < tail) { + uint32_t max_len = tail - print_buffer->head; + const char *str = print_buffer->content + print_buffer->head; + uint32_t print_size; + + /* It must be null terminted */ + if (print_buffer->content[tail - 1] != '\0') { + pva_kmd_log_err( + "Firmware print is not null terminated! Refusing to print"); + } + print_size = strnlen(str, max_len); + pva_kmd_print_str(str); + + /* +1 for null terminator */ + print_buffer->head += print_size + 1; + } + + if (print_buffer->buffer_info->flags & PVA_FW_PRINT_BUFFER_OVERFLOWED) { + pva_kmd_log_err("Firmware print buffer overflowed!"); + } + + if (print_buffer->buffer_info->flags & PVA_FW_PRINT_FAILURE) { + pva_kmd_log_err("Firmware print failed!"); + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h new file mode 100644 index 00000000..ac367718 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_FW_DEBUG_H +#define PVA_KMD_FW_DEBUG_H +#include "pva_api.h" +#include "pva_fw.h" + +struct pva_kmd_fw_print_buffer { + struct pva_fw_print_buffer_header *buffer_info; + char const *content; + uint32_t size; + uint32_t head; +}; + +void pva_kmd_drain_fw_print(struct pva_kmd_fw_print_buffer *print_buffer); + +#endif // PVA_KMD_FW_DEBUG_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c new file mode 100644 index 00000000..001e212a --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_api_cmdbuf.h" +#include "pva_api_types.h" +#include "pva_bit.h" +#include "pva_fw.h" +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_device.h" +#include "pva_kmd_constants.h" +#include "pva_utils.h" +#include "pva_kmd_fw_profiler.h" + +// TODO: This is here temporarily just for testing. Should be moved to a common header +#define CMD_ID(x) PVA_EXTRACT(x, 6, 0, uint8_t) +#define CMD(name) [CMD_ID(PVA_CMD_OPCODE_##name)] = #name + +static const char *cmd_names[PVA_CMD_OPCODE_COUNT] = { + CMD(LINK_CHUNK), + CMD(BARRIER), + CMD(ACQUIRE_ENGINE), + CMD(RELEASE_ENGINE), + CMD(SET_CURRENT_ENGINE), + CMD(CLEAR_VMEM), + CMD(BIND_L2SRAM), + CMD(RELEASE_L2SRAM), + CMD(INVALIDATE_L2SRAM), + CMD(FLUSH_L2SRAM), + CMD(PATCH_L2SRAM_OFFSET), + CMD(SET_VPU_EXECUTABLE), + CMD(INIT_VPU_EXECUTABLE), + CMD(PREFETCH_VPU_CODE), + CMD(SET_VPU_PARAMETER), + CMD(SET_VPU_PARAMETER_WITH_ADDRESS), + CMD(SET_VPU_INSTANCE_PARAMETER), + CMD(SET_VPU_PARAMETER_WITH_BUFFER), + CMD(RUN_VPU), + CMD(SET_PPE_EXECUTABLE), + CMD(INIT_PPE_EXECUTABLE), + CMD(PREFETCH_PPE_CODE), + CMD(RUN_PPE), + CMD(FETCH_DMA_CONFIGURATION), + CMD(SETUP_DMA), + CMD(RUN_DMA), + CMD(BIND_DRAM_SLOT), + CMD(BIND_VMEM_SLOT), + CMD(UNREGISTER_RESOURCE), + CMD(WRITE_DRAM), + CMD(CAPTURE_TIMESTAMP), + CMD(RUN_UNIT_TESTS) +}; + +static const char *priv_cmd_names[PVA_CMD_PRIV_OPCODE_COUNT] = { + CMD(INIT_RESOURCE_TABLE), + CMD(DEINIT_RESOURCE_TABLE), + CMD(UPDATE_RESOURCE_TABLE), + CMD(INIT_QUEUE), + CMD(DEINIT_QUEUE), + CMD(ENABLE_FW_PROFILING), + CMD(DISABLE_FW_PROFILING), + CMD(SUSPEND_FW), + CMD(RESUME_FW) +}; + +static inline const char *pva_fw_get_cmd_name(uint32_t opcode) +{ + uint32_t cmd_id; + const char *name; + + cmd_id = CMD_ID(opcode); + + if (opcode & PVA_CMD_PRIV_OPCODE_FLAG) { + if (cmd_id >= PVA_CMD_PRIV_OPCODE_COUNT) { + return "INVALID"; + } + name = priv_cmd_names[cmd_id]; + } else { + if (cmd_id >= PVA_CMD_OPCODE_COUNT) { + return "INVALID"; + } + name = cmd_names[cmd_id]; + } + + if (name == NULL) { + return "UNKNOWN"; + } else { + return name; + } +} + +void pva_kmd_device_init_profiler(struct pva_kmd_device *pva) +{ + enum pva_error err = PVA_SUCCESS; + const uint32_t profiling_buffer_size = PVA_KMD_FW_PROFILING_BUFFER_SIZE; + + struct pva_kmd_fw_profiling_buffer *fw_profiling_buffer = + &pva->fw_profiling_buffer; + + // Event message should be 32-bit to keep logging latency low + ASSERT(sizeof(struct pva_fw_event_message) == sizeof(uint32_t)); + + pva->fw_profiling_buffer_memory = + pva_kmd_device_memory_alloc_map(profiling_buffer_size, pva, + PVA_ACCESS_RW, + PVA_R5_SMMU_CONTEXT_ID); + ASSERT(pva->fw_profiling_buffer_memory != NULL); + + /* Add profiling memory to resource table */ + err = pva_kmd_add_dram_buffer_resource( + &pva->dev_resource_table, pva->fw_profiling_buffer_memory, + &pva->fw_profiling_buffer_resource_id); + ASSERT(err == PVA_SUCCESS); + pva_kmd_update_fw_resource_table(&pva->dev_resource_table); + + fw_profiling_buffer->buffer_info = + (struct pva_fw_profiling_buffer_header *) + pva->fw_profiling_buffer_memory->va; + fw_profiling_buffer->content = + pva_offset_pointer(pva->fw_profiling_buffer_memory->va, + sizeof(*fw_profiling_buffer->buffer_info)); + fw_profiling_buffer->size = pva->fw_profiling_buffer_memory->size; + fw_profiling_buffer->head = 0U; + fw_profiling_buffer->buffer_info->flags = 0U; + fw_profiling_buffer->buffer_info->tail = 0U; + + pva->debugfs_context.g_fw_profiling_config.enabled = false; + pva->debugfs_context.g_fw_profiling_config.filter = 0x0; +} + +void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva) +{ + pva_kmd_drop_resource(&pva->dev_resource_table, + pva->fw_profiling_buffer_resource_id); + pva->debugfs_context.g_fw_profiling_config.enabled = false; +} + +enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_enable_fw_profiling *cmd; + uint64_t buffer_offset = 0U; + uint32_t filter = 0U; + uint8_t timestamp_type = TIMESTAMP_TYPE_CYCLE_COUNT; + uint32_t fence_val; + enum pva_error err; + + // filter |= PVA_FW_EVENT_DO_CMD; + filter |= PVA_FW_EVENT_RUN_VPU; + + if (pva->debugfs_context.g_fw_profiling_config.enabled) { + return PVA_SUCCESS; + } + + pva->fw_profiling_buffer.head = 0U; + pva->fw_profiling_buffer.buffer_info->flags = 0U; + pva->fw_profiling_buffer.buffer_info->tail = 0U; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); + ASSERT(cmd != NULL); + pva_kmd_set_cmd_enable_fw_profiling( + cmd, pva->fw_profiling_buffer_resource_id, + pva->fw_profiling_buffer.size, buffer_offset, filter, + timestamp_type); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when initializing context"); + goto err_out; + } + + pva->debugfs_context.g_fw_profiling_config.enabled = true; + pva->debugfs_context.g_fw_profiling_config.filter = filter; + pva->debugfs_context.g_fw_profiling_config.timestamp_type = + timestamp_type; + pva->debugfs_context.g_fw_profiling_config.timestamp_size = + (pva->debugfs_context.g_fw_profiling_config.timestamp_type == + TIMESTAMP_TYPE_TSE) ? + 8 : + 4; + + return PVA_SUCCESS; +err_out: + return err; +} + +enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_disable_fw_profiling *cmd; + uint32_t fence_val; + enum pva_error err; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); + ASSERT(cmd != NULL); + pva_kmd_set_cmd_disable_fw_profiling(cmd); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when initializing context"); + goto err_out; + } + + pva->debugfs_context.g_fw_profiling_config.enabled = false; + pva->debugfs_context.g_fw_profiling_config.filter = 0x0; + + return PVA_SUCCESS; +err_out: + return err; +} + +static void decode_and_print_event(unsigned long walltime, + unsigned long relative_time, + struct pva_fw_event_message message, + char *msg_string) +{ + switch (PVA_BIT(message.event)) { + case PVA_FW_EVENT_DO_CMD: { + sprintf(msg_string, + "pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u idx=%-5u opcode=%s", + walltime, relative_time, "DO_CMD", + event_type_to_string(message.type), message.arg2, + message.arg3, pva_fw_get_cmd_name(message.arg1)); + } break; + case PVA_FW_EVENT_SCAN_QUEUES: { + sprintf(msg_string, + "pva_fw@%lu: [%8lu] event=%-12s type=%-7s found=%u ccq_id=%-5u queue_id=%u", + walltime, relative_time, "SCAN_QUEUES", + event_type_to_string(message.type), message.arg1, + message.arg2, message.arg3); + } break; + case PVA_FW_EVENT_SCAN_SLOTS: { + sprintf(msg_string, + "pva_fw@%lu: [%8lu] event=%-12s type=%-7s state=%u slot=%u", + walltime, relative_time, "SCAN_SLOTS", + event_type_to_string(message.type), message.arg1, + message.arg2); + } break; + case PVA_FW_EVENT_RUN_VPU: { + sprintf(msg_string, + "pva_fw@%lu: [%8lu] event=%-12s type=%-7s slot=%u idx=%-5u opcode=%s", + walltime, relative_time, "RUN_VPU", + event_type_to_string(message.type), message.arg2, + message.arg3, pva_fw_get_cmd_name(message.arg1)); + } break; + default: + pva_dbg_printf("Unknown event type\n"); + break; + } +} + +void pva_kmd_drain_fw_profiling_buffer( + struct pva_kmd_device *pva, + struct pva_kmd_fw_profiling_buffer *profiling_buffer) +{ + char msg_string[200] = { '\0' }; + struct pva_fw_event_message message; + uint64_t prev_walltime = 0U; + uint64_t timestamp = 0U; + uint64_t relative_time = 0U; + uint32_t buffer_space; + + // TODO: R5 frequency is hard-coded for now. Get this at runtime. + static const uint32_t r5_freq = 716800000U; + static const unsigned long r5_cycle_duration = 1000000000000 / r5_freq; + unsigned long walltime = 0U; // in nanoseconds + uint64_t walltime_diff; + + const uint32_t message_size = + sizeof(message) + + pva->debugfs_context.g_fw_profiling_config.timestamp_size; + uint32_t *profiling_buffer_head = &profiling_buffer->head; + uint32_t profiling_buffer_tail = profiling_buffer->buffer_info->tail; + while (*profiling_buffer_head < profiling_buffer_tail) { + buffer_space = safe_addu32(*profiling_buffer_head, + safe_subu32(message_size, 1U)); + ASSERT(buffer_space <= profiling_buffer_tail); + memcpy(&message, + &profiling_buffer->content[*profiling_buffer_head], + sizeof(message)); + memcpy(×tamp, + &profiling_buffer->content[*profiling_buffer_head + + sizeof(message)], + pva->debugfs_context.g_fw_profiling_config + .timestamp_size); + + if (pva->debugfs_context.g_fw_profiling_config.timestamp_type == + TIMESTAMP_TYPE_TSE) { + walltime = (timestamp << 5); + } else if (pva->debugfs_context.g_fw_profiling_config + .timestamp_type == + TIMESTAMP_TYPE_CYCLE_COUNT) { + timestamp = PVA_LOW32(timestamp); + walltime = (r5_cycle_duration * timestamp) / 1000U; + } + walltime_diff = safe_subu64((uint64_t)walltime, prev_walltime); + relative_time = (prev_walltime == 0U) ? 0U : walltime_diff; + decode_and_print_event(walltime, relative_time, message, + &msg_string[0]); + pva_kmd_print_str(msg_string); + *profiling_buffer_head = *profiling_buffer_head + message_size; + prev_walltime = walltime; + } + + return; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h new file mode 100644 index 00000000..76fcbead --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_FW_PROFILER_H +#define PVA_KMD_FW_PROFILER_H +#include "pva_kmd_device.h" + +struct pva_kmd_fw_profiling_buffer { +#define PVA_KMD_FW_PROFILING_BUFFER_SIZE (512 * 1024) + struct pva_fw_profiling_buffer_header *buffer_info; + char const *content; + uint32_t size; + uint32_t head; +}; + +struct pva_kmd_fw_profiling_config { + uint32_t filter; + enum pva_fw_timestamp_t timestamp_type; + uint8_t timestamp_size; + uint8_t enabled; +}; + +void pva_kmd_device_init_profiler(struct pva_kmd_device *pva); + +void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva); + +void pva_kmd_drain_fw_profiling_buffer( + struct pva_kmd_device *pva, + struct pva_kmd_fw_profiling_buffer *profiling_buffer); + +enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva); + +enum pva_error pva_kmd_notify_fw_disable_profiling(struct pva_kmd_device *pva); +#endif diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c new file mode 100644 index 00000000..df045684 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c @@ -0,0 +1,1608 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_hwseq_validate.h" +#include "pva_api_dma.h" +#include "pva_kmd_device.h" +//TODO: Use nv_speculate barrier +//#include "nv_speculation_barrier.h" + +#define MAX_DESC_ID 0x3FU +#define HWSEQ_MIN_WORDS 5U + +static inline const void *read_hwseq_blob(struct pva_hwseq_buffer *buffer, + uint32_t num_bytes) +{ + const uint8_t *ret = NULL; + if (num_bytes > buffer->bytes_left) { + return NULL; + } + ret = buffer->data; + buffer->data += num_bytes; + buffer->bytes_left -= num_bytes; + return (const void *)ret; +} + +#if 0 +// Debug code which can be removed later after HW Sequencer checks are done +static void print_hwseq_blob(const uint8_t *blob, uint16_t start_address, + uint16_t end_address) +{ + uint16_t i = 0; + printf(" Start Address = %d End Address = %d", start_address, + end_address); + for (i = (start_address << 2U); i <= (end_address << 2U); i++) { + printf("\nblob[%d] = %x", i, blob[i]); + } + printf("\n"); +} +#endif +/** + * \brief Validates the descriptor entry in HW Sequencer Blob + * + * This function ensures that the descriptor entry read from HW Sequencer Blob + * passes the following checks: + * - Non-NULL Descriptor Entry. + * - Descriptor ID != 0 and Descriptor ID < num_descriptors. + * + * \param[in] desc_entry A pointer to the desc_entry read from the HW Sequencer Blob. + * \param[in] num_descriptors Number of DMA Descriptors in the current DMA Config. + * + * \return + * - PVA_SUCCESS if above checks pass. + * - PVA_INVAL if any of the above checks fail. + */ +static enum pva_error +validate_desc_entry(struct pva_dma_hwseq_desc_entry const *desc_entry, + const uint8_t num_descriptors) +{ + if (desc_entry == NULL) { + pva_kmd_log_err("Hwseq buffer too small"); + return PVA_INVAL; + } + + if ((desc_entry->did == 0U) || (desc_entry->did > num_descriptors)) { + pva_kmd_log_err("Invalid Descriptor ID found in HW Sequencer"); + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +static inline uint8_t get_head_desc_did(struct pva_hwseq_priv const *hwseq) +{ + return hwseq->dma_descs[0].did; +} + +static inline enum pva_error check_adv_params(int32_t adv1, int32_t adv2, + int32_t adv3, uint8_t rpt1, + uint8_t rpt2, uint8_t rpt3, + bool has_dim3) +{ + if (!has_dim3 && ((adv1 != 0) || (adv2 != 0) || (adv3 != 0) || + ((rpt1 + rpt2 + rpt3) != 0U))) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +/** + * \brief Validates advancement parameters of Head Descriptor of HW Sequencer + * + * This function validates the advancement paramters of the head descriptor of + * HW Seqeuncer. + * This checks that the advancement parameters for both source and destination of + * DMA Transfer to be zero and the addition of all repetition parameters per transfer + * mode add up to zero + * + * \param[in] attr Pointer to a valid DMA Transfer Attributes + * \param[in] has_dim3 Boolean to indicate if Tensor Data Flow is in use + * Range: False: TDF is not in use + * True: TDF is in use + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if above checks fail + */ +static inline enum pva_error +validate_adv_params(struct pva_dma_transfer_attr const *attr, bool has_dim3) +{ + enum pva_error err = PVA_SUCCESS; + + err = check_adv_params(attr->adv1, attr->adv2, attr->adv3, attr->rpt1, + attr->rpt2, attr->rpt3, has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("descriptor source tile looping not allowed"); + return err; + } + if (attr->adv1 < 0) { + pva_kmd_log_err( + "source advance amount on dim1 can not be negative"); + return PVA_INVAL; + } + + if ((attr->adv1 * ((int32_t)(attr->rpt1) + 1)) != attr->adv2) { + pva_kmd_log_err( + "Invalid source advance amount on dim1 or dim2"); + return PVA_INVAL; + } + return err; +} + +static enum pva_error calculate_tx(uint32_t *tx, + const struct pva_hwseq_priv *hwseq) +{ + const struct pva_dma_descriptor *d0 = + (hwseq->hdr->to >= 0) ? hwseq->head_desc : hwseq->tail_desc; + const struct pva_dma_descriptor *d1 = + (hwseq->hdr->to >= 0) ? hwseq->tail_desc : hwseq->head_desc; + + if (((d0->tx + hwseq->hdr->padl) > 0xFFFFU) || + ((d1->tx + hwseq->hdr->padr) > 0xFFFFU)) { + pva_kmd_log_err("Invalid Tx + Pad X in HW Sequencer"); + return PVA_INVAL; + } + *tx = maxu32(((uint32_t)(d0->tx) + hwseq->hdr->padl), + ((uint32_t)(d1->tx) + hwseq->hdr->padr)); + return PVA_SUCCESS; +} + +static enum pva_error calculate_ty(uint32_t *ty, + const struct pva_hwseq_priv *hwseq) +{ + const struct pva_dma_descriptor *d0 = + (hwseq->hdr->to >= 0) ? hwseq->head_desc : hwseq->tail_desc; + const struct pva_dma_descriptor *d1 = + (hwseq->hdr->to >= 0) ? hwseq->tail_desc : hwseq->head_desc; + + if (((d0->ty + hwseq->hdr->padt) > 0xFFFFU) || + ((d1->ty + hwseq->hdr->padb) > 0xFFFFU)) { + pva_kmd_log_err("Invalid Ty + Pad Y in HW Sequencer"); + return PVA_INVAL; + } + *ty = maxu32(((uint32_t)(d0->ty) + hwseq->hdr->padt), + ((uint32_t)(d1->ty) + hwseq->hdr->padb)); + return PVA_SUCCESS; +} + +/** + * \brief Validates the Tiles in Circular Buffer can be contained in the vmem + * + * This function returns error if Circular Buffer size is more than the specified + * vmem_size indicating that the Circular buffer cannot be contained in the VMEM + * on DMA transfer to VMEM + * + * This function also calculates the tile size of the tile being addressed by the head + * and tail descriptors of the HW Sequencer and ensures that the tile size lies within + * the specified Circular Buffer Size + * + * \param[in] hwseq A pointer to a populated struct \ref pva_hwseq_priv + * \param[in] entry Pointer to a valid DMA Access Entry + * \param[in] has_dim3 Boolean to indicate if Tensor Data Flow is in use + * Range: False: TDF is not in use + * True: TDF is in use + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if any of the above checks fail + * - PVA_ERR_MATH_OP if any math operation fails + */ +static enum pva_error validate_cb_tiles(struct pva_hwseq_priv *hwseq, + struct pva_kmd_dma_access_entry *entry, + bool has_dim3) +{ + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; + const struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + uint32_t tx = 0; + uint32_t ty = 0; + int64_t end_addr = 0LL; + pva_math_error math_err = MATH_OP_SUCCESS; + + if (hwseq->is_split_padding) { + if (hwseq->is_raster_scan) { + ty = head_desc->ty; + if (calculate_tx(&tx, hwseq) != PVA_SUCCESS) { + return PVA_INVAL; + } + } else { + tx = head_desc->tx; + if (calculate_ty(&ty, hwseq) != PVA_SUCCESS) { + return PVA_INVAL; + } + } + } else { + tx = maxu32(head_desc->tx, tail_desc->tx); + ty = maxu32(head_desc->ty, tail_desc->ty); + } + + end_addr = + adds64(muls64((int64_t)head_desc->dst.line_pitch, + subs64((int64_t)ty, 1LL, &math_err), &math_err), + (int64_t)tx, &math_err); + + end_addr = adds64(end_addr, + muls64((int64_t)head_desc->src.rpt1, + head_desc->dst.adv1, &math_err), + &math_err); + + if ((head_desc->dst.adv2 > 0) && (end_addr > head_desc->dst.adv2)) { + pva_kmd_log_err( + "Tile voxel size exceeds destination advance amount on dim2"); + return PVA_INVAL; + } + + end_addr = muls64(end_addr, + convert_to_signed_s64(1ULL + << (head_desc->log2_pixel_size & + MAX_BYTES_PER_PIXEL)), + &math_err); + entry->start_addr = mins64(end_addr, 0LL); + entry->end_addr = maxs64(end_addr, 0LL); + + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("Math error in tile size calculation"); + return PVA_ERR_MATH_OP; + } + + return PVA_SUCCESS; +} + +/** + * \brief This function check if advancement parameters in DMA Descriptor + * are set correctly for handling valid VMEM transfer + * + * This function ensures that if vmem_tile_count is more than 1, then advancement paramters + * for the DMA Descriptor are set to 0 else error is returned + * + * If vmem is dst, then dst.adv parameters are to be set to 0 + * If vmem is src, then src.adv parameters are to be set to 0 + * + * \param[in] attr Pointer to a valid DMA Transfer Attributes + * \param[in] vmem_tile_count Number of VMEM tile present + * \param[in] has_dim3 Boolean to indicate if Tensor Data Flow is in use + * Range: False: TDF is not in use + * True: TDF is in use + * + * \return + * - PVA_SUCCESS if above checks are valid + * - PVA_INVAL if any of the above checks fail + */ +static inline enum pva_error +check_vmem_setup(struct pva_dma_transfer_attr const *attr, + uint32_t vmem_tile_count, bool has_dim3) +{ + if ((!has_dim3) && (vmem_tile_count > 1U) && + ((attr->adv1 != 0) || (attr->adv2 != 0) || (attr->adv3 != 0))) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +/** + * \brief Validates Transfer Modes allowed with HW Sequener + * + * This function checks if the transfer modes allowed for HW Sequencer as per + * DMA IAS are set correctly in the DMA Descriptor + * Allowed Transfer Modes: + * ---------------------------------------------- + * | Source | Destination | + * ---------------------------------------------- + * | VMEM | MC/L2SRAM | + * ---------------------------------------------- + * | MC/L2SRAM | VMEM | + * ---------------------------------------------- + * + * \param[in] dma_desc DMA Descriptor of type \ref nvpva_dma_descriptor + * + * \return + * - PVA_SUCCESS if valid source/destination pair is found + * - PVA_INVAL if invalid source/destination pair is found + */ +static enum pva_error validate_xfer_mode(struct pva_dma_descriptor *dma_desc) +{ + enum pva_error err = PVA_SUCCESS; + + switch (dma_desc->src.transfer_mode) { + case (uint8_t)PVA_DMA_TRANS_MODE_VMEM: + if (!((dma_desc->dst.transfer_mode == + (uint8_t)PVA_DMA_TRANS_MODE_DRAM) || + (dma_desc->dst.transfer_mode == + (uint8_t)PVA_DMA_TRANS_MODE_L2SRAM)) || + (dma_desc->dst.cb_enable == 1U)) { + pva_kmd_log_err( + "HWSequncer: Invalid dst.transfer_mode"); + err = PVA_INVAL; + } + break; + case (uint8_t)PVA_DMA_TRANS_MODE_L2SRAM: + case (uint8_t)PVA_DMA_TRANS_MODE_DRAM: + if ((dma_desc->dst.transfer_mode != + (uint8_t)PVA_DMA_TRANS_MODE_VMEM) || + (dma_desc->src.cb_enable == 1U)) { + /* Source or destination Circular Buffer mode should not be used for MC or L2 + in frame addressing mode due to rtl bug 3136383 */ + pva_kmd_log_err( + "HW Sequencer: Invalid src.transfer_mode"); + err = PVA_INVAL; + } + break; + default: + err = PVA_INVAL; + pva_kmd_log_err("Unreachable branch"); + break; + } + + return err; +} + +static inline uint32_t +get_vmem_tile_count(struct pva_dma_transfer_attr const *attr, bool has_dim3) +{ + uint32_t rpt1_plus_1 = 0U; + uint32_t rpt2_plus_1 = 0U; + uint32_t rpt3_plus_1 = 0U; + uint32_t temp = 0U; + + if (has_dim3) { + return ((uint32_t)attr->rpt3 + 1U); + } + + // Calculate intermediate results first to avoid multiple evaluations + rpt1_plus_1 = ((uint32_t)attr->rpt1 + 1U); + rpt2_plus_1 = ((uint32_t)attr->rpt2 + 1U); + rpt3_plus_1 = ((uint32_t)attr->rpt3 + 1U); + + // Perform multiplications sequentially with error checking + temp = safe_mulu32(rpt1_plus_1, rpt2_plus_1); + return safe_mulu32(temp, rpt3_plus_1); +} + +/** + * \brief Validate HW Sequencer for VMEM at Destination of DMA Transfer + * + * This function validates the DMA Configuration in the HW Seqeuncer + * where VMEM is the destination of the DMA Transfer. + * + * The function does the following checks: + * - Validate Transfer Mode src/dst pair. + * - Validate advancement paramters. + * - Obtain VMEM Size by getting the buffer size associated with the HW Sequencer DMA Descriptor + * - If Circular Buffer is present, validate Circular Buffer can handle the vmem tiles. + * This is done by calling function \ref validate_cb_tiles + * - If Circular Buffer is not present do the following checks + * - Reject Split Padding if no Circular Buffer is used for the destination + * - Check VMEM Setup for the Destination Mode + * This is done by calling check_vmem_setup + * - Calculate end address of the DMA Transfer + * Populate the start and end address of the DMA Transfer in the DMA Access Entry + * + * \param[in] hwseq A const structure of type \ref pva_hwseq_priv + * \param[in] vmem_tile_count Number of VMEM tiles + * \param[in] has_dim3 Boolean to indicate if Tensor Data Flow is in use + * Range: False: TDF is not in use + * True: TDF is in use + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if any of the above checks fail + * - PVA_ERR_MATH_OP if any math operation fails + */ +static enum pva_error validate_dst_vmem(struct pva_hwseq_priv *hwseq, + uint32_t *vmem_tile_count, + bool has_dim3) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t tx = 0U; + uint32_t ty = 0U; + int64_t end_addr = 0LL; + int64_t num_bytes = 0LL; + int64_t offset = 0LL; + struct pva_dma_descriptor *head_desc = hwseq->head_desc; + struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + uint8_t head_desc_id = get_head_desc_did(hwseq); + pva_math_error math_err = MATH_OP_SUCCESS; + + num_bytes = convert_to_signed_s64( + 1ULL << (head_desc->log2_pixel_size & MAX_BYTES_PER_PIXEL)); + offset = convert_to_signed_s64(head_desc->src.offset); + + *vmem_tile_count = get_vmem_tile_count(&head_desc->dst, has_dim3); + + err = validate_adv_params(&head_desc->src, has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Descriptor source tile looping not allowed"); + return PVA_INVAL; + } + + if (head_desc->dst.cb_enable != 0U) { + err = validate_cb_tiles(hwseq, + &hwseq->access_sizes[head_desc_id].dst, + has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "VMEM address range validation failed for dst vmem with cb"); + return PVA_INVAL; + } + } else { + if (hwseq->is_split_padding) { + pva_kmd_log_err( + "Split padding not supported without circular buffer"); + return PVA_INVAL; + } + + err = check_vmem_setup(&head_desc->dst, *vmem_tile_count, + has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Invalid VMEM destination setup"); + return PVA_INVAL; + } + + if (head_desc->src.adv1 < 0) { + pva_kmd_log_err("Source adv1 can not be negative"); + return PVA_INVAL; + } + + tx = maxu32(head_desc->tx, tail_desc->tx); + ty = maxu32(head_desc->ty, tail_desc->ty); + end_addr = + muls64((int64_t)head_desc->dst.line_pitch, + subs64((int64_t)ty, 1LL, &math_err), &math_err); + + end_addr = adds64(end_addr, (int64_t)tx, &math_err); + end_addr = adds64(muls64((int64_t)head_desc->src.rpt1, + head_desc->dst.adv1, &math_err), + end_addr, &math_err); + + end_addr = adds64(muls64(end_addr, num_bytes, &math_err), + offset, &math_err); + + hwseq->access_sizes[head_desc_id].dst.start_addr = + mins64(end_addr, 0LL); + hwseq->access_sizes[head_desc_id].dst.end_addr = + maxs64(end_addr, 0LL); + } + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("Invalid VMEM destination setup"); + return PVA_ERR_MATH_OP; + } + + return err; +} + +/** + * \brief Ensures no padding is set in the Header of the HW Sequencer + * + * This function ensures that no padding is present in the HW Sequencer Header + * i.e. all padding values are 0 + * If any of the padding values are found to be not 0, an error is returned + * + * \param[in] header A struct of type \ref pva_dma_hwseq_hdr_t + * which is the HW Sequencer Header + * + * \return + * - PVA_SUCCESS if all padding values in the header are zero + * - PVA_INVAL if any of the padding values in the header is non-zero + */ +static inline enum pva_error +check_no_padding(const struct pva_dma_hwseq_hdr *header) +{ + if ((header->padl != 0U) || (header->padr != 0U) || + (header->padt != 0U) || (header->padb != 0U)) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +/** + * \brief Validate HW Sequencer for VMEM at Source of DMA Transfer + * + * This function validates the DMA Configuration in the HW Seqeuncer + * where VMEM is the Source of the DMA Transfer. + * + * The function does the following checks: + * - Validate Transfer Mode src/dst pair. + * - Validate advancement paramters. + * - Check no padding in HW Sequencer header. + * - Obtain VMEM Size by getting the buffer size associated with the HW Sequencer DMA Descriptor, + * - Calculate tile_size of the memory transfer using head and tail descriptors in the HW Sequencer. + * - If Circular Buffer is present: + * - Validate Circular Buffer size is less than VMEM size previously obtained + * - Validate tile_size obtained is less than Circular Buffer Size + * - If Circular Buffer is not present do the following checks + * - Check VMEM Setup for Source Mode + * This is done by calling check_vmem_setup + * Calculate end address of the DMA Transfer + * Populate the start and end address of the DMA Transfer in the DMA Access Entry + * + * \param[in] hwseq A const structure of type \ref pva_hwseq_priv + * \param[in] vmem_tile_count Number of VMEM tiles + * \param[in] has_dim3 Boolean to indicate if Tensor Data Flow is in use + * Range: False: TDF is not in use + * True: TDF is in use + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if any of the above checks fail + * - PVA_ERR_MATH_OP if any math operation fails + */ +static enum pva_error validate_src_vmem(struct pva_hwseq_priv *hwseq, + uint32_t *vmem_tile_count, + bool has_dim3) +{ + struct pva_dma_descriptor *head_desc = hwseq->head_desc; + struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + uint8_t head_desc_id = get_head_desc_did(hwseq); + uint32_t tx = 0U; + uint32_t ty = 0U; + int64_t end_addr = 0LL; + int64_t num_bytes = 0LL; + int64_t offset = 0LL; + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + + num_bytes = convert_to_signed_s64( + 1ULL << (head_desc->log2_pixel_size & MAX_BYTES_PER_PIXEL)); + offset = convert_to_signed_s64(head_desc->src.offset); + + *vmem_tile_count = get_vmem_tile_count(&head_desc->src, has_dim3); + + // make sure last 3 loop dimensions are not used + err = validate_adv_params(&head_desc->dst, has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Descriptor destination tile looping not allowed"); + return PVA_INVAL; + } + + // since we don't support output padding, make sure hwseq program header has none + err = check_no_padding(hwseq->hdr); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("invalid padding value in hwseq program"); + return PVA_INVAL; + } + + tx = maxu32(head_desc->tx, tail_desc->tx); + ty = maxu32(head_desc->ty, tail_desc->ty); + + end_addr = + adds64(muls64((int64_t)head_desc->src.line_pitch, + subs64((int64_t)ty, 1LL, &math_err), &math_err), + (int64_t)tx, &math_err); + + if (0U != head_desc->src.cb_enable) { + end_addr = adds64(muls64((int64_t)head_desc->dst.rpt1, + head_desc->src.adv1, &math_err), + end_addr, &math_err); + + if ((head_desc->src.adv2 > 0) && + (end_addr > head_desc->src.adv2)) { + pva_kmd_log_err( + "Tile voxel size exceeds source advance amount on dim2"); + return PVA_INVAL; + } + end_addr = muls64(end_addr, num_bytes, &math_err); + + hwseq->access_sizes[head_desc_id].src.start_addr = + mins64(end_addr, 0LL); + hwseq->access_sizes[head_desc_id].src.end_addr = + maxs64(end_addr, 0LL); + } else { + err = check_vmem_setup(&head_desc->src, *vmem_tile_count, + has_dim3); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Invalid VMEM Source setup in hw sequencer"); + return PVA_INVAL; + } + + end_addr = adds64(muls64((int64_t)head_desc->dst.rpt1, + head_desc->src.adv1, &math_err), + end_addr, &math_err); + + end_addr = adds64(muls64(end_addr, num_bytes, &math_err), + offset, &math_err); + + hwseq->access_sizes[head_desc_id].src.start_addr = + mins64(end_addr, 0LL); + hwseq->access_sizes[head_desc_id].src.end_addr = + maxs64(end_addr, 0LL); + } + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err( + "Math error in VMEM Source setup in hw sequencer"); + return PVA_ERR_MATH_OP; + } + + return PVA_SUCCESS; +} + +/** + * \brief Validates if grid is large enough to support defined padding + * + * This function validates the following + * - Valid Horizontal padding/tile count + * This is done by ensuring if pad_x[0] and pad_x[1] are > 0 then + * grid_size_x should be atleast of size 2 + * - Valid Vertical Padding/tile count + * This is done by ensuring if pad_y[0] and pad_y[1] are > 0 then + * grid_size_y should be atlest of size 1 + * Also checked is tile_y == max(pad_y[0], pad_y[1]) + * - Constant tile height in raster mode + * This is done by ensuring tile_y[0] == tile_y[1] + * + * \param[in] gi Populated grid info of type \ref struct pva_hwseq_grid_info + * + * \return + * - PVA_SUCCESS if all the above checks pass + * - PVA_INVAL if any of the above checks fail + */ +static enum pva_error +validate_grid_padding(struct pva_hwseq_grid_info const *gi) +{ + // make sure grid is large enough to support defined padding + if ((gi->pad_x[0] > 0) && (gi->pad_x[1] > 0) && + (gi->grid_size_x < 2U)) { + pva_kmd_log_err("horizontal padding/tile count mismatch"); + return PVA_INVAL; + } + + // validate vertical padding + if (gi->tile_y[0] <= maxs32(gi->pad_y[0], gi->pad_y[1])) { + pva_kmd_log_err("invalid vertical padding"); + return PVA_INVAL; + } + // make sure ty is fixed + if (gi->tile_y[0] != gi->tile_y[1]) { + pva_kmd_log_err( + "tile height cannot change in raster-scan mode"); + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +/** + * \brief Validate Horizontal padding + * + * Ensures that pad_start and pad_end are not larger than grid tile_x and grid tile_y + * respectively + * + * \param[in] gi A valid pointer to object of type \ref struct pva_hwseq_grid_info. + * \param[in] pad_start Start Horizontal padding value + * \param[in] pad_end End Horizontal Padding value + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if grid tile_x <= pad_start or grid tile_y <= pad_end + */ +static inline enum pva_error +validate_horizontal_padding(struct pva_hwseq_grid_info const *gi, + int32_t pad_start, int32_t pad_end) +{ + if ((gi->tile_x[0] <= pad_start) || (gi->tile_x[1] <= pad_end)) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +/** + * \brief This function generates frame_info from the grid info + * + * This function generates a frame_info of type \ref struct pva_hwseq_frame_info + * from an input of type \ref struct pva_hwseq_grid_info. + * + * During conversion following checks are done: + * - validate_grid_padding. + * - get_grid_signed. + * - If split_padding is specified: + * - Reject overlapping tiles. + * This is done by ensuring left most tile x dimension + * is less than the grid step in x dimension. + * - If split_padding is not specified: + * - Ensure horizontal padding is valid. + * Function validate_horizontal_padding does this. + * + * \param[out] fi A pointer to object of type \ref struct pva_hwseq_frame_info + * which is populated with this function + * \param[in] gi A pointer to object of type \ref struct pva_hwseq_grid_info + * whic is the input for the conversion + * + * \return + * - PVA_SUCCESS if conversion is successfull and above checks pass + * - PVA_INVAL if any of the above checks fail + */ +static enum pva_error compute_frame_info(struct pva_hwseq_frame_info *fi, + struct pva_hwseq_grid_info const *gi) +{ + int32_t dim_offset = 0; + int32_t grid_size_x = 0; + int32_t grid_size_y = 0; + int32_t head_tile_count = 0; + int32_t left_tile_x = 0; + int32_t step_x = 0; + int32_t pad_start = 0; + int32_t pad_end = 0; + int64_t alt_start_x = 0; + int64_t alt_end_x = 0; + pva_math_error math_err = MATH_OP_SUCCESS; + + if (validate_grid_padding(gi) != PVA_SUCCESS) { + return PVA_INVAL; + } + + grid_size_x = convert_to_signed_s32(gi->grid_size_x); + grid_size_y = convert_to_signed_s32(gi->grid_size_y); + head_tile_count = convert_to_signed_s32(gi->head_tile_count); + + if (gi->grid_step_x >= 0) { + left_tile_x = gi->tile_x[0]; + pad_start = gi->pad_x[0]; + pad_end = gi->pad_x[1]; + step_x = gi->grid_step_x; + } else { + left_tile_x = gi->tile_x[1]; + pad_start = gi->pad_x[1]; + pad_end = gi->pad_x[0]; + step_x = ((-1) * gi->grid_step_x); + } + + // update X span (partial) + dim_offset = muls32(gi->grid_step_x, subs32(grid_size_x, 1, &math_err), + &math_err); + + fi->start_x = mins32(dim_offset, 0); + fi->end_x = maxs32(dim_offset, 0); + // update Y span (full) + dim_offset = muls32(gi->grid_step_y, subs32(grid_size_y, 1, &math_err), + &math_err); + + fi->start_y = mins32(dim_offset, 0); + + fi->start_z = 0; + + if (gi->grid_step_y < 0) { + // For reversed scans, when the padding is applied it will adjust the read offset + fi->start_y += gi->pad_y[0]; + } + fi->end_y = maxs32(dim_offset, 0); + fi->end_y += ((int64_t)gi->tile_y[1] - gi->pad_y[0] - gi->pad_y[1]); + + fi->end_z = muls64(gi->tile_z, (int64_t)gi->grid_size_z, &math_err); + + if (gi->is_split_padding) { + // update X span (final) + fi->end_x += gi->tile_x[1]; + + // disallow overlapping tiles + if (left_tile_x > step_x) { + pva_kmd_log_err( + "sequencer horizontal jump offset smaller than tile width"); + return PVA_INVAL; + } + } else { + // update X span (final) + // remove padding since it's already included in tx in this mode + fi->end_x += + ((int64_t)gi->tile_x[1] - gi->pad_x[0] - gi->pad_x[1]); + // validate horizontal padding + // swap pad values if sequencing in reverse + if (validate_horizontal_padding(gi, pad_start, pad_end) != + PVA_SUCCESS) { + pva_kmd_log_err("invalid horizontal padding"); + return PVA_INVAL; + } + // compute alternative span from 1st descriptor + dim_offset = gi->grid_step_x * (head_tile_count - 1); + alt_start_x = mins32(dim_offset, 0); + if (gi->grid_step_x < 0) { + // For reversed scans, when the padding is applied it will adjust the read offset + fi->start_x += gi->pad_x[0]; + alt_start_x += gi->pad_x[0]; + } + alt_end_x = maxs32(dim_offset, 0); + alt_end_x += ((int64_t)gi->tile_x[0] - pad_start); + if (gi->head_tile_count == gi->grid_size_x) { + // if there is only a single tile configuration per grid row + // then we should subtract padding at the end below since + // repetitions of this single tile will include both pad at + // start and end + alt_end_x -= pad_end; + } + // pick the conservative span + fi->start_x = mins64(alt_start_x, fi->start_x); + fi->end_x = maxs64(alt_end_x, fi->end_x); + } + + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("Math error in frame info calculation"); + return PVA_ERR_MATH_OP; + } + + return PVA_SUCCESS; +} + +/** + * \brief Swaps Frame x and y co-ordinates of frame boundaries. + * + * This function is called to swap X and Y Co-ordinates of Frame info. + * This is called to get correct frame info when using Vertical Mining Mode. + * + * \param[in, out] frame_info A valid pointer to object of type \ref struct pva_hwseq_frame_info + * X and Y Co-ordinates of frame_info are swapped + * + * \return void + * + */ +static inline void +swap_frame_boundaries(struct pva_hwseq_frame_info *frame_info) +{ + int64_t tmp; + tmp = frame_info->start_x; + frame_info->start_x = frame_info->start_y; + frame_info->start_y = tmp; + tmp = frame_info->end_x; + frame_info->end_x = frame_info->end_y; + frame_info->end_y = tmp; +} + +/** + * \brief Checks padding for the tiles + * + * This function ensures the following: + * - px, py (Horizontal and Vertical Padding) are set to 0 for Head Descriptor. + * - desc_reload_enable (Descriptor Release Enable) is set to 0 for Head Descriptor. + * - tx, ty (Tile width and Tile Heiht) for both Head and Tail Descriptors are non-zero. + * + * \param[in] head_desc Pointer to the head descriptor of HW Sequencer + * of type \ref nvpva_dma_descriptor. + * \param[in] tail_desc Pointer to the tail descriptor of HW Sequencer + * of type \ref nvpva_dma_descriptor. + * + * \return + * - PVA_SUCCESS if above checks pass + * - PVA_INVAL if any of the above checks fail + */ +static enum pva_error check_padding_tiles(struct pva_dma_descriptor *head_desc, + struct pva_dma_descriptor *tail_desc) +{ + if ((head_desc->px != 0U) || (head_desc->py != 0U) || + (head_desc->desc_reload_enable != 0U)) { + pva_kmd_log_err("Invalid padding in descriptor"); + return PVA_INVAL; + } + + if ((head_desc->tx == 0U) || (head_desc->ty == 0U) || + (tail_desc->tx == 0U) || (tail_desc->ty == 0U)) { + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +static enum pva_error validate_vmem(struct pva_hwseq_priv *hwseq, + bool is_dst_vmem, + struct pva_hwseq_per_frame_info *fr_info, + uint32_t cr_index, bool has_dim3) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t vmem_tile_count = 0U; + pva_math_error math_err = MATH_OP_SUCCESS; + + err = validate_xfer_mode(hwseq->head_desc); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Invalid transfer mode"); + return PVA_INVAL; + } + + if (is_dst_vmem) { + err = validate_dst_vmem(hwseq, &vmem_tile_count, has_dim3); + } else { + err = validate_src_vmem(hwseq, &vmem_tile_count, has_dim3); + } + + if (err != PVA_SUCCESS) { + return PVA_INVAL; + } + + if (cr_index == 0U) { + fr_info->vmem_tiles_per_frame = vmem_tile_count; + } + + // total count of tiles sequenced + fr_info->seq_tile_count = + addu32(fr_info->seq_tile_count, + mulu32(hwseq->tiles_per_packet, + ((uint32_t)hwseq->colrow->crr + 1U), &math_err), + &math_err); + + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("Math error in tile count calculation"); + return PVA_ERR_MATH_OP; + } + if ((fr_info->vmem_tiles_per_frame != fr_info->seq_tile_count) && + (cr_index == hwseq->hdr->nocr)) { + pva_kmd_log_err("hwseq/vmem tile count mismatch"); + err = PVA_INVAL; + } + return err; +} + +static enum pva_error +prepare_frame_info(struct pva_hwseq_priv *hwseq, + struct pva_hwseq_frame_info *frame_info) +{ + struct pva_hwseq_grid_info grid_info = { 0 }; + if (hwseq->is_raster_scan) { + grid_info.tile_x[0] = (int32_t)(hwseq->head_desc->tx); + grid_info.tile_x[1] = (int32_t)(hwseq->tail_desc->tx); + grid_info.tile_y[0] = (int32_t)(hwseq->head_desc->ty); + grid_info.tile_y[1] = (int32_t)(hwseq->tail_desc->ty); + grid_info.tile_z = convert_to_signed_s32( + (uint32_t)hwseq->head_desc->src.rpt1 + 1U); + grid_info.pad_x[0] = (int32_t)hwseq->hdr->padl; + grid_info.pad_x[1] = (int32_t)hwseq->hdr->padr; + grid_info.pad_y[0] = (int32_t)hwseq->hdr->padt; + grid_info.pad_y[1] = (int32_t)hwseq->hdr->padb; + grid_info.grid_size_x = hwseq->tiles_per_packet; + grid_info.grid_size_y = (uint32_t)hwseq->colrow->crr + 1U; + grid_info.grid_size_z = + (uint32_t)hwseq->head_desc->src.rpt2 + 1U; + grid_info.grid_step_x = hwseq->hdr->to; + grid_info.grid_step_y = hwseq->colrow->cro; + grid_info.head_tile_count = + (uint32_t)hwseq->dma_descs[0].dr + 1U; + grid_info.is_split_padding = hwseq->is_split_padding; + + if (compute_frame_info(frame_info, &grid_info) != PVA_SUCCESS) { + pva_kmd_log_err("Error in converting grid to frame"); + return PVA_INVAL; + } + } else { + // vertical-mining mode + // this is just raster-scan transposed so let's transpose the tile and padding + if (hwseq->is_split_padding) { + pva_kmd_log_err( + "vertical mining not supported with split padding"); + return PVA_INVAL; + } + grid_info.tile_x[0] = (int32_t)(hwseq->head_desc->ty); + grid_info.tile_x[1] = (int32_t)(hwseq->tail_desc->ty); + grid_info.tile_y[0] = (int32_t)(hwseq->head_desc->tx); + grid_info.tile_y[1] = (int32_t)(hwseq->tail_desc->tx); + grid_info.tile_z = convert_to_signed_s32( + (uint32_t)hwseq->head_desc->src.rpt1 + 1U); + grid_info.pad_x[0] = (int32_t)hwseq->hdr->padt; + grid_info.pad_x[1] = (int32_t)hwseq->hdr->padb; + grid_info.pad_y[0] = (int32_t)hwseq->hdr->padl; + grid_info.pad_y[1] = (int32_t)hwseq->hdr->padr; + grid_info.grid_size_x = hwseq->tiles_per_packet, + grid_info.grid_size_y = (uint32_t)hwseq->colrow->crr + 1U; + grid_info.grid_size_z = + (uint32_t)hwseq->head_desc->src.rpt2 + 1U; + grid_info.grid_step_x = hwseq->hdr->to; + grid_info.grid_step_y = hwseq->colrow->cro; + grid_info.head_tile_count = + (uint32_t)hwseq->dma_descs[0].dr + 1U; + grid_info.is_split_padding = false; + + if (compute_frame_info(frame_info, &grid_info) != PVA_SUCCESS) { + pva_kmd_log_err("Error in converting grid to frame"); + return PVA_INVAL; + } + swap_frame_boundaries(frame_info); + } + + return PVA_SUCCESS; +} + +static enum pva_error +validate_frame_buffer_addr(struct pva_hwseq_priv *hwseq, + const struct pva_hwseq_frame_info *frame_info, + bool sequencing_to_vmem, int64_t frame_buffer_offset, + uint16_t frame_line_pitch, bool has_dim3) +{ + int64_t frame_buffer_start = 0; + int64_t frame_buffer_end = 0; + uint32_t num_bytes = 0; + int64_t frame_plane_size = 0LL; + uint8_t head_desc_id = get_head_desc_did(hwseq); + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; + pva_math_error math_err = MATH_OP_SUCCESS; + + frame_plane_size = + sequencing_to_vmem ? head_desc->src.adv1 : head_desc->dst.adv1; + frame_buffer_start = + adds64(muls64(frame_info->start_y, (int64_t)frame_line_pitch, + &math_err), + frame_info->start_x, &math_err); + + frame_buffer_end = muls64(subs64(frame_info->end_z, 1, &math_err), + frame_plane_size, &math_err); + + frame_buffer_end = + adds64(muls64(subs64(frame_info->end_y, 1, &math_err), + (int64_t)frame_line_pitch, &math_err), + frame_buffer_end, &math_err); + + frame_buffer_end = + adds64(frame_buffer_end, frame_info->end_x, &math_err); + + // convert to byte range + num_bytes = (uint32_t)1U + << (head_desc->log2_pixel_size & MAX_BYTES_PER_PIXEL); + + frame_buffer_start = + muls64(frame_buffer_start, (int64_t)num_bytes, &math_err); + + frame_buffer_end = + muls64(frame_buffer_end, (int64_t)num_bytes, &math_err); + + if (!sequencing_to_vmem) { + hwseq->access_sizes[head_desc_id].dst.start_addr = + adds64(mins64(frame_buffer_start, frame_buffer_end), + frame_buffer_offset, &math_err); + + hwseq->access_sizes[head_desc_id].dst.end_addr = + adds64(maxs64(frame_buffer_start, frame_buffer_end), + frame_buffer_offset, &math_err); + } else { + hwseq->access_sizes[head_desc_id].src.start_addr = + adds64(mins64(frame_buffer_start, frame_buffer_end), + frame_buffer_offset, &math_err); + hwseq->access_sizes[head_desc_id].src.end_addr = + adds64(maxs64(frame_buffer_start, frame_buffer_end), + frame_buffer_offset, &math_err); + } + + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err( + "Math error in frame buffer address calculation"); + return PVA_ERR_MATH_OP; + } + + return PVA_SUCCESS; +} + +static enum pva_error check_tile_offset(struct pva_hwseq_priv *hwseq) +{ + if ((hwseq->tiles_per_packet > 1U) && (hwseq->hdr->to == 0)) { + pva_kmd_log_err( + "unsupported hwseq program modality: Tile Offset = 0"); + return PVA_INVAL; + } + return PVA_SUCCESS; +} + +static void get_sequencing_and_dim3(struct pva_hwseq_priv *hwseq, + bool *sequencing_to_vmem, bool *has_dim3) +{ + struct pva_dma_descriptor *head_desc = hwseq->head_desc; + *sequencing_to_vmem = (head_desc->dst.transfer_mode == + (uint8_t)PVA_DMA_TRANS_MODE_VMEM); + // Check if this a 3D tensor transfer. + *has_dim3 = ((head_desc->src.rpt1 == head_desc->dst.rpt1) && + (head_desc->src.rpt2 == head_desc->dst.rpt2)); + *has_dim3 = *has_dim3 && + ((*sequencing_to_vmem) ? ((head_desc->src.adv1 > 0) && + (head_desc->src.adv2 > 0) && + (head_desc->dst.adv1 > 0)) : + ((head_desc->dst.adv1 > 0) && + (head_desc->dst.adv2 > 0) && + (head_desc->src.adv1 > 0))); +} + +/** + * \brief Checks if the DMA Transfers are valid and within memory bounds. + * + * This function does the following: + * - Rejects unsupported HW Sequencer program modality i.e. tile_offset == 0 when + * more than one tile is present per packet. + * - Check padding for the tiles by calling check_padding_tiles. + * - Validate VMEM tiles at source/destination as per configuration. + * Calls validate_dst_vmem is sequencing to VMEM. + * Calls validate_src_vmem if sequencing from VMEM. + * - Ensures number of tiles computed in validating VMEM tiles. + * is the same as sequencing tile count obtained from HW Sequencer Blob. + * - Populate Grid Info and convert it to Frame Info to obtain + * start and end co-ordinates of the Frame in memory. + * - Validate Frame Boundaries with the Frame Offset lies within + * the memory range of the DMA Transfer. + * + * \param[in] hwseq A valid pointer to object of type \ref pva_hwseq_priv. + * + * \return + * - PVA_SUCCESS if all the above checks pass + * - PVA_INVAL if any of the above checks fail + */ +static enum pva_error +validate_dma_boundaries(struct pva_hwseq_priv *hwseq, + struct pva_hwseq_per_frame_info *fr_info, + uint32_t num_cr) +{ + enum pva_error err = PVA_SUCCESS; + bool sequencing_to_vmem = false; + bool has_dim3 = false; + uint16_t frame_line_pitch = 0U; + int64_t frame_buffer_offset = 0; + struct pva_hwseq_frame_info frame_info = { 0 }; + struct pva_dma_descriptor *head_desc = hwseq->head_desc; + struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + + err = check_tile_offset(hwseq); + if (err != PVA_SUCCESS) { + return err; + } + + err = check_padding_tiles(head_desc, tail_desc); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("DMA Descriptors have empty tiles"); + return PVA_INVAL; + } + + get_sequencing_and_dim3(hwseq, &sequencing_to_vmem, &has_dim3); + + err = validate_vmem(hwseq, sequencing_to_vmem, fr_info, num_cr, + has_dim3); + if (err != PVA_SUCCESS) { + return PVA_INVAL; + } + + if (prepare_frame_info(hwseq, &frame_info) != PVA_SUCCESS) { + pva_kmd_log_err("Error in preparing the frame"); + return PVA_INVAL; + } + + frame_line_pitch = sequencing_to_vmem ? head_desc->src.line_pitch : + head_desc->dst.line_pitch; + frame_buffer_offset = + sequencing_to_vmem ? + convert_to_signed_s64(head_desc->src.offset) : + convert_to_signed_s64(head_desc->dst.offset); + + if (validate_frame_buffer_addr(hwseq, &frame_info, sequencing_to_vmem, + frame_buffer_offset, frame_line_pitch, + has_dim3) != PVA_SUCCESS) { + pva_kmd_log_err("sequencer address validation failed"); + return PVA_INVAL; + } + + return err; +} + +/** + * \brief Validates the HW Sequener when it is in Frame Addressing Mode + * + * This function performs the following initial checks on the HW Sequencer + * having Frame Addressing Mode + * - HW Sequencer has only 1 col/row header + * - Col/Row has at most 2 descriptors + * - Each descriptor has valid ID i.e. 0 < Desc ID < num_dma_descriptors + * - tiles_per_packet is calculated as summation of all Descriptor repetition + * factors associated with each descriptor + * + * It populates the following fields in hwseq_info + * - hwseq_info->desc_count + * - hwseq_info->dma_descs + * - hwseq_info->head_desc + * - hwseq_info->tail_desc + * + * After the above checks and updates are done, validate_dma_boundaries is called + * to validate the DMA Transfer setup by HW Sequencer does not go out of memory bounds + * + * \param[in, out] hwseq_info A valid pointer to object of type \ref struct pva_hwseq_priv + * \param[in] task A valid pointer to object of type \ref pva_submit_task + * \param[in] dma_ch A valid pointer to object of type \ref pva_dma_channel + * \param[out] requires_block_height A valid pointer to a boolean. This is updated + * to indicate if current channel needs Block Height information. + * + * \return + * - PVA_SUCCESS if all above checks pass + * - PVA_INVAL in the following cases + * - More than one Col/Row Header + * - More than 2 Descriptors in Col/Row + * - Invalid Descriptor Entry as highlighted above + * - validate_dma_boundaries returns an error + */ +static enum pva_error validate_frame_mode(struct pva_hwseq_priv *hwseq_info, + uint64_t *hw_dma_descs_mask) +{ + struct pva_dma_hwseq_desc_entry *desc_entry = NULL; + struct pva_dma_hwseq_desc_entry *desc_entries = hwseq_info->dma_descs; + uint32_t num_descs = 0U; + uint32_t i = 0U; + uint32_t num_cr = 0U; + enum pva_error err = PVA_SUCCESS; + struct pva_hwseq_per_frame_info fr_info = { 0 }; + + if ((hwseq_info->hw_gen < PVA_HW_GEN3) && + (hwseq_info->hdr->nocr != 0U)) { + pva_kmd_log_err_u64( + "Cannot have more than 1 col/row header in GEN2", + hwseq_info->hdr->nocr); + return PVA_INVAL; + } + + for (num_cr = 0; num_cr <= hwseq_info->hdr->nocr; num_cr++) { + hwseq_info->tiles_per_packet = 0; + hwseq_info->colrow = + (struct pva_dma_hwseq_colrow_hdr *)(read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof( + struct pva_dma_hwseq_colrow_hdr))); + + if (hwseq_info->colrow == NULL) { + pva_kmd_log_err( + "Cannot read HW sequencer col/row header"); + return PVA_INVAL; + } + + num_descs = hwseq_info->colrow->dec + (uint32_t)1U; + //Check that the col/row has a max of 2 descriptors + if (num_descs > 2U) { + pva_kmd_log_err( + "Cannot have more than 2 descriptors in HW Sequencer"); + return PVA_INVAL; + } + + for (i = 0; i < num_descs; i++) { + desc_entry = (struct pva_dma_hwseq_desc_entry + *)(read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof( + struct pva_dma_hwseq_desc_entry))); + + if (validate_desc_entry( + desc_entry, + safe_addu8(hwseq_info->dma_config->header + .base_descriptor, + hwseq_info->dma_config->header + .num_descriptors)) != + PVA_SUCCESS) { + pva_kmd_log_err("Invalid DMA Descriptor Entry"); + return PVA_INVAL; + } + desc_entries[i].did = desc_entry->did - 1U; + //TODO enable nv_array_index_no_speculate later + // desc_entries[i].did = (uint8_t) nv_array_index_no_speculate_u32( + // desc_entries[i].did, max_num_descs); + + desc_entries[i].dr = desc_entry->dr; + hw_dma_descs_mask[(desc_entries[i].did / 64ULL)] |= + 1ULL << (desc_entries[i].did & MAX_DESC_ID); + + hwseq_info->tiles_per_packet += + ((uint32_t)desc_entry->dr + 1U); + } + if ((i == num_descs) && ((i % 2U) != 0U)) { + (void)read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof( + struct pva_dma_hwseq_desc_entry)); + } + + hwseq_info->desc_count = num_descs; + hwseq_info->head_desc = + &hwseq_info->dma_config + ->descriptors[desc_entries[0].did - + hwseq_info->dma_config->header + .base_descriptor]; + hwseq_info->tail_desc = + &hwseq_info->dma_config + ->descriptors[desc_entries[num_descs - 1U].did - + hwseq_info->dma_config->header + .base_descriptor]; + + //TODO: User nv_array_index_no_speculate_u32 + // num_descs = nv_array_index_no_speculate_u32(num_descs, 3); + + err = validate_dma_boundaries(hwseq_info, &fr_info, num_cr); + if (err != PVA_SUCCESS) { + return err; + } + } + + return err; +} + +static enum pva_error validate_rra_mode(struct pva_hwseq_priv *hwseq_info, + uint64_t *hw_dma_descs_mask) +{ + const uint8_t *column = 0U; + uint32_t i = 0U; + uint32_t num_columns = 0U; + uint32_t end = hwseq_info->entry.ch->hwseq_end; + const uint8_t *blob_end = &(hwseq_info->blob.data[(end << 2) + 4U]); + + // In each NOCR entry, 4 bytes are used for CRO + // and 4 bytes are used for Desc info + const uint8_t column_entry_size = 8U; + + if (hwseq_info->entry.ch->hwseq_frame_count > + PVA_HWSEQ_RRA_MAX_FRAME_COUNT) { + pva_kmd_log_err("Invalid HWSEQ frame count"); + return PVA_INVAL; + } + + if (hwseq_info->hdr->nocr > PVA_HWSEQ_RRA_MAX_NOCR) { + pva_kmd_log_err("Invalid HWSEQ column count"); + return PVA_INVAL; + } + + if (hwseq_info->hdr->fr != 0) { + pva_kmd_log_err("Invalid HWSEQ repetition factor"); + return PVA_INVAL; + } + + num_columns = hwseq_info->hdr->nocr + 1U; + column = hwseq_info->blob.data + sizeof(struct pva_dma_hwseq_hdr); + + // Ensure there are sufficient CRO and Desc ID entries in the HWSEQ blob + if (((blob_end - column) / column_entry_size) < num_columns) { + pva_kmd_log_err("HWSEQ Program does not have enough columns"); + return PVA_INVAL; + } + + for (i = 0U; i < num_columns; i++) { + struct pva_dma_hwseq_desc_entry desc_entry; + uint32_t *desc_read_data = (uint32_t *)(read_hwseq_blob( + &hwseq_info->blob, column_entry_size)); + if (desc_read_data == NULL) { + pva_kmd_log_err( + "Failed to read descriptor data from HWSEQ blob"); + return PVA_INVAL; + } + + // Index 0 contains DEC and CRO, both of which are not used + // Index 1 contains the DID and DR of 1st and second desc + // In RRA mode, each HWSEQ column has only 1 descriptor + // Hence, we validate the first descriptor and ignore the second + // descriptor in each column + desc_entry.did = (desc_read_data[1U] & 0x000000FFU); + desc_entry.dr = ((desc_read_data[1U] & 0x0000FF00U) >> 8U); + if (validate_desc_entry(&desc_entry, PVA_MAX_NUM_DMA_DESC) != + PVA_SUCCESS) { + pva_kmd_log_err( + "Invalid Descriptor ID found in HW Sequencer"); + return PVA_INVAL; + } + desc_entry.did -= 1U; + hw_dma_descs_mask[(desc_entry.did / 64ULL)] |= + 1ULL << (desc_entry.did & MAX_DESC_ID); + } + + return 0; +} + +/** + * \brief Validates the HW Sequencer Blob if it has Descriptor Addressing Mode + * + * This function validates Descriptor Addressing Mode in HW Sequencer Blob + * + * The following checks are performed: + * - Frame Repeat Count is checked to be 0 + * - Reading of HW Sequencer Blob does not lead to out of bounds read + * - Loop through all the col/rows present in the HW Sequencer Blob. + * The count is obtained from HW Sequencer Blob Header + * - Ensure that any descriptor present in the HW Sequencer Blob + * have valid IDs i.e. ID > 0 and ID < num_dma_descriptors + * - Even number of descriptors are expected per col/row. + * If odd number of descriptors are found, increment the read pointer to the + * HW Sequencer Blob to ensure compliance with DMA IAS + * + * \param[in, out] hwseq_info A valid pointer to object of type \ref struct pva_hwseq_priv + * + * \return + * - PVA_SUCCESS if all of the above checks pass + * - PVA_INVAL if any of the above checks fail + */ +static enum pva_error validate_desc_mode(struct pva_hwseq_priv *hwseq_info) +{ + enum pva_error err = PVA_SUCCESS; + const struct pva_dma_hwseq_colrow_hdr *colrow = NULL; + const struct pva_dma_hwseq_desc_entry *desc_entry = NULL; + uint32_t num_colrows = 0U; + uint32_t num_descs = 0U; + uint32_t i = 0U; + uint32_t j = 0U; + + if (hwseq_info->hdr->fr != 0U) { + pva_kmd_log_err( + "invalid hwseq modality (frame repeat count>1)"); + return PVA_INVAL; + } + + num_colrows = (uint32_t)hwseq_info->hdr->nocr + 1U; + for (i = 0U; i < num_colrows; i++) { + colrow = (const struct pva_dma_hwseq_colrow_hdr + *)(read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof(struct pva_dma_hwseq_colrow_hdr))); + if (colrow == NULL) { + pva_kmd_log_err("Attempt to read out of bounds"); + return PVA_INVAL; + } + num_descs = (uint32_t)colrow->dec + 1U; + for (j = 0U; j < num_descs; j++) { + desc_entry = (const struct pva_dma_hwseq_desc_entry + *)(read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof( + struct pva_dma_hwseq_desc_entry))); + + if (validate_desc_entry( + desc_entry, + safe_addu8(hwseq_info->dma_config->header + .base_descriptor, + hwseq_info->dma_config->header + .num_descriptors)) != + PVA_SUCCESS) { + pva_kmd_log_err("Invalid DMA Descriptor Entry"); + return PVA_INVAL; + } + } + if ((j == num_descs) && ((j % 2U) != 0U)) { + (void)read_hwseq_blob( + &hwseq_info->blob, + (uint32_t)sizeof( + struct pva_dma_hwseq_desc_entry)); + } + } + + return err; +} + +static enum pva_error +check_for_valid_hwseq_type(struct pva_hwseq_priv *hwseq_info, + struct hw_seq_blob_entry const *entry, + uint64_t *hw_dma_descs_mask) +{ + enum pva_error err = PVA_SUCCESS; + //Populate hwseq_info header + hwseq_info->hdr = (struct pva_dma_hwseq_hdr *)(read_hwseq_blob( + &hwseq_info->blob, (uint32_t)sizeof(struct pva_dma_hwseq_hdr))); + if (hwseq_info->hdr == NULL) { + pva_kmd_log_err("HW sequencer buffer does not contain header"); + return PVA_INVAL; + } + + if ((hwseq_info->hdr->fr != 0U) || (hwseq_info->hdr->fo != 0U)) { + return PVA_INVAL; + } + + if (hwseq_info->hdr->fid == (uint16_t)PVA_DMA_HWSEQ_DESC_MODE) { + err = validate_desc_mode(hwseq_info); + } else if (hwseq_info->hdr->fid == (uint16_t)PVA_DMA_HWSEQ_FRAME_MODE) { + err = validate_frame_mode(hwseq_info, hw_dma_descs_mask); + } else if (hwseq_info->hdr->fid == (uint16_t)PVA_DMA_HWSEQ_RRA_MODE) { + if (hwseq_info->hw_gen < PVA_HW_GEN3) { + pva_kmd_log_err( + "RRA Mode not supported for current device"); + return PVA_INVAL; + } + err = validate_rra_mode(hwseq_info, hw_dma_descs_mask); + } else { + pva_kmd_log_err("Invalid Header in HW Sequencer Blob"); + return PVA_INVAL; + } + + return err; +} + +static enum pva_error validate_hwseq_blob(struct pva_hwseq_priv *hwseq_info, + struct hw_seq_blob_entry const *entry, + uint64_t *hw_dma_descs_mask) +{ + uint32_t i = 0U; + enum pva_error err = PVA_SUCCESS; + + hwseq_info->entry.ch = entry->ch; + hwseq_info->is_split_padding = + (hwseq_info->entry.ch->hwseq_tx_select != 0U); + hwseq_info->is_raster_scan = + (hwseq_info->entry.ch->hwseq_traversal_order == 0U); + hwseq_info->entry.hwseq_start = entry->hwseq_start; + hwseq_info->entry.hwseq_end = entry->hwseq_end; + hwseq_info->entry.num_frames = entry->num_frames; + + for (i = 0U; i < entry->num_frames; i++) { + err = check_for_valid_hwseq_type(hwseq_info, entry, + hw_dma_descs_mask); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Invalid Header in HW Sequencer Blob"); + return err; + } + } + return err; +} + +static enum pva_error +validate_channel_accesses(struct pva_dma_channel const *ch, + struct pva_dma_config_header const *header, + enum pva_hw_gen hw_gen, + struct hw_seq_blob_entry *entry) +{ + if (ch->hwseq_end < ch->hwseq_start) { + return PVA_ERR_HWSEQ_INVALID; + } + //TODO: Confirm below checks. Is header->base_hwseq_word needed? + if (((ch->hwseq_end + 1U) + (header->base_hwseq_word)) > + (header->num_hwseq_words)) { + pva_kmd_log_err( + "Possible out of bounds read for HW Seqeuncer Blob"); + return PVA_ERR_HWSEQ_INVALID; + } + if ((uint16_t)(ch->hwseq_end - ch->hwseq_start) < 4U) { + pva_kmd_log_err("HW Sequencer too small for channel"); + return PVA_ERR_HWSEQ_INVALID; + } + entry->hwseq_start = ch->hwseq_start; + entry->hwseq_end = ch->hwseq_end; + entry->num_frames = 1U; + if (hw_gen == PVA_HW_GEN3) { + entry->num_frames = ch->hwseq_frame_count + 1U; + } + entry->ch = ch; + return PVA_SUCCESS; +} + +enum pva_error validate_hwseq(struct pva_dma_config const *dma_config, + struct pva_kmd_hw_constants const *hw_consts, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask) +{ + uint32_t i = 0U; + struct pva_hwseq_priv hwseq_info = { 0 }; + enum pva_error err = PVA_SUCCESS; + struct pva_dma_channel *ch = NULL; + struct hw_seq_blob_entry entries[PVA_MAX_NUM_DMA_CHANNELS] = { 0 }; + uint8_t num_hwseqs = 0U; + uint8_t num_channels = dma_config->header.num_channels; + + hwseq_info.dma_config = (const struct pva_dma_config *)(dma_config); + hwseq_info.hw_gen = hw_consts->hw_gen; + hwseq_info.access_sizes = access_sizes; + + for (i = 0U; i < num_channels; i++) { + ch = &dma_config->channels[i]; + if (ch->hwseq_enable == 1) { + err = validate_channel_accesses(ch, &dma_config->header, + hwseq_info.hw_gen, + &entries[num_hwseqs]); + if (err != PVA_SUCCESS) { + return err; + } + num_hwseqs++; + } + } + + for (i = 0U; i < num_hwseqs; i++) { + uint32_t start_address = entries[i].hwseq_start; + uint32_t end_address = entries[i].hwseq_end + 1U; + uint32_t curr_offset = start_address << 2U; + uint32_t size = 0U; + //Populate hwseq blob + hwseq_info.blob.data = + (uint8_t *)((uintptr_t)(dma_config->hwseq_words) + + (curr_offset)); + + size = safe_subu32(end_address, start_address); + if (size > (hw_consts->n_hwseq_words >> 2U)) { + return PVA_ERR_HWSEQ_INVALID; + } + hwseq_info.blob.bytes_left = size << 2U; + + err = validate_hwseq_blob(&hwseq_info, &entries[i], + hw_dma_descs_mask); + if (err != PVA_SUCCESS) { + return PVA_ERR_HWSEQ_INVALID; + } + } + return PVA_SUCCESS; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h new file mode 100644 index 00000000..adbe32c8 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h @@ -0,0 +1,336 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_HWSEQ_VALIDATE_H +#define PVA_KMD_HWSEQ_VALIDATE_H + +#include "pva_api_dma.h" +#include "pva_kmd_device.h" + +#define PVA_HWSEQ_RRA_MAX_NOCR 31U +#define PVA_HWSEQ_RRA_MAX_FRAME_COUNT 63U + +/** + * List of valid Addressing Modes in HW Sequencer Header + */ +enum pva_dma_hwseq_fid { + PVA_DMA_HWSEQ_RRA_MODE = 0xC0DA, /*!< RRA addressing */ + PVA_DMA_HWSEQ_FRAME_MODE = 0xC0DE, /*!< frame addressing */ + PVA_DMA_HWSEQ_DESC_MODE = 0xDEAD /*!< descriptor addressing */ +}; + +/** + * Combine three headers common in HW Sequencer + * + * ---------------------------------------------------------------------------- + * | | byte 3 | byte 2 | byte 1 | byte 0 | + * |--------|---------------|--------------|-----------------|----------------| + * | Head 1 | NOCR | FR | FID1 | FID0 | + * | Head 2 | FO in LP 15:8 | FO in LP 7:0 | TO in P/LP 15:8 | TO in P/LP 7:0 | + * | Head 3 | padB | padL | padT | padR | + * ---------------------------------------------------------------------------- + **/ +struct pva_dma_hwseq_hdr { + //hdr_1 + uint16_t fid; /*!< addressing type: frame or descriptor */ + uint8_t fr; /*!< frame repetition factor */ + uint8_t nocr; /*!< number of descriptor column/row */ + //hdr_2 + int16_t to; /*!< tile offset in pixel/Line Pitch */ + int16_t fo; /*!< frame offset in Line Pitch */ + //hdr_3 + uint8_t padr; /*!< pad right */ + uint8_t padt; /*!< pad top */ + uint8_t padl; /*!< pad left */ + uint8_t padb; /*!< pad bottom */ +}; + +/** + * A struct which represents Column/Row Header in HW Sequencer + */ +struct pva_dma_hwseq_colrow_hdr { + uint8_t dec; /*!< descriptor entry count */ + uint8_t crr; /*!< col/row repetition factor */ + int16_t cro; /*!< col/row ofst in pixel/line pitch */ +}; + +/** + * A struct which represents a DMA Descriptor Header in HW Sequencer + */ +struct pva_dma_hwseq_desc_entry { + uint8_t did; /*!< desc id */ + uint8_t dr; /*!< desc repetition */ +}; + +/** + * A struct which represents a Column/Row Header Entry in HW Sequencer + */ +struct pva_dma_hwseq_colrow_entry_hdr { + struct pva_dma_hwseq_colrow_hdr hdr; /*!< Col/Row Header */ +}; + +/** + * A struct representing Grid Information + */ +struct pva_hwseq_grid_info { + /** + * tile co-ordinates + * In Raster Mode: + * - tile_x[0] = Tile width of the first tile in HW Seq DMA Transfer + * - tile_x[1] = Tile width of the last tile in HW Seq DMA Transfer + * In Vertical Mining Mode: + * - tile_x[0] = Tile height of the first tile in HW Seq DMA Transfer + * - tile_x[1] = Tile height of the last tile in HW Seq DMA Transfer + */ + int32_t tile_x[2]; + /** + * tile co-ordinates + * In Raster Mode: + * - tile_y[0] = Tile height of the first tile in HW Seq DMA Transfer + * - tile_y[1] = Tile height of the last tile in HW Seq DMA Transfer + * In Vertical Mining Mode: + * - tile_y[0] = Tile width of the first tile in HW Seq DMA Transfer + * - tile_y[1] = Tile width of the last tile in HW Seq DMA Transfer + */ + int32_t tile_y[2]; + /** + * tile co-ordinates + * In Tensor Data Flow Mode: + */ + int32_t tile_z; + /** + * Padding values + * In Raster Mode: + * - pad_x[0] = Left Padding + * - pad_x[1] = Right Padding + * In Vertical Mining Mode: + * - pad_x[0] = Top Padding + * - pad_x[1] = Bottom Padding + */ + int32_t pad_x[2]; + /** + * Padding values + * In Raster Mode: + * - pad_y[0] = Top Padding + * - pad_y[1] = Bottom Padding + * In Vertical Mining Mode: + * - pad_y[0] = Left Padding + * - pad_y[1] = Right Padding + */ + int32_t pad_y[2]; + /** + * Tiles per packet. Grid size in X dimension + */ + uint32_t grid_size_x; + /** + * Repeat Count + */ + uint32_t grid_size_y; + /** + * Grid Size in Z dimension for Tensor Data Flow + */ + uint32_t grid_size_z; + /** + * Tile Offset as specified in the HW Sequencer Header + */ + int32_t grid_step_x; + /** + * Col/Row Offset as specified in the HW Sequencer Col/Row Header + */ + int32_t grid_step_y; + /** + * Repetition factor for Head Descriptor in HW Sequencer Blob + */ + uint32_t head_tile_count; + /** + * Boolean value to indicate if HW Sequencer has split padding + */ + bool is_split_padding; +}; + +/** + * A struct representing a valid Frame Information + */ +struct pva_hwseq_frame_info { + /** + * X co-ordinate of start of Frame + */ + int64_t start_x; + /** + * Y co-ordinate of start of Frame + */ + int64_t start_y; + /** + * Z co-ordinates of starte of Frame + */ + int64_t start_z; + /** + * X co-ordinate of end of Frame + */ + int64_t end_x; + /** + * Y co-ordinate of end of Frame + */ + int64_t end_y; + /** + * Z co-ordinate of end of Frame + */ + int64_t end_z; +}; + +/** + * Struct which holds the HW Sequencer Buffer as received from User Space + */ +struct pva_hwseq_buffer { + /** + * Pointer to HW Sequencer Blob in Buffer + */ + const uint8_t *data; + /** + * Number of bytes left to be read from the data buffer + */ + uint32_t bytes_left; +}; + +/** + * @struct hw_seq_blob_entry + * @brief Structure to hold information about a hardware sequence blob entry. + * + * This structure is used to store the details of a DMA channel and the range of hardware sequencer + * associated with it, along with the number of frames involved. + */ +struct hw_seq_blob_entry { + /** + * Pointer to a const \ref pva_dma_channel which holds the current DMA Channel Information + * in which current HW Sequencer Blob is present + */ + struct pva_dma_channel const *ch; + /** + * The starting index of the hardware sequencer. + */ + uint16_t hwseq_start; + /** + * The ending index of the hardware sequencer. + */ + uint16_t hwseq_end; + /** + * The number of frames associated with the hardware sequencer. + */ + uint32_t num_frames; +}; + +/** + * TODO: Separate out pva_hwseq_priv to be more modular + * + * Items in pva_hwseq_main + * - dma_config + * - hw_gen + * - blob + * - num_hwseq_words + * Items per segment of main i.e. pva_hwseq_segment + * - hwseq_start, hwseq_end + * - channel id + * - hwseq_header, + * - desc_count + * - num_frames + * - head_desc, tail_desc + * - is_split_padding + * - is_raster_scan + */ + +/** + * A struct holding private data to HW Sequencer Blob being parsed + */ +struct pva_hwseq_priv { + /** + * Number of descriptors in the HW Sequencer Blob + */ + uint32_t desc_count; + /** + * Number of tiles in the packet + * This is the sum total of descriptor repetition factors + * present in the HW Sequencer Blob + */ + uint32_t tiles_per_packet; + int32_t max_tx; + int32_t max_ty; + + /** + * Struct that holds the entry info of HW Sequencer Blob + */ + struct hw_seq_blob_entry entry; + + /** + * Struct that holds HW Sequencer Blob to be read + */ + struct pva_hwseq_buffer blob; + + /** + * Boolean to indicate if split padding is present in the HW Sequener Blob + */ + bool is_split_padding; + /** + * Bool to indicate if HW Sequencer uses raster scan or Vertical mining + * TRUE: Raster Scan + * FALSE: Vertical Mining + */ + bool is_raster_scan; + + /** + * @brief Indicates the generation of PVA HW. + * Allowed values: 0 (GEN 1), 1 (GEN 2), 2 (GEN 3) + */ + enum pva_hw_gen hw_gen; + + /** + * @brief Pointer to the DMA configuration header. + */ + const struct pva_dma_config *dma_config; + + /** + * Pointer to \ref pva_dma_hwseq_hdr_t which holds the HW Sequencer Header + */ + const struct pva_dma_hwseq_hdr *hdr; + /** + * Pointer to \ref pva_dma_hwseq_colrow_hdr_t which holds the Header of the + * Col/Row inside HW Sequencer + */ + const struct pva_dma_hwseq_colrow_hdr *colrow; + + /** + * Pointer to the Head Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer + */ + struct pva_dma_descriptor *head_desc; + /** + * Pointer to the Tail Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer + */ + struct pva_dma_descriptor *tail_desc; + /** + * DMA Descriptor information obtained from HW Sequencer Blob of type + * \ref pva_dma_hwseq_desc_entry_t + */ + struct pva_dma_hwseq_desc_entry dma_descs[2]; + /** + * Access Sizes are calculated and stored here from HW Sequencer Blob + */ + struct pva_kmd_dma_access *access_sizes; +}; + +struct pva_hwseq_per_frame_info { + uint32_t seq_tile_count; + uint32_t vmem_tiles_per_frame; +}; + +enum pva_error validate_hwseq(struct pva_dma_config const *dma_config, + struct pva_kmd_hw_constants const *hw_consts, + struct pva_kmd_dma_access *access_sizes, + uint64_t *hw_dma_descs_mask); + +#endif diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c new file mode 100644 index 00000000..abc025fe --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_msg.h" +#include "pva_fw.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_thread_sema.h" +#include "pva_kmd_fw_debug.h" +#include "pva_kmd_device.h" +#include "pva_kmd_context.h" + +static uint8_t get_msg_type(uint32_t hdr) +{ + return PVA_EXTRACT(hdr, PVA_FW_MSG_TYPE_MSB, PVA_FW_MSG_TYPE_LSB, + uint32_t); +} + +void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len) +{ + struct pva_kmd_device *pva = pva_dev; + uint8_t type = get_msg_type(data[0]); + uint8_t updated_len = safe_subu8(len, 1U); + uint8_t size = safe_mulu8((uint8_t)sizeof(uint32_t), updated_len); + + switch (type) { + case PVA_FW_MSG_TYPE_BOOT_DONE: { + uint64_t r5_start_time = + pack64(data[PVA_FW_MSG_R5_START_TIME_HI_IDX], + data[PVA_FW_MSG_R5_START_TIME_LO_IDX]); + uint64_t r5_ready_time = + pack64(data[PVA_FW_MSG_R5_READY_TIME_HI_IDX], + data[PVA_FW_MSG_R5_READY_TIME_LO_IDX]); + + pva_kmd_log_err("Firmware boot completes"); + pva_kmd_log_err_u64("R5 start time (us)", + tsc_to_us(r5_start_time)); + pva_kmd_log_err_u64("R5 ready time (us)", + tsc_to_us(r5_ready_time)); + + pva_kmd_sema_post(&pva->fw_boot_sema); + } break; + case PVA_FW_MSG_TYPE_ABORT: { + char abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN + 1]; + + pva_kmd_drain_fw_print(&pva->fw_print_buffer); + + pva_kmd_log_err("Firmware aborted! The abort message is: "); + abort_msg[0] = PVA_EXTRACT(data[0], 7, 0, uint32_t); + abort_msg[1] = PVA_EXTRACT(data[0], 15, 8, uint32_t); + memcpy(abort_msg + 2, &data[1], size); + abort_msg[PVA_FW_MSG_ABORT_STR_MAX_LEN] = '\0'; + pva_kmd_log_err(abort_msg); + } break; + case PVA_FW_MSG_TYPE_FLUSH_PRINT: + pva_kmd_drain_fw_print(&pva->fw_print_buffer); + break; + + default: + FAULT("Unknown message type from firmware"); + } +} + +void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len) +{ + struct pva_kmd_device *pva = pva_dev; + + uint8_t type = get_msg_type(data[0]); + switch (type) { + case PVA_FW_MSG_TYPE_RESOURCE_UNREGISTER: { + uint8_t table_id = + PVA_EXTRACT(data[0], PVA_FW_MSG_RESOURCE_TABLE_ID_MSB, + PVA_FW_MSG_RESOURCE_TABLE_ID_LSB, uint8_t); + /* Resource table ID equals context id */ + struct pva_kmd_context *ctx = + pva_kmd_get_context(pva, table_id); + uint32_t i; + + pva_kmd_mutex_lock(&ctx->resource_table_lock); + for (i = 1; i < len; i++) { + pva_kmd_drop_resource(&ctx->ctx_resource_table, + data[i]); + } + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + break; + } + default: + FAULT("Unexpected CCQ msg type from FW"); + break; + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h new file mode 100644 index 00000000..16e5aef0 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_api.h" + +/** + * @brief Handle messages from FW to hypervisor. + * + * This is just a provision for future hypervisor support. For now, this just + * handles all messages from mailboxes. + */ +void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len); + +/** + * @brief Handle messages from FW to KMD. + * + * These messages come from CCQ0 statues registers. + */ +void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h new file mode 100644 index 00000000..65b9c819 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_mutex.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_MUTEX_H +#define PVA_KMD_MUTEX_H + +#include "pva_api.h" + +#if defined(__KERNEL__) /* For Linux */ + +#include +typedef struct mutex pva_kmd_mutex_t; + +#else /* For user space code, including QNX KMD */ + +#include +/* Mutex */ +typedef pthread_mutex_t pva_kmd_mutex_t; + +#endif + +enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m); +void pva_kmd_mutex_lock(pva_kmd_mutex_t *m); +void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m); +void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m); + +#endif // PVA_KMD_MUTEX_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c new file mode 100644 index 00000000..ab2aab0c --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c @@ -0,0 +1,814 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_op_handler.h" +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_device.h" +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_constants.h" +#include "pva_fw.h" +#include "pva_kmd_vpu_app_auth.h" +#include "pva_math_utils.h" + +struct pva_kmd_buffer { + void const *base; + uint32_t offset; + uint32_t size; +}; + +/* Offset will always be multiple of 8 bytes */ +static void incr_offset(struct pva_kmd_buffer *buf, uint32_t incr) +{ + buf->offset = safe_addu32(buf->offset, incr); + buf->offset = + safe_pow2_roundup_u32(buf->offset, (uint32_t)sizeof(uint64_t)); +} + +static bool access_ok(struct pva_kmd_buffer const *buf, uint32_t size) +{ + return safe_addu32(buf->offset, size) <= buf->size; +} + +static void *read_data(struct pva_kmd_buffer *buf, uint32_t size) +{ + void *data = (void *)((uint8_t *)buf->base + buf->offset); + incr_offset(buf, size); + return data; +} + +static void write_data(struct pva_kmd_buffer *buf, void const *data, + uint32_t size) +{ + memcpy((uint8_t *)buf->base + buf->offset, data, size); + incr_offset(buf, size); +} + +static enum pva_error +pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_memory_register_in_args *args; + struct pva_kmd_register_out_args out_args = { 0 }; + struct pva_kmd_device_memory *dev_mem; + struct pva_cmd_update_resource_table *update_cmd; + struct pva_resource_entry entry = { 0 }; + uint8_t smmu_ctx_id; + + uint32_t resource_id = 0; + + if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) { + return PVA_INVAL; + } + + if (!access_ok(in_buffer, + sizeof(struct pva_kmd_memory_register_in_args))) { + err = PVA_INVAL; + goto err_out; + } + + args = read_data(in_buffer, + sizeof(struct pva_kmd_memory_register_in_args)); + + dev_mem = pva_kmd_device_memory_acquire(args->memory_handle, + args->offset, args->size, ctx); + if (dev_mem == NULL) { + err = PVA_NOMEM; + goto err_out; + } + + if (args->segment == PVA_MEMORY_SEGMENT_R5) { + smmu_ctx_id = PVA_R5_SMMU_CONTEXT_ID; + } else { + smmu_ctx_id = ctx->smmu_ctx_id; + } + + err = pva_kmd_device_memory_iova_map(dev_mem, ctx->pva, + args->access_flags, smmu_ctx_id); + if (err != PVA_SUCCESS) { + goto release; + } + + if ((smmu_ctx_id == PVA_R5_SMMU_CONTEXT_ID) && + (dev_mem->iova < FW_SHARED_MEMORY_START)) { + pva_kmd_log_err( + "Not able to map memory in the R5 shared region"); + err = PVA_NOMEM; + goto unmap; + } + + pva_kmd_mutex_lock(&ctx->resource_table_lock); + err = pva_kmd_add_dram_buffer_resource(&ctx->ctx_resource_table, + dev_mem, &resource_id); + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + if (err != PVA_SUCCESS) { + goto unmap; + } + + update_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); + if (update_cmd == NULL) { + pva_kmd_log_err("Unable to reserve command buffer space"); + err = PVA_NOMEM; + goto free_dram_buffer_resource; + } + + err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, + &entry); + if (err != PVA_SUCCESS) { + goto free_cmdbuf; + } + + pva_kmd_set_cmd_update_resource_table( + update_cmd, ctx->resource_table_id, resource_id, &entry); + + out_args.error = PVA_SUCCESS; + out_args.resource_id = resource_id; + write_data(out_buffer, &out_args, sizeof(out_args)); + return err; +free_cmdbuf: + pva_kmd_cmdbuf_builder_cancel(cmdbuf_builder); +free_dram_buffer_resource: + pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); +unmap: + pva_kmd_device_memory_iova_unmap(dev_mem); +release: + pva_kmd_device_memory_free(dev_mem); +err_out: + out_args.error = err; + write_data(out_buffer, &out_args, sizeof(out_args)); + return err; +} + +static enum pva_error pva_kmd_op_executable_register_async( + struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_executable_register_in_args *args; + struct pva_kmd_exec_register_out_args out_args = { 0 }; + struct pva_cmd_update_resource_table *update_cmd; + struct pva_resource_entry entry = { 0 }; + struct pva_kmd_resource_record *rec; + uint32_t num_symbols = 0; + void *exec_data; + + uint32_t resource_id = 0; + + if (!access_ok(out_buffer, + sizeof(struct pva_kmd_exec_register_out_args))) { + return PVA_INVAL; + } + + if (!access_ok(in_buffer, + sizeof(struct pva_kmd_executable_register_in_args))) { + err = PVA_INVAL; + goto err_out; + } + + args = read_data(in_buffer, + sizeof(struct pva_kmd_executable_register_in_args)); + + if (!access_ok(in_buffer, args->size)) { + err = PVA_INVAL; + goto err_out; + } + + exec_data = read_data(in_buffer, args->size); + + err = pva_kmd_verify_exectuable_hash(ctx->pva, (uint8_t *)exec_data, + args->size); + if (err != PVA_SUCCESS) { + goto err_out; + } + + pva_kmd_mutex_lock(&ctx->resource_table_lock); + err = pva_kmd_add_vpu_bin_resource(&ctx->ctx_resource_table, exec_data, + args->size, &resource_id); + if (err == PVA_SUCCESS) { + rec = pva_kmd_use_resource(&ctx->ctx_resource_table, + resource_id); + ASSERT(rec != NULL); + num_symbols = rec->vpu_bin.symbol_table.n_symbols; + pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); + } + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + if (err != PVA_SUCCESS) { + goto err_out; + } + + update_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); + if (update_cmd == NULL) { + pva_kmd_log_err("Unable to reserve memory in command buffer"); + err = PVA_NOMEM; + goto drop_resource; + } + ASSERT(update_cmd != NULL); + + err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, + &entry); + ASSERT(err == PVA_SUCCESS); + + pva_kmd_set_cmd_update_resource_table( + update_cmd, ctx->resource_table_id, resource_id, &entry); + + out_args.error = PVA_SUCCESS; + out_args.resource_id = resource_id; + out_args.num_symbols = num_symbols; + write_data(out_buffer, &out_args, sizeof(out_args)); + return err; +drop_resource: + pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); +err_out: + out_args.error = err; + write_data(out_buffer, &out_args, sizeof(out_args)); + return err; +} + +static enum pva_error +pva_kmd_op_dma_register_async(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_dma_config_register_in_args *args; + struct pva_kmd_register_out_args out_args = { 0 }; + struct pva_cmd_update_resource_table *update_cmd; + struct pva_resource_entry entry = { 0 }; + void *dma_cfg_data; + uint32_t dma_cfg_payload_size; + uint32_t resource_id = 0; + uint32_t dma_config_size = 0; + + if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) { + return PVA_INVAL; + } + + if (!access_ok(in_buffer, + sizeof(struct pva_kmd_dma_config_register_in_args))) { + return PVA_INVAL; + } + + args = read_data(in_buffer, + sizeof(struct pva_kmd_dma_config_register_in_args)); + + dma_cfg_data = &args->dma_config_header; + dma_cfg_payload_size = in_buffer->size - in_buffer->offset; + // Discard the data we are about to pass to pva_kmd_add_dma_config_resource + read_data(in_buffer, dma_cfg_payload_size); + + pva_kmd_mutex_lock(&ctx->resource_table_lock); + dma_config_size = + safe_addu32(dma_cfg_payload_size, + (uint32_t)sizeof(args->dma_config_header)); + err = pva_kmd_add_dma_config_resource(&ctx->ctx_resource_table, + dma_cfg_data, dma_config_size, + &resource_id); + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + if (err != PVA_SUCCESS) { + goto err_out; + } + + update_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); + if (update_cmd == NULL) { + err = PVA_NOMEM; + goto drop_dma_config; + } + + err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, + &entry); + ASSERT(err == PVA_SUCCESS); + + pva_kmd_set_cmd_update_resource_table( + update_cmd, ctx->resource_table_id, resource_id, &entry); + + out_args.error = PVA_SUCCESS; + out_args.resource_id = resource_id; + write_data(out_buffer, &out_args, sizeof(out_args)); + + return PVA_SUCCESS; +drop_dma_config: + pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); +err_out: + out_args.error = err; + write_data(out_buffer, &out_args, sizeof(out_args)); + /* Error is reported in the output buffer. So we return success here. */ + return PVA_SUCCESS; +} + +static enum pva_error +pva_kmd_op_unregister_async(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_unregister_in_args *args; + struct pva_cmd_unregister_resource *unreg_cmd; + + if (!access_ok(in_buffer, sizeof(struct pva_kmd_unregister_in_args))) { + err = PVA_INVAL; + goto err_out; + } + + args = read_data(in_buffer, sizeof(struct pva_kmd_unregister_in_args)); + + unreg_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*unreg_cmd)); + if (unreg_cmd == NULL) { + pva_kmd_log_err( + "Unable to reserve memory for unregister command"); + err = PVA_NOMEM; + goto err_out; + } + + pva_kmd_set_cmd_unregister_resource(unreg_cmd, args->resource_id); + + return PVA_SUCCESS; +err_out: + return err; +} + +static enum pva_error pva_kmd_async_ops_handler( + struct pva_kmd_context *ctx, struct pva_fw_postfence *post_fence, + struct pva_kmd_buffer *in_arg, struct pva_kmd_buffer *out_arg) +{ + struct pva_kmd_cmdbuf_builder cmdbuf_builder; + enum pva_error err = PVA_SUCCESS; + uint32_t wait_time = 0; + + //first check if we have space in queue + while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) { + pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US); + wait_time += PVA_KMD_WAIT_FW_POLL_INTERVAL_US; + if (wait_time > PVA_KMD_WAIT_FW_TIMEOUT_US) { + err = PVA_TIMEDOUT; + goto out; + } + } + + err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder); + if (err != PVA_SUCCESS) { + goto out; + } + + while (access_ok(in_arg, sizeof(struct pva_kmd_op_header))) { + struct pva_kmd_op_header *header = + read_data(in_arg, sizeof(struct pva_kmd_op_header)); + + if (header->op_type >= PVA_KMD_OP_MAX) { + err = PVA_INVAL; + goto out; + } + + switch (header->op_type) { + case PVA_KMD_OP_MEMORY_REGISTER: + err = pva_kmd_op_memory_register_async( + ctx, in_arg, out_arg, &cmdbuf_builder); + break; + + case PVA_KMD_OP_EXECUTABLE_REGISTER: + err = pva_kmd_op_executable_register_async( + ctx, in_arg, out_arg, &cmdbuf_builder); + break; + + case PVA_KMD_OP_DMA_CONFIG_REGISTER: + err = pva_kmd_op_dma_register_async( + ctx, in_arg, out_arg, &cmdbuf_builder); + break; + case PVA_KMD_OP_UNREGISTER: + err = pva_kmd_op_unregister_async(ctx, in_arg, out_arg, + &cmdbuf_builder); + break; + + default: + err = PVA_INVAL; + break; + } + + if (err != PVA_SUCCESS) { + break; + } + } + + /* This fence comes from user, so set the flag to inform FW */ + post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE; + err = pva_kmd_submitter_submit_with_fence(&ctx->submitter, + &cmdbuf_builder, post_fence); + ASSERT(err == PVA_SUCCESS); + +out: + return err; +} + +static enum pva_error pva_kmd_op_context_init(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer) +{ + struct pva_kmd_context_init_in_args *ctx_init_args; + struct pva_kmd_context_init_out_args ctx_init_out = { 0 }; + enum pva_error err; + + if (!access_ok(in_buffer, + sizeof(struct pva_kmd_context_init_in_args))) { + return PVA_INVAL; + } + + if (!access_ok(out_buffer, + sizeof(struct pva_kmd_context_init_out_args))) { + return PVA_INVAL; + } + + ctx_init_args = read_data(in_buffer, + sizeof(struct pva_kmd_context_init_in_args)); + + err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity); + ctx_init_out.error = err; + ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle; + + write_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out)); + + return err; +} + +static enum pva_error +pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +{ + enum pva_error err; + struct pva_syncpt_rw_info *syncpts; + struct pva_kmd_device_memory dev_mem; + uint32_t resource_id = 0; + struct pva_cmd_update_resource_table *update_cmd; + struct pva_resource_entry entry = { 0 }; + struct pva_kmd_syncpt_register_out_args syncpt_register_out = { 0 }; + + /* Register RO syncpts */ + dev_mem.iova = ctx->pva->syncpt_ro_iova; + dev_mem.va = 0; + dev_mem.size = ctx->pva->syncpt_offset * ctx->pva->num_syncpts; + dev_mem.pva = ctx->pva; + dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID; + pva_kmd_mutex_lock(&ctx->resource_table_lock); + err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem, + &resource_id); + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + if (err != PVA_SUCCESS) { + goto err_out; + } + syncpt_register_out.syncpt_ro_res_id = resource_id; + syncpt_register_out.num_ro_syncpoints = ctx->pva->num_syncpts; + update_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); + ASSERT(update_cmd != NULL); + err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, + &entry); + ASSERT(err == PVA_SUCCESS); + pva_kmd_set_cmd_update_resource_table( + update_cmd, ctx->resource_table_id, resource_id, &entry); + + /* Register RW syncpts */ + syncpts = (struct pva_syncpt_rw_info *)pva_kmd_get_block( + &ctx->pva->syncpt_allocator, ctx->syncpt_block_index); + ASSERT(syncpts != NULL); + + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS_PER_CONTEXT; i++) { + ctx->syncpt_ids[i] = syncpts[i].syncpt_id; + syncpt_register_out.synpt_ids[i] = syncpts[i].syncpt_id; + } + + dev_mem.iova = syncpts[0].syncpt_iova; + dev_mem.va = 0; + dev_mem.size = ctx->pva->syncpt_offset * PVA_NUM_RW_SYNCPTS_PER_CONTEXT; + dev_mem.pva = ctx->pva; + dev_mem.smmu_ctx_idx = PVA_R5_SMMU_CONTEXT_ID; + pva_kmd_mutex_lock(&ctx->resource_table_lock); + err = pva_kmd_add_syncpt_resource(&ctx->ctx_resource_table, &dev_mem, + &resource_id); + pva_kmd_mutex_unlock(&ctx->resource_table_lock); + if (err != PVA_SUCCESS) { + goto err_out; + } + syncpt_register_out.syncpt_rw_res_id = resource_id; + syncpt_register_out.synpt_size = ctx->pva->syncpt_offset; + ctx->ctx_resource_table.syncpt_allocator = &ctx->pva->syncpt_allocator; + update_cmd = + pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*update_cmd)); + ASSERT(update_cmd != NULL); + err = pva_kmd_make_resource_entry(&ctx->ctx_resource_table, resource_id, + &entry); + ASSERT(err == PVA_SUCCESS); + pva_kmd_set_cmd_update_resource_table( + update_cmd, ctx->resource_table_id, resource_id, &entry); + +err_out: + syncpt_register_out.error = err; + write_data(out_buffer, &syncpt_register_out, + sizeof(syncpt_register_out)); + return err; +} + +static enum pva_error pva_kmd_op_queue_create(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_arg, + struct pva_kmd_buffer *out_arg) +{ + struct pva_kmd_queue_create_in_args *queue_create_args; + struct pva_kmd_queue_create_out_args queue_out_args = { 0 }; + uint32_t queue_id = PVA_INVALID_QUEUE_ID; + enum pva_error err = PVA_SUCCESS; + + if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_create_in_args))) { + return PVA_INVAL; + } + + if (!access_ok(out_arg, sizeof(struct pva_kmd_queue_create_out_args))) { + return PVA_INVAL; + } + + queue_create_args = + read_data(in_arg, sizeof(struct pva_kmd_queue_create_in_args)); + queue_out_args.error = + pva_kmd_queue_create(ctx, queue_create_args, &queue_id); + if (queue_out_args.error == PVA_SUCCESS) { + queue_out_args.queue_id = queue_id; + } + + if (queue_id >= PVA_MAX_NUM_QUEUES_PER_CONTEXT) { + pva_kmd_log_err("pva_kmd_op_queue_create invalid queue id"); + err = PVA_INVAL; + goto err_out; + } + pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id], + &queue_out_args.syncpt_fence_counter); + + write_data(out_arg, &queue_out_args, + sizeof(struct pva_kmd_queue_create_out_args)); + +err_out: + return err; +} + +static enum pva_error pva_kmd_op_queue_destroy(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_arg, + struct pva_kmd_buffer *out_arg) +{ + struct pva_kmd_queue_destroy_in_args *queue_destroy_args; + struct pva_kmd_queue_destroy_out_args queue_out_args = { 0 }; + + if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args))) { + return PVA_INVAL; + } + + if (!access_ok(out_arg, + sizeof(struct pva_kmd_queue_destroy_out_args))) { + return PVA_INVAL; + } + + queue_destroy_args = + read_data(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args)); + queue_out_args.error = pva_kmd_queue_destroy(ctx, queue_destroy_args); + + write_data(out_arg, &queue_out_args, + sizeof(struct pva_kmd_queue_destroy_out_args)); + + return PVA_SUCCESS; +} + +static enum pva_error +pva_kmd_op_executable_get_symbols(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_arg, + struct pva_kmd_buffer *out_arg) +{ + struct pva_kmd_executable_get_symbols_in_args *sym_in_args; + struct pva_kmd_executable_get_symbols_out_args sym_out_args = { 0 }; + struct pva_kmd_resource_record *rec; + enum pva_error err = PVA_SUCCESS; + uint32_t table_size = 0; + uint32_t size = 0; + + if (!access_ok(in_arg, + sizeof(struct pva_kmd_executable_get_symbols_in_args))) { + return PVA_INVAL; + } + + if (!access_ok(out_arg, + sizeof(struct pva_kmd_executable_get_symbols_out_args))) { + return PVA_INVAL; + } + + sym_in_args = read_data( + in_arg, sizeof(struct pva_kmd_executable_get_symbols_in_args)); + + rec = pva_kmd_use_resource(&ctx->ctx_resource_table, + sym_in_args->exec_resource_id); + if (rec == NULL) { + err = PVA_INVAL; + pva_kmd_log_err("pva_kmd_use_resource failed"); + goto err_out; + } + if (rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) { + err = PVA_INVAL; + pva_kmd_log_err("Not an executable resource"); + goto err_drop; + } + + table_size = safe_mulu32(rec->vpu_bin.symbol_table.n_symbols, + sizeof(struct pva_symbol_info)); + size = safe_addu32( + table_size, + sizeof(struct pva_kmd_executable_get_symbols_out_args)); + if (!access_ok(out_arg, size)) { + err = PVA_INVAL; + goto err_drop; + } + + sym_out_args.error = err; + sym_out_args.num_symbols = rec->vpu_bin.symbol_table.n_symbols; + write_data(out_arg, &sym_out_args, sizeof(sym_out_args)); + write_data(out_arg, rec->vpu_bin.symbol_table.symbols, table_size); + + pva_kmd_drop_resource(&ctx->ctx_resource_table, + sym_in_args->exec_resource_id); + + return PVA_SUCCESS; + +err_drop: + pva_kmd_drop_resource(&ctx->ctx_resource_table, + sym_in_args->exec_resource_id); + +err_out: + sym_out_args.error = err; + write_data(out_arg, &sym_out_args, sizeof(sym_out_args)); + return err; +} + +typedef enum pva_error (*pva_kmd_async_op_func_t)( + struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder); + +static enum pva_error +pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_buffer, + struct pva_kmd_buffer *out_buffer, + pva_kmd_async_op_func_t async_op_func) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_cmdbuf_builder cmdbuf_builder; + uint32_t fence_val; + + err = pva_kmd_submitter_prepare(&ctx->submitter, &cmdbuf_builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = async_op_func(ctx, in_buffer, out_buffer, &cmdbuf_builder); + if (err != PVA_SUCCESS) { + goto cancel_submit; + } + + err = pva_kmd_submitter_submit(&ctx->submitter, &cmdbuf_builder, + &fence_val); + /* TODO: handle this error */ + ASSERT(err == PVA_SUCCESS); + + err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + /* TODO: handle this error when FW reboot is supported */ + ASSERT(err == PVA_SUCCESS); + + return PVA_SUCCESS; +cancel_submit: + pva_kmd_cmdbuf_builder_cancel(&cmdbuf_builder); +err_out: + return err; +} + +static enum pva_error pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx, + struct pva_kmd_buffer *in_arg, + struct pva_kmd_buffer *out_arg) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_op_header *header; + + if (!access_ok(in_arg, sizeof(struct pva_kmd_op_header))) { + err = PVA_INVAL; + goto out; + } + + header = read_data(in_arg, sizeof(struct pva_kmd_op_header)); + switch (header->op_type) { + case PVA_KMD_OP_CONTEXT_INIT: + err = pva_kmd_op_context_init(ctx, in_arg, out_arg); + break; + case PVA_KMD_OP_QUEUE_CREATE: + err = pva_kmd_op_queue_create(ctx, in_arg, out_arg); + break; + case PVA_KMD_OP_QUEUE_DESTROY: + err = pva_kmd_op_queue_destroy(ctx, in_arg, out_arg); + break; + case PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS: + err = pva_kmd_op_executable_get_symbols(ctx, in_arg, out_arg); + break; + case PVA_KMD_OP_MEMORY_REGISTER: + err = pva_kmd_op_synced_submit( + ctx, in_arg, out_arg, pva_kmd_op_memory_register_async); + break; + case PVA_KMD_OP_SYNPT_REGISTER: + err = pva_kmd_op_synced_submit( + ctx, in_arg, out_arg, pva_kmd_op_syncpt_register_async); + break; + case PVA_KMD_OP_EXECUTABLE_REGISTER: + err = pva_kmd_op_synced_submit( + ctx, in_arg, out_arg, + pva_kmd_op_executable_register_async); + break; + case PVA_KMD_OP_DMA_CONFIG_REGISTER: + err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg, + pva_kmd_op_dma_register_async); + break; + case PVA_KMD_OP_UNREGISTER: + err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg, + pva_kmd_op_unregister_async); + break; + default: + err = PVA_INVAL; + break; + } + +out: + return err; +} + +enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, + void const *ops_buffer, uint32_t ops_size, + void *response, + uint32_t response_buffer_size, + uint32_t *out_response_size) +{ + struct pva_kmd_operations *ops; + struct pva_kmd_buffer in_buffer = { 0 }, out_buffer = { 0 }; + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_response_header *resp_hdr; + + in_buffer.base = ops_buffer; + in_buffer.size = ops_size; + + out_buffer.base = response; + out_buffer.size = response_buffer_size; + + if (!access_ok(&in_buffer, sizeof(struct pva_kmd_operations))) { + err = PVA_INVAL; + goto out; + } + + if (!access_ok(&out_buffer, sizeof(struct pva_kmd_response_header))) { + err = PVA_INVAL; + goto out; + } + + resp_hdr = + read_data(&out_buffer, sizeof(struct pva_kmd_response_header)); + + ops = read_data(&in_buffer, sizeof(struct pva_kmd_operations)); + + if (ops->mode == PVA_KMD_OPS_MODE_SYNC) { + /* Process one sync operation */ + err = pva_kmd_sync_ops_handler(ctx, &in_buffer, &out_buffer); + + } else { + /* Process async operations: + * - memory register + * - executable register + * - DMA configuration registration + * - unregister + */ + err = pva_kmd_async_ops_handler(ctx, &ops->postfence, + &in_buffer, &out_buffer); + } + //Update the size of the responses in the response header. + // This size also include the header size. + resp_hdr->rep_size = out_buffer.offset; +out: + *out_response_size = out_buffer.offset; + return err; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h new file mode 100644 index 00000000..9d5f1c29 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_OP_HANDLER_H +#define PVA_KMD_OP_HANDLER_H + +#include "pva_kmd_context.h" +#include "pva_fw.h" +#include "pva_kmd.h" + +/** @brief Handler for PVA KMD operations. +* +* This function implements the only runtime interface with UMD. Shim layers +* receive the input data from UMD and call this function to execute the +* operations. Then, shim layers send the response back to UMD. +* +* @param ctx The KMD context. +* @param ops Pointer to the input buffer containing the operations to be +* executed. The common layer assumes that this buffer is private to +* KMD and will dereference it directly without making a copy. +* Specifically on Linux, this parameter should point to a private +* kernel space buffer instead of the user space buffer. +* @param ops_size Size of the input buffer. +* @param response Pointer to the buffer where the response will be written. +* @param response_buffer_size Size of the response buffer. +* @param out_response_size Pointer to a variable where the actual size of the +* response will be written. +* +* @return pva_error indicating the success or failure of the operation. +*/ +enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, void const *ops, + uint32_t ops_size, void *response, + uint32_t response_buffer_size, + uint32_t *out_response_size); + +#endif // PVA_KMD_OP_HANDLER_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c new file mode 100644 index 00000000..f7faafe0 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.c @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_utils.h" +#include "pva_fw.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_device.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_context.h" +#include "pva_kmd_block_allocator.h" +#include "pva_utils.h" +#include "pva_kmd_device.h" +#include "pva_kmd_constants.h" +#include "pva_kmd_pm.h" + +enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + enum pva_error err = PVA_SUCCESS; + struct pva_cmd_suspend_fw *fw_suspend; + uint32_t fence_val; + + pva_kmd_mutex_lock(&pva->powercycle_lock); + if (pva->refcount == 0u) { + pva_dbg_printf("PVA: Nothing to prepare for suspend"); + err = PVA_SUCCESS; + goto err_out; + } + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "PVA: Prepare submitter for FW suspend command failed\n"); + goto err_out; + } + + //Build args + fw_suspend = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_suspend)); + if (fw_suspend == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for FW suspend command failed\n"); + err = PVA_NOMEM; + goto cancel_submit; + } + + pva_kmd_set_cmd_suspend_fw(fw_suspend); + + //Submit + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "PVA: Submission for FW suspend command failed\n"); + goto cancel_submit; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "PVA: Waiting for FW timed out when preparing for suspend state\n"); + goto err_out; + } + +cancel_submit: + pva_kmd_cmdbuf_builder_cancel(&builder); + +err_out: + pva_kmd_mutex_unlock(&pva->powercycle_lock); + return err; +} + +enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_init_resource_table *res_cmd; + struct pva_cmd_init_queue *queue_cmd; + struct pva_cmd_resume_fw *fw_resume; + enum pva_error err; + uint32_t fence_val; + struct pva_kmd_queue *queue; + + pva_kmd_mutex_lock(&pva->powercycle_lock); + if (pva->refcount == 0u) { + pva_dbg_printf( + "PVA : Nothing to check for completion in resume"); + err = PVA_SUCCESS; + goto err_out; + } + + pva_kmd_send_resource_table_info_by_ccq(pva, &pva->dev_resource_table); + pva_kmd_send_queue_info_by_ccq(pva, &pva->dev_queue); + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "PVA: Prepare submitter for FW resume command failed\n"); + goto err_out; + } + + fw_resume = pva_kmd_reserve_cmd_space(&builder, sizeof(*fw_resume)); + if (fw_resume == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for FW resume command failed\n"); + err = PVA_NOMEM; + goto cancel_builder; + } + + pva_kmd_set_cmd_resume_fw(fw_resume); + + for (uint8_t i = 0; i < pva->max_n_contexts; i++) { + struct pva_kmd_context *ctx = pva_kmd_get_context( + pva, sat_add8(i, PVA_KMD_USER_CONTEXT_ID_BASE)); + if (ctx != NULL) { + /**Initialize resource table */ + res_cmd = pva_kmd_reserve_cmd_space(&builder, + sizeof(*res_cmd)); + if (res_cmd == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for context registration in FW resume command failed\n"); + err = PVA_NOMEM; + goto cancel_builder; + } + + pva_dbg_printf( + "PVA: Resume init resource table for context %d\n", + ctx->ccq_id); + pva_kmd_set_cmd_init_resource_table( + res_cmd, ctx->resource_table_id, + ctx->ctx_resource_table.table_mem->iova, + ctx->ctx_resource_table.n_entries); + + queue_cmd = pva_kmd_reserve_cmd_space( + &builder, sizeof(*queue_cmd)); + if (queue_cmd == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for queue registration in FW resume command failed\n"); + err = PVA_NOMEM; + goto cancel_builder; + } + + pva_dbg_printf( + "PVA: Resume priv queue for context %d\n", + ctx->ccq_id); + pva_kmd_set_cmd_init_queue( + queue_cmd, PVA_PRIV_CCQ_ID, + ctx->ccq_id, /* For privileged queues, queue ID == user CCQ ID*/ + ctx->ctx_queue.queue_memory->iova, + ctx->ctx_queue.max_num_submit); + + /**Initialize resource table */ + for (uint32_t j = 0; j < ctx->max_n_queues; j++) { + queue = pva_kmd_get_block(&ctx->queue_allocator, + j); + if (queue != NULL) { + pva_dbg_printf( + "PVA: Resume queue for context %d, queue %d\n", + queue->ccq_id, queue->queue_id); + queue_cmd = pva_kmd_reserve_cmd_space( + &builder, sizeof(*queue_cmd)); + if (queue_cmd == NULL) { + pva_kmd_log_err( + "PVA: Memory alloc for queue registration in FW resume command failed\n"); + err = PVA_NOMEM; + goto cancel_builder; + } + + pva_kmd_set_cmd_init_queue( + queue_cmd, queue->ccq_id, + queue->queue_id, + queue->queue_memory->iova, + queue->max_num_submit); + } + } + } + } + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + // Error is either QUEUE_FULL or TIMEDOUT + pva_kmd_log_err( + "PVA: Submission for FW resume command failed\n"); + goto cancel_builder; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when resuming from suspend state"); + goto err_out; + } + +cancel_builder: + pva_kmd_cmdbuf_builder_cancel(&builder); + +err_out: + pva_kmd_mutex_unlock(&pva->powercycle_lock); + return err; +} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h new file mode 100644 index 00000000..d910eba6 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_pm.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_PM_H +#define PVA_KMD_PM_H + +struct pva_kmd_device; +enum pva_error pva_kmd_prepare_suspend(struct pva_kmd_device *pva); +enum pva_error pva_kmd_complete_resume(struct pva_kmd_device *pva); + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c new file mode 100644 index 00000000..b922b9a6 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_utils.h" +#include "pva_fw.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_queue.h" +#include "pva_kmd_context.h" +#include "pva_kmd_block_allocator.h" +#include "pva_utils.h" +#include "pva_kmd_device.h" +#include "pva_kmd_constants.h" + +void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva, + uint8_t ccq_id, uint8_t queue_id, + pva_kmd_mutex_t *ccq_lock, + struct pva_kmd_device_memory *queue_memory, + uint32_t max_num_submit) +{ + queue->pva = pva; + queue->queue_memory = queue_memory; + queue->ccq_id = ccq_id; + queue->queue_id = queue_id; + queue->max_num_submit = max_num_submit; + queue->queue_header = queue_memory->va; + queue->ccq_lock = ccq_lock; +} + +uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue) +{ + uint32_t head = queue->queue_header->cb_head; + uint32_t tail = queue->queue_header->cb_tail; + uint32_t size = queue->max_num_submit; + return pva_fw_queue_space(head, tail, size); +} + +enum pva_error +pva_kmd_queue_submit(struct pva_kmd_queue *queue, + struct pva_fw_cmdbuf_submit_info const *submit_info) +{ + uint32_t head = queue->queue_header->cb_head; + uint32_t tail = queue->queue_header->cb_tail; + uint32_t size = queue->max_num_submit; + uint64_t ccq_entry; + enum pva_error err; + struct pva_fw_cmdbuf_submit_info *items = pva_offset_pointer( + queue->queue_header, sizeof(*queue->queue_header)); + + if (pva_fw_queue_space(head, tail, size) == 0) { + return PVA_QUEUE_FULL; + } + + items[tail] = *submit_info; + + /* Update tail */ + tail = wrap_add(tail, 1, size); + ccq_entry = + PVA_INSERT64(PVA_FW_CCQ_OP_UPDATE_TAIL, PVA_FW_CCQ_OPCODE_MSB, + PVA_FW_CCQ_OPCODE_LSB) | + PVA_INSERT64(queue->queue_id, PVA_FW_CCQ_QUEUE_ID_MSB, + PVA_FW_CCQ_QUEUE_ID_LSB) | + PVA_INSERT64(tail, PVA_FW_CCQ_TAIL_MSB, PVA_FW_CCQ_TAIL_LSB); + + pva_kmd_mutex_lock(queue->ccq_lock); + /* TODO: memory write barrier is needed here */ + err = pva_kmd_ccq_push_with_timeout(queue->pva, queue->ccq_id, + ccq_entry, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err == PVA_SUCCESS) { + queue->queue_header->cb_tail = tail; + } + pva_kmd_mutex_unlock(queue->ccq_lock); + + return err; +} + +void pva_kmd_queue_deinit(struct pva_kmd_queue *queue) +{ + queue->queue_memory = NULL; + queue->ccq_id = PVA_INVALID_QUEUE_ID; + queue->max_num_submit = 0; +} + +static enum pva_error notify_fw_queue_deinit(struct pva_kmd_context *ctx, + struct pva_kmd_queue *queue) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_cmdbuf_builder builder; + struct pva_cmd_deinit_queue *queue_cmd; + uint32_t fence_val; + + err = pva_kmd_submitter_prepare(&ctx->submitter, &builder); + if (err != PVA_SUCCESS) { + goto end; + } + + queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); + if (queue_cmd == NULL) { + err = PVA_NOMEM; + goto cancel_submitter; + } + pva_kmd_set_cmd_deinit_queue(queue_cmd, queue->ccq_id, queue->queue_id); + + err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto cancel_submitter; + } + + err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + ASSERT(err == PVA_SUCCESS); + return PVA_SUCCESS; +cancel_submitter: + pva_kmd_cmdbuf_builder_cancel(&builder); +end: + return err; +} + +enum pva_error +pva_kmd_queue_create(struct pva_kmd_context *ctx, + struct pva_kmd_queue_create_in_args *in_args, + uint32_t *queue_id) +{ + struct pva_kmd_device_memory *submission_mem_kmd = NULL; + struct pva_kmd_queue *queue = NULL; + struct pva_kmd_cmdbuf_builder builder; + struct pva_cmd_init_queue *queue_cmd; + uint32_t fence_val; + enum pva_error err, tmperr; + + queue = pva_kmd_zalloc_block(&ctx->queue_allocator, queue_id); + if (queue == NULL) { + err = PVA_NOMEM; + goto err_out; + } + + /* Get handle from mapped memory */ + submission_mem_kmd = pva_kmd_device_memory_acquire( + in_args->queue_memory_handle, in_args->queue_memory_offset, + pva_get_submission_queue_memory_size( + in_args->max_submission_count), + ctx); + if (submission_mem_kmd == NULL) { + err = PVA_INVAL; + goto err_free_queue; + } + + pva_kmd_queue_init(queue, ctx->pva, ctx->ccq_id, *queue_id, + &ctx->ccq_lock, submission_mem_kmd, + in_args->max_submission_count); + + /* Get device mapped IOVA to share with FW */ + err = pva_kmd_device_memory_iova_map(submission_mem_kmd, ctx->pva, + PVA_ACCESS_RW, + PVA_R5_SMMU_CONTEXT_ID); + if (err != PVA_SUCCESS) { + goto err_free_kmd_memory; + } + + if (submission_mem_kmd->iova < FW_SHARED_MEMORY_START) { + pva_kmd_log_err( + "Not able to map memory in the R5 shared region"); + err = PVA_NOMEM; + goto unmap_iova; + } + + err = pva_kmd_submitter_prepare(&ctx->submitter, &builder); + if (err != PVA_SUCCESS) { + goto unmap_iova; + } + + queue_cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*queue_cmd)); + if (queue_cmd == NULL) { + err = PVA_NOMEM; + goto cancel_submitter; + } + ASSERT(queue_cmd != NULL); + pva_kmd_set_cmd_init_queue(queue_cmd, queue->ccq_id, queue->queue_id, + queue->queue_memory->iova, + queue->max_num_submit); + + err = pva_kmd_submitter_submit(&ctx->submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto cancel_submitter; + } + + err = pva_kmd_submitter_wait(&ctx->submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + goto cancel_submitter; + } + + return PVA_SUCCESS; + +cancel_submitter: + pva_kmd_cmdbuf_builder_cancel(&builder); +unmap_iova: + pva_kmd_device_memory_iova_unmap(submission_mem_kmd); +err_free_kmd_memory: + pva_kmd_device_memory_free(queue->queue_memory); + pva_kmd_queue_deinit(queue); +err_free_queue: + tmperr = pva_kmd_free_block(&ctx->queue_allocator, *queue_id); + ASSERT(tmperr == PVA_SUCCESS); + + *queue_id = PVA_INVALID_QUEUE_ID; +err_out: + return err; +} + +enum pva_error +pva_kmd_queue_destroy(struct pva_kmd_context *ctx, + struct pva_kmd_queue_destroy_in_args *in_args) +{ + struct pva_kmd_queue *queue; + enum pva_error err = PVA_SUCCESS; + + /* + * TODO : + * Send command to FW to stop queue usage. Wait for ack. + * This call needs to be added after syncpoint and ccq functions are ready. + */ + queue = pva_kmd_get_block(&ctx->queue_allocator, in_args->queue_id); + if (queue == NULL) { + return PVA_INVAL; + } + + err = notify_fw_queue_deinit(ctx, queue); + if (err != PVA_SUCCESS) { + return err; + } + + pva_kmd_device_memory_iova_unmap(queue->queue_memory); + + pva_kmd_device_memory_free(queue->queue_memory); + + pva_kmd_queue_deinit(queue); + + err = pva_kmd_free_block(&ctx->queue_allocator, in_args->queue_id); + ASSERT(err == PVA_SUCCESS); + return PVA_SUCCESS; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h new file mode 100644 index 00000000..8804b409 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_QUEUE_H +#define PVA_KMD_QUEUE_H +#include "pva_fw.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_mutex.h" + +struct pva_kmd_queue { + struct pva_kmd_device *pva; + struct pva_kmd_device_memory *queue_memory; + struct pva_fw_submit_queue_header *queue_header; + pva_kmd_mutex_t *ccq_lock; + uint8_t ccq_id; + uint8_t queue_id; + uint32_t max_num_submit; +}; + +void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva, + uint8_t ccq_id, uint8_t queue_id, + pva_kmd_mutex_t *ccq_lock, + struct pva_kmd_device_memory *queue_memory, + uint32_t max_num_submit); +enum pva_error +pva_kmd_queue_create(struct pva_kmd_context *ctx, + struct pva_kmd_queue_create_in_args *in_args, + uint32_t *queue_id); + +enum pva_error +pva_kmd_queue_destroy(struct pva_kmd_context *ctx, + struct pva_kmd_queue_destroy_in_args *in_args); + +enum pva_error +pva_kmd_queue_submit(struct pva_kmd_queue *queue, + struct pva_fw_cmdbuf_submit_info const *submit_info); +uint32_t pva_kmd_queue_space(struct pva_kmd_queue *queue); +void pva_kmd_queue_deinit(struct pva_kmd_queue *queue); + +#endif // PVA_KMD_QUEUE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h new file mode 100644 index 00000000..d4328999 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_regs.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_REGS_H +#define PVA_KMD_REGS_H + +#include "pva_api.h" +#include "pva_constants.h" + +/* Exception vectors */ +#define PVA_REG_EVP_RESET_ADDR 0x20 +#define PVA_REG_EVP_UNDEF_ADDR 0x24 +#define PVA_REG_EVP_SWI_ADDR 0x28 +#define PVA_REG_EVP_PREFETCH_ABORT_ADDR 0x2c +#define PVA_REG_EVP_DATA_ABORT_ADDR 0x30 +#define PVA_REG_EVP_RSVD_ADDR 0x34 +#define PVA_REG_EVP_IRQ_ADDR 0x38 +#define PVA_REG_EVP_FIQ_ADDR 0x3c + +/* R5 */ +#define PVA_REG_PROC_CPUHALT_ADDR 0x30000 + +/* SCRs */ +#define PVA_SEC_SCR_SECEXT_INTR_EVENT 0x28804 +#define PVA_PROC_SCR_PROC 0x30800 + +#define PVA_REG_EVP_SCR_ADDR 0x40 //PVA_EVP_SCR_EVP_0 +#define PVA_CFG_SCR_STATUS_CNTL 0x258000 //PVA_CFG_SCR_STATUS_CNTL_0 +#define PVA_CFG_SCR_PRIV 0x258008 //PVA_CFG_SCR_PRIV_0 +#define PVA_CFG_SCR_CCQ_CNTL 0x258010 //PVA_CFG_SCR_CCQ_CNTL_0 + +/* HSP */ +#define PVA_REG_HSP_COMMON_ADDR 0x160000 +#define PVA_REG_HSP_INT_IE0_ADDR 0x160100 +#define PVA_REG_HSP_INT_IE1_ADDR 0x160104 +#define PVA_REG_HSP_INT_IE2_ADDR 0x160108 +#define PVA_REG_HSP_INT_IE3_ADDR 0x16010c +#define PVA_REG_HSP_INT_IE4_ADDR 0x160110 +#define PVA_REG_HSP_INT_EXTERNAL_ADDR 0x160300 +#define PVA_REG_HSP_INT_INTERNAL_ADDR 0x160304 +#define PVA_REG_HSP_SM0_ADDR 0x170000 +#define PVA_REG_HSP_SM1_ADDR 0x178000 +#define PVA_REG_HSP_SM2_ADDR 0x180000 +#define PVA_REG_HSP_SM3_ADDR 0x188000 +#define PVA_REG_HSP_SM4_ADDR 0x190000 +#define PVA_REG_HSP_SM5_ADDR 0x198000 +#define PVA_REG_HSP_SM6_ADDR 0x1a0000 +#define PVA_REG_HSP_SM7_ADDR 0x1a8000 +#define PVA_REG_HSP_SS0_STATE_ADDR 0x1b0000 +#define PVA_REG_HSP_SS0_SET_ADDR 0x1b0004 +#define PVA_REG_HSP_SS0_CLR_ADDR 0x1b0008 +#define PVA_REG_HSP_SS1_STATE_ADDR 0x1c0000 +#define PVA_REG_HSP_SS1_SET_ADDR 0x1c0004 +#define PVA_REG_HSP_SS1_CLR_ADDR 0x1c0008 +#define PVA_REG_HSP_SS2_STATE_ADDR 0x1d0000 +#define PVA_REG_HSP_SS2_SET_ADDR 0x1d0004 +#define PVA_REG_HSP_SS2_CLR_ADDR 0x1d0008 +#define PVA_REG_HSP_SS3_STATE_ADDR 0x1e0000 +#define PVA_REG_HSP_SS3_SET_ADDR 0x1e0004 +#define PVA_REG_HSP_SS3_CLR_ADDR 0x1e0008 + +/* SEC */ +#define PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR 0x20030 +#define PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR 0x20060 +#define PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR 0x20090 +#define PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR 0x200c0 +#define PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR 0x20040 +#define PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR 0x20070 +#define PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR 0x200a0 +#define PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR 0x200d0 + +/* SEC_LIC_INTR_STATUS */ +#define PVA_REG_SEC_LIC_INTR_H1X_MSB 7 +#define PVA_REG_SEC_LIC_INTR_H1X_LSB 5 +#define PVA_REG_SEC_LIC_INTR_HSP_MSB 4 +#define PVA_REG_SEC_LIC_INTR_HSP_LSB 1 +#define PVA_REG_SEC_LIC_INTR_WDT_MSB 0 +#define PVA_REG_SEC_LIC_INTR_WDT_LSB 0 + +/* CCQ status 2 */ +#define PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT PVA_BIT(28) +#define PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT PVA_BIT(24) +#define PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT PVA_BIT(20) +#define PVA_REG_CCQ_STATUS2_INTR_ALL_BITS \ + (PVA_REG_CCQ_STATUS2_INTR_OVERFLOW_BIT | \ + PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT | \ + PVA_REG_CCQ_STATUS2_INTR_STATUS7_BIT) +#define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB 4 +#define PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB 0 + +struct pva_kmd_ccq_regspec { + uint32_t status_count; + uint32_t status[PVA_CFG_CCQ_STATUS_COUNT]; + uint32_t fifo; +}; + +struct pva_kmd_regspec { + uint32_t sec_lic_intr_enable; + uint32_t sec_lic_intr_status; + uint32_t cfg_r5user_lsegreg; + uint32_t cfg_r5user_usegreg; + uint32_t cfg_priv_ar1_lsegreg; + uint32_t cfg_priv_ar1_usegreg; + uint32_t cfg_priv_ar2_lsegreg; + uint32_t cfg_priv_ar2_usegreg; + uint32_t cfg_priv_ar1_start; + uint32_t cfg_priv_ar1_end; + uint32_t cfg_priv_ar2_start; + uint32_t cfg_priv_ar2_end; + uint32_t cfg_user_sid_base; + uint32_t cfg_priv_sid; + uint32_t cfg_vps_sid; + uint32_t cfg_perf_mon; + + uint32_t cfg_scr_priv_0; + + uint32_t ccq_count; + uint32_t vpu_dbg_instr_reg_offset[PVA_NUM_ENGINES]; + struct pva_kmd_ccq_regspec ccq_regs[PVA_MAX_NUM_CCQ]; +}; + +enum pva_kmd_reg_aperture { + PVA_KMD_APERTURE_PVA_CLUSTER = 0, + PVA_KMD_APERTURE_VPU_DEBUG, + PVA_KMD_APERTURE_COUNT, +}; + +#endif // PVA_KMD_REGS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c new file mode 100644 index 00000000..d3cd9f61 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_resource_table.h" +#include "pva_kmd_device.h" +#include "pva_kmd_constants.h" + +static uint32_t get_max_dma_config_size(struct pva_kmd_device *pva) +{ + uint32_t max_num_dyn_slots = PVA_DMA_MAX_NUM_SLOTS; + uint32_t max_num_reloc_infos = + safe_pow2_roundup_u32(max_num_dyn_slots, 2U); + + uint32_t max_dma_cfg_size = + (uint32_t)sizeof(struct pva_dma_config_resource); + + max_dma_cfg_size = safe_addu32( + max_dma_cfg_size, + safe_mulu32(max_num_dyn_slots, + (uint32_t)sizeof(struct pva_fw_dma_slot))); + + max_dma_cfg_size = safe_addu32( + max_dma_cfg_size, + safe_mulu32(max_num_reloc_infos, + (uint32_t)sizeof(struct pva_fw_dma_reloc))); + + max_dma_cfg_size = safe_addu32( + max_dma_cfg_size, + safe_mulu32(pva->hw_consts.n_user_dma_channels, + (uint32_t)sizeof(struct pva_dma_channel))); + + max_dma_cfg_size = safe_addu32( + max_dma_cfg_size, + safe_mulu32(pva->hw_consts.n_dma_descriptors, + (uint32_t)sizeof(struct pva_dma_descriptor))); + + max_dma_cfg_size = safe_addu32(max_dma_cfg_size, + safe_mulu32(pva->hw_consts.n_hwseq_words, + (uint32_t)sizeof(uint32_t))); + + //Must be aligned to 8 to form array + return safe_pow2_roundup_u32(max_dma_cfg_size, + (uint32_t)sizeof(uint64_t)); +} + +enum pva_error +pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table, + struct pva_kmd_device *pva, + uint8_t user_smmu_ctx_id, uint32_t n_entries, + uint32_t max_num_dma_configs) +{ + uint32_t max_dma_config_size = get_max_dma_config_size(pva); + enum pva_error err; + uint64_t size; + + res_table->pva = pva; + res_table->n_entries = n_entries; + res_table->user_smmu_ctx_id = user_smmu_ctx_id; + + size = (uint64_t)safe_mulu32( + n_entries, (uint32_t)sizeof(struct pva_resource_entry)); + res_table->table_mem = pva_kmd_device_memory_alloc_map( + size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + ASSERT(res_table->table_mem != NULL); + + pva_kmd_sema_init(&res_table->resource_semaphore, n_entries); + + size = (uint64_t)safe_mulu32(sizeof(struct pva_kmd_resource_record), + n_entries); + res_table->records_mem = pva_kmd_zalloc(size); + + ASSERT(res_table->records_mem != NULL); + + err = pva_kmd_block_allocator_init( + &res_table->resource_record_allocator, res_table->records_mem, + PVA_RESOURCE_ID_BASE, sizeof(struct pva_kmd_resource_record), + n_entries); + ASSERT(err == PVA_SUCCESS); + + size = (uint64_t)safe_mulu32(max_num_dma_configs, max_dma_config_size); + res_table->dma_config_mem = pva_kmd_device_memory_alloc_map( + size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + ASSERT(res_table->dma_config_mem != NULL); + + err = pva_kmd_block_allocator_init(&res_table->dma_config_allocator, + res_table->dma_config_mem->va, 0, + max_dma_config_size, + max_num_dma_configs); + ASSERT(err == PVA_SUCCESS); + + res_table->dma_aux = pva_kmd_zalloc( + safe_mulu32((uint32_t)sizeof(struct pva_kmd_dma_resource_aux), + max_num_dma_configs)); + ASSERT(res_table->dma_aux != NULL); + + return PVA_SUCCESS; +} + +void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table) +{ + pva_kmd_free(res_table->dma_aux); + pva_kmd_block_allocator_deinit(&res_table->dma_config_allocator); + pva_kmd_device_memory_free(res_table->dma_config_mem); + pva_kmd_block_allocator_deinit(&res_table->resource_record_allocator); + pva_kmd_free(res_table->records_mem); + pva_kmd_sema_deinit(&res_table->resource_semaphore); + pva_kmd_device_memory_free(res_table->table_mem); +} + +static struct pva_kmd_resource_record * +pva_kmd_alloc_resource(struct pva_kmd_resource_table *resource_table, + uint32_t *out_resource_id) +{ + enum pva_error err; + struct pva_kmd_resource_record *rec = NULL; + + err = pva_kmd_sema_wait_timeout(&resource_table->resource_semaphore, + PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS); + if (err == PVA_TIMEDOUT) { + pva_kmd_log_err("pva_kmd_alloc_resource Timed out"); + } + + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to wait for resource IDs"); + goto out; + } + + rec = (struct pva_kmd_resource_record *)pva_kmd_alloc_block( + &resource_table->resource_record_allocator, out_resource_id); + ASSERT(rec != NULL); + +out: + return rec; +} + +static void pva_kmd_free_resource(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id) +{ + enum pva_error err; + + err = pva_kmd_free_block(&resource_table->resource_record_allocator, + resource_id); + ASSERT(err == PVA_SUCCESS); + + pva_kmd_sema_post(&resource_table->resource_semaphore); +} + +enum pva_error +pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table, + struct pva_kmd_device_memory *dev_mem, + uint32_t *out_resource_id) +{ + struct pva_kmd_resource_record *rec = + pva_kmd_alloc_resource(resource_table, out_resource_id); + + if (rec == NULL) { + pva_kmd_log_err("No more resource id"); + return PVA_NO_RESOURCE_ID; + } + + if (*out_resource_id > resource_table->curr_max_resource_id) { + resource_table->curr_max_resource_id = *out_resource_id; + } + + rec->type = PVA_RESOURCE_TYPE_DRAM; + rec->dram.mem = dev_mem; + rec->dram.syncpt = true; + rec->ref_count = 1; + + return PVA_SUCCESS; +} + +enum pva_error +pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, + struct pva_kmd_device_memory *dev_mem, + uint32_t *out_resource_id) +{ + struct pva_kmd_resource_record *rec = + pva_kmd_alloc_resource(resource_table, out_resource_id); + + if (rec == NULL) { + pva_kmd_log_err("No more resource id"); + return PVA_NO_RESOURCE_ID; + } + + if (*out_resource_id > resource_table->curr_max_resource_id) { + resource_table->curr_max_resource_id = *out_resource_id; + } + + rec->type = PVA_RESOURCE_TYPE_DRAM; + rec->dram.mem = dev_mem; + rec->dram.syncpt = false; + rec->ref_count = 1; + + return PVA_SUCCESS; +} + +static struct pva_resource_entry * +get_fw_resource(struct pva_kmd_resource_table *res_table, uint32_t resource_id) +{ + struct pva_resource_entry *entries = res_table->table_mem->va; + uint32_t index; + + ASSERT(resource_id >= PVA_RESOURCE_ID_BASE); + index = safe_subu32(resource_id, PVA_RESOURCE_ID_BASE); + return &entries[index]; +} + +void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table) +{ + uint32_t id; + + for (id = PVA_RESOURCE_ID_BASE; id <= res_table->curr_max_resource_id; + id++) { + struct pva_resource_entry *entry = + get_fw_resource(res_table, id); + struct pva_kmd_resource_record *rec = pva_kmd_get_block( + &res_table->resource_record_allocator, id); + if (rec == NULL) { + continue; + } + + entry->type = rec->type; + switch (rec->type) { + case PVA_RESOURCE_TYPE_DRAM: + entry->addr_lo = iova_lo(rec->dram.mem->iova); + entry->addr_hi = iova_hi(rec->dram.mem->iova); + entry->size_lo = iova_lo(rec->dram.mem->size); + entry->size_hi = iova_hi(rec->dram.mem->size); + entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx; + break; + case PVA_RESOURCE_TYPE_INVALID: + break; + default: + pva_kmd_log_err("Unsupported resource type"); + pva_kmd_fault(); + } + } +} + +struct pva_kmd_resource_record * +pva_kmd_use_resource(struct pva_kmd_resource_table *res_table, + uint32_t resource_id) +{ + struct pva_kmd_resource_record *rec = pva_kmd_get_block( + &res_table->resource_record_allocator, resource_id); + + if (rec == NULL) { + return NULL; + } + + rec->ref_count = safe_addu32(rec->ref_count, 1U); + return rec; +} + +struct pva_kmd_resource_record * +pva_kmd_peek_resource(struct pva_kmd_resource_table *res_table, + uint32_t resource_id) +{ + struct pva_kmd_resource_record *rec = pva_kmd_get_block( + &res_table->resource_record_allocator, resource_id); + + return rec; +} + +void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id) +{ + struct pva_kmd_resource_record *rec; + + rec = pva_kmd_get_block(&resource_table->resource_record_allocator, + resource_id); + + ASSERT(rec != NULL); + + rec->ref_count = safe_subu32(rec->ref_count, 1U); + if (rec->ref_count == 0) { + pva_dbg_printf("Dropping resource %u of type %u\n", resource_id, + rec->type); + switch (rec->type) { + case PVA_RESOURCE_TYPE_DRAM: + if (rec->dram.syncpt != true) { + pva_kmd_device_memory_free(rec->dram.mem); + } + break; + case PVA_RESOURCE_TYPE_EXEC_BIN: + pva_kmd_unload_executable(&rec->vpu_bin.symbol_table, + rec->vpu_bin.metainfo_mem, + rec->vpu_bin.sections_mem); + break; + case PVA_RESOURCE_TYPE_DMA_CONFIG: { + struct pva_kmd_dma_resource_aux *dma_aux; + dma_aux = + &resource_table + ->dma_aux[rec->dma_config.block_index]; + pva_kmd_unload_dma_config(dma_aux); + pva_kmd_free_block( + &resource_table->dma_config_allocator, + rec->dma_config.block_index); + break; + } + + default: + pva_kmd_log_err("Unsupported resource type"); + pva_kmd_fault(); + } + + pva_kmd_free_resource(resource_table, resource_id); + } +} + +enum pva_error +pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table, + void *executable, uint32_t executable_size, + uint32_t *out_resource_id) +{ + uint32_t res_id; + struct pva_kmd_resource_record *rec = + pva_kmd_alloc_resource(resource_table, &res_id); + enum pva_error err; + struct pva_kmd_vpu_bin_resource *vpu_bin; + + if (rec == NULL) { + err = PVA_NO_RESOURCE_ID; + goto err_out; + } + + vpu_bin = &rec->vpu_bin; + err = pva_kmd_load_executable( + executable, executable_size, resource_table->pva, + resource_table->user_smmu_ctx_id, &vpu_bin->symbol_table, + &vpu_bin->metainfo_mem, &vpu_bin->sections_mem); + if (err != PVA_SUCCESS) { + goto free_block; + } + + if (res_id > resource_table->curr_max_resource_id) { + resource_table->curr_max_resource_id = res_id; + } + + rec->type = PVA_RESOURCE_TYPE_EXEC_BIN; + rec->ref_count = 1; + *out_resource_id = res_id; + + return PVA_SUCCESS; +free_block: + pva_kmd_free_resource(resource_table, res_id); +err_out: + return err; +} + +enum pva_error +pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id, + struct pva_resource_entry *entry) +{ + struct pva_kmd_resource_record *rec = + pva_kmd_use_resource(resource_table, resource_id); + if (rec == NULL) { + return PVA_NO_RESOURCE_ID; + } + + switch (rec->type) { + case PVA_RESOURCE_TYPE_DRAM: + entry->type = rec->type; + entry->addr_lo = iova_lo(rec->dram.mem->iova); + entry->addr_hi = iova_hi(rec->dram.mem->iova); + entry->size_lo = iova_lo(rec->dram.mem->size); + entry->size_hi = iova_hi(rec->dram.mem->size); + entry->smmu_context_id = rec->dram.mem->smmu_ctx_idx; + break; + case PVA_RESOURCE_TYPE_EXEC_BIN: + entry->type = rec->type; + entry->addr_lo = iova_lo(rec->vpu_bin.metainfo_mem->iova); + entry->addr_hi = iova_hi(rec->vpu_bin.metainfo_mem->iova); + entry->size_lo = iova_lo(rec->vpu_bin.metainfo_mem->size); + entry->size_hi = iova_hi(rec->vpu_bin.metainfo_mem->size); + entry->smmu_context_id = + rec->vpu_bin.metainfo_mem->smmu_ctx_idx; + break; + case PVA_RESOURCE_TYPE_DMA_CONFIG: + entry->type = rec->type; + entry->addr_lo = iova_lo(rec->dma_config.iova_addr); + entry->addr_hi = iova_hi(rec->dma_config.iova_addr); + entry->size_lo = iova_lo(rec->dma_config.size); + entry->size_hi = iova_hi(rec->dma_config.size); + entry->smmu_context_id = PVA_R5_SMMU_CONTEXT_ID; + break; + default: + pva_kmd_log_err("Unsupported resource type"); + pva_kmd_fault(); + } + + pva_kmd_drop_resource(resource_table, resource_id); + return PVA_SUCCESS; +} + +enum pva_error pva_kmd_add_dma_config_resource( + struct pva_kmd_resource_table *resource_table, void *dma_config_payload, + uint32_t dma_config_size, uint32_t *out_resource_id) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t block_idx, fw_fetch_size; + void *fw_dma_cfg; + struct pva_kmd_dma_resource_aux *dma_aux; + struct pva_kmd_resource_record *rec; + uint32_t res_id; + + fw_dma_cfg = pva_kmd_zalloc_block(&resource_table->dma_config_allocator, + &block_idx); + if (fw_dma_cfg == NULL) { + err = PVA_NOMEM; + goto err_out; + } + + // Must satisfy alignment requirement for converting to struct + // pva_dma_config_resource* + ASSERT(((uintptr_t)fw_dma_cfg) % sizeof(uint64_t) == 0); + + dma_aux = &resource_table->dma_aux[block_idx]; + + err = pva_kmd_load_dma_config(resource_table, dma_config_payload, + dma_config_size, dma_aux, fw_dma_cfg, + &fw_fetch_size); + if (err != PVA_SUCCESS) { + goto free_block; + } + + rec = pva_kmd_alloc_resource(resource_table, &res_id); + if (rec == NULL) { + err = PVA_NO_RESOURCE_ID; + goto unload_dma; + } + + if (res_id > resource_table->curr_max_resource_id) { + resource_table->curr_max_resource_id = res_id; + } + + rec->type = PVA_RESOURCE_TYPE_DMA_CONFIG; + rec->ref_count = 1; + rec->dma_config.block_index = block_idx; + rec->dma_config.iova_addr = safe_addu64( + resource_table->dma_config_mem->iova, + (uint64_t)safe_mulu32( + block_idx, + resource_table->dma_config_allocator.block_size)); + rec->dma_config.size = fw_fetch_size; + + *out_resource_id = res_id; + + return PVA_SUCCESS; +unload_dma: + pva_kmd_unload_dma_config(dma_aux); +free_block: + pva_kmd_free_block(&resource_table->dma_config_allocator, block_idx); +err_out: + return err; +} + +void pva_kmd_verify_all_resources_free( + struct pva_kmd_resource_table *resource_table) +{ + enum pva_error err; + for (uint32_t i = 0; i < resource_table->n_entries; i++) { + err = pva_kmd_sema_wait_timeout( + &resource_table->resource_semaphore, + PVA_KMD_TIMEOUT_RESOURCE_SEMA_MS); + ASSERT(err == PVA_SUCCESS); + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h new file mode 100644 index 00000000..26b0b22b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_RESOURCE_TABLE_H +#define PVA_KMD_RESOURCE_TABLE_H +#include "pva_fw.h" +#include "pva_bit.h" +#include "pva_resource.h" +#include "pva_kmd_block_allocator.h" +#include "pva_kmd.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_executable.h" +#include "pva_constants.h" +#include "pva_kmd_dma_cfg.h" +#include "pva_kmd_mutex.h" +#include "pva_kmd_thread_sema.h" + +struct pva_kmd_device; + +struct pva_kmd_dram_resource { + struct pva_kmd_device_memory *mem; + bool syncpt; +}; + +struct pva_kmd_vpu_bin_resource { + struct pva_kmd_device_memory *metainfo_mem; + struct pva_kmd_device_memory *sections_mem; + struct pva_kmd_exec_symbol_table symbol_table; +}; + +struct pva_kmd_dma_config_resource { + uint32_t block_index; + uint64_t size; + uint64_t iova_addr; +}; + +struct pva_kmd_resource_record { + /** + * Possible types: + * PVA_RESOURCE_TYPE_DRAM + * PVA_RESOURCE_TYPE_EXEC_BIN + * PVA_RESOURCE_TYPE_DMA_CONFIG + */ + uint8_t type; + uint32_t ref_count; + union { + struct pva_kmd_dram_resource dram; + struct pva_kmd_vpu_bin_resource vpu_bin; + struct pva_kmd_dma_config_resource dma_config; + }; +}; + +/** + * + */ +struct pva_kmd_resource_table { + /** @brief User smmu context ID. + * + * - DRAM memory, VPU data/text sections will be mapped to this space. + * - VPU metadata, DMA configurations will always be mapped to R5 SMMU + * context. */ + uint8_t user_smmu_ctx_id; + uint32_t n_entries; + /** Maximum resource ID we have seen so far */ + uint32_t curr_max_resource_id; + + /** Semaphore to keep track of resources in use*/ + pva_kmd_sema_t resource_semaphore; + + /** Memory for resource table entries, in R5 segment */ + struct pva_kmd_device_memory *table_mem; + + /** Memory for fw dma configs, in DMA segment */ + struct pva_kmd_device_memory *dma_config_mem; + struct pva_kmd_block_allocator dma_config_allocator; + + /** Memory for tracking resources used by DMA configuration. Single + * allocation shared by all DMA configs */ + struct pva_kmd_dma_resource_aux *dma_aux; + + /** Pointer to syncpt_allocator in pva_kmd_device created during kmd boot */ + struct pva_kmd_block_allocator *syncpt_allocator; + + /** Memory for resource records */ + void *records_mem; + struct pva_kmd_block_allocator resource_record_allocator; + struct pva_kmd_device *pva; +}; + +enum pva_error +pva_kmd_resource_table_init(struct pva_kmd_resource_table *res_table, + struct pva_kmd_device *pva, + uint8_t user_smmu_ctx_id, uint32_t n_entries, + uint32_t max_num_dma_configs); +void pva_kmd_resource_table_deinit(struct pva_kmd_resource_table *res_table); + +/** KMD only writes to FW resource table during init time. Once the address of + * the resource table is sent to FW, all updates should be done through commands. + */ +void pva_kmd_update_fw_resource_table(struct pva_kmd_resource_table *res_table); + +enum pva_error +pva_kmd_add_syncpt_resource(struct pva_kmd_resource_table *resource_table, + struct pva_kmd_device_memory *dev_mem, + uint32_t *out_resource_id); + +enum pva_error +pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, + struct pva_kmd_device_memory *memory, + uint32_t *out_resource_id); + +enum pva_error +pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table, + void *executable, uint32_t executable_size, + uint32_t *out_resource_id); + +enum pva_error +pva_kmd_add_dma_config_resource(struct pva_kmd_resource_table *resource_table, + void *dma_config, uint32_t dma_config_size, + uint32_t *out_resource_id); + +/** + * Increment reference count of the resources + * + * TODO: make use and drop thread safe. + * */ +struct pva_kmd_resource_record * +pva_kmd_use_resource(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id); + +struct pva_kmd_resource_record * +pva_kmd_peek_resource(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id); + +void pva_kmd_drop_resource(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id); + +enum pva_error +pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, + uint32_t resource_id, + struct pva_resource_entry *entry); + +void pva_kmd_verify_all_resources_free( + struct pva_kmd_resource_table *resource_table); + +#endif // PVA_KMD_RESOURCE_TABLE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c new file mode 100644 index 00000000..b61003f5 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved. + */ + +#include "pva_kmd_sha256.h" + +#define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) +#define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b)))) + +#define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define SHA_EP0(x) (ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22)) +#define SHA_EP1(x) (ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25)) +#define SIG0(x) (ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ ((x) >> 10)) + +#define SWAP32(x) __builtin_bswap32(x) +#define SWAP64(x) __builtin_bswap64(x) + +/** + * This variable is used internally by \ref sha256_transform() + */ +static const uint32_t k[64] = { + U32(0x428a2f98U), U32(0x71374491U), U32(0xb5c0fbcfU), U32(0xe9b5dba5U), + U32(0x3956c25bU), U32(0x59f111f1U), U32(0x923f82a4U), U32(0xab1c5ed5U), + U32(0xd807aa98U), U32(0x12835b01U), U32(0x243185beU), U32(0x550c7dc3U), + U32(0x72be5d74U), U32(0x80deb1feU), U32(0x9bdc06a7U), U32(0xc19bf174U), + U32(0xe49b69c1U), U32(0xefbe4786U), U32(0x0fc19dc6U), U32(0x240ca1ccU), + U32(0x2de92c6fU), U32(0x4a7484aaU), U32(0x5cb0a9dcU), U32(0x76f988daU), + U32(0x983e5152U), U32(0xa831c66dU), U32(0xb00327c8U), U32(0xbf597fc7U), + U32(0xc6e00bf3U), U32(0xd5a79147U), U32(0x06ca6351U), U32(0x14292967U), + U32(0x27b70a85U), U32(0x2e1b2138U), U32(0x4d2c6dfcU), U32(0x53380d13U), + U32(0x650a7354U), U32(0x766a0abbU), U32(0x81c2c92eU), U32(0x92722c85U), + U32(0xa2bfe8a1U), U32(0xa81a664bU), U32(0xc24b8b70U), U32(0xc76c51a3U), + U32(0xd192e819U), U32(0xd6990624U), U32(0xf40e3585U), U32(0x106aa070U), + U32(0x19a4c116U), U32(0x1e376c08U), U32(0x2748774cU), U32(0x34b0bcb5U), + U32(0x391c0cb3U), U32(0x4ed8aa4aU), U32(0x5b9cca4fU), U32(0x682e6ff3U), + U32(0x748f82eeU), U32(0x78a5636fU), U32(0x84c87814U), U32(0x8cc70208U), + U32(0x90befffaU), U32(0xa4506cebU), U32(0xbef9a3f7U), U32(0xc67178f2U) +}; + +/** + * \brief + * This function is a helper function used by \ref pva_sha256_update + * to hash 512-bit blocks and forms the core of the algorithm. + * Use \ref sha256_init(), \ref pva_sha256_update(), and + * \ref sha256_finalize() instead of calling sha256_transform() directly. + * \param[in] ctx pointer of struct sha256_ctx context. + * \param[in] data_in pointer to the data block to be hashed. + * \return Void + */ +static void sha256_transform(struct sha256_ctx *ctx, const void *data_in) +{ + uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64]; + const uint32_t *const data = data_in; + size_t i; + + for (i = 0; i < U32(16); i++) { + m[i] = SWAP32(data[i]); + } + for (i = 0; i < U32(64) - U32(16); ++i) { + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] + + SIG0(m[U32(1) + i]) + m[i]; + } + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < U32(64); ++i) { + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i]; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + t2 = SHA_EP0(a) + MAJ(a, b, c); + h = g; + g = f; + f = e; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + e = d + t1; + d = c; + c = b; + b = a; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + a = t1 + t2; + } + + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[0] += a; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[1] += b; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[2] += c; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[3] += d; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[4] += e; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[5] += f; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[6] += g; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[7] += h; +} + +void sha256_init(struct sha256_ctx *ctx) +{ + ctx->bitlen = 0; + ctx->state[0] = U32(0x6a09e667); + ctx->state[1] = U32(0xbb67ae85); + ctx->state[2] = U32(0x3c6ef372); + ctx->state[3] = U32(0xa54ff53a); + ctx->state[4] = U32(0x510e527f); + ctx->state[5] = U32(0x9b05688c); + ctx->state[6] = U32(0x1f83d9ab); + ctx->state[7] = U32(0x5be0cd19); +} + +void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len) +{ + uint32_t i; + + for (i = 0; i < len; i += U32(64)) { + ctx->bitlen &= U32(0xffffffff); + sha256_transform(ctx, ((const uint8_t *)data) + i); + ctx->bitlen += U32(512); + } +} + +void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out) +{ + *ctx_out = *ctx_in; +} + +void sha256_finalize(struct sha256_ctx *ctx, const void *input, + size_t input_size, uint32_t out[8]) +{ + uint8_t data[64]; + void *p = data; + uint32_t t; + + input_size &= U32(0xffffffff); + ctx->bitlen &= U32(0xffffffff); + + /* the false of this condition is illegal for this API agreement */ + /* this check is here only for Coverity INT30-C */ + ctx->bitlen += input_size * U32(8); + (void)memcpy(p, input, input_size); + data[input_size] = 0x80; + + if (input_size < U32(56)) { /* can we fit an 8-byte counter? */ + /* Pad whatever data is left in the buffer. */ + (void)memset(data + input_size + U32(1), 0, + U32(56) - input_size - U32(1)); + } else { /* Go into another block. We are here only for message hashing */ + if (input_size + U32(1) < U32(64)) { + (void)memset(data + input_size + U32(1), 0, + U32(64) - input_size - U32(1)); + } + sha256_transform(ctx, data); + (void)memset(data, 0, 56); + } + + t = ctx->bitlen_low; + + *(uint32_t *)(void *)(data + 56) = 0; + *(uint32_t *)(void *)(data + 60) = SWAP32(t); + + sha256_transform(ctx, data); + + out[0] = SWAP32(ctx->state[0]); + out[1] = SWAP32(ctx->state[1]); + out[2] = SWAP32(ctx->state[2]); + out[3] = SWAP32(ctx->state[3]); + out[4] = SWAP32(ctx->state[4]); + out[5] = SWAP32(ctx->state[5]); + out[6] = SWAP32(ctx->state[6]); + out[7] = SWAP32(ctx->state[7]); +} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h new file mode 100644 index 00000000..da015a84 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_sha256.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved. + */ + +#ifndef PVA_KMD_SHA256_H +#define PVA_KMD_SHA256_H + +#include "pva_api_types.h" +#define U32(x) ((uint32_t)(x)) + +struct sha256_ctx { + /* + * On bitlen: + * + * While we don't exceed 2^32 bit (2^29 byte) length for the input buffer, + * size_t is more efficient at least on RISC-V. This particular + * structure is needed to make Coverity happy. + */ + union { + size_t bitlen; + uint32_t bitlen_low; + }; + uint32_t state[8]; +}; + +/** + * Initializes struct sha256_ctx + * + * \param[in] ctx pointer of struct sha256_ctx context + * + * \return void + */ +void sha256_init(struct sha256_ctx *ctx); + +/** + * \brief + * Hash full blocks, in units of 64 bytes + * can be called repeatedly with chunks of the message + * to be hashed (len bytes at data). + * + * \param[in] ctx pointer of struct sha256_ctx context + * \param[in] data pointer to the data block to be hashed + * \param[in] len length (in units of 64 bytes) of the data to be hashed. + * + * \return void + */ +void sha256_update(struct sha256_ctx *ctx, const void *data, size_t len); + +/** + * \brief + * Finalize the hash and keep the calcualted hash in out. + * Required: input_size < 64. Call pva_sha256_update() first otherwise. + * + * \param[in] ctx pointer of struct sha256_ctx context + * \param[in] input pointer to the data block + * (left over from \ref pva_sha256_update) to be hashed + * \param[in] input_size size of the data block to hashed + * (left over from \ref pva_sha256_update to be hashed) + * \param[out] out places the calcuated sha256 key in out. + * + * \return void + */ +void sha256_finalize(struct sha256_ctx *ctx, const void *input, + size_t input_size, uint32_t out[8]); + +/** + * \brief + * copy state information to ctx_out from ctx_in + * \param[in] ctx_in input struct sha256_ctx + * \param[out] ctx_out output struct sha256_ctx + * \return void + */ +void sha256_copy(const struct sha256_ctx *ctx_in, struct sha256_ctx *ctx_out); + +#endif /* PVA_SHA256_H */ diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c new file mode 100644 index 00000000..e526e6bb --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_device.h" +#include "pva_fw_address_map.h" +#include "pva_fw_hyp.h" +#include "pva_kmd_thread_sema.h" +#include "pva_kmd_constants.h" +#include "pva_kmd_silicon_isr.h" +#include "pva_kmd_silicon_boot.h" +#include "pva_kmd_shim_silicon.h" + +static inline void pva_kmd_set_sema(struct pva_kmd_device *pva, + uint32_t sema_idx, uint32_t val) +{ + uint32_t gap = PVA_REG_HSP_SS1_SET_ADDR - PVA_REG_HSP_SS0_SET_ADDR; + gap = safe_mulu32(gap, sema_idx); + pva_kmd_write(pva, safe_addu32(PVA_REG_HSP_SS0_SET_ADDR, gap), val); +} + +static void init_fw_print_buffer(struct pva_kmd_fw_print_buffer *print_buffer, + void *debug_buffer_va) +{ + print_buffer->buffer_info = pva_offset_pointer( + debug_buffer_va, + FW_TRACE_BUFFER_SIZE + FW_CODE_COVERAGE_BUFFER_SIZE); + print_buffer->size = + FW_DEBUG_LOG_BUFFER_SIZE - sizeof(*print_buffer->buffer_info); + print_buffer->head = 0; + print_buffer->content = pva_offset_pointer( + print_buffer->buffer_info, sizeof(*print_buffer->buffer_info)); +} + +static void disable_sec_mission_error_reporting(struct pva_kmd_device *pva) +{ + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_MISSIONERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_MISSIONERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_MISSIONERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_MISSIONERR_ENABLE_ADDR, 0U); +} + +static void disable_sec_latent_error_reporting(struct pva_kmd_device *pva) +{ + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE0_LATENTERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE1_LATENTERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE2_LATENTERR_ENABLE_ADDR, 0U); + pva_kmd_write(pva, PVA_REG_SEC_ERRSLICE3_LATENTERR_ENABLE_ADDR, 0U); +} + +void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva) +{ + uint64_t seg_reg_value; + /* EVP */ + pva_kmd_write(pva, PVA_REG_EVP_RESET_ADDR, EVP_RESET_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_UNDEF_ADDR, + EVP_UNDEFINED_INSTRUCTION_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_SWI_ADDR, EVP_SVC_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_PREFETCH_ABORT_ADDR, + EVP_PREFETCH_ABORT_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_DATA_ABORT_ADDR, EVP_DATA_ABORT_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_RSVD_ADDR, EVP_RESERVED_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_IRQ_ADDR, EVP_IRQ_VECTOR); + pva_kmd_write(pva, PVA_REG_EVP_FIQ_ADDR, EVP_FIQ_VECTOR); + /* R5 regions are defined as: + * - PRIV1 region for firmware code and data. + * - PRIV2 region for debug printf data. + * - Remaining region for resource table, queues, etc. + */ + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_start, + FW_CODE_DATA_START_ADDR); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_end, + FW_CODE_DATA_END_ADDR); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_start, + FW_DEBUG_DATA_START_ADDR); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar2_end, + FW_DEBUG_DATA_END_ADDR); + /* Firmware expects R5 virtual address FW_CODE_DATA_START_ADDR to be + * mapped to the beginning of firmware binary. Therefore, we adjust + * segment registers accordingly + * + * */ + if (pva->load_from_gsc) { + if (pva->is_hv_mode) { + /* Loading from GSC with HV (i.e AV+L or AV+Q case). + * This will be trapped by HV + */ + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg, + 0xFFFFFFFFU); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg, + 0xFFFFFFFFU); + } else { + /* Loading from GSC without HV i.e L4T case. + * TODO: Program Segment regsites using the GSC Careveout + * fetched from DT file. Till then, ASSERT here. + */ + ASSERT(false); + } + } else { + /* Loading from file. + * In HV case, traps should be bypassed in HV + */ + seg_reg_value = + pva->fw_bin_mem->iova - + FW_CODE_DATA_START_ADDR; /* underflow is totally OK */ + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg, + iova_lo(seg_reg_value)); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg, + iova_hi(seg_reg_value)); + } +} + +void pva_kmd_config_scr_regs(struct pva_kmd_device *pva) +{ + pva_kmd_write(pva, PVA_REG_EVP_SCR_ADDR, PVA_EVP_SCR_VAL); + pva_kmd_write(pva, PVA_CFG_SCR_STATUS_CNTL, PVA_STATUS_CTL_SCR_VAL); + pva_kmd_write(pva, PVA_CFG_SCR_PRIV, PVA_PRIV_SCR_VAL); + pva_kmd_write(pva, PVA_CFG_SCR_CCQ_CNTL, PVA_CCQ_SCR_VAL); +} + +void pva_kmd_config_sid(struct pva_kmd_device *pva) +{ + uint32_t addr; + uint32_t i; + uint32_t offset; + uint8_t priv1_sid; + uint8_t priv_sid; + priv_sid = pva->stream_ids[PVA_R5_SMMU_CONTEXT_ID] & 0xFF; + priv1_sid = pva->stream_ids[pva->r5_image_smmu_context_id] & 0xFF; + /* Priv SIDs */ + if (pva->load_from_gsc) { + pva_kmd_write(pva, pva->regspec.cfg_priv_sid, + PVA_INSERT(priv_sid, 7, 0) | + PVA_INSERT(priv1_sid, 15, 8) | + PVA_INSERT(priv_sid, 23, 16)); + } else { + pva_kmd_write(pva, pva->regspec.cfg_priv_sid, + PVA_INSERT(priv_sid, 7, 0) | + PVA_INSERT(priv_sid, 15, 8) | + PVA_INSERT(priv_sid, 23, 16)); + } + /* VPS SIDs */ + if ((pva->hw_consts.hw_gen == PVA_HW_GEN3) && pva->load_from_gsc) { + pva_kmd_write(pva, pva->regspec.cfg_vps_sid, + PVA_INSERT(priv1_sid, 7, 0) | + PVA_INSERT(priv1_sid, 15, 8)); + } else { + pva_kmd_write(pva, pva->regspec.cfg_vps_sid, + PVA_INSERT(priv_sid, 7, 0) | + PVA_INSERT(priv_sid, 15, 8)); + } + /* User SIDs */ + offset = 0; + for (i = 1; i < pva->hw_consts.n_smmu_contexts - 1; i++) { + addr = safe_addu32(pva->regspec.cfg_user_sid_base, offset); + pva_kmd_write(pva, addr, pva->stream_ids[i]); + offset = safe_addu32(offset, 4U); + } +} + +enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva) +{ + uint64_t seg_reg_value; + uint32_t debug_data_size; + uint32_t boot_sema = 0; + enum pva_error err = PVA_SUCCESS; + + /* Load firmware */ + if (!pva->load_from_gsc) { + err = pva_kmd_read_fw_bin(pva); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to read firmware from filesystem"); + goto out; + } + } + + debug_data_size = (uint32_t)safe_pow2_roundup_u32( + FW_DEBUG_DATA_TOTAL_SIZE, SIZE_4KB); + pva->fw_debug_mem = pva_kmd_device_memory_alloc_map( + debug_data_size, pva, PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + if (pva->fw_debug_mem == NULL) { + err = PVA_NOMEM; + goto free_fw_mem; + } + init_fw_print_buffer(&pva->fw_print_buffer, pva->fw_debug_mem->va); + + /* Program SCRs */ + pva_kmd_write(pva, PVA_SEC_SCR_SECEXT_INTR_EVENT, + PVA_SEC_SCR_SECEXT_INTR_EVENT_VAL); + pva_kmd_write(pva, PVA_PROC_SCR_PROC, PVA_PROC_SCR_PROC_VAL); + + pva_kmd_config_evp_seg_scr_regs(pva); + + /* Write IOVA address of debug buffer to mailbox and FW will program + * PRIV2 segment register properly such that the debug buffer is located + * at R5 virtual address FW_DEBUG_DATA_START_ADDR */ + seg_reg_value = pva->fw_debug_mem->iova; + + /* When GSC is enabled, KMD cannot write directly to segment registers, + * therefore we write to mailbox registers and FW will program by + * itself. + * pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_lsegreg, + * iova_lo(seg_reg_value)); + * pva_kmd_writel(pva, pva->regspec.cfg_priv_ar2_usegreg, + * iova_hi(seg_reg_value)); + */ + pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_L, + iova_lo(seg_reg_value)); + pva_kmd_write_mailbox(pva, PVA_MBOXID_PRIV2SEG_H, + iova_hi(seg_reg_value)); + + /* Write shared memory allocation start address to mailbox and FW will + * program user segment register accordingly so that virtual address + * PVA_SHARED_MEMORY_START will point to the allocation start address. + * + * We deliberately also choose PVA_SHARED_MEMORY_START as the allocation + * start address so that the net result is that user segment register + * will be programmed to 0. + */ + seg_reg_value = FW_SHARED_MEMORY_START; + pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_L, + iova_lo(seg_reg_value)); + pva_kmd_write_mailbox(pva, PVA_MBOXID_USERSEG_H, + iova_hi(seg_reg_value)); + + /* Boot parameters */ + if (pva->bl_sector_pack_format == PVA_BL_XBAR_RAW) { + boot_sema = PVA_BOOT_SEMA_USE_XBAR_RAW; + } + pva_kmd_set_sema(pva, PVA_BOOT_SEMA, boot_sema); + + pva_kmd_write(pva, PVA_REG_HSP_SS2_SET_ADDR, + pva_kmd_get_syncpt_ro_offset(pva)); + pva_kmd_write(pva, PVA_REG_HSP_SS3_SET_ADDR, + pva_kmd_get_syncpt_rw_offset(pva)); + + pva_kmd_config_sid_regs(pva); + + /* Enable LIC INTR line for HSP1 and WDT */ + pva_kmd_write(pva, pva->regspec.sec_lic_intr_enable, + PVA_BIT(0) /*Watchdog*/ + | PVA_INSERT(0x1, 4, 1) /* HSP1 */ + | PVA_INSERT(0x7, 7, 5) /* All H1X errors */); + + /* Bind interrupts */ + err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_SEC_LIC, + pva_kmd_hyp_isr, pva); + if (err != PVA_SUCCESS) { + goto free_fw_debug_mem; + } + err = pva_kmd_bind_intr_handler(pva, PVA_KMD_INTR_LINE_CCQ0, + pva_kmd_isr, pva); + if (err != PVA_SUCCESS) { + goto free_sec_lic; + } + + /* Take R5 out of reset */ + pva_kmd_write(pva, PVA_REG_PROC_CPUHALT_ADDR, 0x1); + + /* Wait until fw boots */ + err = pva_kmd_sema_wait_timeout(&pva->fw_boot_sema, + PVA_KMD_FW_BOOT_TIMEOUT_MS); + + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Waiting for FW boot timed out."); + goto free_ccq0; + } + + return err; + +free_ccq0: + pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0); +free_sec_lic: + pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC); +free_fw_debug_mem: + pva_kmd_drain_fw_print(&pva->fw_print_buffer); + pva_kmd_device_memory_free(pva->fw_debug_mem); +free_fw_mem: + if (!pva->load_from_gsc) { + pva_kmd_device_memory_free(pva->fw_bin_mem); + } +out: + return err; +} + +void pva_kmd_deinit_fw(struct pva_kmd_device *pva) +{ + pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_CCQ0); + pva_kmd_free_intr(pva, PVA_KMD_INTR_LINE_SEC_LIC); + pva_kmd_drain_fw_print(&pva->fw_print_buffer); + + /* + * Before powering off PVA, disable SEC error reporting. + * While powering off, PVA might generate (unexplained) error interrupts + * This causes HSM to read some PVA SEC registers. However, since PVA might + * already be powergated by this time, access to PVA SEC registers from HSM + * fails. This was discussed in Bug 3785498. + * + * Note: we do not explicity enable these errors during power on since + * 'enable' is their reset value + */ + disable_sec_mission_error_reporting(pva); + disable_sec_latent_error_reporting(pva); + + pva_kmd_device_memory_free(pva->fw_debug_mem); + if (!pva->load_from_gsc) { + pva_kmd_device_memory_free(pva->fw_bin_mem); + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h new file mode 100644 index 00000000..589b6363 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_SILICON_BOOT_H +#define PVA_KMD_SILICON_BOOT_H + +#include "pva_kmd_device.h" + +/** + * @brief Configure EVP and Segment config registers + * + * This function configures the EVP and Segment config registers. + * + * @param pva Pointer to the PVA device. + */ +void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva); + +/** + * @brief Configure SCR registers. + * + * This function configures the SCR registers. + * + * @param pva Pointer to the PVA device. + */ +void pva_kmd_config_scr_regs(struct pva_kmd_device *pva); + +/** + * @brief Configure SID registers. + * + * This function configures the SID registers. + * + * @param pva Pointer to the PVA device. + */ +void pva_kmd_config_sid(struct pva_kmd_device *pva); + +#endif /* PVA_KMD_SILICON_BOOT_H */ diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c new file mode 100644 index 00000000..2e3efe3b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.c @@ -0,0 +1,414 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_silicon_elf_parser.h" +#include "pva_kmd_utils.h" + +#ifndef max +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef UINT8_MAX +#define UINT8_MAX 0xFF +#endif + +// CERT complains about casts from const uint8_t*, so do intermediate cast to void* +static inline const void *uint_8_to_void(const uint8_t *const p) +{ + return (const void *)p; +} + +bool elf_header_check(const elf_ct e) +{ + const elfFileHeader *efh = (const elfFileHeader *)e; + if ((ELFCLASS32 == efh->oclass) && + (ELFMAGIC_LSB == *(const elfWord *)e)) { + return true; + } + return false; +} + +/** + * @brief Return pointer to ELF file header + * + * Cast the elf image data to \ref elfFileHeader* + * + * @param [in] e pointer to elf image data + * @return + * - Valid poniter to ELF file header + * - NULL if \a e is NULL or correct elf magic ID is not present + * in first 4 bytes of elf file pointed by \a e. + * + */ +static const elfFileHeader *elf_file_header(const elf_ct e) +{ + return (const elfFileHeader *)e; +} + +/** + * @brief Get start address of the section table. + * + * @param[in] e pointer to elf image + * @return const elfSectionHeader* + * - Valid address of section header. + * - NULL if \a e is NULL or Header in ELF file is NULL. + */ +static inline const elfSectionHeader *elf_section_table(const elf_parser_ctx e) +{ + const elfFileHeader *efh = elf_file_header(e.elf_file); + const char *p = (const char *)e.elf_file; + + if (efh->shoff > e.size) { + pva_kmd_log_err("Invalid Section header Offset"); + return NULL; + } + p = &p[efh->shoff]; + // proper ELF should always have offsets be aligned, + // but add check just in case. + return (const elfSectionHeader *)(const void *)(p); +} + +/** + * @brief Get the size of ELF section + * + * @param esh pointer to ELF section header + * @return elfWord + * - size of the corresponding section header. + * - 0, if \a esh is NULL. + * + */ +static elfWord elf_section_size(const elfSectionHeader *esh) +{ + if (NULL == esh) { + return UZERO; + } + return (elfWord)esh->size; +} + +elfWord elf_shnum(const elf_parser_ctx e) +{ + const elfFileHeader *efh = elf_file_header(e.elf_file); + if (NULL == efh) { + return UZERO; + } + if (UZERO == efh->shnum) { + /* get value from size of first (empty) section */ + /* to avoid recursion, don't call elf_section_header(0) */ + const elfSectionHeader *esh = elf_section_table(e); + // if esh is somehow NULL, section_size will return UZERO + elfWord size = elf_section_size(esh); + if (size > e.size) { // make sure we don't lose precision + return UZERO; + } else { + return size; + } + } else { + return (elfWord)efh->shnum; + } +} + +const elfSectionHeader *elf_section_header(const elf_parser_ctx e, + unsigned int index) +{ + const elfSectionHeader *esh = elf_section_table(e); + if (NULL == esh) { + return NULL; + } + if (index >= elf_shnum(e)) { + return NULL; + } + + esh = &esh[index]; + return esh; +} + +static inline elfOff get_table_end(elfWord num, elfHalf entsize, elfOff off) +{ + elfOff end; + elfWord tablesize = 0; + /** + * Guaranteed to be less than UINT32_MAX and not overflow + * num if set as efh->shnum is UINT16_MAX + * num if set as section_header->size is file size of ELF which + * is bound to 2 MB + */ + tablesize = safe_mulu32(num, (uint32_t)entsize); + + end = off + tablesize; + if (end < off) { + return UZERO; //Wrap around error + } + return end; +} + +bool elf_has_valid_sections(const elf_parser_ctx e) +{ + elfOff max_size = UZERO; + uint32_t i; + elfOff ph_end, sh_end; + const elfFileHeader *efh = elf_file_header(e.elf_file); + if (efh == NULL) { + return false; + } + ph_end = get_table_end(efh->phnum, efh->phentsize, efh->phoff); + sh_end = get_table_end(elf_shnum(e), efh->shentsize, efh->shoff); + max_size = max(ph_end, sh_end); + if ((max_size == UZERO) || (max_size > e.size)) { + return false; + } + for (i = UZERO; i < elf_shnum(e); ++i) { + elfOff esh_end; + const elfSectionHeader *esh = elf_section_header(e, i); + /*We have already validated the whole section header array is within the file*/ + ASSERT(esh != NULL); + esh_end = esh->offset + esh->size; + if (esh_end < esh->offset) { + return false; //WRAP around error; + } + if ((esh->type != SHT_NOBITS) && (esh_end > e.size)) { + return false; + } + } + return true; +} + +/** + * @brief Get section header index + * get elf_file_header and check it's not null, + * get value from link field of first (empty) section + * if esh is somehow NULL, return esh link + * + * @param[in] e elf context + * + * @return section header index + */ +static elfWord elf_shstrndx(const elf_parser_ctx e) +{ + const elfFileHeader *efh = elf_file_header(e.elf_file); + if (NULL == efh) { + return UZERO; + } + if (efh->shstrndx == SHN_XINDEX) { + /* get value from link field of first (empty) section */ + /* to avoid recursion, don't call elf_section_header(0) */ + const elfSectionHeader *esh = elf_section_table(e); + if (NULL == esh) { + return UZERO; + } + return esh->link; + } + return efh->shstrndx; +} + +/** + * @brief Get name of string from strtab section + * check elf context and section header not null, + * check from section header for type and size are not null. + * Get strtab section, check that stroffset doesn't wrap + * + * @param[in] e elf context + * @param[in] eshstr pointer to elf Section header + * @param[in] offset offset in integer + * Valid range: 0 to eshstr->size + * + * @return name of string from strtab section "eshstr" at "offset" + */ +static const char *elf_string_at_offset(const elf_parser_ctx e, + const elfSectionHeader *eshstr, + unsigned int offset) +{ + const char *strtab; + elfOff stroffset; + + if (SHT_STRTAB != eshstr->type) { + return NULL; + } + if (offset >= eshstr->size) { + return NULL; + } + strtab = (const char *)e.elf_file; + stroffset = eshstr->offset + offset; + if (stroffset < eshstr->offset) { // check that stroffset doesn't wrap + return NULL; + } + strtab = &strtab[stroffset]; + return strtab; +} + +const char *elf_section_name(const elf_parser_ctx e, + const elfSectionHeader *esh) +{ + const char *name; + const elfSectionHeader *eshstr; + elfWord shstrndx; + + /* get section header string table */ + shstrndx = elf_shstrndx(e); + if (shstrndx == UZERO) { + return NULL; + } + eshstr = elf_section_header(e, shstrndx); + if ((NULL == esh) || (NULL == eshstr)) { + return NULL; + } + name = elf_string_at_offset(e, eshstr, esh->name); + return name; +} + +const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e, + const char *name) +{ + const elfSectionHeader *esh; + unsigned int i; + if (NULL == name) { + return NULL; + } + esh = elf_section_table(e); + if (NULL == esh) { + return NULL; + } + + /* iterate through sections till find matching name */ + for (i = UZERO; i < elf_shnum(e); ++i) { + const char *secname = elf_section_name(e, esh); + if (NULL != secname) { + size_t seclen = strlen(secname); + + // use strncmp to avoid problem with input not being null-terminated, + // but then need to check for false partial match + if ((ZERO == strncmp(secname, name, seclen)) && + (UZERO == (uint8_t)name[seclen])) { + return esh; + } + } + ++esh; + } + return NULL; +} + +/** + * @brief Get section header + * Get elf_section_table pointer and check it and + * iterate through sections till find matching type + * + * @param[in] e elf context + * @param[in] type type in word size + * + * @return elf section header with given "type" + */ +static const elfSectionHeader *elf_typed_section_header(const elf_parser_ctx e, + elfWord type) +{ + unsigned int i; + const elfSectionHeader *esh = elf_section_table(e); + if (NULL == esh) { + return NULL; + } + + /* iterate through sections till find matching type */ + for (i = UZERO; i < elf_shnum(e); ++i) { + if (esh->type == type) { + return esh; + } + ++esh; + } + return NULL; +} + +const elfByte *elf_section_contents(const elf_parser_ctx e, + const elfSectionHeader *esh) +{ + const elfByte *p; + if ((NULL == e.elf_file) || (NULL == esh)) { + return NULL; + } + p = (const elfByte *)e.elf_file; + if ((esh->offset > e.size) || + ((uint64_t)((uint64_t)esh->offset + (uint64_t)esh->size) > + e.size)) { + return NULL; + } + return &p[esh->offset]; +} + +const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index) +{ + const elfSectionHeader *esh; + const elfSymbol *esymtab; + const uint8_t *p = e.elf_file; + uint8_t align = 0; + /* get symbol table */ + esh = elf_typed_section_header(e, SHT_SYMTAB); + if ((NULL == esh) || (UZERO == esh->entsize)) { + return NULL; + } + if (index >= (esh->size / esh->entsize)) { + return NULL; + } + if (esh->addralign <= (uint8_t)UINT8_MAX) { + align = (uint8_t)esh->addralign; + } else { + return NULL; + } + if ((uint64_t)((uint64_t)esh->size + (uint64_t)esh->offset) > e.size) { + return NULL; + } + p = &p[esh->offset]; + esymtab = (const elfSymbol *)uint_8_to_void(p); + if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) { + return NULL; + } + + return &esymtab[index]; +} + +const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh, + unsigned int index) +{ + const elfSectionHeader *eshstr; + const elfSymbol *esymtab; + const elfSymbol *esym; + const char *name; + const char *p; + uint8_t align = 0; + + if ((NULL == esh) || (UZERO == esh->entsize)) { + return NULL; + } + if (SHT_SYMTAB != esh->type) { + return NULL; + } + if (index >= (esh->size / esh->entsize)) { + return NULL; + } + /* get string table */ + eshstr = elf_section_header(e, esh->link); + if (NULL == eshstr) { + return NULL; + } + p = (const char *)e.elf_file; + if (esh->addralign <= (uint8_t)UINT8_MAX) { + align = (uint8_t)esh->addralign; + } else { + return NULL; + } + if (esh->offset > e.size) { + return NULL; + } + p = &p[esh->offset]; + esymtab = (const elfSymbol *)(const void *)(p); + if ((align != 0U) && ((((uintptr_t)(esymtab) % align) != UZERO))) { + return NULL; + } + esym = &esymtab[index]; + name = elf_string_at_offset(e, eshstr, esym->name); + return name; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h new file mode 100644 index 00000000..d1e24b0b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_elf_parser.h @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SILICON_ELF_PARSER_H +#define PVA_KMD_SILICON_ELF_PARSER_H +#include "pva_api.h" + +#define ZERO 0 +#define UZERO 0U +#define ULLZERO 0ULL + +/* + * Define mapping from VPU data, rodata and program sections into + * corresponding segment types. + */ +typedef const void *elf_ct; /* points to const image of elf file */ + +/** + * Struct containing the ELF Buffer and size of the buffer. + */ +typedef struct { + /** Pointer to buffer containing ELF File */ + elf_ct elf_file; + /** Size of the buffer containing ELF File */ + uint64_t size; +} elf_parser_ctx; + +/*--------------------------------- Types ----------------------------------*/ +/** unsinged 8-bit data type */ +typedef uint8_t elfByte; +/** unsinged 16-bit data type */ +typedef uint16_t elfHalf; +/** unsinged 32-bit data type */ +typedef uint32_t elfWord; +/** unsinged 32-bit data type */ +typedef uint32_t elfAddr; +/** unsinged 32-bit data type */ +typedef uint32_t elfOff; + +/** + * @brief ELF File Header + * + */ +typedef struct { + /** ELF magic number : 0x7f,0x45,0x4c,0x46 */ + elfWord magic; + /** Object file class */ + elfByte oclass; + /** Data encoding */ + elfByte data; + /** Object format version */ + elfByte formatVersion; + /** OS application binary interface */ + elfByte abi; + /** Version of abi */ + elfByte abiVersion; + /** Elf ident padding */ + elfByte padd[7]; + /** Object file type */ + elfHalf type; + /** Architecture */ + elfHalf machine; + /** Object file version */ + elfWord version; + /** Entry point virtual address */ + elfAddr entry; + /** Program header table file offset */ + elfOff phoff; + /** Section header table file offset */ + elfOff shoff; + /** Processor-specific flags */ + elfWord flags; + /** ELF header size in bytes */ + elfHalf ehsize; + /** Program header table entry size */ + elfHalf phentsize; + /** Program header table entry count */ + elfHalf phnum; + /** Section header table entry size */ + elfHalf shentsize; + /** Section header table entry count */ + elfHalf shnum; + /** Section header string table index */ + elfHalf shstrndx; +} elfFileHeader; + +/** ELF magic number in big endian */ +#define ELFMAGIC 0x7f454c46U +#define ELFMAGIC_LSB 0x464c457fU // ELF magic number in little endian +#define ELFCLASS32 1U // 32 bit object file + +#define EV_NONE 0 // Invalid version +#define EV_CURRENT 1 // Current version + +/** + * @brief ELF Section Header + * + */ +typedef struct { + /** Section name, string table index */ + elfWord name; + /** Type of section */ + elfWord type; + /** Miscellaneous section attributes */ + elfWord flags; + /** Section virtual addr at execution */ + elfAddr addr; + /** Section file offset */ + elfOff offset; + /** Size of section in bytes */ + elfWord size; + /** Index of another section */ + elfWord link; + /** Additional section information */ + elfWord info; + /** Section alignment */ + elfWord addralign; + /** Entry size if section holds table */ + elfWord entsize; +} elfSectionHeader; + +/* +* Section Header Type +*/ +#define SHT_NULL 0x00U /// NULL section (entry unused) +#define SHT_PROGBITS 0x01U /// Loadable program data +#define SHT_SYMTAB 0x02U /// Symbol table +#define SHT_STRTAB 0x03U /// String table +#define SHT_RELA 0x04U /// Relocation table with addents +#define SHT_HASH 0x05U /// Hash table +#define SHT_DYNAMIC 0x06U /// Information for dynamic linking +#define SHT_NOTE 0x07U /// Information that marks file +#define SHT_NOBITS 0x08U /// Section does not have data in file +#define SHT_REL 0x09U /// Relocation table without addents +#define SHT_SHLIB 0x0aU /// Reserved +#define SHT_DYNSYM 0x0bU /// Dynamic linker symbol table +#define SHT_INIT_ARRAY 0x0eU /// Array of pointers to init funcs +#define SHT_FINI_ARRAY 0x0fU /// Array of function to finish funcs +#define SHT_PREINIT_ARRAY 0x10U /// Array of pointers to pre-init functions +#define SHT_GROUP 0x11U /// Section group +#define SHT_SYMTAB_SHNDX 0x12U /// Table of 32bit symtab shndx +#define SHT_LOOS 0x60000000U /// Start OS-specific. +#define SHT_HIOS 0x6fffffffU /// End OS-specific type +#define SHT_LOPROC 0x70000000U /// Start of processor-specific +#define SHT_HIPROC 0x7fffffffU /// End of processor-specific +#define SHT_LOUSER 0x80000000U /// Start of application-specific +#define SHT_HIUSER 0x8fffffffU /// End of application-specific + +/* +* Special section index +*/ +#define SHN_UNDEF 0U // Undefined section +#define SHN_LORESERVE 0xff00U // lower bound of reserved indexes +#define SHN_ABS 0xfff1U // Associated symbol is absolute +#define SHN_COMMON 0xfff2U // Associated symbol is common +#define SHN_XINDEX 0xffffU // Index is in symtab_shndx + +/* +* Special section names +*/ +#define SHNAME_SHSTRTAB ".shstrtab" /// section string table +#define SHNAME_STRTAB ".strtab" /// string table +#define SHNAME_SYMTAB ".symtab" /// symbol table +#define SHNAME_SYMTAB_SHNDX ".symtab_shndx" /// symbol table shndx array +#define SHNAME_TEXT ".text." /// suffix with entry name + +/** + * @brief Symbol's information + * + */ +typedef struct { + /** Symbol name, index in string tbl */ + elfWord name; + /** Value of the symbol */ + elfAddr value; + /** Associated symbol size */ + elfWord size; + /** Type and binding attributes */ + elfByte info; + /** Extra flags */ + elfByte other; + /** Associated section index */ + elfHalf shndx; +} elfSymbol; + +/** Get the \a binding info of the symbol */ +#define ELF_ST_BIND(s) ((elfWord)((s)->info) >> 4) +/** Get the \a type info of the symbol */ +#define ELF_ST_TYPE(s) ((elfWord)((s)->info) & 0xfU) + +/* +* ELF symbol type +*/ +#define STT_NOTYPE 0U // No type known +#define STT_OBJECT 1U // Data symbol +#define STT_FUNC 2U // Code symbol +#define STT_SECTION 3U // Section +#define STT_FILE 4U // File +#define STT_COMMON 5U // Common symbol +#define STT_LOOS 10U // Start of OS-specific + +/* +* ELF symbol scope (binding) +*/ +#define STB_LOCAL 0U /// Symbol not visible outside object +#define STB_GLOBAL 1U /// Symbol visible outside object +#define STB_WEAK 2U /// Weak symbol + +/* + * The following routines that return file/program/section headers + * all return NULL when not found. + */ + +/* + * Typical elf readers create a table of information that is passed + * to the different routines. For simplicity, we're going to just + * keep the image of the whole file and pass that around. Later, if we see + * a need to speed this up, we could consider changing elf_parser_ctx to be something + * more complicated. + */ + +/** + * @brief Checks if the file stored in \a e is a 32-bit elf file + * and if the first 4 bytes contain elf magic ID. + * + * @param[in] e elf context containing complete ELF in a const buffer + * + * @return + * - TRUE if valid 32-bit elf file and correct elf magic ID present + * in first 4 bytes of elf file + * - FALSE if either of the above condition is not met + */ +bool elf_header_check(const elf_ct e); + +/** + * @brief Provide number of sections in sections header table + * get elf_file_header and check it's not null, + * get value from size of first (empty) section + * if esh is NULL, section_size will return zero + * + * @param[in] e elf context containing complete ELF in a const buffer + * + * @return section header number + */ +elfWord elf_shnum(const elf_parser_ctx e); + +/** + * @brief This function checks all sections in the elf to be valid + * + * The function validates all sections as follows: + * - Valid section offset i.e. within file bounds. + * - Valid section size i.e. non-zero section size + * and offset + section size is within file bounds + * + * @param[in]e elf context containing completeELF in a const buffer + * + * @return + * - TRUE if all sections are valid + * - FALSE if any invalid section found + */ +bool elf_has_valid_sections(const elf_parser_ctx e); + +/** + * @brief This function traverses the elf and + * returns a valid \ref elfSectionHeader if present + * at the index provided + * + * @param[in] e elf context containing complete ELF in a const buffer + * @param[in] index The index of the elfSectionHeader that is requested + * Valid range : 0 to elf_shnum(e) + * + * @return + * - valid elfSectionHeader from elf if index is valid and if sectionHeader is present + * - NULL if invalid or out of bounds index + */ +const elfSectionHeader *elf_section_header(const elf_parser_ctx e, + unsigned int index); + +/** + * @brief This function obtains the name of the \ref elfSectionHeader + * by going to the index specified by elfSectionHeader->name in the string table + * of the elf + * + * @param[in] e elf context + * + * @param[in] esh Valid \ref elfSectionHeader whose name is requested + * + * @return + * - Non NULL character array containing name of the elfSectionHeader + * if found in elf String Table + * - NULL if invalid elfSectionHeader or invalid index in elfSectionHeader->name + * going out of bounds of string table of elf + */ +const char *elf_section_name(const elf_parser_ctx e, + const elfSectionHeader *esh); + +/** + * @brief Provide elf section header with given "name". + * check elf context not a null, get elf_section_table and + * then iterate through sections till find matching name + * + * @param[in] e elf context + * @param[in] name name of section + * + * @return + * - elf section header with given "name" + * - NULL if @a name is NULL or invalid elfSectionHeader is found + */ +const elfSectionHeader *elf_named_section_header(const elf_parser_ctx e, + const char *name); + +/** + * @brief Provide contents of section. + * check elf context and section header not a null, + * return byte pointer of section header offset of elf context + * @param[in] e elf context + * @param[in] esh section header + * + i* @return Bytepointer of elf (NULL if e or esh == NULL) + */ +const elfByte *elf_section_contents(const elf_parser_ctx e, + const elfSectionHeader *esh); + +/** + * @brief Get ELF symbol + * get elf_typed_section_header section header, + * check header or it's entsize not null. + * check index is not crossing section header & table size + * Also make sure it is address aligned and get symbol table. + * + * @param[in] e elf context + * @param[in] index unsigned index + * Valid range: 0 to number of entries in SHT_SYMTAB of e + * + * @return elf symbol at given index (NULL if not found). + */ +const elfSymbol *elf_symbol(const elf_parser_ctx e, unsigned int index); + +/** + * @brief Get symbol table section + * check section header or it's entsize not null. + * check index is not crossing section header & table size + * get elf_section_header and Also make sure it is address + * aligned and get symbol table. + * + * @param[in] e elf context + * @param[in] esh pointer to structure elfSectionHeader + * @param[in] index unsigned index + * Valid range: 0 to number of entries in SHT_SYMTAB of e + * + * @return name of symbol from symtab section "esh" at "index". + */ +const char *elf_symbol_name(const elf_parser_ctx e, const elfSectionHeader *esh, + unsigned int index); + +#endif // PVA_KMD_SILICON_ELF_PARSER_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c new file mode 100644 index 00000000..783e295d --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c @@ -0,0 +1,920 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_executable.h" +#include "pva_kmd_silicon_elf_parser.h" +#include "pva_kmd_utils.h" +#include "pva_resource.h" +#include "pva_kmd_device.h" +#include "pva_api_types.h" +#include "pva_kmd_t23x.h" +#include "pva_kmd_t26x.h" +#include "pva_math_utils.h" + +/** + * enum to identify different segments of VPU ELF + */ +enum pva_elf_seg_type { + /** Code segment in VPU ELF */ + PVA_SEG_VPU_CODE = 0U, + /** DATA segment in VPU ELF */ + PVA_SEG_VPU_DATA, + /** DATA segment in VPU ELF containing symbol information*/ + PVA_SEG_VPU_IN_PARAMS, + /** Not a valid segment in VPU ELF */ + PVA_SEG_VPU_MAX_TYPE +}; + +/** Maximum number of characters in symbol name */ +#define ELF_MAXIMUM_SYMBOL_LENGTH 64U + +/** Maximum number of characters in section name */ +#define ELF_MAXIMUM_SECTION_NAME 64 + +/** Section name of EXPORTS section */ +#define ELF_EXPORTS_SECTION "EXPORTS" + +/** Section name of EXPORTS section name length */ +#define ELF_EXPORTS_SECTION_NAME_LENGTH 7 + +/** Alignment needed for Data section of ELFs */ +#define DATA_SECTION_ALIGNMENT 32U + +/** Alignment needed for Text section of ELFs */ +#define TEXT_SECTION_ALIGNMENT 128U + +/** VPU icache size: 16KB */ +#define VPU_ICACHE_SIZE (16U * 1024U) + +/** This value indicates the that current symbol can be ignored in the VPU ELF */ +#define SYM_IGNORE 1 + +#define SIZE_EXPORTS_TABLE_ENTRY (3U * sizeof(uint32_t)) + +static uint32_t change_byte_order(uint32_t word) +{ + uint32_t out_word = 0U; + out_word = PVA_INSERT(PVA_EXTRACT(word, 31, 24, uint32_t), 7, 0); + out_word |= PVA_INSERT(PVA_EXTRACT(word, 23, 16, uint32_t), 15, 8); + out_word |= PVA_INSERT(PVA_EXTRACT(word, 15, 8, uint32_t), 23, 16); + out_word |= PVA_INSERT(PVA_EXTRACT(word, 7, 0, uint32_t), 31, 24); + return out_word; +} + +/* + * Define mapping from VPU data, rodata and program sections into + * corresponding segment types. + */ +static const struct pack_rule { + const char *elf_sec_name; + int32_t pva_type; +} pack_rules[] = { { + .elf_sec_name = ".data", + .pva_type = (int32_t)PVA_SEG_VPU_DATA, + }, + { + .elf_sec_name = ".rodata", + .pva_type = (int32_t)PVA_SEG_VPU_DATA, + }, + { + .elf_sec_name = ".text", + .pva_type = (int32_t)PVA_SEG_VPU_CODE, + } }; + +/** +* \brief Compares the \a section_name with all +* vpu elf section names until it finds a match and +* then return corresponding segment type. +* If the segment type is \ref PVA_SEG_VPU_DATA, then it further +* checks if its PVA_SEG_VPU_IN_PARAMS. +* \param[in] section_name Name of the section to be searched for, in VPU ELF +* \return returns corresponding value from enum pva_elf_seg_type. +*/ +static int32_t find_pva_ucode_segment_type(const char *section_name) +{ + uint32_t i; + int32_t ret = (int32_t)PVA_SEG_VPU_MAX_TYPE; + + for (i = 0; i < PVA_ARRAY_SIZE(pack_rules); i += 1U) { + /* Ignore the suffix of the section name */ + if (strncmp(section_name, pack_rules[i].elf_sec_name, + strlen(pack_rules[i].elf_sec_name)) == 0) { + ret = pack_rules[i].pva_type; + break; + } + } + if (ret == (int32_t)PVA_SEG_VPU_DATA) { + uint64_t section_name_len = + strnlen(section_name, ELF_MAXIMUM_SECTION_NAME); + uint64_t exports_section_name_len = + ELF_EXPORTS_SECTION_NAME_LENGTH; + // Check Export section present in DATA segment. Only support export sections. + if ((section_name_len >= exports_section_name_len) && + (strncmp((section_name + + (section_name_len - exports_section_name_len)), + ELF_EXPORTS_SECTION, + (size_t)exports_section_name_len)) == 0) { + ret = (int32_t)PVA_SEG_VPU_IN_PARAMS; + } + } + + return ret; +} + +static enum pva_error validate_elf(const elf_parser_ctx elf) +{ + enum pva_error err = PVA_SUCCESS; + + if (!elf_header_check(elf.elf_file)) { + pva_kmd_log_err("Invalid 32 bit VPU ELF"); + err = PVA_INVAL; + goto done; + } + + if (!elf_has_valid_sections(elf)) { + pva_kmd_log_err("ELF has invalid sections"); + err = PVA_INVAL; + } +done: + return err; +} + +static int32_t validate_symbol(elf_parser_ctx elf, uint32_t symbol_entry_id, + const elfSymbol **sym) +{ + const elfSectionHeader *sym_scn; + const char *section_name = NULL; + int32_t section_type = (int32_t)PVA_SEG_VPU_MAX_TYPE; + int32_t err = 0; + + *sym = elf_symbol(elf, symbol_entry_id); + if ((*sym == NULL) || ((*sym)->size == 0U) || + (ELF_ST_BIND(*sym) != STB_GLOBAL) || + (ELF_ST_TYPE(*sym) == STT_FUNC)) { + err = SYM_IGNORE; + goto end; + } + + sym_scn = elf_section_header(elf, (*sym)->shndx); + section_name = elf_section_name(elf, sym_scn); + if (section_name == NULL) { + err = SYM_IGNORE; + goto end; + } + section_type = find_pva_ucode_segment_type(section_name); + if (section_type != (int32_t)PVA_SEG_VPU_IN_PARAMS) { + err = SYM_IGNORE; + goto end; + } + err = 0; +end: + if (err != 0) { + *sym = NULL; + } + return err; +} + +static enum pva_error count_symbols(const elf_parser_ctx elf, + uint32_t *out_num_symbols) +{ + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + const elfSectionHeader *section_header; + uint32_t i, ent_count; + const elfSymbol *sym; + int32_t ret; + uint32_t num_symbols = 0; + + section_header = elf_named_section_header(elf, ".symtab"); + + if (section_header == NULL) { + err = PVA_INVAL; + pva_kmd_log_err("No symbol table found"); + goto done; + } + + ent_count = section_header->size / section_header->entsize; + for (i = 0; i < ent_count; i++) { + ret = validate_symbol(elf, i, &sym); + if (ret < 0) { + err = PVA_INVAL; + pva_kmd_log_err("Validation of symbol failed"); + goto done; + } + if (ret == SYM_IGNORE) { + continue; + } + num_symbols = addu32(num_symbols, 1U, &math_err); + } + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("count_symbols math error"); + goto done; + } + + *out_num_symbols = num_symbols; +done: + return err; +} + +/** + * @brief updates symbol information (type, addr and size) from + * VPU ELF PVA_SEG_VPU_IN_PARAMS segment. + * + * Data about symbol information in EXPORTS section of ELF is present as follows. + * typedef struct { + * uint32_t type; From VMEM_TYPE enums + * uint32_t addr_offset; Offset from VMEM base + * uint32_t size; Size of VMEM region in bytes + * }; + * @param[in] elf pointer to const image of elf file. + * @param[in] section_header pointer to VPU ELF PVA_SEG_VPU_IN_PARAMS section header + * @param[in, out] symbol_info pointer to ELF image symbol which needs to be updated. +*/ +static enum pva_error +update_exports_symbol(elf_parser_ctx elf, + const elfSectionHeader *section_header, + struct pva_symbol_info *symbol_info) +{ + const elfByte *data; + uint32_t symOffset = 0U; + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + + if ((section_header == NULL) || + (symbol_info->vmem_addr < section_header->addr) || + (addu32(symbol_info->vmem_addr, (uint32_t)SIZE_EXPORTS_TABLE_ENTRY, + &math_err) > + addu32(section_header->addr, section_header->size, &math_err))) { + err = PVA_INVAL; + goto done; + } else { + symOffset = subu32(symbol_info->vmem_addr, section_header->addr, + &math_err); + } + data = elf_section_contents(elf, section_header); + if (data == NULL) { + pva_kmd_log_err("Export section in ELF is NULL"); + err = PVA_INVAL; + goto done; + } + symbol_info->symbol_type = *(uint8_t *)((uintptr_t)&data[symOffset]); + if ((symbol_info->symbol_type == (uint8_t)PVA_SYM_TYPE_INVALID) || + (symbol_info->symbol_type >= (uint8_t)PVA_SYM_TYPE_MAX)) { + pva_kmd_log_err("Invalid symbol type found"); + err = PVA_INVAL; + goto done; + } + symbol_info->vmem_addr = + *(uint32_t *)((uintptr_t)&data[symOffset + sizeof(uint32_t)]); + symbol_info->size = *(uint32_t *)(( + uintptr_t)&data[symOffset + (2UL * sizeof(uint32_t))]); + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("update_exports_symbol math error"); + err = PVA_ERR_MATH_OP; + goto done; + } +done: + return err; +} + +static bool validate_vmem_offset(const uint32_t vmem_offset, + const uint32_t size, + const uint8_t vmem_region_count, + const struct vmem_region *vmem_regions_tab) +{ + bool valid = false; + uint32_t i = 0U; + uint32_t prev_idx; + pva_math_error math_err = MATH_OP_SUCCESS; + + for (i = vmem_region_count; i > 0U; i--) { + prev_idx = subu32(i, 1U, &math_err); + if (vmem_offset >= vmem_regions_tab[prev_idx].start) { + break; + } + } + + if ((i > 0U) && (addu32(vmem_offset, size, &math_err) <= + vmem_regions_tab[prev_idx].end)) { + valid = true; + } + + return (math_err != MATH_OP_SUCCESS) ? false : valid; +} + +static enum pva_error copy_symbol(elf_parser_ctx elf, const elfSymbol *sym, + const char *symname, + struct pva_symbol_info *symbol_info, + const uint8_t vmem_region_count, + const struct vmem_region *vmem_regions_tab) +{ + const elfSectionHeader *sym_scn; + enum pva_error err = PVA_SUCCESS; + + size_t symname_len = strnlen(symname, PVA_MAX_SYMBOL_NAME_LEN); + if (symname_len > 0U) { + (void)memcpy(symbol_info->name, symname, symname_len); + } + symbol_info->name[PVA_MAX_SYMBOL_NAME_LEN] = '\0'; + + symbol_info->size = sym->size; + symbol_info->vmem_addr = sym->value; + + sym_scn = elf_section_header(elf, sym->shndx); + err = update_exports_symbol(elf, sym_scn, symbol_info); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Updating symbol from EXPORTS table failed"); + goto out; + } + + if (!validate_vmem_offset(symbol_info->vmem_addr, symbol_info->size, + vmem_region_count, vmem_regions_tab)) { + pva_kmd_log_err("Invalid symbol vmem offset in ELF"); + err = PVA_INVAL; + goto out; + } + +out: + return err; +} + +static enum pva_error +fill_symbol_table(const elf_parser_ctx elf, + struct pva_kmd_exec_symbol_table *sym_table, + const uint8_t vmem_region_count, + const struct vmem_region *vmem_regions_tab) +{ + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + const elfSectionHeader *section_header; + uint32_t i, ent_count; + const elfSymbol *sym; + const char *symname; + int32_t ret; + uint32_t export_sym_idx = 0; + + section_header = elf_named_section_header(elf, ".symtab"); + + if (section_header == NULL) { + err = PVA_INVAL; + pva_kmd_log_err("No symbol table found"); + goto done; + } + + ent_count = section_header->size / section_header->entsize; + for (i = 0; i < ent_count; i++) { + struct pva_symbol_info *symbol_info; + + ret = validate_symbol(elf, i, &sym); + if (ret < 0) { + err = PVA_INVAL; + pva_kmd_log_err("Validation of symbol failed"); + goto done; + } + if (ret == SYM_IGNORE) { + continue; + } + + symbol_info = &sym_table->symbols[export_sym_idx]; + ASSERT(symbol_info != NULL); + symname = elf_symbol_name(elf, section_header, i); + if (symname == NULL) { + err = PVA_INVAL; + pva_kmd_log_err("elf_symbol_name failed"); + goto done; + } + err = copy_symbol(elf, sym, symname, symbol_info, + vmem_region_count, vmem_regions_tab); + if (err != PVA_SUCCESS) { + goto done; + } + symbol_info->symbol_id = + addu32(export_sym_idx, PVA_SYMBOL_ID_BASE, &math_err); + export_sym_idx = addu32(export_sym_idx, 1U, &math_err); + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("fill_symbol_table math error"); + goto done; + } + } +done: + return err; +} + +/** + * The simplify caller's life: the input ptr should always be considered freed + * after this call. The returned new ptr should always be considered a new + * allocation and it needs to be freed if not NULL. + */ +static void *pva_realloc(void *ptr, uint32_t old_size, uint32_t new_size) +{ + void *new_buffer; + + if (ptr == NULL) { + return pva_kmd_zalloc(new_size); + } + + if (new_size <= old_size) { + return ptr; + } + + new_buffer = pva_kmd_zalloc(new_size); + if (new_buffer == NULL) { + goto out; + } + + memcpy(new_buffer, ptr, old_size); + +out: + pva_kmd_free(ptr); + return new_buffer; +} + +static void *copy_text_section(const elf_parser_ctx elf, + const elfSectionHeader *section_header, + void *out_buffer, uint32_t *buffer_size) +{ + const elfByte *elf_data; + uint32_t const *word; + uint32_t *dst_word; + uint32_t wi; + /* The load address in section header is in words (uint32_t) */ + uint32_t load_addr_bytes = + safe_mulu32(section_header->addr, (uint32_t)sizeof(uint32_t)); + uint32_t needed_size = + safe_addu32(load_addr_bytes, section_header->size); + + // Align required text section size + needed_size = + safe_pow2_roundup_u32(needed_size, TEXT_SECTION_ALIGNMENT); + + if (needed_size > *buffer_size) { + out_buffer = pva_realloc(out_buffer, *buffer_size, needed_size); + *buffer_size = needed_size; + } + + if (out_buffer == NULL) { + return NULL; + } + + elf_data = elf_section_contents(elf, section_header); + if (elf_data == NULL) { + pva_kmd_log_err("copy_text_section elf_data error"); + return NULL; + } + + word = (uint32_t const *)elf_data; + + dst_word = (uint32_t *)((uintptr_t)out_buffer + load_addr_bytes); + for (wi = 0; wi < (section_header->size / sizeof(uint32_t)); wi++) { + dst_word[wi] = change_byte_order(word[wi]); + } + + return out_buffer; +} + +/** + * @brief Aggregate all text sections into a single, dynamically + * allocated buffer. + * + * The placement of text sections needs to take into account of the loading + * addresses. + * + * The endianness of text section needs to be changed. + * + * Caller is responsible for freeing the returned buffer. + */ +static void *aggregate_text_sections(const elf_parser_ctx elf, + uint32_t *out_size) +{ + const elfSectionHeader *section_header; + uint32_t index = 0; + const char *section_name; + const elfWord sectionCount = elf_shnum(elf); + void *sections_content = NULL; + uint32_t sections_size = 0; + + for (index = 0; index < sectionCount; index++) { + int32_t segment_type; + + section_header = elf_section_header(elf, index); + if (section_header == NULL) { + pva_kmd_log_err( + "aggregate_text_sections elf_section_header error"); + goto out; + } + + section_name = elf_section_name(elf, section_header); + if (section_name == NULL) { + pva_kmd_log_err( + "aggregate_text_sections elf_section_name error"); + goto out; + } + segment_type = find_pva_ucode_segment_type(section_name); + if ((section_header->type == SHT_PROGBITS) && + (segment_type == (int32_t)PVA_SEG_VPU_CODE)) { + sections_content = + copy_text_section(elf, section_header, + sections_content, + §ions_size); + if (sections_content == NULL) { + pva_kmd_log_err( + "aggregate_text_sections copy_text_section error"); + goto out; + } + } + } +out: + *out_size = sections_size; + return sections_content; +} + +static void copy_data_section(const elf_parser_ctx elf, + const elfSectionHeader *section_header, + void *out_buffer, uint32_t *buffer_offset, + uint32_t buffer_size) +{ + const elfByte *elf_data; + void *dst; + uint32_t aligned_size = safe_pow2_roundup_u32(section_header->size, + DATA_SECTION_ALIGNMENT); + uint32_t size = safe_addu32(*buffer_offset, aligned_size); + ASSERT(size <= buffer_size); + + dst = pva_offset_pointer(out_buffer, *buffer_offset); + + elf_data = elf_section_contents(elf, section_header); + + ASSERT(elf_data != NULL); + + memcpy(dst, elf_data, section_header->size); + + *buffer_offset = safe_addu32(*buffer_offset, aligned_size); +} + +static enum pva_error count_data_sections(const elf_parser_ctx elf, + uint32_t *out_n_data_sections, + uint32_t *out_total_size) +{ + const elfSectionHeader *section_header; + uint32_t index = 0; + const char *section_name; + const elfWord sectionCount = elf_shnum(elf); + uint32_t n_data_sections = 0; + uint32_t total_size = 0; + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + + for (index = 0; index < sectionCount; index++) { + int32_t segment_type; + + section_header = elf_section_header(elf, index); + if (section_header == NULL) { + err = PVA_INVAL; + goto out; + } + + section_name = elf_section_name(elf, section_header); + if (section_name == NULL) { + err = PVA_INVAL; + goto out; + } + segment_type = find_pva_ucode_segment_type(section_name); + if ((section_header->type == SHT_PROGBITS) && + (segment_type == (int32_t)PVA_SEG_VPU_DATA)) { + n_data_sections = + addu32(n_data_sections, 1U, &math_err); + total_size += safe_pow2_roundup_u32( + section_header->size, DATA_SECTION_ALIGNMENT); + } + } + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("count_data_sections math error"); + goto out; + } + *out_n_data_sections = n_data_sections; + *out_total_size = total_size; +out: + return err; +} + +/** + * @brief Aggregate all data sections into a single, dynamically + * allocated buffer. + * + * The offset of each data section must be aligned to DATA_SEGMENT_ALIGNMENT. + * + * The caller must free the returned data buffer and out_section_infos. + * + */ +static void * +aggregate_data_sections(const elf_parser_ctx elf, uint32_t n_data_sections, + uint32_t total_sections_size, + struct pva_fw_data_section_info **out_section_infos) +{ + const elfSectionHeader *section_header; + uint32_t index = 0; + const char *section_name; + const elfWord sectionCount = elf_shnum(elf); + void *sections_content = NULL; + struct pva_fw_data_section_info *section_infos; + uint32_t buffer_offset = 0; + uint32_t sec_idx = 0; + + sections_content = pva_kmd_zalloc(total_sections_size); + if (sections_content == NULL) { + goto err_out; + } + section_infos = + pva_kmd_zalloc(sizeof(*section_infos) * n_data_sections); + if (section_infos == NULL) { + goto free_content; + } + + for (index = 0; index < sectionCount; index++) { + int32_t segment_type; + + section_header = elf_section_header(elf, index); + /* Already checked when count data sections */ + ASSERT(section_header != NULL); + + section_name = elf_section_name(elf, section_header); + ASSERT(section_name != NULL); + segment_type = find_pva_ucode_segment_type(section_name); + if ((section_header->type == SHT_PROGBITS) && + (segment_type == (int32_t)PVA_SEG_VPU_DATA)) { + section_infos[sec_idx].data_buf_off = buffer_offset; + section_infos[sec_idx].vmem_addr = section_header->addr; + section_infos[sec_idx].size = section_header->size; + sec_idx = safe_addu32(sec_idx, 1U); + + copy_data_section(elf, section_header, sections_content, + &buffer_offset, total_sections_size); + } + } + + *out_section_infos = section_infos; + return sections_content; +free_content: + pva_kmd_free(sections_content); +err_out: + return NULL; +} + +/** + * @brief layout text and data sections in a single continuous buffer that is + * mapped to PVA IOVA space (user SID). + * + * We need to pad text size by an entire VPU icache size to avoid SMMU fault + * when prefetching. + */ +static struct pva_kmd_device_memory * +load_sections(struct pva_kmd_device *pva, uint8_t smmu_id, + const void *text_section_buf, uint32_t text_size, + const void *data_section_buf, uint32_t data_size, + uint32_t *out_data_begin_offset) +{ + uint32_t size = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE); + uint32_t alloc_size = safe_addu32(size, data_size); + uint32_t data_begin = safe_addu32(text_size, (uint32_t)VPU_ICACHE_SIZE); + struct pva_kmd_device_memory *dev_mem; + + ASSERT(TEXT_SECTION_ALIGNMENT >= DATA_SECTION_ALIGNMENT); + /* This is guaranteed to be true as TEXT_SECTION_ALIGNMENT is more strict */ + ASSERT(data_begin % DATA_SECTION_ALIGNMENT == 0); + + /* Map it as read-only. TODO: when VPU debugger is supported, we may + * need to map text as READ_WRITE conditionally. */ + dev_mem = pva_kmd_device_memory_alloc_map(alloc_size, pva, + PVA_ACCESS_RO, smmu_id); + if (dev_mem == NULL) { + goto out; + } + + memcpy(dev_mem->va, text_section_buf, text_size); + memcpy(pva_offset_pointer(dev_mem->va, data_begin), data_section_buf, + data_size); + + *out_data_begin_offset = data_begin; +out: + return dev_mem; +} + +static struct pva_kmd_device_memory * +load_metainfo(struct pva_kmd_device *pva, uint64_t section_iova, + uint32_t text_size, uint32_t data_begin_off, uint32_t data_size, + struct pva_fw_data_section_info const *section_infos, + uint32_t n_data_sections, struct pva_symbol_info *symbol_table, + uint32_t n_symbols) +{ + struct pva_kmd_device_memory *dev_mem; + struct pva_exec_bin_resource *metainfo; + struct pva_fw_vmem_buffer *vmem_buffers_mem; + struct pva_fw_data_section_info *data_sections_mem; + uint32_t i; + uint32_t alloc_size = (uint32_t)sizeof(struct pva_exec_bin_resource); + pva_math_error math_err = MATH_OP_SUCCESS; + + alloc_size = + addu32(alloc_size, + mulu32(n_data_sections, + (uint32_t)sizeof(struct pva_fw_data_section_info), + &math_err), + &math_err); + + alloc_size = addu32(alloc_size, + mulu32(n_symbols, + (uint32_t)sizeof(struct pva_fw_vmem_buffer), + &math_err), + &math_err); + + dev_mem = pva_kmd_device_memory_alloc_map( + alloc_size, pva, PVA_ACCESS_RO, PVA_R5_SMMU_CONTEXT_ID); + if (dev_mem == NULL) { + goto out; + } + + metainfo = dev_mem->va; + metainfo->code_addr_hi = iova_hi(section_iova); + metainfo->code_addr_lo = iova_lo(section_iova); + metainfo->code_size = text_size; + metainfo->data_section_addr_hi = + iova_hi(addu64(section_iova, data_begin_off, &math_err)); + metainfo->data_section_addr_lo = + iova_lo(addu64(section_iova, data_begin_off, &math_err)); + metainfo->num_data_sections = n_data_sections; + metainfo->num_vmem_buffers = n_symbols; + + data_sections_mem = pva_offset_pointer(metainfo, sizeof(*metainfo)); + memcpy(data_sections_mem, section_infos, + mulu32(n_data_sections, (uint32_t)sizeof(*section_infos), + &math_err)); + + vmem_buffers_mem = pva_offset_pointer( + data_sections_mem, + mulu32(n_data_sections, (uint32_t)sizeof(*section_infos), + &math_err)); + if (math_err != MATH_OP_SUCCESS) { + dev_mem = NULL; + goto out; + } + + for (i = 0; i < n_symbols; i++) { + vmem_buffers_mem[i].addr = + PVA_INSERT(symbol_table[i].vmem_addr, + PVA_FW_VMEM_ADDR_MSB, PVA_FW_VMEM_ADDR_LSB) | + PVA_INSERT((uint32_t)symbol_table[i].symbol_type, + PVA_FW_SYM_TYPE_MSB, PVA_FW_SYM_TYPE_LSB); + vmem_buffers_mem[i].size = symbol_table[i].size; + } + +out: + return dev_mem; +} + +enum pva_error +pva_kmd_load_executable(void *executable_data, uint32_t executable_size, + struct pva_kmd_device *pva, uint8_t dma_smmu_id, + struct pva_kmd_exec_symbol_table *out_symbol_table, + struct pva_kmd_device_memory **out_metainfo, + struct pva_kmd_device_memory **out_sections) +{ + enum pva_error err = PVA_SUCCESS; + pva_math_error math_err = MATH_OP_SUCCESS; + elf_parser_ctx elf = { 0 }; + uint32_t num_symbols = 0; + uint32_t n_data_sections; + uint32_t total_data_section_size = 0; + struct pva_fw_data_section_info *section_infos = NULL; + void *data_section_buf = NULL; + void *text_section_buf = NULL; + uint32_t total_text_section_size = 0; + struct pva_kmd_device_memory *metainfo_mem = NULL; + struct pva_kmd_device_memory *sections_mem = NULL; + uint32_t data_begin_off; + + elf.elf_file = executable_data; + elf.size = executable_size; + err = validate_elf(elf); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = count_symbols(elf, &num_symbols); + if (err != PVA_SUCCESS) { + goto err_out; + } + + out_symbol_table->n_symbols = num_symbols; + if (num_symbols > 0) { + out_symbol_table->symbols = pva_kmd_zalloc( + mulu32((uint32_t)sizeof(struct pva_symbol_info), + num_symbols, &math_err)); + if (out_symbol_table->symbols == NULL) { + err = PVA_NOMEM; + goto err_out; + } + if (math_err != MATH_OP_SUCCESS) { + err = PVA_ERR_MATH_OP; + pva_kmd_log_err("pva_kmd_load_executable math error"); + goto err_out; + } + } + + err = fill_symbol_table(elf, out_symbol_table, + pva->hw_consts.n_vmem_regions, + pva->vmem_regions_tab); + if (err != PVA_SUCCESS) { + goto free_syms; + } + + text_section_buf = + aggregate_text_sections(elf, &total_text_section_size); + /* Must have text sections */ + if (text_section_buf == NULL) { + pva_kmd_log_err( + "pva_kmd_load_executable aggregate_text_sections error"); + goto free_syms; + } + + err = count_data_sections(elf, &n_data_sections, + &total_data_section_size); + if (err != PVA_SUCCESS) { + goto free_text_buf; + } + + /* It's OK to not have data sections */ + if (total_data_section_size != 0) { + data_section_buf = + aggregate_data_sections(elf, n_data_sections, + total_data_section_size, + §ion_infos); + ASSERT(data_section_buf != NULL); + } + + sections_mem = load_sections(pva, dma_smmu_id, text_section_buf, + total_text_section_size, data_section_buf, + total_data_section_size, &data_begin_off); + if (sections_mem == NULL) { + goto free_data_buf; + } + + metainfo_mem = + load_metainfo(pva, sections_mem->iova, total_text_section_size, + data_begin_off, total_data_section_size, + section_infos, n_data_sections, + out_symbol_table->symbols, num_symbols); + if (metainfo_mem == NULL) { + goto free_sec_mem; + } + /* Success. Now clean up temporary allocations */ + if (data_section_buf != NULL) { + pva_kmd_free(data_section_buf); + } + if (section_infos != NULL) { + pva_kmd_free(section_infos); + } + pva_kmd_free(text_section_buf); + + *out_metainfo = metainfo_mem; + *out_sections = sections_mem; + + return PVA_SUCCESS; +free_sec_mem: + pva_kmd_device_memory_free(sections_mem); +free_data_buf: + if (data_section_buf != NULL) { + pva_kmd_free(data_section_buf); + } + if (section_infos != NULL) { + pva_kmd_free(section_infos); + } +free_text_buf: + pva_kmd_free(text_section_buf); +free_syms: + pva_kmd_free(out_symbol_table->symbols); +err_out: + return err; +} + +void pva_kmd_unload_executable(struct pva_kmd_exec_symbol_table *symbol_table, + struct pva_kmd_device_memory *metainfo, + struct pva_kmd_device_memory *sections) +{ + pva_kmd_device_memory_free(metainfo); + pva_kmd_device_memory_free(sections); + if (symbol_table->symbols != NULL) { + pva_kmd_free(symbol_table->symbols); + symbol_table->symbols = NULL; + } +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c new file mode 100644 index 00000000..44105316 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_device.h" +#include "pva_kmd_silicon_hwpm.h" +#include "pva_kmd_silicon_utils.h" + +#ifndef TEGRA_SOC_HWPM_IP_REG_OP_READ +#define TEGRA_SOC_HWPM_IP_REG_OP_READ 0x1 +#endif +#ifndef TEGRA_SOC_HWPM_IP_REG_OP_WRITE +#define TEGRA_SOC_HWPM_IP_REG_OP_WRITE 0x2 +#endif +int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op, + uint32_t inst_element_index, uint64_t reg_offset, + uint32_t *reg_data) +{ + struct pva_kmd_device *pva = ip_dev; + + if (reg_offset > UINT32_MAX) + return PVA_INVAL; + + switch (reg_op) { + case TEGRA_SOC_HWPM_IP_REG_OP_READ: + *reg_data = + pva_kmd_read(pva, safe_addu32(pva->regspec.cfg_perf_mon, + reg_offset)); + break; + case TEGRA_SOC_HWPM_IP_REG_OP_WRITE: + pva_kmd_write( + pva, safe_addu32(pva->regspec.cfg_perf_mon, reg_offset), + *reg_data); + break; + default: + pva_kmd_log_err("Invalid HWPM operation"); + return PVA_INVAL; + } + + return PVA_SUCCESS; +} + +int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable) +{ + struct pva_kmd_device *dev = ip_dev; + enum pva_error err = PVA_SUCCESS; + + if (disable) { + err = pva_kmd_device_busy(dev); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to busy"); + } + } else { + pva_kmd_device_idle(dev); + } + return err; +} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h new file mode 100644 index 00000000..b423eca5 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_hwpm.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SILICON_HWPM_H +#define PVA_KMD_SILICON_HWPM_H +#include "pva_kmd.h" +#include "pva_kmd_shim_debugfs.h" + +/** +* @brief pva_hwpm_ip_pm +* +* This function called from Tegra HWPM driver to +* poweron/off pva device. +* +* @param ip_dev Pointer to PVA device +* @param disable disable/enable power management. PVA is +* powered on when false. +* @param reg_offset offset of register relative to PVA HWP base +* @return 0 on Success or negative error code +* +*/ +int pva_kmd_hwpm_ip_pm(void *ip_dev, bool disable); + +/** +* @brief pva_hwpm_ip_reg_op +* +* This function called from Tegra HWPM driver to +* access PVA HWPM registers. +* +* @param ip_dev Pointer to PVA device +* @param reg_op access operation and can be one of +* TEGRA_SOC_HWPM_IP_REG_OP_READ +* TEGRA_SOC_HWPM_IP_REG_OP_WRITE +* @param inst_element_index element index within PVA instance +* @param reg_offset offset of register relative to PVA HWP base +* @param reg_data pointer to where data is to be placed or read. +* @return 0 on Success or negative error code +* +*/ +int pva_kmd_hwpm_ip_reg_op(void *ip_dev, uint32_t reg_op, + uint32_t inst_element_index, uint64_t reg_offset, + uint32_t *reg_data); +#endif //PVA_KMD_SILICON_HWPM_H \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c new file mode 100644 index 00000000..61d1b0b2 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_silicon_isr.h" +#include "pva_kmd_device.h" +#include "pva_fw_hyp.h" +#include "pva_kmd_msg.h" + +struct pva_fw_msg { + uint8_t len; + uint32_t data[PVA_FW_MSG_MAX_LEN]; +}; + +static void read_hyp_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg) +{ + uint32_t i; + + msg->data[0] = pva_kmd_read_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST); + msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB, + PVA_FW_MSG_LEN_LSB, uint8_t); + ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data)); + for (i = 1; i < msg->len; i++) { + msg->data[i] = pva_kmd_read_mailbox( + pva, PVA_FW_MBOX_TO_HYP_BASE + i - 1); + } +} + +void pva_kmd_hyp_isr(void *data) +{ + struct pva_kmd_device *pva = data; + uint32_t intr_status; + uint32_t wdt_val, hsp_val, h1x_val; + + intr_status = pva_kmd_read(pva, pva->regspec.sec_lic_intr_status); + + wdt_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_WDT_MSB, + PVA_REG_SEC_LIC_INTR_WDT_LSB, uint32_t); + hsp_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_HSP_MSB, + PVA_REG_SEC_LIC_INTR_HSP_LSB, uint32_t); + h1x_val = PVA_EXTRACT(intr_status, PVA_REG_SEC_LIC_INTR_H1X_MSB, + PVA_REG_SEC_LIC_INTR_H1X_LSB, uint32_t); + + if (wdt_val != 0) { + /* Clear interrupt status */ + pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, + intr_status & + PVA_MASK(PVA_REG_SEC_LIC_INTR_WDT_MSB, + PVA_REG_SEC_LIC_INTR_WDT_LSB)); + /* TODO: reboot firmware when we can */ + FAULT("PVA watchdog timeout!"); + } + + if (h1x_val != 0) { + pva_kmd_log_err_u64("Host1x errors", h1x_val); + /* Clear interrupt status */ + pva_kmd_write(pva, pva->regspec.sec_lic_intr_status, + intr_status & + PVA_MASK(PVA_REG_SEC_LIC_INTR_H1X_MSB, + PVA_REG_SEC_LIC_INTR_H1X_LSB)); + } + + if (hsp_val != 0) { + struct pva_fw_msg msg = { 0 }; + + read_hyp_msg(pva, &msg); + + pva_kmd_handle_hyp_msg(pva, &msg.data[0], msg.len); + + msg.data[0] &= ~PVA_FW_MBOX_FULL_BIT; + /* Clear interrupt bit in mailbox */ + pva_kmd_write_mailbox(pva, PVA_FW_MBOX_TO_HYP_LAST, + msg.data[0]); + } +} + +static uint32_t read_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id) +{ + return pva_kmd_read(pva, pva->regspec.ccq_regs[0].status[status_id]); +} + +static void write_ccq0_status(struct pva_kmd_device *pva, uint8_t status_id, + uint32_t value) +{ + pva_kmd_write(pva, pva->regspec.ccq_regs[0].status[status_id], value); +} + +static void read_ccq_msg(struct pva_kmd_device *pva, struct pva_fw_msg *msg) +{ + uint32_t i; + + msg->data[0] = read_ccq0_status(pva, PVA_FW_MSG_STATUS_LAST); + msg->len = PVA_EXTRACT(msg->data[0], PVA_FW_MSG_LEN_MSB, + PVA_FW_MSG_LEN_LSB, uint8_t); + ASSERT(msg->len <= PVA_ARRAY_SIZE(msg->data)); + for (i = 1; i < msg->len; i++) { + msg->data[i] = + read_ccq0_status(pva, PVA_FW_MSG_STATUS_BASE + i - 1); + } +} + +/* Handle interrupt from CCQ0 */ +void pva_kmd_isr(void *data) +{ + struct pva_kmd_device *pva = data; + uint32_t intr_status; + + intr_status = + read_ccq0_status(pva, 2) & PVA_REG_CCQ_STATUS2_INTR_ALL_BITS; + pva_dbg_printf("CCQ0_INTR_STATUS 0x%x\n", intr_status); + /* Clear interupt status This must be done prior to ack CCQ messages + * otherwise we risk losing CCQ messages. + */ + write_ccq0_status(pva, 2, intr_status); + + if (intr_status & PVA_REG_CCQ_STATUS2_INTR_STATUS8_BIT) { + struct pva_fw_msg msg; + + read_ccq_msg(pva, &msg); + + pva_kmd_handle_msg(pva, &msg.data[0], msg.len); + + /* Ack through status1 write. */ + write_ccq0_status(pva, 1, 0 /* Value doesn't matter for now */); + } + + /* We don't care about Status7 or CCQ overflow interrupt */ +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h new file mode 100644 index 00000000..9fb7e604 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_isr.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SILICON_ISR_H +#define PVA_KMD_SILICON_ISR_H +#include "pva_kmd_silicon_utils.h" +#include "pva_kmd_device.h" + +void pva_kmd_hyp_isr(void *data); + +void pva_kmd_isr(void *data); + +#endif // PVA_KMD_SILICON_ISR_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c new file mode 100644 index 00000000..bfe98378 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_misc.c @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_silicon_utils.h" +#include "pva_kmd_device.h" +#include "pva_math_utils.h" + +void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id, + uint64_t ccq_entry) +{ + pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo, + PVA_EXTRACT64(ccq_entry, 31, 0, uint32_t)); + pva_kmd_write(pva, pva->regspec.ccq_regs[ccq_id].fifo, + PVA_EXTRACT64(ccq_entry, 63, 32, uint32_t)); +} + +uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id) +{ + uint32_t status2 = + pva_kmd_read(pva, pva->regspec.ccq_regs[ccq_id].status[2]); + uint32_t len = + PVA_EXTRACT(status2, PVA_REG_CCQ_STATUS2_NUM_ENTRIES_MSB, + PVA_REG_CCQ_STATUS2_NUM_ENTRIES_LSB, uint32_t); + return safe_subu32((uint32_t)PVA_CCQ_DEPTH, len) / 2U; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h new file mode 100644 index 00000000..e1e99dc8 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_utils.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_SILICON_UTILS_H +#define PVA_KMD_SILICON_UTILS_H +#include "pva_utils.h" +#include "pva_kmd_regs.h" +#include "pva_kmd_shim_silicon.h" +#include "pva_math_utils.h" + +static inline void pva_kmd_write(struct pva_kmd_device *pva, uint32_t addr, + uint32_t val) +{ + pva_dbg_printf("pva_kmd_write: addr=0x%x, val=0x%x\n", addr, val); + pva_kmd_aperture_write(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr, val); +} + +static inline uint32_t pva_kmd_read(struct pva_kmd_device *pva, uint32_t addr) +{ + uint32_t val; + + val = pva_kmd_aperture_read(pva, PVA_KMD_APERTURE_PVA_CLUSTER, addr); + return val; +} + +static inline void pva_kmd_write_mailbox(struct pva_kmd_device *pva, + uint32_t mailbox_idx, uint32_t val) +{ + uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR; + uint32_t offset = safe_mulu32(gap, mailbox_idx); + uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset); + pva_kmd_write(pva, addr, val); +} + +static inline uint32_t pva_kmd_read_mailbox(struct pva_kmd_device *pva, + uint32_t mailbox_idx) +{ + uint32_t gap = PVA_REG_HSP_SM1_ADDR - PVA_REG_HSP_SM0_ADDR; + uint32_t offset = safe_mulu32(gap, mailbox_idx); + uint32_t addr = safe_addu32(PVA_REG_HSP_SM0_ADDR, offset); + return pva_kmd_read(pva, addr); +} + +#endif // PVA_KMD_SILICON_UTILS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c new file mode 100644 index 00000000..93fc5e33 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.c @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_submitter.h" +#include "pva_kmd_utils.h" + +void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter, + struct pva_kmd_queue *queue, + pva_kmd_mutex_t *submit_lock, + struct pva_kmd_cmdbuf_chunk_pool *chunk_pool, + pva_kmd_mutex_t *chunk_pool_lock, + uint32_t *post_fence_va, + struct pva_fw_postfence const *post_fence) +{ + submitter->queue = queue; + submitter->submit_lock = submit_lock; + submitter->post_fence_va = post_fence_va; + submitter->post_fence = *post_fence; + submitter->fence_future_value = 0; + submitter->chunk_pool = chunk_pool; + submitter->chunk_pool_lock = chunk_pool_lock; + + *submitter->post_fence_va = submitter->fence_future_value; +} + +enum pva_error pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder) +{ + enum pva_error err; + + err = pva_kmd_cmdbuf_builder_init(builder, submitter->chunk_pool); + if (err != PVA_SUCCESS) { + goto err_out; + } + + return PVA_SUCCESS; +err_out: + return err; +} + +enum pva_error +pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder, + struct pva_fw_postfence *fence) +{ + enum pva_error err; + uint32_t first_chunk_id; + uint16_t first_chunk_size; + uint64_t first_chunk_offset; + struct pva_fw_cmdbuf_submit_info submit_info = { 0 }; + struct pva_fw_postfence free_notifier_fence; + + pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id, + &first_chunk_size); + + pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id, + &free_notifier_fence); + first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset( + submitter->chunk_pool, first_chunk_id); + + submit_info.postfences[0] = free_notifier_fence; + submit_info.num_postfence = 1; + if (fence->resource_id != PVA_RESOURCE_ID_INVALID) { + submit_info.postfences[1] = *fence; + submit_info.num_postfence = 2; + } + submit_info.first_chunk_resource_id = + submitter->chunk_pool->mem_resource_id; + submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset); + submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset); + submit_info.first_chunk_size = first_chunk_size; + + pva_kmd_mutex_lock(submitter->submit_lock); + err = pva_kmd_queue_submit(submitter->queue, &submit_info); + if (err != PVA_SUCCESS) { + pva_kmd_cmdbuf_builder_cancel(builder); + } + pva_kmd_mutex_unlock(submitter->submit_lock); + + return err; +} + +enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder, + uint32_t *out_fence_val) +{ + enum pva_error err; + uint32_t first_chunk_id; + uint16_t first_chunk_size; + uint64_t first_chunk_offset; + struct pva_fw_cmdbuf_submit_info submit_info = { 0 }; + struct pva_fw_postfence free_notifier_fence; + + pva_kmd_cmdbuf_builder_finalize(builder, &first_chunk_id, + &first_chunk_size); + + pva_kmd_get_free_notifier_fence(submitter->chunk_pool, first_chunk_id, + &free_notifier_fence); + first_chunk_offset = pva_kmd_get_cmdbuf_chunk_res_offset( + submitter->chunk_pool, first_chunk_id); + + submit_info.num_postfence = 2; + submit_info.postfences[0] = submitter->post_fence; + submit_info.postfences[1] = free_notifier_fence; + submit_info.first_chunk_resource_id = + submitter->chunk_pool->mem_resource_id; + submit_info.first_chunk_offset_lo = iova_lo(first_chunk_offset); + submit_info.first_chunk_offset_hi = iova_hi(first_chunk_offset); + submit_info.first_chunk_size = first_chunk_size; + /* TODO: remove these flags after FW execute command buffer with no engines. */ + submit_info.flags = + PVA_INSERT8(0x3, PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_MSB, + PVA_CMDBUF_FLAGS_ENGINE_AFFINITY_LSB); + + pva_kmd_mutex_lock(submitter->submit_lock); + submitter->fence_future_value += 1U; + submit_info.postfences[0].value = submitter->fence_future_value; + err = pva_kmd_queue_submit(submitter->queue, &submit_info); + if (err == PVA_SUCCESS) { + *out_fence_val = submitter->fence_future_value; + } else { + submitter->fence_future_value -= 1U; + pva_kmd_cmdbuf_builder_cancel(builder); + } + pva_kmd_mutex_unlock(submitter->submit_lock); + + return err; +} + +enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter, + uint32_t fence_val, + uint32_t poll_interval_us, + uint32_t timeout_us) +{ + uint32_t volatile *fence_addr = submitter->post_fence_va; + uint32_t time_spent = 0; + + while (*fence_addr < fence_val) { + pva_kmd_sleep_us(poll_interval_us); + time_spent = safe_addu32(time_spent, poll_interval_us); + if (time_spent >= timeout_us) { + pva_kmd_log_err("pva_kmd_submitter_wait Timed out"); + return PVA_TIMEDOUT; + } + } + + return PVA_SUCCESS; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h new file mode 100644 index 00000000..3aace3a3 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_submitter.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_SUBMITTER_H +#define PVA_KMD_SUBMITTER_H +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_mutex.h" +#include "pva_kmd_queue.h" + +/** A thread-safe submitter utility */ +struct pva_kmd_submitter { + /** The lock protects the submission to the queue, including + * incrementing the post fence */ + pva_kmd_mutex_t *submit_lock; + struct pva_kmd_queue *queue; + uint32_t *post_fence_va; + struct pva_fw_postfence post_fence; + uint32_t fence_future_value; + + /** This lock protects the use of the chunk_pool*/ + pva_kmd_mutex_t *chunk_pool_lock; + struct pva_kmd_cmdbuf_chunk_pool *chunk_pool; +}; + +void pva_kmd_submitter_init(struct pva_kmd_submitter *submitter, + struct pva_kmd_queue *queue, + pva_kmd_mutex_t *submit_lock, + struct pva_kmd_cmdbuf_chunk_pool *chunk_pool, + pva_kmd_mutex_t *chunk_pool_lock, + uint32_t *post_fence_va, + struct pva_fw_postfence const *post_fence); + +enum pva_error +pva_kmd_submitter_prepare(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder); + +enum pva_error pva_kmd_submitter_submit(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder, + uint32_t *out_fence_val); +enum pva_error pva_kmd_submitter_wait(struct pva_kmd_submitter *submitter, + uint32_t fence_val, + uint32_t poll_interval_ms, + uint32_t timeout_ms); +enum pva_error +pva_kmd_submitter_submit_with_fence(struct pva_kmd_submitter *submitter, + struct pva_kmd_cmdbuf_builder *builder, + struct pva_fw_postfence *fence); + +/* prepare submission */ +/* add cmd */ +/* add cmd */ +/* do submit -> fence value */ +/* wait for fence */ + +/* prepare submission */ +/* add cmd */ +/* add cmd */ +/* do submit with fence (provide a fence) */ + +#endif // PVA_KMD_SUBMITTER_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c new file mode 100644 index 00000000..743effbd --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.c @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_t23x.h" +#include "pva_kmd_constants.h" + +struct vmem_region vmem_regions_tab_t23x[PVA_VMEM_REGION_COUNT_T23X] = { + { .start = T23x_VMEM0_START, .end = T23x_VMEM0_END }, + { .start = T23x_VMEM1_START, .end = T23x_VMEM1_END }, + { .start = T23x_VMEM2_START, .end = T23x_VMEM2_END }, +}; + +void pva_kmd_device_init_t23x(struct pva_kmd_device *pva) +{ + uint32_t ccq; + uint32_t st_idx; + + pva->hw_consts.hw_gen = PVA_HW_GEN2; + pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T23X; + pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T23X - 1; + pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T23X; + pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T23X - 1U; + pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T23X; + pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T23X; + pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T23X; + pva->support_hwseq_frame_linking = false; + pva->vmem_regions_tab = vmem_regions_tab_t23x; + pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] = + PVA_KMD_PVA0_T23x_REG_BASE; + pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] = + PVA_KMD_PVA0_T23x_REG_SIZE; + pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE; + pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE; + + pva->regspec.sec_lic_intr_enable = 0x28064; + pva->regspec.sec_lic_intr_status = 0x2806C; + + pva->regspec.cfg_user_sid_base = 0x240000; + pva->regspec.cfg_priv_sid = 0x240020; + pva->regspec.cfg_vps_sid = 0x240024; + pva->regspec.cfg_r5user_lsegreg = 0x250008; + pva->regspec.cfg_r5user_usegreg = 0x25001c; + pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c; + pva->regspec.cfg_priv_ar1_usegreg = 0x250020; + pva->regspec.cfg_priv_ar2_lsegreg = 0x250010; + pva->regspec.cfg_priv_ar2_usegreg = 0x250024; + pva->regspec.cfg_priv_ar1_start = 0x250028; + pva->regspec.cfg_priv_ar1_end = 0x25002c; + pva->regspec.cfg_priv_ar2_start = 0x250030; + pva->regspec.cfg_priv_ar2_end = 0x250034; + + pva->regspec.cfg_scr_priv_0 = 0x18004; + pva->regspec.cfg_perf_mon = 0x200000; + + pva->regspec.ccq_count = 8U; + /* For VPU 0*/ + pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U; + /* For VPU 1*/ + pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U; + for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) { + uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT; + uint32_t ccq_base = safe_addu32( + (uint32_t)0x260000, + safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq)); + pva->regspec.ccq_regs[ccq].status_count = n_st; + pva->regspec.ccq_regs[ccq].fifo = ccq_base; + for (st_idx = 0; st_idx < n_st; st_idx++) { + pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32( + ccq_base, + safe_addu32((uint32_t)0x4U, + safe_mulu32((uint32_t)0x4U, + st_idx))); + } + } + +#if PVA_SUPPORT_XBAR_RAW == 1 + pva->bl_sector_pack_format = PVA_BL_XBAR_RAW; +#else + pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW; +#endif +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h new file mode 100644 index 00000000..aa9d045d --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t23x.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_T23X_H +#define PVA_KMD_T23X_H +#include "pva_kmd_device.h" + +/** Number of VMEM regions */ +#define PVA_VMEM_REGION_COUNT_T23X 3U + +/** Start Address of VMEM0 Bank in T23X */ +#define T23x_VMEM0_START 0x40U +/** End Address of VMEM0 Bank in T23X */ +#define T23x_VMEM0_END 0x20000U +/** Start Address of VMEM1 Bank in T23X */ +#define T23x_VMEM1_START 0x40000U +/** End Address of VMEM1 Bank in T23X */ +#define T23x_VMEM1_END 0x60000U +/** Start Address of VMEM2 Bank in T23X */ +#define T23x_VMEM2_START 0x80000U +/** End Address of VMEM2 Bank in T23X */ +#define T23x_VMEM2_END 0xA0000U + +/** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */ +#define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U +/** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */ +#define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U + +void pva_kmd_device_init_t23x(struct pva_kmd_device *pva); + +#endif // PVA_KMD_T23X_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c new file mode 100644 index 00000000..549e14b8 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.c @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_t26x.h" +#include "pva_kmd_constants.h" + +struct vmem_region vmem_regions_tab_t26x[PVA_VMEM_REGION_COUNT_T26X] = { + { .start = T26x_VMEM0_START, .end = T26x_VMEM0_END }, + { .start = T26x_VMEM1_START, .end = T26x_VMEM1_END }, + { .start = T26x_VMEM2_START, .end = T26x_VMEM2_END }, + { .start = T26x_VMEM3_START, .end = T26x_VMEM3_END }, +}; + +void pva_kmd_device_init_t26x(struct pva_kmd_device *pva) +{ + uint32_t ccq; + uint32_t st_idx; + + pva->hw_consts.hw_gen = PVA_HW_GEN3; + pva->hw_consts.n_smmu_contexts = PVA_NUM_SMMU_CONTEXTS_T26X; + pva->r5_image_smmu_context_id = PVA_NUM_SMMU_CONTEXTS_T26X - 1; + pva->hw_consts.n_dma_descriptors = PVA_NUM_DMA_DESC_T26X; + pva->hw_consts.n_user_dma_channels = PVA_DMA_NUM_CHANNELS_T26X - 1U; + pva->hw_consts.n_hwseq_words = PVA_NUM_HWSEQ_WORDS_T26X; + pva->hw_consts.n_dynamic_adb_buffs = PVA_NUM_DYNAMIC_ADB_BUFFS_T26X; + pva->hw_consts.n_vmem_regions = PVA_VMEM_REGION_COUNT_T26X; + pva->vmem_regions_tab = vmem_regions_tab_t26x; + pva->support_hwseq_frame_linking = true; + pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER] = + PVA_KMD_PVA0_T26x_REG_BASE; + pva->reg_size[PVA_KMD_APERTURE_PVA_CLUSTER] = + PVA_KMD_PVA0_T26x_REG_SIZE; + pva->reg_phy_base[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_BASE; + pva->reg_size[PVA_KMD_APERTURE_VPU_DEBUG] = TEGRA_PVA0_VPU_DBG_SIZE; + + pva->regspec.sec_lic_intr_enable = 0x28064; + pva->regspec.sec_lic_intr_status = 0x2806C; + + pva->regspec.cfg_user_sid_base = 0x240000; + pva->regspec.cfg_priv_sid = 0x240020; + pva->regspec.cfg_vps_sid = 0x240024; + pva->regspec.cfg_r5user_lsegreg = 0x250008; + pva->regspec.cfg_r5user_usegreg = 0x25001c; + pva->regspec.cfg_priv_ar1_lsegreg = 0x25000c; + pva->regspec.cfg_priv_ar1_usegreg = 0x250020; + pva->regspec.cfg_priv_ar2_lsegreg = 0x250010; + pva->regspec.cfg_priv_ar2_usegreg = 0x250024; + pva->regspec.cfg_priv_ar1_start = 0x250028; + pva->regspec.cfg_priv_ar1_end = 0x25002c; + pva->regspec.cfg_priv_ar2_start = 0x250030; + pva->regspec.cfg_priv_ar2_end = 0x250034; + + pva->regspec.cfg_scr_priv_0 = 0x18004; + pva->regspec.cfg_perf_mon = 0x200000; + + pva->regspec.ccq_count = 8U; + /* For VPU 0*/ + pva->regspec.vpu_dbg_instr_reg_offset[0] = 0x50000U; + /* For VPU 1*/ + pva->regspec.vpu_dbg_instr_reg_offset[1] = 0x70000U; + for (ccq = 0; ccq < pva->regspec.ccq_count; ccq++) { + uint32_t n_st = PVA_CFG_CCQ_STATUS_COUNT; + uint32_t ccq_base = safe_addu32( + (uint32_t)0x260000, + safe_mulu32((uint32_t)PVA_CFG_CCQ_BLOCK_SIZE, ccq)); + pva->regspec.ccq_regs[ccq].status_count = n_st; + pva->regspec.ccq_regs[ccq].fifo = ccq_base; + for (st_idx = 0; st_idx < n_st; st_idx++) { + pva->regspec.ccq_regs[ccq].status[st_idx] = safe_addu32( + ccq_base, + safe_addu32((uint32_t)0x4U, + safe_mulu32((uint32_t)0x4U, + st_idx))); + } + } + pva->bl_sector_pack_format = PVA_BL_TEGRA_RAW; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h new file mode 100644 index 00000000..8da1233c --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_t26x.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_T26X_H +#define PVA_KMD_T26X_H +#include "pva_kmd_device.h" + +#define PVA_KMD_PVA0_T26x_REG_BASE 0x818c000000 +#define PVA_KMD_PVA0_T26x_REG_SIZE 0x900000 + +/** Number of VMEM regions in T26X */ +#define PVA_VMEM_REGION_COUNT_T26X 4U + +/** Start Address of VMEM0 Bank in T26X */ +#define T26x_VMEM0_START 0x40U +/** End Address of VMEM0 Bank in T26X */ +#define T26x_VMEM0_END 0x20000U +/** Start Address of VMEM1 Bank in T26X */ +#define T26x_VMEM1_START 0x40000U +/** End Address of VMEM1 Bank in T26X */ +#define T26x_VMEM1_END 0x60000U +/** End Address of VMEM2 Bank in T26X */ +#define T26x_VMEM2_START 0x80000U +/** End Address of VMEM2 Bank in T26X */ +#define T26x_VMEM2_END 0xA0000U +/** End Address of VMEM3 Bank in T26X */ +#define T26x_VMEM3_START 0xC0000U +/** End Address of VMEM3 Bank in T26X */ +#define T26x_VMEM3_END 0xE0000U + +/** @brief Base address for PVA0 VPU Debug Register space (CSITE_PVA0VPU) */ +#define TEGRA_PVA0_VPU_DBG_BASE 0x24740000U +/** @brief Size (in bytes) of the PVA0 VPU Debug Register space (CSITE_PVA0VPU) */ +#define TEGRA_PVA0_VPU_DBG_SIZE 0x40000U + +void pva_kmd_device_init_t26x(struct pva_kmd_device *pva); + +#endif // PVA_KMD_T26X_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c new file mode 100644 index 00000000..af2d9bc6 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_api_cmdbuf.h" +#include "pva_api_types.h" +#include "pva_bit.h" +#include "pva_fw.h" +#include "pva_kmd_cmdbuf.h" +#include "pva_kmd_device.h" +#include "pva_kmd_constants.h" +#include "pva_utils.h" +#include "pva_kmd_tegra_stats.h" + +void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva) +{ + enum pva_error err = PVA_SUCCESS; + + pva->tegra_stats_buf_size = sizeof(struct pva_kmd_fw_tegrastats); + + pva->tegra_stats_memory = + pva_kmd_device_memory_alloc_map(pva->tegra_stats_buf_size, pva, + PVA_ACCESS_RW, + PVA_R5_SMMU_CONTEXT_ID); + ASSERT(pva->tegra_stats_memory != NULL); + + err = pva_kmd_add_dram_buffer_resource(&pva->dev_resource_table, + pva->tegra_stats_memory, + &pva->tegra_stats_resource_id); + ASSERT(err == PVA_SUCCESS); + pva_kmd_update_fw_resource_table(&pva->dev_resource_table); +} + +void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva) +{ + pva_kmd_drop_resource(&pva->dev_resource_table, + pva->tegra_stats_resource_id); +} + +enum pva_error +pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, + struct pva_kmd_tegrastats *kmd_tegra_stats) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_get_tegra_stats *cmd; + uint64_t buffer_offset = 0U; + uint32_t fence_val; + enum pva_error err; + struct pva_kmd_fw_tegrastats *fw_tegra_stats; + bool stats_enabled = pva->debugfs_context.stats_enable; + uint64_t duration = 0U; + + /* Power on PVA if not already */ + err = pva_kmd_device_busy(pva); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "pva_kmd_device_busy failed when submitting tegra stats cmd"); + return err; + } + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); + ASSERT(cmd != NULL); + + pva_kmd_set_cmd_get_tegra_stats(cmd, pva->tegra_stats_resource_id, + pva->tegra_stats_buf_size, + buffer_offset, stats_enabled); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("tegra stats cmd submission failed"); + goto cancel_builder; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when getting tegra stats"); + goto err_out; + } + + if (stats_enabled == false) + goto err_out; + + fw_tegra_stats = + (struct pva_kmd_fw_tegrastats *)(pva->tegra_stats_memory->va); + + duration = safe_subu64(fw_tegra_stats->window_end_time, + fw_tegra_stats->window_start_time); + if (duration == 0) { + pva_kmd_print_str("VPU Stats: Duration is zero"); + goto err_out; + } + + pva_kmd_print_str("VPU Stats"); + pva_kmd_print_str_u64("Window Start Time", + fw_tegra_stats->window_start_time); + pva_kmd_print_str_u64("Window End Time", + fw_tegra_stats->window_end_time); + pva_kmd_print_str_u64("Total utilization VPU 0", + fw_tegra_stats->total_utilization[0]); + pva_kmd_print_str_u64("Total utilization VPU 1", + fw_tegra_stats->total_utilization[1]); + pva_kmd_print_str_u64( + "VPU 0 percent utilization", + safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) / + duration); + pva_kmd_print_str_u64( + "VPU 1 percent utilization", + safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) / + duration); + + kmd_tegra_stats->average_vpu_utilization[0] = + safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) / + duration; + kmd_tegra_stats->average_vpu_utilization[1] = + safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) / + duration; + kmd_tegra_stats->window_start_time = fw_tegra_stats->window_start_time; + kmd_tegra_stats->window_end_time = fw_tegra_stats->window_end_time; + + err = PVA_SUCCESS; + +cancel_builder: + pva_kmd_cmdbuf_builder_cancel(&builder); +err_out: + pva_kmd_device_idle(pva); + return err; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h new file mode 100644 index 00000000..4691d48d --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_TEGRA_STATS_H +#define PVA_KMD_TEGRA_STATS_H +#include "pva_kmd_device.h" + +/** + * @brief Structure which holds vpu stats information + */ +struct pva_kmd_tegrastats { + /** Holds vpu utilization as a percentage for each VPU in the PVA */ + uint64_t average_vpu_utilization[PVA_NUM_PVE]; + /** Current state of pva_kmd_tegrastats */ + uint64_t window_start_time; + uint64_t window_end_time; +}; + +void pva_kmd_device_init_tegra_stats(struct pva_kmd_device *pva); + +void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva); + +enum pva_error +pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, + struct pva_kmd_tegrastats *kmd_tegra_stats); + +#endif diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c new file mode 100644 index 00000000..f68688b9 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_mutex.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_thread_sema.h" +#include "pva_kmd_device_memory.h" +#include +#include +#include +#include +#include + +enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m) +{ + int ret = pthread_mutex_init(m, NULL); + ASSERT(ret == 0); + + return PVA_SUCCESS; +} + +void pva_kmd_mutex_lock(pva_kmd_mutex_t *m) +{ + int ret = pthread_mutex_lock(m); + ASSERT(ret == 0); +} + +void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m) +{ + int ret = pthread_mutex_unlock(m); + ASSERT(ret == 0); +} + +void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m) +{ + int ret = pthread_mutex_destroy(m); + ASSERT(ret == 0); +} + +void *pva_kmd_zalloc(uint64_t size) +{ + return calloc(1, size); +} + +void pva_kmd_free(void *ptr) +{ + free(ptr); +} + +void pva_kmd_fault(void) +{ + abort(); +} + +void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val) +{ + int ret; + + ret = sem_init(sem, 0 /* Only sharing in threads */, val); + ASSERT(ret == 0); +} + +enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem, + uint32_t timeout_ms) +{ + struct timespec ts; + int ret; + ret = clock_gettime(CLOCK_REALTIME, &ts); + ASSERT(ret == 0); + + /* Add timeout (specified in milliseconds) to the current time */ + ts.tv_sec += timeout_ms / 1000; + ts.tv_nsec += (timeout_ms % 1000) * 1000000; + + /* Handle case where nanoseconds exceed 1 second */ + if (ts.tv_nsec >= 1000000000) { + ts.tv_nsec -= 1000000000; + ts.tv_sec += 1; + } + +wait_again: + ret = sem_timedwait(sem, &ts); + if (ret != 0) { + if (errno == ETIMEDOUT) { + pva_kmd_log_err("pva_kmd_sema_wait_timeout Timed out"); + return PVA_TIMEDOUT; + } else if (errno == EINTR) { + goto wait_again; + } else { + FAULT("Unexpected sem_timedwait error"); + } + } + + return PVA_SUCCESS; +} + +void pva_kmd_sema_deinit(pva_kmd_sema_t *sem) +{ + int ret = sem_destroy(sem); + ASSERT(ret == 0); +} + +void pva_kmd_sema_post(pva_kmd_sema_t *sem) +{ + int ret = sem_post(sem); + ASSERT(ret == 0); +} + +struct pva_kmd_device_memory * +pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva, + uint32_t iova_access_flags, + uint32_t smmu_ctx_idx) +{ + struct pva_kmd_device_memory *mem; + enum pva_error err; + + mem = pva_kmd_device_memory_alloc(size); + + if (mem == NULL) { + goto err_out; + } + + err = pva_kmd_device_memory_iova_map(mem, pva, iova_access_flags, + smmu_ctx_idx); + if (err != PVA_SUCCESS) { + goto free_mem; + } + + err = pva_kmd_device_memory_cpu_map(mem); + if (err != PVA_SUCCESS) { + goto iova_unmap; + } + + return mem; +iova_unmap: + pva_kmd_device_memory_iova_unmap(mem); +free_mem: + pva_kmd_device_memory_free(mem); +err_out: + return NULL; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c new file mode 100644 index 00000000..4cb8396f --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_utils.h" + +void *pva_kmd_zalloc_nofail(uint64_t size) +{ + void *ptr = pva_kmd_zalloc(size); + ASSERT(ptr != NULL); + return ptr; +} + +void pva_kmd_log_err(const char *msg) +{ + pva_kmd_print_str(msg); +} + +void pva_kmd_log_err_u64(const char *msg, uint64_t val) +{ + pva_kmd_print_str_u64(msg, val); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h new file mode 100644 index 00000000..d5bec4c2 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_utils.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_UTILS_H +#define PVA_KMD_UTILS_H +#include "pva_kmd.h" +#include "pva_api.h" +#include "pva_kmd_shim_utils.h" +#include "pva_bit.h" +#include "pva_utils.h" +#include "pva_plat_faults.h" +#include "pva_math_utils.h" + +#define SIZE_4KB (4 * 1024) + +void pva_kmd_log_err(const char *msg); +void pva_kmd_log_err_u64(const char *msg, uint64_t val); +void *pva_kmd_zalloc_nofail(uint64_t size); + +#endif // PVA_KMD_UTILS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c new file mode 100644 index 00000000..1354ac8b --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_api_types.h" +#include "pva_kmd_vpu_app_auth.h" +#include "pva_kmd_device.h" +#include "pva_kmd_sha256.h" +#include "pva_kmd_utils.h" + +enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena) +{ + enum pva_error err = PVA_SUCCESS; + const char *default_path = pva_kmd_get_default_allowlist(); + size_t default_path_len; + struct pva_vpu_auth *pva_auth = pva_kmd_zalloc(sizeof(*pva_auth)); + if (pva_auth == NULL) { + pva_kmd_log_err("Unable to allocate memory"); + return PVA_NOMEM; + } + + pva->pva_auth = pva_auth; + ASSERT(pva_auth != NULL); + + pva_auth->vpu_hash_keys = NULL; + pva_auth->pva_auth_allow_list_parsed = false; + /**TODO - This will be disabled by default. Authentication will be enabled based on 2 things + * 1. Debug FS (For non production) + * 2. Device tree property (For production) + * Either of the 2 conditions if satisfied will enable authentication + */ + pva_auth->pva_auth_enable = ena; + default_path_len = strnlen(default_path, ALLOWLIST_FILE_LEN); + if (default_path_len > 0U) { + (void)memcpy(pva_auth->pva_auth_allowlist_path, default_path, + default_path_len); + } + + return err; +} + +/** + * \brief + * is_key_match calculates the sha256 key of ELF and checks if it matches with key. + * \param[in] dataptr Pointer to the data to which sha256 to ba calculated + * \param[in] size length in bytes of the data to which sha256 to be calculated. + * \param[in] key the key with which calculated key would be compared for match. + * \return The completion status of the operation. Possible values are: + * \ref PVA_SUCCESS Success. Passed in key matched wth calculated key. + * \ref -EACCES. Passed in Key doesn't match with calcualted key. + */ +static enum pva_error is_key_match(uint8_t *dataptr, size_t size, + struct shakey key) +{ + enum pva_error err = PVA_SUCCESS; + int32_t status = 0; + uint32_t calc_key[8]; + size_t off; + struct sha256_ctx ctx1; + struct sha256_ctx ctx2; + + sha256_init(&ctx1); + off = (size / 64U) * 64U; + if (off > 0U) { + sha256_update(&ctx1, dataptr, off); + } + + /* clone */ + sha256_copy(&ctx1, &ctx2); + + /* finalize with leftover, if any */ + sha256_finalize(&ctx2, dataptr + off, size % 64U, calc_key); + + status = memcmp((void *)&(key.sha_key), (void *)calc_key, + NVPVA_SHA256_DIGEST_SIZE); + if (status != 0) { + err = PVA_EACCES; + } + + return err; +} + +/** + * \brief + * Keeps checking all the keys accociated with match_hash + * against the calculated sha256 key for dataptr, until it finds a match. + * \param[in] pallkeys Pointer to array of SHA keys \ref shakey + * \param[in] dataptr pointer to ELF data + * \param[in] size length (in bytes) of ELF data + * \param[in] match_hash pointer to matching hash structure, \ref struct vpu_hash_vector. + * \return Matching status of the calculated key + * against the keys asscociated with match_hash. possible values: + * - 0 Success, one of the keys associated with match_hash + * matches with the calculated sha256 key. + * - -EACCES if no match found. + */ +static enum pva_error +check_all_keys_for_match(struct shakey *pallkeys, uint8_t *dataptr, size_t size, + const struct vpu_hash_vector *match_hash) +{ + enum pva_error err = PVA_SUCCESS; + uint32_t idx; + uint32_t count; + uint32_t end; + struct shakey key; + uint32_t i; + + idx = match_hash->index; + count = match_hash->count; + end = idx + count; + if (end < idx) { + err = PVA_ERANGE; + goto fail; + } + + for (i = 0; i < count; i++) { + key = pallkeys[idx + i]; + err = is_key_match(dataptr, size, key); + if (err == PVA_SUCCESS) { + break; + } + } +fail: + return err; +} + +/** + * @brief + * Helper function for \ref binary_search. + * Uses a specific field in @ref pkey to compare with the same filed in @ref pbase. + * @param[in] pkey pointer to the object that needs to be compared. + * @param[in] pbase pointer to the starting element of the array. + * @retval + * - -1 when @ref pkey is less than starting element of array pointed to by @ref pbase. + * - 1 when @ref pkey is greater than starting element of array pointed to by @ref pbase. + * - 0 when @ref pkey is equal to starting element of array pointed to by @ref pbase. + */ +static int32_t compare_hash_value(const struct vpu_hash_vector *pkey, + const struct vpu_hash_vector *pbase) +{ + int32_t ret; + + if (pkey->crc32_hash < pbase->crc32_hash) { + ret = -1; + } else if (pkey->crc32_hash > pbase->crc32_hash) { + ret = 1; + } else { + ret = 0; + } + + return ret; +} + +/** + * @brief + * calculates crc32. + * @param[in] crc initial crc value. usually 0. + * @param[in] buf pointer to the buffer whose crc32 to be calculated. + * @param[in] len length (in bytes) of data at @ref buf. + * @retval value of calculated crc32. + */ +static uint32_t pva_crc32(uint32_t crc, uint8_t *buf, size_t len) +{ + int32_t k; + size_t count; + + count = len; + crc = ~crc; + while (count != 0U) { + crc ^= *buf++; + for (k = 0; k < 8; k++) { + crc = ((crc & 1U) == 1U) ? (crc >> 1U) ^ 0xedb88320U : + crc >> 1U; + } + + count--; + } + + return ~crc; +} + +static const struct vpu_hash_vector * +binary_search(const struct vpu_hash_vector *key, + const struct vpu_hash_vector *base, size_t num_elems, + int32_t (*compare)(const struct vpu_hash_vector *pkey, + const struct vpu_hash_vector *pbase)) +{ + size_t low = 0U; + size_t high; + + if (num_elems == 0U) { + return NULL; + } + + high = num_elems - 1U; + for (;;) { + const struct vpu_hash_vector *mid_elem; + int32_t r; + size_t mid = low + ((high - low) / 2U); + + mid_elem = &(base[mid]); + r = compare(key, mid_elem); + + if (r < 0) { + if (mid == 0U) { + return NULL; + } + + high = mid - 1U; + } else if (r > 0) { + low = mid + 1U; + if (low < mid || low > high) { + return NULL; + } + } else { + return mid_elem; + } + } +} + +static enum pva_error +pva_kmd_vpu_check_sha256_key(struct vpu_hash_key_pair *vpu_hash_keys, + uint8_t *dataptr, size_t size) +{ + enum pva_error err = PVA_SUCCESS; + struct vpu_hash_vector cal_Hash; + const struct vpu_hash_vector *match_Hash; + + cal_Hash.crc32_hash = pva_crc32(0L, dataptr, size); + + match_Hash = (const struct vpu_hash_vector *)binary_search( + &cal_Hash, vpu_hash_keys->pvpu_hash_vector, + vpu_hash_keys->num_hashes, compare_hash_value); + if (match_Hash == NULL) { + pva_kmd_log_err("No Hash Match Found"); + err = PVA_EACCES; + goto fail; + } + + err = check_all_keys_for_match(vpu_hash_keys->psha_key, dataptr, size, + match_Hash); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Match key not found"); + } +fail: + return err; +} + +enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, + uint8_t *dataptr, size_t size) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_vpu_auth *pva_auth; + + ASSERT(pva != NULL); + ASSERT(dataptr != NULL); + pva_auth = pva->pva_auth; + ASSERT(pva_auth != NULL); + + pva_kmd_mutex_lock(&pva_auth->allow_list_lock); + if (pva_auth->pva_auth_enable) { + pva_dbg_printf("App authentication enabled"); + if (pva_auth->pva_auth_allow_list_parsed == false) { + err = pva_kmd_allowlist_parse(pva); + if (err == PVA_SUCCESS) { + pva_dbg_printf( + "App authentication allowlist parsing successfull"); + } else { + pva_dbg_printf( + "App authentication allowlist parsing failed"); + } + } + + if (err == PVA_SUCCESS) { + err = pva_kmd_vpu_check_sha256_key( + pva_auth->vpu_hash_keys, (uint8_t *)dataptr, + size); + if (err == PVA_SUCCESS) { + pva_dbg_printf( + "App authentication successfull"); + } else { + pva_dbg_printf("App authentication failed : %d", + err); + } + } + } else { + pva_dbg_printf("App authentication disabled"); + } + + pva_kmd_mutex_unlock(&pva_auth->allow_list_lock); + + return err; +} + +static void pva_kmd_allowlist_destroy(struct pva_vpu_auth *pva_auth) +{ + if (pva_auth->vpu_hash_keys != NULL) { + pva_kmd_free(pva_auth->vpu_hash_keys->ptr_file_data); + pva_kmd_free(pva_auth->vpu_hash_keys); + pva_auth->vpu_hash_keys = NULL; + } +} + +enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva) +{ + struct pva_vpu_auth *pva_auth = pva->pva_auth; + enum pva_error err = PVA_SUCCESS; + uint8_t *data = NULL; + uint64_t size = 0; + struct vpu_hash_key_pair *vhashk; + size_t vkey_size = 0; + size_t vhash_size = 0; + + ASSERT(pva_auth != NULL); + + //Destroy previously parsed allowlist data + pva_kmd_allowlist_destroy(pva_auth); + + err = pva_kmd_auth_allowlist_load( + pva, pva_auth->pva_auth_allowlist_path, &data, &size); + if (err != PVA_SUCCESS) { + if (data != NULL) { + pva_kmd_free(data); + } + goto fail; + } + vhashk = (struct vpu_hash_key_pair *)pva_kmd_zalloc( + sizeof(struct vpu_hash_key_pair)); + if (vhashk == NULL) { + pva_kmd_log_err("Unable to allocate memory"); + pva_kmd_free(data); + err = PVA_NOMEM; + goto fail; + } + + vhashk->ptr_file_data = data; + vhashk->num_keys = ((uint32_t *)(uintptr_t)data)[0]; + vhashk->psha_key = + (struct shakey *)(uintptr_t)(data + sizeof(uint32_t)); + vkey_size = sizeof(struct shakey) * (vhashk->num_keys); + vhashk->num_hashes = ((uint32_t *)(uintptr_t)((char *)vhashk->psha_key + + vkey_size))[0]; + vhashk->pvpu_hash_vector = + (struct vpu_hash_vector + *)(uintptr_t)((char *)(vhashk->psha_key) + vkey_size + + sizeof(uint32_t)); + vhash_size = sizeof(struct vpu_hash_vector) * (vhashk->num_hashes); + if ((sizeof(uint32_t) + sizeof(uint32_t) + vkey_size + vhash_size) != + size) { + pva_kmd_free(data); + pva_kmd_free(vhashk); + err = PVA_EACCES; + goto fail; + } + + pva_auth->pva_auth_allow_list_parsed = true; + pva_auth->vpu_hash_keys = vhashk; + +fail: + return err; +} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h new file mode 100644 index 00000000..967a57fd --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef PVA_KMD_VPU_APP_AUTH_H +#define PVA_KMD_VPU_APP_AUTH_H + +#include "pva_kmd_shim_vpu_app_auth.h" +#include "pva_kmd_mutex.h" + +/** + * Maximum length of allowlist file path + */ +#define ALLOWLIST_FILE_LEN 128U + +/** + * Size of sha256 keys in bytes. + */ +#define NVPVA_SHA256_DIGEST_SIZE 32U + +struct pva_kmd_device; +/** + * Array of all VPU Hash'es + */ +struct vpu_hash_vector { + /*! Number of Keys for this crc32_hash */ + uint32_t count; + /*! Starting Index into Keys Array */ + uint32_t index; + /*! CRC32 hash value */ + uint32_t crc32_hash; +}; + +/** + * Stores sha256 key + */ +struct shakey { + /** 256-bit (32 Bytes) SHA Key */ + uint8_t sha_key[NVPVA_SHA256_DIGEST_SIZE]; +}; + +/** + * Stores Hash Vector and Keys vector + */ +struct vpu_hash_key_pair { + /*! Total number of Keys in binary file */ + uint32_t num_keys; + /*! pointer to SHA keys Array. */ + struct shakey *psha_key; + /*! Total number of Hashes in binary file */ + uint32_t num_hashes; + /*! pointer to Array of Hash'es */ + struct vpu_hash_vector *pvpu_hash_vector; + /*! pointer to data loaded from file (QNX Specific)*/ + uint8_t *ptr_file_data; +}; + +/** + * Stores all the information related to pva vpu elf authentication. + */ +struct pva_vpu_auth { + /** Stores crc32-sha256 of ELFs */ + struct vpu_hash_key_pair *vpu_hash_keys; + pva_kmd_mutex_t allow_list_lock; + /** Flag to check if allowlist is enabled */ + bool pva_auth_enable; + /** Flag to track if the allow list is already parsed */ + bool pva_auth_allow_list_parsed; + /** Stores the path to allowlist binary file. */ + char pva_auth_allowlist_path[ALLOWLIST_FILE_LEN + 1U]; +}; + +enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena); + +enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, + uint8_t *dataptr, size_t size); + +enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva); + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c new file mode 100644 index 00000000..8a2c06f4 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_device.h" +#include "pva_math_utils.h" +#include "pva_kmd_vpu_ocd.h" +#include "pva_kmd_silicon_utils.h" + +#define PVA_DEBUG_APERTURE_INDEX 1U + +int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev) +{ + int retval = 0; + enum pva_error err; + err = pva_kmd_device_busy(dev); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "pva_kmd_vpu_ocd_open pva_kmd_device_busy failed"); + retval = -1; + goto out; + } +out: + return retval; +} + +int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev) +{ + pva_kmd_device_idle(dev); + return 0; +} + +int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data, + const uint8_t *data, uint64_t offset, + uint64_t size) +{ + struct pva_vpu_ocd_write_param write_param; + uint32_t i; + unsigned long retval; + uint32_t reg_offset; + uint32_t const *vpu_ocd_offset = (uint32_t *)file_data; + + retval = pva_kmd_copy_data_from_user(&write_param, data, + sizeof(write_param)); + if (retval != 0u) { + pva_kmd_log_err("Failed to copy write buffer from user"); + return -1; + } + + if (write_param.n_write > VPU_OCD_MAX_NUM_DATA_ACCESS) { + pva_kmd_log_err_u64("pva: too many vpu dbg reg write", + write_param.n_write); + return -1; + } + + /* Write instruction first */ + pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX, *vpu_ocd_offset, + write_param.instr); + + /* + * Write data + * if there's 1 word, write to addr 0x4, + * if there's 2 words, write to addr 2 * 0x4, + * ... + */ + reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset, + safe_mulu32(write_param.n_write, + (uint32_t)sizeof(uint32_t))); + for (i = 0u; i < write_param.n_write; i++) { + pva_kmd_aperture_write(dev, PVA_DEBUG_APERTURE_INDEX, + reg_offset, write_param.data[i]); + } + + return 0; +} + +int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data, + uint8_t *data, uint64_t offset, uint64_t size) +{ + struct pva_vpu_ocd_read_param read_param; + unsigned long retval; + uint32_t i; + uint32_t reg_offset; + uint32_t const *vpu_ocd_offset = (uint32_t *)file_data; + + retval = pva_kmd_copy_data_from_user(&read_param, data, + sizeof(read_param)); + if (retval != 0u) { + pva_kmd_log_err("failed to copy read buffer from user"); + return -1; + } + + if (read_param.n_read > VPU_OCD_MAX_NUM_DATA_ACCESS) { + pva_kmd_log_err_u64("pva: too many vpu dbg reg read", + read_param.n_read); + return -1; + } + + /* + * Read data + * if there's 1 word, read from addr 0x4, + * if there's 2 words, read from addr 2 * 0x4, + * ... + */ + reg_offset = safe_addu32((uint32_t)*vpu_ocd_offset, + safe_mulu32(read_param.n_read, + (uint32_t)sizeof(uint32_t))); + for (i = 0; i < read_param.n_read; i++) { + read_param.data[i] = pva_kmd_aperture_read( + dev, PVA_DEBUG_APERTURE_INDEX, reg_offset); + } + + retval = pva_kmd_copy_data_to_user(data, &read_param, + sizeof(read_param)); + if (retval != 0u) { + pva_kmd_log_err("failed to copy read buffer to user"); + return -1; + } + + return 0; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h new file mode 100644 index 00000000..d202adcf --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_ocd.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_VPU_OCD_H +#define PVA_KMD_VPU_OCD_H + +#define VPU_OCD_MAX_NUM_DATA_ACCESS 7U + +struct pva_vpu_ocd_write_param { + uint32_t instr; + uint32_t n_write; + uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS]; +}; + +struct pva_vpu_ocd_read_param { + uint32_t n_read; + uint32_t data[VPU_OCD_MAX_NUM_DATA_ACCESS]; +}; + +int64_t pva_kmd_vpu_ocd_read(struct pva_kmd_device *dev, void *file_data, + uint8_t *data, uint64_t offset, uint64_t size); +int64_t pva_kmd_vpu_ocd_write(struct pva_kmd_device *dev, void *file_data, + const uint8_t *data, uint64_t offset, + uint64_t size); +int pva_kmd_vpu_ocd_open(struct pva_kmd_device *dev); +int pva_kmd_vpu_ocd_release(struct pva_kmd_device *dev); + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h new file mode 100644 index 00000000..6e879d51 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_plat_faults.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_PLAT_FAULTS_H +#define PVA_PLAT_FAULTS_H + +#include "pva_kmd_shim_utils.h" + +#define ASSERT(x) \ + if (!(x)) { \ + pva_kmd_print_str_u64("PVA KMD ASSERT at " __FILE__, \ + __LINE__); \ + pva_kmd_fault(); \ + } + +#define FAULT(msg) \ + { \ + pva_kmd_print_str_u64("PVA KMD FAULT at " __FILE__, __LINE__); \ + pva_kmd_print_str(msg); \ + pva_kmd_fault(); \ + } \ + while (0) + +#define ASSERT_WITH_LOC(x, err_file, err_line) \ + if (!(x)) { \ + pva_kmd_print_str_u64("Error at line", err_line); \ + pva_kmd_print_str(err_file); \ + pva_kmd_print_str("PVA KMD ASSERT"); \ + pva_kmd_fault(); \ + } + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h new file mode 100644 index 00000000..20dc3b72 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_device_memory.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_DEVICE_MEMORY_H +#define PVA_KMD_DEVICE_MEMORY_H +#include "pva_kmd.h" +#include "pva_api.h" +struct pva_kmd_context; + +/** + * @brief KMD device memory structure. + * + * This structure is essentially a base object. More information is needed to + * manage memory allocations but the required information is platform dependent. + * Therefore, each platform will have a derived implementation and this + * structure is just part of it. + */ +struct pva_kmd_device_memory { + uint64_t iova; /**< IOVA address if mapped. Otherwise 0 */ + void *va; /**< CPU address if mapped. Otherwise 0. */ + uint64_t size; /**< Size of the mapping. */ + struct pva_kmd_device *pva; /**< The PVA this memory is mapped to. */ + uint32_t smmu_ctx_idx; /**< The SMMU context this memory is mapped to. */ +}; + +/** + * This API is not available in Linux and should not be used by the common code. + */ +struct pva_kmd_device_memory *pva_kmd_device_memory_alloc(uint64_t size); + +/** + * Allocate memory and map to both IOVA space and CPU space. + * + * @note We cannot just allocate without mapping or just mapping to one + * space. This restriction comes from the Linux dma_alloc_coherent API, which + * allocates and maps at the same time. + * + * @note iova_access_flag is only supported by QNX implementation. + * + * @param size Size of the allocation + * @param pva The PVA device to map to + * @param iova_access_flags Access flags for IOVA space. PVA_ACCESS_RO or + * PVA_ACCESS_RW. For CPU space, it's always + * read and write. + * @param smmu_ctx_idx The SMMU context to map to + */ +struct pva_kmd_device_memory * +pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva, + uint32_t iova_access_flags, + uint32_t smmu_ctx_idx); +/** @brief Acquire memory shared from UMD. + * + * This function takes a shared ownership of the memory allocation so that KMD + * can keep the allocation alive even after UMD closed the memory handle. + * + * @param memory_handle Memory handle passed from user space. On Linux, this is + * a file descriptor associated with dma_buf object. On + * QNX, this is NvRM import ID. + * @param offset Offset into the allocation. This affects the mapped address. + * @param size Size of the mapping, which can be smaller than the size of the + * allocation. + * @param ctx The user from whom we are importing the memory. + */ +struct pva_kmd_device_memory * +pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset, + uint64_t size, struct pva_kmd_context *ctx); +/** + * @brief Release the memory. + * + * This function frees memory allocated from acquire or alloc_map. If there are + * active CPU mapping or IOVA mapping, this function will unmap them. + * + * @param memory Pointer to the memory to release. + */ +void pva_kmd_device_memory_free(struct pva_kmd_device_memory *memory); + +/** + * @brief Map the memory to CPU space. + */ +enum pva_error +pva_kmd_device_memory_cpu_map(struct pva_kmd_device_memory *memory); + +/** + * @brief Unmap the memory from CPU space. + * + * Unmap a not mapped memory will trigger abort. + */ +void pva_kmd_device_memory_cpu_unmap(struct pva_kmd_device_memory *memory); + +/** + * @brief Map the memory to IOVA space. + */ +enum pva_error +pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory, + struct pva_kmd_device *pva, + uint32_t access_flags, uint32_t smmu_ctx_idx); +/** + * @brief Unmap the memory from IOVA space. + * + * Unmap a not mapped memory will trigger abort. + */ +void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory); + +#endif // PVA_KMD_DEVICE_MEMORY_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h new file mode 100644 index 00000000..c2f47953 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_ccq.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SHIM_CCQ_H +#define PVA_KMD_SHIM_CCQ_H +#include "pva_api.h" +struct pva_kmd_device; + +/** + * @brief Push a 64 bit entry to CCQ FIFO. + * + * Push low 32 bits first and then high 32 bits. + * + * @note The caller is responsible for checking if CCQ has enough spaces. + * + */ +void pva_kmd_ccq_push(struct pva_kmd_device *pva, uint8_t ccq_id, + uint64_t ccq_entry); +/** + * @brief Get the number of available spaces in the CCQ. + * + * One CCQ entry is 64 bits. One CCQ can hold up to 4 entries. Therefore, this + * function returns values from 0 to 4. + */ +uint32_t pva_kmd_get_ccq_space(struct pva_kmd_device *pva, uint8_t ccq_id); + +#endif // PVA_KMD_SHIM_CCQ_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h new file mode 100644 index 00000000..4a23922d --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_debugfs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SHIM_DEBUGFS_H +#define PVA_KMD_SHIM_DEBUGFS_H +#include "pva_api.h" +#include "pva_kmd_tegra_stats.h" + +void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name, + bool *val); +void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name, + uint32_t *val); +void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name, + struct pva_kmd_file_ops *fops); +void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva); +unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src, + uint64_t size); +unsigned long pva_kmd_copy_data_to_user(void *to, const void *from, + unsigned long size); +unsigned long pva_kmd_strtol(const char *str, int base); + +#endif //PVA_KMD_SHIM_DEBUGFS_H \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h new file mode 100644 index 00000000..de69bf55 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_init.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SHIM_INIT_H +#define PVA_KMD_SHIM_INIT_H +#include "pva_api.h" +struct pva_kmd_device; +struct pva_kmd_file_ops; + +/* TODO: remove plat_init APIs. We should just pass in plat_data directly to + * pva_kmd_device_create. */ +void pva_kmd_device_plat_init(struct pva_kmd_device *pva); +void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva); + +void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id, + uint32_t *syncpt_value); + +void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id, + uint64_t *syncpt_iova); + +void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva); + +/** + * @brief Power on PVA cluster. + */ +enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva); + +/** + * @brief Power off PVA cluster. + */ +void pva_kmd_power_off(struct pva_kmd_device *pva); + +/** + * @brief Initialize firmware. + * + * This function initializes firmware. On silicon, this includes + * - power on R5, + * - load firmware, + * - bind interrupts, + * - and wait for firmware boot to complete. + * + * @param pva pointer to the PVA device to initialize + */ +enum pva_error pva_kmd_init_fw(struct pva_kmd_device *pva); + +/** + * @brief De-init firmware. + * + * This function de-initializes firmware. On silicon, this includes + * - free interrupts, + * - power off R5, + * - and free firmware memories. + * + * @param pva pointer to the PVA device to de-initialize + */ +void pva_kmd_deinit_fw(struct pva_kmd_device *pva); +#endif // PVA_KMD_SHIM_INIT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h new file mode 100644 index 00000000..6a18ba92 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_silicon.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#ifndef PVA_KMD_SHIM_SILICON_H +#define PVA_KMD_SHIM_SILICON_H +#include "pva_api.h" +#include "pva_kmd_regs.h" +struct pva_kmd_device; + +/** + * @file This file defines silicon APIs. + * + * Silicon APIs are only implemented by platforms that closely resemble the + * silicon PVA, a.k.a Linux, QNX and SIM platforms. Silicon APIs are used to + * implement message APIs and some init APIs. + * + * On native platform, message APIs are implemented differently. Therefore, + * native platform does not need to implement silicon APIs. + */ + +/** + * @brief Write to a register in a MMIO region. + * + * @param pva pointer the PVA cluser. + * @param aperture the MMIO region. + * @param addr the register offset in the MMIO region. + * @param val value to write. + */ +void pva_kmd_aperture_write(struct pva_kmd_device *pva, + enum pva_kmd_reg_aperture aperture, uint32_t addr, + uint32_t val); +/** + * @brief Read from a register in a MMIO region. + * + * @param pva pointer the PVA cluser. + * @param aperture the MMIO region. + * @param addr the register offset in the MMIO region. + * + * @return the value of the register. + */ +uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva, + enum pva_kmd_reg_aperture aperture, + uint32_t addr); + +/** + * @brief PVA's interrupt lines. + */ +enum pva_kmd_intr_line { + /** Interrupt line from SEC block. We receive mailbox interrupts from + * this line. */ + PVA_KMD_INTR_LINE_SEC_LIC = 0, + PVA_KMD_INTR_LINE_CCQ0, + PVA_KMD_INTR_LINE_CCQ1, + PVA_KMD_INTR_LINE_CCQ2, + PVA_KMD_INTR_LINE_CCQ3, + PVA_KMD_INTR_LINE_CCQ4, + PVA_KMD_INTR_LINE_CCQ5, + PVA_KMD_INTR_LINE_CCQ6, + PVA_KMD_INTR_LINE_CCQ7, + PVA_KMD_INTR_LINE_COUNT, +}; + +/** + * @brief Interrupt handler function prototype. + */ +typedef void (*pva_kmd_intr_handler_t)(void *data); + +/** + * @brief Bind an interrupt handler to an interrupt line. + * + * Interrupt will be enabled after binding. + */ +enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line, + pva_kmd_intr_handler_t handler, + void *data); +/** + * @brief Enable an interrupt line. + */ +void pva_kmd_enable_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line); + +/** + * @brief Disable an interrupt line. + */ +void pva_kmd_disable_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line); + +/** + * @brief Free an interrupt line. + * + * This will disable the interrupt line and unbind the handler. + */ +void pva_kmd_free_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line); + +/** + * @brief Read firmware binary from file system. + * + * Firmware binary is loaded into pva->fw_bin_mem, which is directly accessible + * by R5. + * + * KMD will free pva->fw_bin_mem during firmware deinit. + */ +enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva); + +/** + * @brief Get base address of read only syncpoints. + */ +uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva); + +/** + * @brief Get base address of read write syncpoints. + */ +uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva); + +/** + * @brief Configure EVP, Segment config registers and SCR registers. + * + * This function configures the EVP, Segment config registers and SCR registers. + * + * @param pva Pointer to the PVA device. + */ +void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva); + +/** + * @brief Configure SID registers. + * + * This function configures the SID registers. + * + * @param pva Pointer to the PVA device. + */ +void pva_kmd_config_sid_regs(struct pva_kmd_device *pva); + +#endif // PVA_KMD_SHIM_SILICON_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h new file mode 100644 index 00000000..a02adb27 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_SHIM_UTILS_H +#define PVA_KMD_SHIM_UTILS_H +#include "pva_api.h" + +/** + * @brief Allocate memory for KMD's private use. + * + * Memory will be zero initialized. + */ +void *pva_kmd_zalloc(uint64_t size); + +/** + * @brief Free memory allocated by pva_kmd_zalloc. + */ +void pva_kmd_free(void *ptr); + +/** + * @brief Print a string. + * + * This function is used for logging errors, enabled even in safety environment. + * For debug print, use pva_dbg_printf. + * + * @param str The string to print. + */ +void pva_kmd_print_str(const char *str); + +/** + * @brief Print a string followed by a 64-bit unsigned number. + * + * This function is used for logging errors, enabled even in safety environment. + * For debug print, use pva_dbg_printf. + * + * @param str The string to print. + * @param n The number to print. + */ +void pva_kmd_print_str_u64(const char *str, uint64_t n); + +/** + * @brief Fault KMD. + * + * Abort KMD due to critical unrecoverable error. + */ +void pva_kmd_fault(void) __attribute__((noreturn)); + +/** + * @brief Sleep for some microseconds. + * + * @param us The number of microseconds to sleep. + */ +void pva_kmd_sleep_us(uint64_t us); + +#if defined(__KERNEL__) +#include +#else +static inline uint32_t array_index_nospec(uint32_t index, uint32_t size) +{ + return index < size ? index : 0; +} +#endif + +#endif // PVA_KMD_SHIM_UTILS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h new file mode 100644 index 00000000..9ea61cd2 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021-2023, NVIDIA Corporation. All rights reserved. + */ + +#ifndef PVA_KMD_SHIM_VPU_APP_AUTH_H +#define PVA_KMD_SHIM_VPU_APP_AUTH_H + +#include "pva_api_types.h" +struct pva_kmd_device; +const char *pva_kmd_get_default_allowlist(void); +enum pva_error pva_kmd_auth_allowlist_load(struct pva_kmd_device *pva, + const char *file_name, + uint8_t **hash_keys_data, + uint64_t *psize); + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h new file mode 100644 index 00000000..abdd91da --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_thread_sema.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_THREAD_SEMA_H +#define PVA_KMD_THREAD_SEMA_H + +#include "pva_api.h" + +#if defined(__KERNEL__) /* For Linux */ + +#include +typedef struct semaphore pva_kmd_sema_t; + +#else /* For user space code, including QNX KMD */ + +#include +/* Mutex */ +typedef sem_t pva_kmd_sema_t; + +#endif + +/** + * @brief Initialize a semaphore. + * + * @param sem Pointer to the semaphore. + * @param val Initial value of the semaphore. + */ +void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val); + +/** + * @brief Wait on a semaphore. + * + * Decrement the semaphore count. If the count is zero, the caller will block + * until the semaphore is posted or the timeout expires. + * + * @param sem Pointer to the semaphore. + * @param timeout_ms Timeout in milliseconds. + * + * @retval PVA_SUCCESS if the semaphore was successfully acquired. + * @retval PVA_TIMEDOUT if the semaphore was not acquired within the timeout. + */ +enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem, + uint32_t timeout_ms); + +/** + * @brief Signal a semaphore. + * + * Increment the semaphore count. + * + * @param sem Pointer to the semaphore. + */ +void pva_kmd_sema_post(pva_kmd_sema_t *sem); + +/** + * @brief Deinitialize a semaphore. + * + * @param sem Pointer to the semaphore. + */ +void pva_kmd_sema_deinit(pva_kmd_sema_t *sem); + +#endif // PVA_KMD_THREAD_SEMA_H diff --git a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h new file mode 100644 index 00000000..a37e5e08 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_H +#define PVA_KMD_H +#include "pva_api.h" +#include "pva_fw.h" +#include "pva_constants.h" +#include "pva_math_utils.h" + +/* KMD API: context init */ +struct pva_kmd_context_init_in_args { + uint32_t resource_table_capacity; +}; + +struct pva_kmd_context_init_out_args { + enum pva_error error; + uint64_t ccq_shm_hdl; +}; + +struct pva_kmd_syncpt_register_out_args { + enum pva_error error; + uint32_t syncpt_ro_res_id; + uint32_t syncpt_rw_res_id; + uint32_t synpt_size; + uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT]; + uint32_t num_ro_syncpoints; +}; + +/** + * Calculates the total memory size required for a PVA submission queue. + * This includes the size of the queue header and the combined size of all command buffer submission info structures. + * + * @param x The number of command buffer submission info structures. + * @return The total memory size in bytes. + */ +static inline uint32_t pva_get_submission_queue_memory_size(uint32_t x) +{ + uint32_t submit_info_size = + (uint32_t)sizeof(struct pva_fw_cmdbuf_submit_info); + uint32_t num_submit_infos = safe_mulu32(x, submit_info_size); + uint32_t header_size = + (uint32_t)sizeof(struct pva_fw_submit_queue_header); + return safe_addu32(header_size, num_submit_infos); +} + +/* KMD API: queue create */ +struct pva_kmd_queue_create_in_args { + uint32_t max_submission_count; + uint64_t queue_memory_handle; + uint64_t queue_memory_offset; +}; + +struct pva_kmd_queue_create_out_args { + enum pva_error error; + uint32_t queue_id; + uint32_t syncpt_fence_counter; +}; + +/* KMD API: queue destroy */ +struct pva_kmd_queue_destroy_in_args { + uint32_t queue_id; +}; + +struct pva_kmd_queue_destroy_out_args { + enum pva_error error; +}; + +struct pva_kmd_memory_register_in_args { + enum pva_memory_segment segment; + uint32_t access_flags; + uint64_t memory_handle; + uint64_t offset; + uint64_t size; +}; + +/* KMD API: executable */ +struct pva_kmd_executable_register_in_args { + uint32_t size; +}; + +struct pva_kmd_executable_get_symbols_in_args { + uint32_t exec_resource_id; +}; + +struct pva_kmd_executable_get_symbols_out_args { + enum pva_error error; + uint32_t num_symbols; + /* Followed by of struct pva_symbol_info */ +}; + +/* KMD API: DMA config */ +struct pva_kmd_dma_config_register_in_args { + struct pva_dma_config_header dma_config_header; + /* Followed by hwseq words, channels, descriptors, etc. */ +}; + +struct pva_kmd_register_out_args { + enum pva_error error; + uint32_t resource_id; +}; + +struct pva_kmd_exec_register_out_args { + enum pva_error error; + uint32_t resource_id; + uint32_t num_symbols; +}; + +struct pva_kmd_unregister_in_args { + uint32_t resource_id; +}; + +enum pva_kmd_op_type { + PVA_KMD_OP_CONTEXT_INIT, + PVA_KMD_OP_QUEUE_CREATE, + PVA_KMD_OP_QUEUE_DESTROY, + PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS, + PVA_KMD_OP_MEMORY_REGISTER, + PVA_KMD_OP_SYNPT_REGISTER, + PVA_KMD_OP_EXECUTABLE_REGISTER, + PVA_KMD_OP_DMA_CONFIG_REGISTER, + PVA_KMD_OP_UNREGISTER, + PVA_KMD_OP_MAX, +}; + +/** + * The header of a KMD operation + */ +struct pva_kmd_op_header { + enum pva_kmd_op_type op_type; /**< Type of the KMD operation */ +}; + +/** + * The header of a KMD response + */ +struct pva_kmd_response_header { + uint32_t rep_size; /** Size of the response, including the header */ +}; + +enum pva_kmd_ops_mode { + /** + * Only one operation is allowed. The + * operation will be done synchronously. + * KMD will wait for the fence if + * necessary. */ + PVA_KMD_OPS_MODE_SYNC, + /** + * A list of registration operations are allowed. These operations will + * trigger a post fence. KMD will not wait for the fence. + */ + PVA_KMD_OPS_MODE_ASYNC, +}; + +/** + * A buffer contains a list of KMD operations and a post fence. + * + * In general, the list of KMD operations contain jobs that need to be done by + * the KMD and FW. KMD will first perform its part and then submit a privileged + * command buffer to FW. FW will trigger the provided post fence when done. + * + * NOTE: Starting address of every struct/array in the buffer must be aligned to + * 8 bytes. + */ +struct pva_kmd_operations { + enum pva_kmd_ops_mode mode; + struct pva_fw_postfence postfence; + /** Followed by a list of KMD operation(s) */ +}; + +/* Max op buffer sizer is 8 MB */ +#define PVA_KMD_MAX_OP_BUFFER_SIZE (8 * 1024 * 1024) + +/* Max respone size is 8 KB */ +#define PVA_KMD_MAX_RESP_BUFFER_SIZE (8 * 1024) + +#endif // PVA_KMD_H diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild b/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild new file mode 100644 index 00000000..8372206f --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild @@ -0,0 +1,19 @@ +################################### tell Emacs this is a -*- makefile-gmake -*- +# +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +############################################################################### + +obj-m := pva_kmd_linux.o + +pva_kmd_linux-objs += ${PVA_KMD_LINUX_SRC} + +ccflags-y += ${PVA_KMD_LINUX_INC} +ccflags-y += ${PVA_KMD_LINUX_DEF} +ccflags-y += ${PVA_KMD_LINUX_CFLAGS} +ccflags-y += -std=gnu11 diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h new file mode 100644 index 00000000..4141447e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_LINUX_H +#define PVA_KMD_LINUX_H + +#include "pva_kmd.h" + +#define PVA_LINUX_DEV_PATH_PREFIX "/dev/nvhost-ctrl-pva" + +#define NVPVA_IOCTL_MAGIC 'Q' + +#define PVA_KMD_IOCTL_GENERIC \ + _IOWR(NVPVA_IOCTL_MAGIC, 1, struct pva_kmd_linux_ioctl_header) + +#define NVPVA_IOCTL_MAX_SIZE 256 //Temp value which can be updated later + +struct nvpva_ioctl_part { + void *addr; + uint64_t size; +}; + +/** + * The header of request to KMD + */ +struct pva_kmd_linux_ioctl_header { + struct nvpva_ioctl_part request; + struct nvpva_ioctl_part response; +}; + +#endif // PVA_KMD_LINUX_H \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c new file mode 100644 index 00000000..6cfdb153 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pva_kmd_linux.h" +#include "pva_kmd_linux_device.h" +#include "pva_kmd_debugfs.h" + +static int pva_handle_fops(struct seq_file *s, void *data) +{ + return 0; +} + +static int debugfs_node_open(struct inode *inode, struct file *file) +{ + int retval; + struct pva_kmd_file_ops *fops = file_inode(file)->i_private; + retval = single_open(file, pva_handle_fops, inode->i_private); + if (retval != 0) { + pva_kmd_log_err("debugfs_node_open single_open failed"); + goto out; + } + + if (fops->open != NULL) { + retval = fops->open(fops->pdev); + } + +out: + return retval; +} + +static int debugfs_node_release(struct inode *inode, struct file *file) +{ + int retval; + struct pva_kmd_file_ops *fops = file_inode(file)->i_private; + + if (fops->release != NULL) { + retval = fops->release(fops->pdev); + if (retval != 0) { + pva_kmd_log_err("debugfs_node_release release failed"); + goto out; + } + } + + retval = single_release(inode, file); + +out: + return retval; +} + +static long int debugfs_node_read(struct file *file, char *data, + long unsigned int size, long long int *offset) +{ + int64_t retval; + struct pva_kmd_file_ops *fops = file_inode(file)->i_private; + retval = fops->read(fops->pdev, fops->file_data, data, *offset, size); + + return retval; +} + +static long int debugfs_node_write(struct file *file, const char *data, + long unsigned int size, + long long int *offset) +{ + long int retval; + struct pva_kmd_file_ops *fops = file_inode(file)->i_private; + retval = fops->write(fops->pdev, fops->file_data, data, *offset, size); + + return retval; +} + +static const struct file_operations pva_linux_debugfs_fops = { + .open = debugfs_node_open, + .read = debugfs_node_read, + .write = debugfs_node_write, + .release = debugfs_node_release, +}; + +void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name, + bool *pdata) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + struct dentry *de = props->debugfs; + + debugfs_create_bool(name, 0644, de, pdata); +} +void pva_kmd_debugfs_create_u32(struct pva_kmd_device *pva, const char *name, + uint32_t *pdata) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + struct dentry *de = props->debugfs; + + debugfs_create_u32(name, 0644, de, pdata); +} + +void pva_kmd_debugfs_create_file(struct pva_kmd_device *pva, const char *name, + struct pva_kmd_file_ops *pvafops) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + struct dentry *de = props->debugfs; + struct file_operations *fops = + (struct file_operations *)&pva_linux_debugfs_fops; + struct dentry *file; + + file = debugfs_create_file(name, 0644, de, pvafops, fops); + ASSERT(file != NULL); +} + +void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + struct dentry *de = props->debugfs; + + debugfs_lookup_and_remove("stats_enable", de); + debugfs_lookup_and_remove("vpu_debug", de); + debugfs_lookup_and_remove("profile_level", de); + debugfs_lookup_and_remove("vpu_stats", de); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c new file mode 100644 index 00000000..d64fa1fc --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pva_kmd_device.h" +#include "pva_kmd_linux_device.h" +#include "pva_kmd_device_memory.h" +#include "pva_kmd_constants.h" +#include "pva_kmd_silicon_utils.h" +#include "pva_kmd_silicon_boot.h" + +struct nvhost_device_data * +pva_kmd_linux_device_get_properties(struct platform_device *pdev) +{ + struct nvhost_device_data *props = platform_get_drvdata(pdev); + return props; +} + +struct pva_kmd_linux_device_data * +pva_kmd_linux_device_get_data(struct pva_kmd_device *device) +{ + return (struct pva_kmd_linux_device_data *)device->plat_data; +} + +void pva_kmd_linux_device_set_data(struct pva_kmd_device *device, + struct pva_kmd_linux_device_data *data) +{ + device->plat_data = (void *)data; +} + +void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id, + uint32_t *syncpt_value) +{ + int err = 0; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + err = nvhost_syncpt_read_ext_check(props->pdev, syncpt_id, + syncpt_value); + if (err < 0) { + FAULT("Failed to read syncpoint value\n"); + } +} + +void pva_kmd_get_syncpt_iova(struct pva_kmd_device *pva, uint32_t syncpt_id, + uint64_t *syncpt_iova) +{ + uint32_t offset = 0; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + + struct platform_device *host_pdev = + to_platform_device(props->pdev->dev.parent); + + offset = nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, + syncpt_id); + *syncpt_iova = safe_addu64(pva->syncpt_ro_iova, (uint64_t)offset); +} + +void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) +{ + phys_addr_t base; + size_t size; + int err = 0; + uint32_t syncpt_page_size; + uint32_t syncpt_offset[PVA_NUM_RW_SYNCPTS]; + dma_addr_t sp_start; + struct platform_device *host_pdev; + struct device *dev; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + nvhost_syncpt_unit_interface_init(props->pdev); + + host_pdev = to_platform_device(props->pdev->dev.parent); + err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base, + &size); + if (err < 0) { + FAULT("Failed to get syncpt aperture\n"); + } + /** Get page size of a syncpoint */ + syncpt_page_size = + nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, 1); + dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; + if (iommu_get_domain_for_dev(dev)) { + sp_start = dma_map_resource(dev, base, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, sp_start)) { + FAULT("Failed to pin RO syncpoints\n"); + } + } else { + FAULT("Failed to pin RO syncpoints\n"); + } + pva->syncpt_ro_iova = sp_start; + pva->syncpt_offset = syncpt_page_size; + pva->num_syncpts = (size / syncpt_page_size); + + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { + pva->syncpt_rw[i].syncpt_id = nvhost_get_syncpt_client_managed( + props->pdev, "pva_syncpt"); + if (pva->syncpt_rw[i].syncpt_id == 0) { + FAULT("Failed to get syncpt\n"); + } + syncpt_offset[i] = + nvhost_syncpt_unit_interface_get_byte_offset_ext( + host_pdev, pva->syncpt_rw[i].syncpt_id); + err = nvhost_syncpt_read_ext_check( + props->pdev, pva->syncpt_rw[i].syncpt_id, + &pva->syncpt_rw[i].syncpt_value); + if (err < 0) { + FAULT("Failed to read syncpoint value\n"); + } + } + + pva->syncpt_rw_iova = + dma_map_resource(dev, + safe_addu64(base, (uint64_t)syncpt_offset[0]), + safe_mulu64((uint64_t)pva->syncpt_offset, + (uint64_t)PVA_NUM_RW_SYNCPTS), + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, pva->syncpt_rw_iova)) { + FAULT("Failed to pin RW syncpoints\n"); + } + pva->syncpt_rw[0].syncpt_iova = pva->syncpt_rw_iova; + for (uint32_t i = 1; i < PVA_NUM_RW_SYNCPTS; i++) { + if (safe_addu32(syncpt_offset[i - 1], pva->syncpt_offset) != + syncpt_offset[i]) { + FAULT("RW syncpts are not contiguous\n"); + } + pva->syncpt_rw[i].syncpt_iova = safe_addu64( + pva->syncpt_rw_iova, + safe_mulu64((uint64_t)pva->syncpt_offset, (uint64_t)i)); + } +} + +void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva) +{ +} + +void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva) +{ + int err = 0; + phys_addr_t base; + size_t size; + struct device *dev; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + struct platform_device *host_pdev = + to_platform_device(props->pdev->dev.parent); + + err = nvhost_syncpt_unit_interface_get_aperture(host_pdev, &base, + &size); + if (err < 0) { + FAULT("Failed to get syncpt aperture\n"); + } + + dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; + if (iommu_get_domain_for_dev(dev)) { + dma_unmap_resource(dev, pva->syncpt_ro_iova, size, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_resource(dev, pva->syncpt_rw_iova, + safe_mulu64((uint64_t)pva->syncpt_offset, + (uint64_t)PVA_NUM_RW_SYNCPTS), + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + } else { + FAULT("Failed to unmap syncpts\n"); + } + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { + nvhost_syncpt_put_ref_ext(props->pdev, + pva->syncpt_rw[i].syncpt_id); + pva->syncpt_rw[i].syncpt_id = 0; + pva->syncpt_rw[i].syncpt_iova = 0; + pva->syncpt_rw[i].syncpt_value = 0; + } + pva->syncpt_ro_iova = 0; + pva->syncpt_rw_iova = 0; + pva->syncpt_offset = 0; + nvhost_syncpt_unit_interface_deinit(props->pdev); +} + +void pva_kmd_device_plat_init(struct pva_kmd_device *pva) +{ + struct pva_kmd_linux_device_data *plat_data = + pva_kmd_zalloc_nofail(sizeof(struct pva_kmd_linux_device_data)); + + pva_kmd_linux_device_set_data(pva, plat_data); + + /* Get SMMU context devices that were probed earlier and their SIDs */ + pva_kmd_linux_device_smmu_contexts_init(pva); +} + +void pva_kmd_device_plat_deinit(struct pva_kmd_device *pva) +{ + pva_kmd_linux_host1x_deinit(pva); + pva_kmd_free(pva_kmd_linux_device_get_data(pva)); +} + +enum pva_error pva_kmd_power_on(struct pva_kmd_device *pva) +{ + int err = 0; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + + err = pm_runtime_get_sync(&props->pdev->dev); + if (err < 0) { + pm_runtime_put_noidle(&props->pdev->dev); + goto out; + } + + /* Power management operation is asynchronous. PVA may not be power + * cycled between power_off -> power_on call. Therefore, we need to + * reset it here to make sure it is in a clean state. */ + reset_control_acquire(props->reset_control); + reset_control_reset(props->reset_control); + reset_control_release(props->reset_control); + +out: + return kernel_err2pva_err(err); +} + +void pva_kmd_power_off(struct pva_kmd_device *pva) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *props = device_data->pva_device_properties; + + pm_runtime_mark_last_busy(&props->pdev->dev); + pm_runtime_put(&props->pdev->dev); + + /* Power management operation is asynchronous. We don't control when PVA + * will really be powered down. However, we need to free memories after + * this call. Therefore, we assert the reset line to stop PVA from any + * further activity. */ + reset_control_acquire(props->reset_control); + reset_control_assert(props->reset_control); + reset_control_release(props->reset_control); +} + +uint32_t pva_kmd_get_syncpt_ro_offset(struct pva_kmd_device *pva) +{ + return safe_subu64(pva->syncpt_ro_iova, FW_SHARED_MEMORY_START); +} +uint32_t pva_kmd_get_syncpt_rw_offset(struct pva_kmd_device *pva) +{ + return safe_subu64(pva->syncpt_rw_iova, FW_SHARED_MEMORY_START); +} + +enum pva_error pva_kmd_read_fw_bin(struct pva_kmd_device *pva) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *device_props = + device_data->pva_device_properties; + struct pva_kmd_device_memory *fw_bin_mem; + + const struct firmware *fw_ucode; + int kerr = request_firmware(&fw_ucode, device_props->firmware_name, + &device_props->pdev->dev); + if (kerr < 0) { + err = kernel_err2pva_err(kerr); + goto out; + } + + fw_bin_mem = pva_kmd_device_memory_alloc_map( + safe_pow2_roundup_u64(fw_ucode->size, SIZE_4KB), pva, + PVA_ACCESS_RW, PVA_R5_SMMU_CONTEXT_ID); + if (fw_bin_mem == NULL) { + err = PVA_NOMEM; + goto release; + } + + memcpy(fw_bin_mem->va, fw_ucode->data, fw_ucode->size); + + pva->fw_bin_mem = fw_bin_mem; +release: + release_firmware(fw_ucode); +out: + return err; +} + +void pva_kmd_aperture_write(struct pva_kmd_device *pva, + enum pva_kmd_reg_aperture aperture, uint32_t reg, + uint32_t val) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *device_props = + device_data->pva_device_properties; + + void __iomem *addr = device_props->aperture[aperture] + reg; + + writel(val, addr); +} + +uint32_t pva_kmd_aperture_read(struct pva_kmd_device *pva, + enum pva_kmd_reg_aperture aperture, uint32_t reg) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *device_props = + device_data->pva_device_properties; + + void __iomem *addr = device_props->aperture[aperture] + reg; + + return readl(addr); +} + +enum pva_error kernel_err2pva_err(int err) +{ + if (err >= 0) { + return PVA_SUCCESS; + } + + switch (err) { + case -EINVAL: + return PVA_INVAL; + case -EINTR: + return PVA_EINTR; + default: + return PVA_UNKNOWN_ERROR; + } +} + +unsigned long pva_kmd_copy_data_from_user(void *dst, const void *src, + uint64_t size) +{ + return copy_from_user(dst, src, size); +} + +unsigned long pva_kmd_copy_data_to_user(void __user *to, const void *from, + unsigned long size) +{ + return copy_to_user(to, from, size); +} + +unsigned long pva_kmd_strtol(const char *str, int base) +{ + unsigned long val; + int ret; + + ret = kstrtoul(str, base, &val); + if (ret < 0) + return 0; + + return val; +} + +/* TODO: Enable HVC call once HVC fix is available on dev-main */ +//static void pva_kmd_config_regs(void) +//{ +//bool hv_err = true; +//hv_err = hyp_pva_config_regs(); +//ASSERT(hv_err == true); +//ASSERT(false); +//} + +void pva_kmd_config_evp_seg_scr_regs(struct pva_kmd_device *pva) +{ + pva_kmd_config_evp_seg_regs(pva); + pva_kmd_config_scr_regs(pva); +} + +void pva_kmd_config_sid_regs(struct pva_kmd_device *pva) +{ + pva_kmd_config_sid(pva); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h new file mode 100644 index 00000000..7ccc82f3 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include +#include +#include + +#include "pva_api.h" +#include "pva_kmd_device.h" +#include "pva_kmd_linux_isr.h" + +#define NV_PVA0_CLASS_ID 0xF1 +#define PVA_KMD_LINUX_MAX_IORESOURCE_MEM 5 + +extern const struct file_operations tegra_pva_ctrl_ops; + +struct pva_kmd_linux_device_data { + /* + * Always keep nvhost_device_data at the top of this struct + * APIs access this data using platform_get_drvdata + */ + struct nvhost_device_data *pva_device_properties; + + /* Global states required by a PVA device instance go here */ + struct platform_device *smmu_contexts[PVA_MAX_NUM_SMMU_CONTEXTS]; + struct pva_kmd_isr_data isr[PVA_KMD_INTR_LINE_COUNT]; +}; + +struct pva_kmd_linux_device_data * +pva_kmd_linux_device_get_data(struct pva_kmd_device *device); + +void pva_kmd_linux_device_set_data(struct pva_kmd_device *device, + struct pva_kmd_linux_device_data *data); + +void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva); +void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva); + +struct nvhost_device_data * +pva_kmd_linux_device_get_properties(struct platform_device *pdev); + +void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device); + +bool pva_kmd_linux_smmu_contexts_initialized(enum pva_chip_id chip_id); + +enum pva_error kernel_err2pva_err(int err); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c new file mode 100644 index 00000000..bc31582e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include "pva_kmd_device_memory.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_linux_device.h" +#include +#include +#include +#include + +static struct device *get_context_device(struct pva_kmd_device *pva_device, + uint32_t smmu_ctx_idx) +{ + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva_device); + + ASSERT(smmu_ctx_idx < pva_device->hw_consts.n_smmu_contexts); + + return &device_data->smmu_contexts[smmu_ctx_idx]->dev; +} + +struct pva_kmd_device_memory_impl { + struct pva_kmd_device_memory dev_mem; + struct dma_buf *dmabuf; + struct iosys_map iosysmap; + struct dma_buf_attachment *dmabuf_attch; + struct sg_table *sgt; + uint64_t offset; +}; + +struct pva_kmd_device_memory * +pva_kmd_device_memory_alloc_map(uint64_t size, struct pva_kmd_device *pva, + uint32_t iova_access_flags, + uint32_t smmu_ctx_idx) +{ + struct device *dev = get_context_device(pva, smmu_ctx_idx); + dma_addr_t pa = 0U; + void *va = NULL; + + struct pva_kmd_device_memory_impl *mem_impl = + pva_kmd_zalloc(sizeof(struct pva_kmd_device_memory_impl)); + va = dma_alloc_coherent(dev, size, &pa, GFP_KERNEL); + if (va == NULL) { + pva_kmd_log_err("dma_alloc_coherent failed"); + goto free_mem; + } + mem_impl->dev_mem.iova = pa; + mem_impl->dev_mem.va = va; + mem_impl->dev_mem.size = size; + mem_impl->dev_mem.pva = pva; + mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx; + mem_impl->dmabuf = NULL; + + return &mem_impl->dev_mem; + +free_mem: + pva_kmd_free(mem_impl); + return NULL; +} + +/** + * memory_handle is dma fd in Linux, NvRM import_id in QNX, shard memory fd in + * sim and native. + */ +struct pva_kmd_device_memory * +pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset, + uint64_t size, struct pva_kmd_context *ctx) +{ + struct pva_kmd_device_memory_impl *mem_impl = + (struct pva_kmd_device_memory_impl *)pva_kmd_zalloc( + sizeof(struct pva_kmd_device_memory_impl)); + + struct dma_buf *dma_buf; + dma_buf = dma_buf_get(memory_handle); + if (dma_buf == NULL) { + pva_kmd_log_err("Failed to acquire memory"); + goto free_mem; + } + + if (size > dma_buf->size) { + pva_kmd_log_err( + "Trying to register device memory with wrong size"); + goto put_dmabuf; + } + + mem_impl->dmabuf = dma_buf; + mem_impl->dev_mem.size = size; + mem_impl->offset = offset; + close_fd(memory_handle); + return &mem_impl->dev_mem; + +put_dmabuf: + dma_buf_put(dma_buf); +free_mem: + pva_kmd_free(mem_impl); + return NULL; +} + +void pva_kmd_device_memory_free(struct pva_kmd_device_memory *mem) +{ + struct pva_kmd_device_memory_impl *mem_impl = + container_of(mem, struct pva_kmd_device_memory_impl, dev_mem); + struct device *dev; + + if (mem_impl->dmabuf != NULL) { + /* This memory comes from dma_buf_get */ + if (mem->iova != 0U) { + pva_kmd_device_memory_iova_unmap(mem); + } + + if (mem->va != NULL) { + pva_kmd_device_memory_cpu_unmap(mem); + } + + dma_buf_put(mem_impl->dmabuf); + } else { + /* This memory comes from dma_alloc_coherent */ + dev = get_context_device(mem_impl->dev_mem.pva, + mem_impl->dev_mem.smmu_ctx_idx); + dma_free_coherent(dev, mem->size, mem->va, mem->iova); + mem->iova = 0U; + } + pva_kmd_free(mem_impl); +} + +enum pva_error +pva_kmd_device_memory_cpu_map(struct pva_kmd_device_memory *memory) +{ + struct pva_kmd_device_memory_impl *mem_impl = container_of( + memory, struct pva_kmd_device_memory_impl, dev_mem); + int ret; + + ret = dma_buf_vmap(mem_impl->dmabuf, &mem_impl->iosysmap); + if (ret != 0) { + pva_kmd_log_err("CPU map failed\n"); + return PVA_NOMEM; + } + + memory->va = + pva_offset_pointer(mem_impl->iosysmap.vaddr, mem_impl->offset); + return PVA_SUCCESS; +} + +void pva_kmd_device_memory_cpu_unmap(struct pva_kmd_device_memory *memory) +{ + struct pva_kmd_device_memory_impl *mem_impl = container_of( + memory, struct pva_kmd_device_memory_impl, dev_mem); + + ASSERT(mem_impl->dmabuf != NULL); + + dma_buf_vunmap(mem_impl->dmabuf, &mem_impl->iosysmap); + memory->va = NULL; +} + +enum pva_error +pva_kmd_device_memory_iova_map(struct pva_kmd_device_memory *memory, + struct pva_kmd_device *pva, + uint32_t access_flags, uint32_t smmu_ctx_idx) +{ + pva_math_error math_err = MATH_OP_SUCCESS; + struct pva_kmd_device_memory_impl *mem_impl = container_of( + memory, struct pva_kmd_device_memory_impl, dev_mem); + + // struct pva_kmd_linux_device_plat_data *plat_data = + // pva_kmd_linux_device_get_plat_data(pva); + // struct device *dev = plat_data->dev[smmu_ctx_idx]; + struct device *dev = get_context_device(pva, smmu_ctx_idx); + struct dma_buf_attachment *attach; + struct sg_table *sgt; + enum pva_error err = PVA_SUCCESS; + + attach = dma_buf_attach(mem_impl->dmabuf, dev); + if (IS_ERR_OR_NULL(attach)) { + err = PVA_INVAL; + pva_kmd_log_err("Failed to attach dma_buf\n"); + goto err_out; + } + + mem_impl->dmabuf_attch = attach; + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) { + err = PVA_INVAL; + pva_kmd_log_err("Failed to map attachment\n"); + goto detach; + } + mem_impl->sgt = sgt; + mem_impl->dev_mem.iova = + addu64(sg_dma_address(sgt->sgl), mem_impl->offset, &math_err); + if (math_err != MATH_OP_SUCCESS) { + err = PVA_INVAL; + pva_kmd_log_err( + "pva_kmd_device_memory_iova_map Invalid DMA address\n"); + goto detach; + } + mem_impl->dev_mem.pva = pva; + mem_impl->dev_mem.smmu_ctx_idx = smmu_ctx_idx; + return PVA_SUCCESS; + +detach: + dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch); +err_out: + return err; +} + +void pva_kmd_device_memory_iova_unmap(struct pva_kmd_device_memory *memory) +{ + struct pva_kmd_device_memory_impl *mem_impl = container_of( + memory, struct pva_kmd_device_memory_impl, dev_mem); + + ASSERT(mem_impl->dmabuf != NULL); + + dma_buf_unmap_attachment(mem_impl->dmabuf_attch, mem_impl->sgt, + DMA_BIDIRECTIONAL); + dma_buf_detach(mem_impl->dmabuf, mem_impl->dmabuf_attch); + memory->iova = 0; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c new file mode 100644 index 00000000..1d72c193 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +/* Auto-detected configuration depending kernel version */ +#include + +/* Linux headers */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif + +/* PVA headers */ +#include "pva_api.h" +#include "pva_kmd_device.h" +#include "pva_kmd_utils.h" +#include "pva_kmd_linux_device.h" +#include "pva_kmd_debugfs.h" +#include "pva_kmd_silicon_hwpm.h" +#include "pva_kmd_pm.h" + +#define PVA_KMD_LINUX_DRIVER_NAME "pva_kmd" +#if PVA_DEV_MAIN_COMPATIBLE == 1 +#define PVA_KMD_LINUX_T23X_FIRMWARE_NAME "nvpva_020.fw" +#define PVA_KMD_LINUX_T26X_FIRMWARE_NAME "nvpva_030.fw" +#else +#define PVA_KMD_LINUX_T23X_FIRMWARE_NAME "pvafw_t23x.fw" +#define PVA_KMD_LINUX_T26X_FIRMWARE_NAME "pvafw_t26x.fw" +#endif + +extern struct platform_driver pva_kmd_linux_smmu_context_driver; +extern atomic_t g_num_smmu_ctxs; +static bool load_from_gsc = PVA_KMD_LOAD_FROM_GSC_DEFAULT; +static bool app_authenticate = PVA_KMD_APP_AUTH_DEFAULT; + +module_param(load_from_gsc, bool, 0); +MODULE_PARM_DESC(load_from_gsc, "Load V3 FW from GSC"); + +module_param(app_authenticate, bool, 0); +MODULE_PARM_DESC(app_authenticate, "Enable app authentication"); + +struct nvhost_device_data t23x_pva0_props = { + .version = PVA_CHIP_T23X, + .ctrl_ops = &tegra_pva_ctrl_ops, + .class = NV_PVA0_CLASS_ID, + /* We should not enable autosuspend here as this logic is handled in + * common code. When poweroff is called, common code expects PVA to be + * _really_ powered off. If we enable autosuspend, PVA will stay on for + * a while. */ + .autosuspend_delay = 0, + .firmware_name = PVA_KMD_LINUX_T23X_FIRMWARE_NAME +}; + +struct nvhost_device_data t26x_pva0_props = { + .version = PVA_CHIP_T26X, + .ctrl_ops = &tegra_pva_ctrl_ops, + .class = NV_PVA0_CLASS_ID, + /* We should not enable autosuspend here as this logic is handled in + * common code. When poweroff is called, common code expects PVA to be + * _really_ powered off. If we enable autosuspend, PVA will stay on for + * a while. */ + .autosuspend_delay = 0, + .firmware_name = PVA_KMD_LINUX_T26X_FIRMWARE_NAME +}; + +/* Map PVA-A and PVA-B to respective configuration items in nvhost */ +static struct of_device_id tegra_pva_of_match[] = { + { .name = "pva0", + .compatible = "nvidia,tegra234-pva", + .data = (struct nvhost_device_data *)&t23x_pva0_props }, + { .name = "pva0", + .compatible = "nvidia,tegra234-pva-hv", + .data = (struct nvhost_device_data *)&t23x_pva0_props }, + { .name = "pva0", + .compatible = "nvidia,tegra264-pva", + .data = (struct nvhost_device_data *)&t26x_pva0_props }, + {}, +}; + +MODULE_DEVICE_TABLE(of, tegra_pva_of_match); + +static int pva_get_gsc_priv_hwid(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) + struct iommu_fwspec *fwspec = dev->iommu_fwspec; +#else + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); +#endif + if (!fwspec) { + return -EINVAL; + } + return fwspec->ids[0] & 0xffff; +} + +static void pva_kmd_linux_register_hwpm(struct pva_kmd_device *pva) +{ + struct tegra_soc_hwpm_ip_ops *hwpm_ip_ops = + pva_kmd_zalloc(sizeof(*hwpm_ip_ops)); + + hwpm_ip_ops->ip_dev = pva; + hwpm_ip_ops->ip_base_address = safe_addu64( + pva->reg_phy_base[0], (uint64_t)pva->regspec.cfg_perf_mon); + hwpm_ip_ops->resource_enum = TEGRA_SOC_HWPM_RESOURCE_PVA; + hwpm_ip_ops->hwpm_ip_pm = &pva_kmd_hwpm_ip_pm; + hwpm_ip_ops->hwpm_ip_reg_op = &pva_kmd_hwpm_ip_reg_op; + tegra_soc_hwpm_ip_register(hwpm_ip_ops); + pva->debugfs_context.data_hwpm = hwpm_ip_ops; +} + +static void pva_kmd_linux_unregister_hwpm(struct pva_kmd_device *pva) +{ + struct tegra_soc_hwpm_ip_ops *hwpm_ip_ops = + (struct tegra_soc_hwpm_ip_ops *)pva->debugfs_context.data_hwpm; + tegra_soc_hwpm_ip_unregister(hwpm_ip_ops); + pva_kmd_free(hwpm_ip_ops); +} + +static ssize_t clk_cap_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct nvhost_device_data *pdata = + container_of(kobj, struct nvhost_device_data, clk_cap_kobj); + /* i is indeed 'index' here after type conversion */ + int ret, i = attr - pdata->clk_cap_attrs; + struct clk_bulk_data *clks = &pdata->clks[i]; + struct clk *clk = clks->clk; + unsigned long freq_cap; + long freq_cap_signed; + + ret = kstrtoul(buf, 0, &freq_cap); + if (ret) + return -EINVAL; + /* Remove previous freq cap to get correct rounted rate for new cap */ + ret = clk_set_max_rate(clk, UINT_MAX); + if (ret < 0) + return ret; + + freq_cap_signed = clk_round_rate(clk, freq_cap); + if (freq_cap_signed < 0) + return -EINVAL; + freq_cap = (unsigned long)freq_cap_signed; + /* Apply new freq cap */ + ret = clk_set_max_rate(clk, freq_cap); + if (ret < 0) + return ret; + + /* Update the clock rate */ + clk_set_rate(clks->clk, freq_cap); + if (ret < 0) + return ret; + + return count; +} + +static ssize_t clk_cap_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nvhost_device_data *pdata = + container_of(kobj, struct nvhost_device_data, clk_cap_kobj); + /* i is indeed 'index' here after type conversion */ + int i = attr - pdata->clk_cap_attrs; + struct clk_bulk_data *clks = &pdata->clks[i]; + struct clk *clk = clks->clk; + long max_rate; + + max_rate = clk_round_rate(clk, UINT_MAX); + if (max_rate < 0) + return max_rate; + + return snprintf(buf, PAGE_SIZE, "%ld\n", max_rate); +} + +static struct kobj_type nvpva_kobj_ktype = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +static int pva_probe(struct platform_device *pdev) +{ + int err = 0U; + struct device *dev = &pdev->dev; + struct pva_kmd_linux_device_data *pva_device_data; + struct nvhost_device_data *pva_props; + const struct of_device_id *device_id; + struct pva_kmd_device *pva_device; + struct kobj_attribute *attr = NULL; + int j = 0; + struct clk_bulk_data *clks; + struct clk *c; + + device_id = of_match_device(tegra_pva_of_match, dev); + if (!device_id) { + dev_err(dev, "no match for pva dev\n"); + return -ENODATA; + } + + pva_props = (struct nvhost_device_data *)device_id->data; + WARN_ON(!pva_props); + if (!pva_props) { + dev_info(dev, "no platform data\n"); + return -ENODATA; + } + + /* Create devices for child nodes of this device */ + of_platform_default_populate(dev->of_node, NULL, dev); + + /* Before probing PVA device, all of PVA's logical context devices + * must have been probed + */ + if (!pva_kmd_linux_smmu_contexts_initialized(pva_props->version)) { + dev_warn(dev, + "nvpva cntxt was not initialized, deferring probe."); + return -EPROBE_DEFER; + } + + pva_props->pdev = pdev; + mutex_init(&pva_props->lock); + pva_device = + pva_kmd_device_create(pva_props->version, 0, app_authenticate); + + pva_device->is_hv_mode = is_tegra_hypervisor_mode(); + + /*Force to always boot from file in case of L4T*/ + if (!pva_device->is_hv_mode) { + load_from_gsc = false; + } + + pva_device->load_from_gsc = load_from_gsc; + pva_device->stream_ids[pva_device->r5_image_smmu_context_id] = + pva_get_gsc_priv_hwid(pdev); + + pva_props->private_data = pva_device; + platform_set_drvdata(pdev, pva_props); + + /* + * pva_kmd_device_create allocates space for the platform data + * of this device. Update its property field to point to the platform + * data read using of_* APIs + */ + pva_device_data = pva_device->plat_data; + pva_device_data->pva_device_properties = pva_props; + + /* Map MMIO range to kernel space */ + err = nvhost_client_device_get_resources(pdev); + if (err < 0) { + dev_err(dev, "nvhost_client_device_get_resources failed\n"); + goto err_get_resources; + } + + /* Get clocks */ + err = nvhost_module_init(pdev); + if (err < 0) { + dev_err(dev, "nvhost_module_init failed\n"); + goto err_get_car; + } + + /* + * Add this to nvhost device list, initialize scaling, + * setup memory management for the device, create dev nodes + */ + err = nvhost_client_device_init(pdev); + if (err < 0) { + dev_err(dev, "nvhost_client_device_init failed\n"); + goto err_cdev_init; + } + + pva_kmd_linux_host1x_init(pva_device); + + pva_kmd_debugfs_create_nodes(pva_device); + pva_kmd_linux_register_hwpm(pva_device); + + if (pva_props->num_clks > 0) { + err = kobject_init_and_add(&pva_props->clk_cap_kobj, + &nvpva_kobj_ktype, &pdev->dev.kobj, + "%s", "clk_cap"); + if (err) { + dev_err(dev, "Could not add dir 'clk_cap'\n"); + goto err_cdev_init; + } + + pva_props->clk_cap_attrs = devm_kcalloc( + dev, pva_props->num_clks, sizeof(*attr), GFP_KERNEL); + if (!pva_props->clk_cap_attrs) + goto err_cleanup_sysfs; + + for (j = 0; j < pva_props->num_clks; ++j) { + clks = &pva_props->clks[j]; + c = clks->clk; + if (!c) + continue; + + attr = &pva_props->clk_cap_attrs[j]; + attr->attr.name = __clk_get_name(c); + /* octal permission is preferred nowadays */ + attr->attr.mode = 0644; + attr->show = clk_cap_show; + attr->store = clk_cap_store; + sysfs_attr_init(&attr->attr); + if (sysfs_create_file(&pva_props->clk_cap_kobj, + &attr->attr)) { + dev_err(dev, + "Could not create sysfs attribute %s\n", + __clk_get_name(c)); + err = -EIO; + goto err_cleanup_sysfs; + } + } + } + /* return 0 as we would have jumped over this if an error was seen */ + return 0; + +err_cleanup_sysfs: + /* kobj of nvpva_kobj_ktype cleans up sysfs entries automatically */ + kobject_put(&pva_props->clk_cap_kobj); +err_cdev_init: + nvhost_client_device_release(pdev); +err_get_car: + nvhost_module_deinit(pdev); +err_get_resources: + pva_kmd_device_destroy(pva_device); + + return err; +} + +static int __exit pva_remove(struct platform_device *pdev) +{ + struct nvhost_device_data *pva_props = platform_get_drvdata(pdev); + struct pva_kmd_device *pva_device = pva_props->private_data; + struct kobj_attribute *attr = NULL; + int i; + + if (pva_props->clk_cap_attrs) { + for (i = 0; i < pva_props->num_clks; i++) { + attr = &pva_props->clk_cap_attrs[i]; + sysfs_remove_file(&pva_props->clk_cap_kobj, + &attr->attr); + } + + kobject_put(&pva_props->clk_cap_kobj); + } + + nvhost_client_device_release(pdev); + pva_kmd_debugfs_destroy_nodes(pva_device); + pva_kmd_linux_unregister_hwpm(pva_device); + nvhost_module_deinit(pdev); + pva_kmd_device_destroy(pva_device); + + return 0; +} + +static int pva_kmd_linux_device_runtime_resume(struct device *dev) +{ + int err; + struct nvhost_device_data *props = dev_get_drvdata(dev); + + dev_info(dev, "PVA: Calling runtime resume"); + reset_control_acquire(props->reset_control); + + err = clk_bulk_prepare_enable(props->num_clks, props->clks); + if (err < 0) { + reset_control_release(props->reset_control); + dev_err(dev, "failed to enabled clocks: %d\n", err); + return err; + } + + reset_control_reset(props->reset_control); + reset_control_release(props->reset_control); + + return 0; +} + +static int pva_kmd_linux_device_runtime_suspend(struct device *dev) +{ + struct nvhost_device_data *props = dev_get_drvdata(dev); + + dev_info(dev, "PVA: Calling runtime suspend"); + + reset_control_acquire(props->reset_control); + reset_control_assert(props->reset_control); + + clk_bulk_disable_unprepare(props->num_clks, props->clks); + + reset_control_release(props->reset_control); + + return 0; +} +#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID) /* Linux v6.11 */ +static void __exit pva_remove_wrapper(struct platform_device *pdev) +{ + pva_remove(pdev); +} +#else +static int __exit pva_remove_wrapper(struct platform_device *pdev) +{ + pva_remove(pdev); + return 0; +} +#endif + +static int pva_kmd_linux_device_resume(struct device *dev) +{ + enum pva_error status = PVA_SUCCESS; + int err = 0; + struct nvhost_device_data *props = dev_get_drvdata(dev); + struct pva_kmd_device *pva_device = props->private_data; + + if (pva_device->is_suspended == false) { + dev_warn(dev, "PVA is not in suspend state.\n"); + goto fail_not_in_suspend; + } + + dev_info(dev, "PVA: Calling resume"); + err = pm_runtime_force_resume(dev); + + if (err != 0) { + goto fail_runtime_resume; + } + + if (pva_device->refcount != 0u) { + status = pva_kmd_init_fw(pva_device); + } + + if (status != PVA_SUCCESS) { + err = -EINVAL; + goto fail_init_fw; + } + +fail_init_fw: +fail_runtime_resume: +fail_not_in_suspend: + return err; +} + +static int pva_kmd_linux_device_suspend(struct device *dev) +{ + int err = 0; + struct nvhost_device_data *props = dev_get_drvdata(dev); + struct pva_kmd_device *pva_device = props->private_data; + + if (pva_device->refcount != 0u) { + pva_kmd_deinit_fw(pva_device); + } + + dev_info(dev, "PVA: Calling suspend"); + err = pm_runtime_force_suspend(dev); + if (err != 0) { + dev_err(dev, "(FAIL) PM suspend\n"); + goto fail_nvhost_module_suspend; + } + + pva_device->is_suspended = true; + +fail_nvhost_module_suspend: + return err; +} + +static int pva_kmd_linux_device_prepare_suspend(struct device *dev) +{ + struct nvhost_device_data *props = dev_get_drvdata(dev); + struct pva_kmd_device *pva_device = props->private_data; + enum pva_error status = PVA_SUCCESS; + int err = 0; + + dev_info(dev, "PVA: Preparing to suspend"); + if (pva_device->is_suspended == true) { + dev_info(dev, "PVA device already suspended"); + goto fail_already_in_suspend; + } + + status = pva_kmd_prepare_suspend(pva_device); + if (status != PVA_SUCCESS) { + dev_info(dev, "PVA: Suspend FAIL"); + err = -EBUSY; + goto fail; + } + +fail_already_in_suspend: +fail: + return err; +} + +static void pva_kmd_linux_device_complete_resume(struct device *dev) +{ + enum pva_error status = PVA_SUCCESS; + struct nvhost_device_data *props = dev_get_drvdata(dev); + struct pva_kmd_device *pva_device = props->private_data; + + dev_info(dev, "PVA: Completing resume"); + if (pva_device->is_suspended == false) { + dev_info(dev, "PVA device not in suspend state"); + goto done; + } + + status = pva_kmd_complete_resume(pva_device); + if (status != PVA_SUCCESS) { + dev_err(dev, "PVA: Resume failed"); + goto done; + } + + dev_info(dev, "PVA: Resume complete"); + +done: + pva_device->is_suspended = false; + return; +} + +static const struct dev_pm_ops pva_kmd_linux_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pva_kmd_linux_device_suspend, + pva_kmd_linux_device_resume) + SET_RUNTIME_PM_OPS(pva_kmd_linux_device_runtime_suspend, + pva_kmd_linux_device_runtime_resume, NULL) + .prepare = pva_kmd_linux_device_prepare_suspend, + .complete = pva_kmd_linux_device_complete_resume +}; + +static struct platform_driver pva_platform_driver = { + .probe = pva_probe, + .remove = pva_remove_wrapper, + .driver = { + .name = PVA_KMD_LINUX_DRIVER_NAME, + .owner = THIS_MODULE, +#ifdef CONFIG_OF + .of_match_table = tegra_pva_of_match, +#endif + .pm = &pva_kmd_linux_pm_ops, + }, +}; + +static struct host1x_driver host1x_nvpva_driver = { + .driver = { + .name = "host1x-nvpva", + }, + .subdevs = tegra_pva_of_match, +}; + +static int __init nvpva_init(void) +{ + int err; + + atomic_set(&g_num_smmu_ctxs, 0); + + err = host1x_driver_register(&host1x_nvpva_driver); + if (err < 0) + goto err_out; + + err = platform_driver_register(&pva_kmd_linux_smmu_context_driver); + if (err < 0) + goto unreg_host1x_drv; + + err = platform_driver_register(&pva_platform_driver); + if (err < 0) + goto unreg_smmu_drv; + + printk(KERN_INFO "nvpva_init completed: %d. GSC boot: %d", err, + load_from_gsc); + + return err; + +unreg_smmu_drv: + platform_driver_unregister(&pva_kmd_linux_smmu_context_driver); +unreg_host1x_drv: + host1x_driver_unregister(&host1x_nvpva_driver); +err_out: + return err; +} + +static void __exit nvpva_exit(void) +{ + platform_driver_unregister(&pva_platform_driver); + platform_driver_unregister(&pva_kmd_linux_smmu_context_driver); + host1x_driver_unregister(&host1x_nvpva_driver); + printk(KERN_INFO "nvpva_exit completed"); +} + +module_init(nvpva_init); +module_exit(nvpva_exit); + +#if defined(NV_MODULE_IMPORT_NS_CALLS_STRINGIFY) +MODULE_IMPORT_NS(DMA_BUF); +#else +MODULE_IMPORT_NS("DMA_BUF"); +#endif +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c new file mode 100644 index 00000000..64b893a6 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +/* Auto-detected configuration depending kernel version */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pva_kmd_linux.h" +#include "pva_kmd_linux_device.h" +#include "pva_kmd_op_handler.h" + +/** + * Struct to hold context pertaining to open/close/ioctl calls +*/ +struct pva_kmd_linux_ocb { + struct pva_kmd_context + *kmd_ctx; /* Stores pva_kmd_context to be assigned per client*/ + u8 req_buffer[PVA_KMD_MAX_OP_BUFFER_SIZE]; /* Buffer to copy request op from user */ + u8 resp_buffer + [PVA_KMD_MAX_OP_BUFFER_SIZE]; /* Buffer to copy response from kernel to user */ +}; + +static inline bool +is_ioctl_header_valid(const struct pva_kmd_linux_ioctl_header *hdr) +{ + return ((hdr->request.addr != NULL) && (hdr->response.addr != NULL) && + (hdr->request.size != 0U) && (hdr->response.size != 0U) && + (hdr->request.size <= PVA_KMD_MAX_OP_BUFFER_SIZE) && + (hdr->response.size <= PVA_KMD_MAX_OP_BUFFER_SIZE)); +} + +static long pva_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pva_kmd_linux_ocb *ocb = file->private_data; + u8 buf[NVPVA_IOCTL_MAX_SIZE] __aligned(sizeof(u64)); + u32 resp_size = 0; + enum pva_error op_err = PVA_SUCCESS; + int err = 0; + int ret_err = 0; + u8 cmd_size = _IOC_SIZE(cmd); + struct pva_kmd_linux_ioctl_header *hdr = NULL; + int req_ok, resp_ok; + + if ((cmd != PVA_KMD_IOCTL_GENERIC) || (cmd_size > sizeof(buf))) { + return -ENOIOCTLCMD; + } + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, cmd_size)) { + return -EFAULT; + } + } + + hdr = (struct pva_kmd_linux_ioctl_header *)(void *)buf; + if (!is_ioctl_header_valid(hdr)) { + return -EINVAL; + } + + req_ok = access_ok((void __user *)hdr->request.addr, + (unsigned long)hdr->request.size); + resp_ok = access_ok((void __user *)hdr->response.addr, + (unsigned long)hdr->response.size); + + if ((req_ok != 1) || (resp_ok != 1)) { + return -EFAULT; + } + + err = copy_from_user(ocb->req_buffer, (void __user *)hdr->request.addr, + hdr->request.size); + if (err) { + return err; + } + + op_err = pva_kmd_ops_handler(ocb->kmd_ctx, ocb->req_buffer, + hdr->request.size, ocb->resp_buffer, + hdr->response.size, &resp_size); + + if (op_err != PVA_SUCCESS) { + if (op_err == PVA_NO_RESOURCE_ID || op_err == PVA_NOMEM) { + err = -ENOMEM; + } else { + err = -EFAULT; + } + } + + if (_IOC_DIR(cmd) & _IOC_READ) { + ret_err = copy_to_user((void __user *)hdr->response.addr, + ocb->resp_buffer, resp_size); + } + + err = (err == 0) ? ret_err : err; + return err; +} + +static int pva_open(struct inode *inode, struct file *file) +{ + int err = 0; + + struct nvhost_device_data *props = container_of( + inode->i_cdev, struct nvhost_device_data, ctrl_cdev); + struct pva_kmd_device *kmd_device = props->private_data; + struct pva_kmd_linux_ocb *ocb = NULL; + + ocb = pva_kmd_zalloc(sizeof(*ocb)); + if (ocb == NULL) { + pva_kmd_log_err("Failed to allocate memory for PVA context"); + err = -ENOMEM; + goto out; + } + + ocb->kmd_ctx = pva_kmd_context_create(kmd_device); + if (ocb->kmd_ctx == NULL) { + err = -ENOMEM; + pva_kmd_log_err("Failed to create PVA context"); + goto free_mem; + } + file->private_data = ocb; + return 0; + +free_mem: + pva_kmd_free(ocb); +out: + return err; +} + +static int pva_release(struct inode *inode, struct file *file) +{ + struct pva_kmd_linux_ocb *ocb = file->private_data; + pva_kmd_context_destroy(ocb->kmd_ctx); + pva_kmd_free(ocb); + return 0; +} + +static int pva_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long size = safe_subu64(vma->vm_end, vma->vm_start); + struct pva_kmd_linux_ocb *ocb = filp->private_data; + struct pva_kmd_device *pva = ocb->kmd_ctx->pva; + uint64_t user_ccq_base = + safe_addu64(pva->reg_phy_base[PVA_KMD_APERTURE_PVA_CLUSTER], + pva->regspec.ccq_regs[ocb->kmd_ctx->ccq_id].fifo); + unsigned long user_ccq_pfn = user_ccq_base >> PAGE_SHIFT; + + if (size != PVA_CFG_CCQ_BLOCK_SIZE) { + pva_kmd_log_err("Unexpected CCQ map size"); + return -EINVAL; + } + +// TODO: use NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS instead of kernel versions +// when the syncpoint change is merged. +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0) /* Linux v6.3 */ + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP); +#else + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; +#endif + vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + if (remap_pfn_range(vma, vma->vm_start, user_ccq_pfn, size, + vma->vm_page_prot)) { + pva_kmd_log_err("CCQ map failed"); + return -EAGAIN; + } + + return 0; +} + +const struct file_operations tegra_pva_ctrl_ops = { + .owner = THIS_MODULE, +#if defined(NV_NO_LLSEEK_PRESENT) + .llseek = no_llseek, +#endif + .unlocked_ioctl = pva_ioctl, + .mmap = pva_mmap, +#ifdef CONFIG_COMPAT + .compat_ioctl = pva_ioctl, +#endif + .open = pva_open, + .release = pva_release, +}; diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c new file mode 100644 index 00000000..2fa0e583 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.c @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include +#include "pva_kmd_device.h" +#include "pva_kmd_linux_device.h" +#include "pva_kmd_linux_isr.h" + +static struct pva_kmd_isr_data *get_isr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line) +{ + struct pva_kmd_linux_device_data *plat_data = + pva_kmd_linux_device_get_data(pva); + struct pva_kmd_isr_data *isr_data; + ASSERT(intr_line < PVA_KMD_INTR_LINE_COUNT); + isr_data = &plat_data->isr[intr_line]; + ASSERT(isr_data->binded); + return isr_data; +} +static irqreturn_t pva_isr(int irq, void *dev_id) +{ + struct pva_kmd_isr_data *isr_data = (struct pva_kmd_isr_data *)dev_id; + + isr_data->handler(isr_data->handler_data); + return IRQ_HANDLED; +} + +enum pva_error pva_kmd_bind_intr_handler(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line, + pva_kmd_intr_handler_t handler, + void *data) +{ + int err = 0; + struct pva_kmd_linux_device_data *plat_data = + pva_kmd_linux_device_get_data(pva); + struct pva_kmd_isr_data *isr_data = &plat_data->isr[intr_line]; + struct nvhost_device_data *props = plat_data->pva_device_properties; + + isr_data->irq = platform_get_irq(props->pdev, intr_line); + isr_data->handler = handler; + isr_data->handler_data = data; + isr_data->binded = true; + err = request_threaded_irq(isr_data->irq, NULL, pva_isr, IRQF_ONESHOT, + "pva-isr", isr_data); + + if (err != 0) { + pva_kmd_log_err("Failed to bind interrupt handler"); + } + + return kernel_err2pva_err(err); +} + +void pva_kmd_enable_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line) +{ + struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); + enable_irq(isr_data->irq); +} + +void pva_kmd_disable_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line) +{ + struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); + disable_irq(isr_data->irq); +} + +void pva_kmd_free_intr(struct pva_kmd_device *pva, + enum pva_kmd_intr_line intr_line) +{ + struct pva_kmd_isr_data *isr_data = get_isr(pva, intr_line); + free_irq(isr_data->irq, isr_data); + isr_data->binded = false; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.h b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.h new file mode 100644 index 00000000..60d8235e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_isr.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_KMD_LINUX_ISR_H +#define PVA_KMD_LINUX_ISR_H +#include +#include +#include +#include +#include "pva_kmd_shim_silicon.h" + +struct pva_kmd_device; + +struct pva_kmd_isr_data { + struct pva_kmd_device *pva; + bool binded; + int irq; /*< Hardware IRQ number */ + + pva_kmd_intr_handler_t handler; + void *handler_data; +}; + +#endif //PVA_KMD_LINUX_ISR_H diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c new file mode 100644 index 00000000..2157fe2e --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +#include +#include +#include +#include +#include "pva_kmd_mutex.h" +#include "pva_kmd_thread_sema.h" +#include "pva_kmd_utils.h" + +void *pva_kmd_zalloc(uint64_t size) +{ + return kvzalloc(size, GFP_KERNEL); +} + +void pva_kmd_free(void *ptr) +{ + kvfree(ptr); +} + +void pva_kmd_print_str(const char *str) +{ + printk(KERN_INFO "%s", str); +} + +void pva_kmd_print_str_u64(const char *str, uint64_t n) +{ + printk(KERN_INFO "%s:%llu", str, n); +} + +enum pva_error pva_kmd_mutex_init(pva_kmd_mutex_t *m) +{ + mutex_init(m); + return PVA_SUCCESS; +} + +void pva_kmd_mutex_lock(pva_kmd_mutex_t *m) +{ + mutex_lock(m); +} + +void pva_kmd_mutex_unlock(pva_kmd_mutex_t *m) +{ + mutex_unlock(m); +} + +void pva_kmd_mutex_deinit(pva_kmd_mutex_t *m) +{ + mutex_destroy(m); +} + +void pva_kmd_fault() +{ + BUG(); +} + +void pva_kmd_sleep_us(uint64_t us) +{ + usleep_range(us, safe_mulu64(2, us)); +} + +void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val) +{ + sema_init(sem, val); +} + +enum pva_error pva_kmd_sema_wait_timeout(pva_kmd_sema_t *sem, + uint32_t timeout_ms) +{ + long timeout_jiffies = usecs_to_jiffies(safe_mulu64(timeout_ms, 1000u)); + int err = down_timeout(sem, timeout_jiffies); + if (err == -ETIME) { + return PVA_TIMEDOUT; + } else if (err == -EINTR) { + return PVA_AGAIN; + } else { + return PVA_SUCCESS; + } +} + +void pva_kmd_sema_deinit(pva_kmd_sema_t *sem) +{ +} + +void pva_kmd_sema_post(pva_kmd_sema_t *sem) +{ + up(sem); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c new file mode 100644 index 00000000..25eb6850 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ +/* Auto-detected configuration depending kernel version */ +#include + +/* Linux headers */ +#include +#include +#include +#include + +/* PVA headers */ +#include "pva_kmd_linux_device.h" + +struct pva_kmd_linux_smmu_ctx { + struct platform_device *pdev; + uint32_t sid; +}; + +static const struct of_device_id pva_kmd_linux_smmu_context_of_match_table[] = { + { .compatible = "nvidia,pva-tegra186-iommu-context" }, + { .compatible = "nvidia,pva-tegra264-iommu-context" }, + {}, + +}; + +/* + * SMMU contexts available to PVA SW to support user applications + * + * Note that we reserve one SMMU context for use by PVA KMD to load FW from GSC + * Probing of the reserved SMMU context is not handled in this file + */ +static struct pva_kmd_linux_smmu_ctx g_smmu_ctxs[PVA_MAX_NUM_SMMU_CONTEXTS]; +atomic_t g_num_smmu_ctxs = ATOMIC_INIT(0); +atomic_t g_num_smmu_probing_done = ATOMIC_INIT(0); +bool g_smmu_probing_done = false; + +static uint32_t pva_kmd_device_get_sid(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + ASSERT(fwspec != NULL); + ASSERT(fwspec->num_ids != 0); + + return fwspec->ids[0] & 0xffff; +} + +static int pva_kmd_linux_device_smmu_context_probe(struct platform_device *pdev) +{ + int idx; + int new_idx; + + if (!iommu_get_domain_for_dev(&pdev->dev)) { + dev_err(&pdev->dev, + "iommu is not enabled for context device. aborting."); + return -ENOSYS; + } + + /* + * Probers for multiple PVA SMMU context devices might be executing + * this routine at once. To avoid race conditions, every prober must + * must first increment the number of contexts probed atomically. This + * way, each prober will find a unique place in the g_smmu_ctxs to store + * its SMMU context information. + */ + new_idx = atomic_add_return(1, &g_num_smmu_ctxs) - 1; + if (new_idx < 0 || new_idx >= PVA_MAX_NUM_SMMU_CONTEXTS) { + atomic_dec(&g_num_smmu_ctxs); + dev_err(&pdev->dev, "Invalid number of SMMU contexts: %d", + new_idx); + return -EINVAL; + } + idx = new_idx; + + g_smmu_ctxs[idx].pdev = pdev; + g_smmu_ctxs[idx].sid = pva_kmd_device_get_sid(pdev); + + atomic_add(1, &g_num_smmu_probing_done); + + dev_info(&pdev->dev, "initialized (streamid=%d)", + pva_kmd_device_get_sid(pdev)); + return 0; +} + +#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID) /* Linux v6.11 */ +static void __exit +pva_kmd_linux_smmu_context_remove_wrapper(struct platform_device *pdev) +{ +} +#else +static int __exit +pva_kmd_linux_smmu_context_remove_wrapper(struct platform_device *pdev) +{ + return 0; +} +#endif + +bool pva_kmd_linux_smmu_contexts_initialized(enum pva_chip_id chip_id) +{ + /* Note: Instead of checking if g_num_smmu_ctxs has reached its maximum + * value here, we do it in the probe function. This is because + * when PVA device probe calls this method, the prober of the last + * SMMU context device might have incremented g_num_smmu_ctxs but + * might still not have updated g_smmu_ctxs. + */ + int max_num_smmu_ctx = (chip_id == PVA_CHIP_T26X) ? + PVA_NUM_SMMU_CONTEXTS_T26X : + PVA_NUM_SMMU_CONTEXTS_T23X; + + (void)chip_id; + // TODO: When multiple VMs are running, each VM will have less than + // PVA_MAX_NUM_SMMU_CONTEXTS contexts. Hence the following logic + // would be incorrect for such cases. Should be fixed. + if (atomic_read(&g_num_smmu_probing_done) == (max_num_smmu_ctx - 1)) + g_smmu_probing_done = true; + + return g_smmu_probing_done; +} + +void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device) +{ + uint32_t sid_idx; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva_device); + + if (!g_smmu_probing_done) + FAULT("SMMU contexts init called before all contexts were probed"); + + /* Configure SMMU contexts for unprivileged operations */ + /* PVA cluster will access these regions only using DMA */ + for (sid_idx = 0U; + sid_idx < safe_subu32(pva_device->hw_consts.n_smmu_contexts, 2U); + sid_idx++) { + uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U); + pva_device->stream_ids[smmu_ctx_idx] = g_smmu_ctxs[sid_idx].sid; + device_data->smmu_contexts[smmu_ctx_idx] = + g_smmu_ctxs[sid_idx].pdev; + dma_set_mask_and_coherent( + &device_data->smmu_contexts[smmu_ctx_idx]->dev, + DMA_BIT_MASK(39)); + } + + /* Configure SMMU contexts for privileged operations */ + /* PVA cluster may access this region directly (without DMA) */ + /* The last one is PRIV SID */ + // TODO - if context devices are not enumerated in the order + // in which they appear in the device tree, the last + // ctx in g_smmu_ctxs may not be the last SID assigned to PVA + // Question: Is it necessary that priv SID is the last one? + pva_device->stream_ids[0] = g_smmu_ctxs[sid_idx].sid; + device_data->smmu_contexts[0] = g_smmu_ctxs[sid_idx].pdev; + dma_set_mask_and_coherent(&device_data->smmu_contexts[0]->dev, + DMA_BIT_MASK(32)); +} + +struct platform_driver pva_kmd_linux_smmu_context_driver = { + .probe = pva_kmd_linux_device_smmu_context_probe, + .remove = __exit_p(pva_kmd_linux_smmu_context_remove_wrapper), + .driver = { + .owner = THIS_MODULE, + .name = "pva_iommu_context_dev", +#ifdef CONFIG_OF + .of_match_table = pva_kmd_linux_smmu_context_of_match_table, +#endif + }, +}; diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c new file mode 100644 index 00000000..3df94c2d --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#include "pva_kmd_shim_vpu_app_auth.h" +#include "pva_kmd_linux_device.h" +#include +#include + +/** + * Default path (including filename) of pva vpu elf authentication allowlist file + */ +#define PVA_AUTH_ALLOW_LIST_DEFAULT "pva_auth_allowlist" + +/** + * @brief Loads allowlist into memory + * + * Reads the content of allowlist file using request_firmware API + * Allocate memory of same size using \ref pva_kmd_zalloc, + * Copies the data content to this allocated memory + * It is responsibility of caller to free this memory using pva_kmd_free + * + * @param[in] pva KMD device structure pointer + * @param[in] file_name Allow list file name + * @param[out] hash_keys_data pointer to pointer pointing to data, where read data to be copied + * @param[out] psize pointer, where size of the data to be updated + * @return + * - PVA_SUCCESS On success + * - PVA_NOENT if unable to read allowlist file + * - PVA_NOMEM if allocation fails + */ +enum pva_error pva_kmd_auth_allowlist_load(struct pva_kmd_device *pva, + const char *file_name, + uint8_t **hash_keys_data, + uint64_t *psize) +{ + enum pva_error err = PVA_SUCCESS; + int32_t kerr = 0; + struct pva_kmd_linux_device_data *device_data = + pva_kmd_linux_device_get_data(pva); + struct nvhost_device_data *device_props = + device_data->pva_device_properties; + + const struct firmware *pallow_list; + + ASSERT(file_name != NULL); + + kerr = request_firmware(&pallow_list, file_name, + &device_props->pdev->dev); + + if (kerr < 0) { + pva_kmd_log_err("Failed to load the allow list\n"); + err = PVA_NOENT; + goto out; + } + + *psize = (uint64_t)pallow_list->size; + *hash_keys_data = pva_kmd_zalloc((size_t)pallow_list->size); + if (*hash_keys_data == NULL) { + pva_kmd_log_err("Unable to allocate memory"); + err = PVA_NOMEM; + goto release; + } + + (void)memcpy(*hash_keys_data, pallow_list->data, pallow_list->size); + +release: + release_firmware(pallow_list); + +out: + return err; +} + +const char *pva_kmd_get_default_allowlist(void) +{ + return PVA_AUTH_ALLOW_LIST_DEFAULT; +} diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_bit.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_bit.h new file mode 100644 index 00000000..fcab1c68 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_bit.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_BIT_H +#define PVA_BIT_H +#include "pva_api.h" +/* + * Bit manipulation macros + */ + +/** + * @brief Number of bits per byte. + */ +#define PVA_BITS_PER_BYTE (8UL) + +/** + * @defgroup PVA_BIT8_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint8_t. + * Parameter that convey the bit position should be in the range + * of 0 to 7 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 7 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a variable of type uint8_t. + */ +#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu)) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Macro used to generate a bit-mask from MSB to LSB in a uint8_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_MASK8(_msb_, _lsb_) \ + ((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) & \ + ~(PVA_BIT8(_lsb_) - 1U)) & \ + 0xffu)) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Macro to extract bits from a 8 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT8(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK8((_msb_), (_lsb_))) >> (_lsb_))) + +#define PVA_INSERT8(_x_, _msb_, _lsb_) \ + ((((uint8_t)(_x_)) << (_lsb_)) & PVA_MASK8((_msb_), (_lsb_))) +//! @endcond + +//! @endcond +/** @} */ + +/** + * @defgroup PVA_BIT16_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint16_t. + * Parameter that convey the bit position should be in the range + * of 0 to 15 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 15 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a 16 bit number. + */ +#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu)) + +/** + * @brief Macro to mask a range(MSB to LSB) of bit positions in a 16 bit number. + * This will set all the bit positions in specified range. + */ +#define PVA_MASK16(_msb_, _lsb_) \ + ((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) & \ + ~(PVA_BIT16(_lsb_) - 1U)) & \ + 0xffffu)) + +//! @cond DISABLE_DOCUMENTATION +/** + * @brief Macro to extract bits from a 16 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_))) +//! @endcond + +/** + * @brief Macro used to generate a bit-mask from MSB to LSB in a uint16_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_INSERT16(_x_, _msb_, _lsb_) \ + ((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_))) +/** @} */ + +/** + * @defgroup PVA_BIT32_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint32_t. + * Parameter that convey the bit position should be in the range + * of 0 to 31 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 31 inclusive with MSB greater than LSB. + * @{ + */ + +/** + * @brief Macro to set a given bit position in a 32 bit number. + */ +#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffu)) + +/** + * @brief Macro to mask a range(MSB to LSB) of bit positions in a 32 bit number. + * This will set all the bit positions in specified range. + */ +#define PVA_MASK(_msb_, _lsb_) \ + (((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U)) + +/** + * @brief Macro to extract bits from a 32 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_))) + +/** + * @brief Macro to insert a range of bits from a given 32 bit number. + * Range of bits are derived from the number passed as argument. + */ +#define PVA_INSERT(_x_, _msb_, _lsb_) \ + ((((uint32_t)(_x_)) << (_lsb_)) & PVA_MASK((_msb_), (_lsb_))) +/** @} */ + +/** + * @defgroup PVA_BIT64_HELPER + * + * @brief Bit Manipulation macros for number which is of type uint64_t. + * Parameter that convey the bit position should be in the range + * of 0 to 63 inclusive. + * Parameter with respect to MSB and LSB should satisfy the conditions + * of both being in the range of 0 to 63 inclusive with MSB greater than LSB. + * @{ + */ +/** + * @brief Macro to set a given bit position in a 64 bit number. + */ +#define PVA_BIT64(_b_) \ + ((uint64_t)(((uint64_t)1UL << (_b_)) & 0xffffffffffffffffu)) + +/** + * @brief Macro used to generate a bit-mask from (MSB to LSB) in a uint64_t variable. + * This macro sets all the bits from MSB to LSB. + */ +#define PVA_MASK64(_msb_, _lsb_) \ + (((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) & \ + ~(PVA_BIT64(_lsb_) - (uint64_t)1U)) + +/** + * @brief Macro to extract bits from a 64 bit number. + * The bits are extracted from the range provided and the extracted + * number is finally type-casted to the type provided as argument. + */ +#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_))) + +/** + * @brief Macro to insert a range of bits into a 64 bit number. + * The bits are derived from the number passed as argument. + */ +#define PVA_INSERT64(_x_, _msb_, _lsb_) \ + ((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_))) + +/** + * @brief Macro to pack a 64 bit number. + * A 64 bit number is generated that has first 32 MSB derived from + * upper 32 bits of passed argument and has lower 32MSB derived from + * lower 32 bits of another passed argument. + */ +#define PVA_PACK64(_l_, _h_) \ + (PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U)) + +/** + * @brief Macro to extract the higher 32 bits from a 64 bit number. + */ +#define PVA_HI32(_x_) ((uint32_t)(((_x_) >> 32U) & 0xFFFFFFFFU)) + +/** + * @brief Macro to extract the lower 32 bits from a 64 bit number. + */ +#define PVA_LOW32(_x_) ((uint32_t)((_x_)&0xFFFFFFFFU)) +/** @} */ + +#endif // PVA_BIT_H diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h new file mode 100644 index 00000000..45f8c6d1 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_constants.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_CONSTANTS_H +#define PVA_CONSTANTS_H + +#define PVA_NUM_ENGINES 2U +#define PVA_MAX_NUM_CCQ 8 +#define PVA_CCQ_DEPTH 8U +#define PVA_USER_CCQ_BASE 1 +#define PVA_INVALID_CCQ_ID 0xFF +#define PVA_INVALID_ENGINE_ID 0xFFU + +#define PVA_CFG_CCQ_STATUS_COUNT 9 +#define PVA_CFG_CCQ_BLOCK_SIZE 0x10000 + +// Max of T26X and T23X +#define PVA_MAX_NUM_SMMU_CONTEXTS 10 + +#define PVA_NUM_SMMU_CONTEXTS_T23X 9 +#define PVA_NUM_SMMU_CONTEXTS_T26X 10 + +#define PVA_MAX_NUM_USER_CONTEXTS 7u + +#define PVA_R5_SMMU_CONTEXT_ID 0 + +#define TIME_MS_TO_NS 1000000U +#define TIME_MS_TO_US 1000U +#define TIME_US_TO_NS 1000U +#define TIME_US_TO_MS 1000U + +#define PVA_PRIV_CCQ_ID 0 +#define PVA_INVALID_QUEUE_ID 0xFF + +#define PVA_MAX_NUM_CHUNKS_PER_CONTEXT 128 +#define PVA_MAX_NUM_SUBMIT 256 + +#define PVA_MAX_NUM_PVAS 1 +#define PVA_NUM_PVE 2 + +#define PVA_MAX_NUM_RESETS 1 +#define PVA_MAX_NUM_CLOCKS 3 + +#define PVA_MAX_NUM_QUEUES_PER_CONTEXT 8u +#define PVA_NUM_RW_SYNCPTS_PER_CONTEXT PVA_MAX_NUM_QUEUES_PER_CONTEXT + +#define PVA_MAX_NUM_DMA_CONFIGS_PER_CONTEXT 1024 + +#define PVA_MAX_NUM_SEMAPHORES 1024 + +#define MAX_BYTES_PER_PIXEL 0x3U + +#define PVA_NUM_DMA_TRIGGERS 9 + +#define PVA_NUM_RESERVED_CHANNELS (1U) + +#define PVA_RESERVED_DMA_CHANNEL 0U + +#define PVA_NUM_RESERVED_DESCRIPTORS (4U) + +#define PVA_RESERVED_DESCRIPTORS_START 60U + +#define PVA_RESERVED_DESCRIPTORS_END \ + ((PVA_RESERVED_DESCRIPTORS_START) + (PVA_NUM_RESERVED_DESCRIPTORS)-1U) + +//T26X defines +#define PVA_NUM_HWSEQ_WORDS_T26X (512U) + +#define PVA_NUM_DMA_DESC_T26X (96U) + +#define PVA_DMA_NUM_CHANNELS_T26X (16U) + +#define PVA_NUM_DMA_ADB_BUFFS_T26X (304U) + +#define PVA_NUM_RESERVED_ADB_BUFFERS_T26X (16U) + +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T26X \ + (PVA_NUM_DMA_ADB_BUFFS_T26X - PVA_NUM_RESERVED_ADB_BUFFERS_T26X) + +#define PVA_NUM_VMEM_REGIONS_T26X (4U) +//T23X defines +#define PVA_NUM_HWSEQ_WORDS_T23X 256U + +#define PVA_NUM_DMA_DESC_T23X (64U) + +#define PVA_DMA_NUM_CHANNELS_T23X (16U) + +#define PVA_NUM_DMA_ADB_BUFFS_T23X (272U) + +#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X (16U) + +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X \ + (PVA_NUM_DMA_ADB_BUFFS_T23X - PVA_NUM_RESERVED_ADB_BUFFERS_T23X) + +#define PVA_NUM_DMA_VDB_BUFFS (128U) + +#define PVA_NUM_RESERVED_VDB_BUFFERS (0U) + +#define PVA_NUM_DYNAMIC_VDB_BUFFS \ + (PVA_NUM_DMA_VDB_BUFFS - PVA_NUM_RESERVED_VDB_BUFFERS) + +#define PVA_NUM_VMEM_REGIONS_T23X (3U) + +#define PVA_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define PVA_MAX_NUM_DYNAMIC_ADB_BUFFS \ + PVA_MAX(PVA_NUM_DYNAMIC_ADB_BUFFS_T26X, PVA_NUM_DYNAMIC_ADB_BUFFS_T23X) + +#define PVA_MAX_NUM_ADB_BUFFS \ + PVA_MAX(PVA_NUM_DMA_ADB_BUFFS_T26X, PVA_NUM_DMA_ADB_BUFFS_T23X) + +#define PVA_MAX_NUM_HWSEQ_WORDS \ + PVA_MAX(PVA_NUM_HWSEQ_WORDS_T26X, PVA_NUM_HWSEQ_WORDS_T23X) + +#define PVA_MAX_NUM_DMA_CHANNELS \ + PVA_MAX(PVA_DMA_NUM_CHANNELS_T26X, PVA_DMA_NUM_CHANNELS_T23X) + +#define PVA_MAX_NUM_DMA_DESC \ + PVA_MAX(PVA_NUM_DMA_DESC_T26X, PVA_NUM_DMA_DESC_T23X) + +#define PVA_MAX_NUM_USER_DMA_DESC \ + MAX(PVA_NUM_USER_DMA_DESC_T26X, PVA_NUM_USER_DMA_DESC_T23X) + +#define PVA_NUM_RESERVED_VDB_BUFFERS_L2SRAM_CLEAR (32U) + +#define PVA_RESERVED_L2SRAM_CLEAR_VDB_BUFF_START (0u) + +#define PVA_KMD_MAX_NUM_DMA_SLOTS 256U +#define PVA_KMD_MAX_NUM_DMA_RELOCS 256U +#define PVA_KMD_MAX_NUM_DMA_DRAM_SLOTS 256U + +#define PVA_BL_XBAR_RAW 1 +#define PVA_BL_TEGRA_RAW 0 +#define PVA_BL_SECTOR_PACK_BIT_SHIFT 39 + +#define PVA_KMD_CHIP_ID_ENV_VAR "PVA_GEN" +#define PVA_KMD_CHIP_ID_T23X "GEN2" +#define PVA_KMD_CHIP_ID_T26X "GEN3" +#define PVA_KMD_CHIP_ID_DEFAULT PVA_KMD_CHIP_ID_T23X + +#endif // PVA_CONSTANTS_H diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h new file mode 100644 index 00000000..124f4291 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h @@ -0,0 +1,851 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef PVA_MATH_UTILS_H +#define PVA_MATH_UTILS_H +#include "pva_plat_faults.h" + +typedef enum { + MATH_OP_SUCCESS, + MATH_OP_ERROR, +} pva_math_error; + +#define MAX_UINT32 4294967295u /*(0xFFFFFFFFu)*/ +#define MAX_UINT16 65535u +#define MAX_UINT8 255u +#define MAX_INT32 0x7FFFFFFF +#define MIN_INT32 (-0x7FFFFFFF - 1LL) +#define MAX_INT64 0x7FFFFFFFFFFFFFFF +#define MIN_INT64 (-0x7FFFFFFFFFFFFFFF - 1LL) +#define UINT64_MAX_SHIFT_BITS 0x3FU + +#define safe_pow2_roundup_u64(val, align) \ + pva_safe_roundup_u64((val), (align), __FILE__, __LINE__) + +#define safe_pow2_roundup_u32(val, align) \ + pva_safe_roundup_u32((val), (align), __FILE__, __LINE__) + +#define safe_pow2_roundup_u16(val, align) \ + pva_safe_roundup_u16((val), (align), __FILE__, __LINE__) + +#define safe_pow2_roundup_u8(val, align) \ + pva_safe_roundup_u8((val), (align), __FILE__, __LINE__) + +/** + * @brief Rounds up a uint64_t value to the nearest multiple of a power-of-two align. + * + * @param val The value to round up. + * @param align The alignment factor (must be a power of two). + * @param file The source file (for assertion messages). + * @param line The line number (for assertion messages). + * @return uint64_t The rounded-up value. + */ +static inline uint64_t pva_safe_roundup_u64(uint64_t val, uint64_t align, + const char *file, uint32_t line) +{ + uint64_t rounded; + // Ensure align is a power of two and at least 1 + ASSERT_WITH_LOC((align != 0U) && ((align & (align - 1U)) == 0U), file, + line); + + // Check for addition overflow + ASSERT_WITH_LOC(val <= UINT64_MAX - (align - 1U), file, line); + + rounded = (val + (align - 1U)) & ~(align - 1U); + + return rounded; +} + +/** + * @brief Rounds up a uint32_t value to the nearest multiple of a power-of-two align. + * + * @param val The value to round up. + * @param align The alignment factor (must be a power of two). + * @param file The source file (for assertion messages). + * @param line The line number (for assertion messages). + * @return uint32_t The rounded-up value. + */ +static inline uint32_t pva_safe_roundup_u32(uint32_t val, uint32_t align, + const char *file, uint32_t line) +{ + uint64_t temp; + uint32_t rounded; + // Ensure align is a power of two and at least 1 + ASSERT_WITH_LOC((align != 0U) && ((align & (align - 1U)) == 0U), file, + line); + + temp = (uint64_t)val + ((uint64_t)align - (uint64_t)1U); + ASSERT_WITH_LOC(temp <= MAX_UINT32, file, line); + rounded = (uint32_t)temp; + rounded = (uint32_t)(rounded & ~(align - 1U)); + + return rounded; +} + +/** + * @brief Rounds up a uint16_t value to the nearest multiple of a power-of-two align. + * + * @param val The value to round up. + * @param align The alignment factor (must be a power of two). + * @param file The source file (for assertion messages). + * @param line The line number (for assertion messages). + * @return uint16_t The rounded-up value. + */ +static inline uint16_t pva_safe_roundup_u16(uint16_t val, uint16_t align, + const char *file, uint32_t line) +{ + uint32_t temp; + uint16_t rounded; + // Ensure align is a power of two and at least 1 + ASSERT_WITH_LOC((align != 0U) && ((align & (align - 1U)) == 0U), file, + line); + + temp = (uint32_t)val + ((uint32_t)align - (uint32_t)1U); + //Overflow check + ASSERT_WITH_LOC(temp <= MAX_UINT16, file, line); + rounded = (uint16_t)temp; + rounded = (uint16_t)(rounded & ~(align - 1U)); + + return rounded; +} + +/** + * @brief Rounds up a uint8_t value to the nearest multiple of a power-of-two align. + * + * @param val The value to round up. + * @param align The alignment factor (must be a power of two). + * @param file The source file (for assertion messages). + * @param line The line number (for assertion messages). + * @return uint8_t The rounded-up value. + */ +static inline uint8_t pva_safe_roundup_u8(uint8_t val, uint8_t align, + const char *file, uint32_t line) +{ + uint32_t temp; + uint8_t rounded; + // Ensure align is a power of two and at least 1 + ASSERT_WITH_LOC((align != 0U) && ((align & (align - 1U)) == 0U), file, + line); + + temp = (uint32_t)val + ((uint32_t)align - (uint32_t)1U); + ASSERT_WITH_LOC(temp <= MAX_UINT8, file, line); + rounded = (uint8_t)temp; + rounded = (uint8_t)(rounded & ~(align - 1U)); + + return rounded; +} + +static inline uint32_t safe_abs_int32(int32_t value, const char *file, + uint32_t line) +{ + ASSERT_WITH_LOC((value != MIN_INT32), file, line); + return (uint32_t)((value < 0) ? -value : value); +} + +static inline uint64_t safe_abs_int64(int64_t value, const char *file, + uint32_t line) +{ + ASSERT_WITH_LOC((value != MIN_INT64), file, line); + return (uint64_t)((value < 0) ? -value : value); +} + +#define safe_absint32(value) safe_abs_int32((value), __FILE__, __LINE__) + +#define safe_absint64(value) safe_abs_int64((value), __FILE__, __LINE__) + +static inline uint64_t safe_add_u64(uint64_t addend1, uint64_t addend2, + const char *file, uint32_t line) +{ + uint64_t sum; + + sum = addend1 + addend2; + ASSERT_WITH_LOC(sum >= addend1, file, line); + return sum; +} + +static inline uint32_t safe_add_u32(uint32_t addend1, uint32_t addend2, + const char *file, uint32_t line) +{ + uint32_t sum; + + sum = addend1 + addend2; + ASSERT_WITH_LOC(sum >= addend1, file, line); + return sum; +} + +static inline uint16_t safe_add_u16(uint16_t addend1, uint16_t addend2, + const char *file, uint32_t line) +{ + uint32_t sum; + + sum = (uint32_t)addend1 + (uint32_t)addend2; + ASSERT_WITH_LOC(sum <= (uint32_t)MAX_UINT16, file, line); + return (uint16_t)sum; +} + +static inline uint8_t safe_add_u8(uint8_t addend1, uint8_t addend2, + const char *file, uint32_t line) +{ + uint32_t sum; + + sum = (uint32_t)addend1 + (uint32_t)addend2; + ASSERT_WITH_LOC(sum <= (uint32_t)MAX_UINT8, file, line); + return (uint8_t)sum; +} + +#define safe_addu64(addend1, addend2) \ + safe_add_u64((addend1), (addend2), __FILE__, __LINE__) + +#define safe_addu32(addend1, addend2) \ + safe_add_u32((addend1), (addend2), __FILE__, __LINE__) + +#define safe_addu16(addend1, addend2) \ + safe_add_u16((addend1), (addend2), __FILE__, __LINE__) + +#define safe_addu8(addend1, addend2) \ + safe_add_u8((addend1), (addend2), __FILE__, __LINE__) + +static inline uint64_t safe_sub_u64(uint64_t minuend, uint64_t subtrahend, + const char *file, uint32_t line) +{ + uint64_t difference; + + ASSERT_WITH_LOC((minuend) >= (subtrahend), file, line); + difference = ((minuend) - (subtrahend)); + return difference; +} + +static inline uint32_t safe_sub_u32(uint32_t minuend, uint32_t subtrahend, + const char *file, uint32_t line) +{ + uint32_t difference; + + ASSERT_WITH_LOC((minuend) >= (subtrahend), file, line); + difference = ((minuend) - (subtrahend)); + return difference; +} + +static inline uint16_t safe_sub_u16(uint16_t minuend, uint16_t subtrahend, + const char *file, uint32_t line) +{ + uint16_t difference; + + ASSERT_WITH_LOC((minuend) >= (subtrahend), file, line); + difference = ((minuend) - (subtrahend)); + return difference; +} + +static inline uint8_t safe_sub_u8(uint8_t minuend, uint8_t subtrahend, + const char *file, uint32_t line) +{ + uint8_t difference; + + ASSERT_WITH_LOC((minuend) >= (subtrahend), file, line); + difference = ((minuend) - (subtrahend)); + return difference; +} + +#define safe_subu64(minuend, subtrahend) \ + safe_sub_u64((minuend), (subtrahend), __FILE__, __LINE__) + +#define safe_subu32(minuend, subtrahend) \ + safe_sub_u32((minuend), (subtrahend), __FILE__, __LINE__) + +#define safe_subu16(minuend, subtrahend) \ + safe_sub_u16((minuend), (subtrahend), __FILE__, __LINE__) + +#define safe_subu8(minuend, subtrahend) \ + safe_sub_u8((minuend), (subtrahend), __FILE__, __LINE__) + +static inline uint64_t safe_mul_u64(uint64_t operand1, uint64_t operand2, + const char *file, uint32_t line) +{ + uint64_t product; + if ((operand1 == 0u) || (operand2 == 0u)) { + product = 0u; + } else { + ASSERT_WITH_LOC((operand1) <= (UINT64_MAX / operand2), file, + line); + product = (operand1 * operand2); + } + + return product; +} + +static inline uint32_t safe_mul_u32(uint32_t operand1, uint32_t operand2, + const char *file, uint32_t line) +{ + uint64_t product; + + product = ((uint64_t)operand1 * (uint64_t)operand2); + ASSERT_WITH_LOC(product <= UINT32_MAX, file, line); + return (uint32_t)(product); +} + +static inline uint16_t safe_mul_u16(uint16_t operand1, uint16_t operand2, + const char *file, uint32_t line) +{ + uint32_t product; + + product = ((uint32_t)operand1 * (uint32_t)operand2); + ASSERT_WITH_LOC(product <= MAX_UINT16, file, line); + return (uint16_t)(product); +} + +static inline uint8_t safe_mul_u8(uint8_t operand1, uint8_t operand2, + const char *file, uint32_t line) +{ + uint32_t product; + + product = ((uint32_t)operand1 * (uint32_t)operand2); + ASSERT_WITH_LOC(product <= MAX_UINT8, file, line); + return (uint8_t)(product); +} + +#define safe_mulu64(operand1, operand2) \ + safe_mul_u64((operand1), (operand2), __FILE__, __LINE__) + +#define safe_mulu32(operand1, operand2) \ + safe_mul_u32((operand1), (operand2), __FILE__, __LINE__) + +#define safe_mulu16(operand1, operand2) \ + safe_mul_u16((operand1), (operand2), __FILE__, __LINE__) + +#define safe_mulu8(operand1, operand2) \ + safe_mul_u8((operand1), (operand2), __FILE__, __LINE__) + +static inline int64_t safe_get_signed_s64(uint64_t value, const char *file, + uint32_t line) +{ + ASSERT_WITH_LOC((value <= (uint64_t)MAX_INT64), file, line); + return (int64_t)value; +} + +static inline int32_t safe_get_signed_s32(uint32_t value, const char *file, + uint32_t line) +{ + ASSERT_WITH_LOC((value <= (uint32_t)MAX_INT32), file, line); + return (int32_t)value; +} +#define convert_to_signed_s64(value) \ + safe_get_signed_s64((value), __FILE__, __LINE__) + +#define convert_to_signed_s32(value) \ + safe_get_signed_s32((value), __FILE__, __LINE__) + +static inline uint64_t addu64(uint64_t addend1, uint64_t addend2, + pva_math_error *math_flag) +{ + uint64_t sum; + + sum = addend1 + addend2; + if (sum < addend1) { + *math_flag = MATH_OP_ERROR; + sum = 0u; + } + + return sum; +} + +static inline uint32_t addu32(uint32_t addend1, uint32_t addend2, + pva_math_error *math_flag) +{ + uint32_t sum; + + sum = addend1 + addend2; + if (sum < addend1) { + *math_flag = MATH_OP_ERROR; + sum = 0u; + } + + return sum; +} + +static inline uint16_t addu16(uint16_t addend1, uint16_t addend2, + pva_math_error *math_flag) +{ + uint32_t sum; + + sum = (uint32_t)addend1 + (uint32_t)addend2; + if (sum > MAX_UINT16) { + *math_flag = MATH_OP_ERROR; + sum = 0u; + } + + return (uint16_t)sum; +} + +static inline uint8_t addu8(uint8_t addend1, uint8_t addend2, + pva_math_error *math_flag) +{ + uint32_t sum; + + sum = (uint32_t)addend1 + (uint32_t)addend2; + if (sum > MAX_UINT8) { + *math_flag = MATH_OP_ERROR; + sum = 0u; + } + + return (uint8_t)sum; +} + +static inline int64_t adds64(int64_t addend1, int64_t addend2, + pva_math_error *math_flag) +{ + int64_t sum = addend1 + addend2; + + /* Check for overflow when both numbers are positive */ + if (((sum < 0) && ((addend2 > 0) && (addend1 > 0))) || + ((sum > 0) && ((addend2 < 0) && (addend1 < 0)))) { + *math_flag = MATH_OP_ERROR; + sum = 0; + } + return sum; +} + +static inline uint64_t subu64(uint64_t minuend, uint64_t subtrahend, + pva_math_error *math_flag) +{ + uint64_t difference; + + if (minuend < subtrahend) { + *math_flag = MATH_OP_ERROR; + difference = 0u; + } else { + difference = ((minuend) - (subtrahend)); + } + + return difference; +} + +static inline uint32_t subu32(uint32_t minuend, uint32_t subtrahend, + pva_math_error *math_flag) +{ + uint32_t difference; + + if (minuend < subtrahend) { + *math_flag = MATH_OP_ERROR; + difference = 0u; + } else { + difference = ((minuend) - (subtrahend)); + } + + return difference; +} + +static inline uint16_t subu16(uint16_t minuend, uint16_t subtrahend, + pva_math_error *math_flag) +{ + uint16_t difference; + + if (minuend < subtrahend) { + *math_flag = MATH_OP_ERROR; + difference = 0u; + } else { + difference = ((minuend) - (subtrahend)); + } + + return difference; +} + +static inline uint8_t subu8(uint8_t minuend, uint8_t subtrahend, + pva_math_error *math_flag) +{ + uint8_t difference; + + if (minuend < subtrahend) { + *math_flag = MATH_OP_ERROR; + difference = 0u; + } else { + difference = ((minuend) - (subtrahend)); + } + + return difference; +} + +static inline int64_t subs64(int64_t minuend, int64_t subtrahend, + pva_math_error *math_flag) +{ + int64_t difference; + + /* Check for overflow/underflow */ + if (subtrahend > 0) { + /* Subtracting a positive number - check for underflow */ + if (minuend < (MIN_INT64 + subtrahend)) { + *math_flag = MATH_OP_ERROR; + return 0; + } + } else { + /* Subtracting a negative number - check for overflow */ + if (minuend > (MAX_INT64 + subtrahend)) { + *math_flag = MATH_OP_ERROR; + return 0; + } + } + + difference = minuend - subtrahend; + return difference; +} + +static inline int32_t subs32(int32_t minuend, int32_t subtrahend, + pva_math_error *math_flag) +{ + int64_t difference; + + difference = (int64_t)minuend - (int64_t)subtrahend; + if ((difference > MAX_INT32) || (difference < MIN_INT32)) { + *math_flag = MATH_OP_ERROR; + return 0; + } + + return (int32_t)difference; +} + +static inline uint64_t mulu64(uint64_t operand1, uint64_t operand2, + pva_math_error *math_flag) +{ + uint64_t product; + + if ((operand1 == 0u) || (operand2 == 0u)) { + product = 0u; + } else { + if ((operand1) > (UINT64_MAX / operand2)) { + *math_flag = MATH_OP_ERROR; + product = 0u; + } else { + product = (operand1 * operand2); + } + } + + return product; +} + +static inline uint32_t mulu32(uint32_t operand1, uint32_t operand2, + pva_math_error *math_flag) +{ + uint64_t product; + + product = ((uint64_t)operand1 * (uint64_t)operand2); + if (product > UINT32_MAX) { + *math_flag = MATH_OP_ERROR; + product = 0u; + } + + return (uint32_t)(product); +} + +static inline uint16_t mulu16(uint16_t operand1, uint16_t operand2, + pva_math_error *math_flag) +{ + uint32_t product; + + product = ((uint32_t)operand1 * (uint32_t)operand2); + if (product > MAX_UINT16) { + *math_flag = MATH_OP_ERROR; + product = 0u; + } + + return (uint16_t)(product); +} + +static inline uint8_t mulu8(uint8_t operand1, uint8_t operand2, + pva_math_error *math_flag) +{ + uint32_t product; + + product = ((uint32_t)operand1 * (uint32_t)operand2); + if (product > MAX_UINT8) { + *math_flag = MATH_OP_ERROR; + product = 0u; + } + + return (uint8_t)(product); +} + +static inline int64_t muls64(int64_t operand1, int64_t operand2, + pva_math_error *math_flag) +{ + /* Handle special cases first */ + if ((operand1 == 0) || (operand2 == 0)) { + return 0; + } + + /* Check for overflow/underflow */ + if ((operand1 > 0 && operand2 > 0 && + operand1 > (MAX_INT64 / operand2)) || + (operand1 < 0 && operand2 < 0 && + operand1 < (MAX_INT64 / operand2)) || + (operand1 < 0 && operand2 > 0 && + operand1 < (MIN_INT64 / operand2)) || + (operand1 > 0 && operand2 < 0 && + operand2 < (MIN_INT64 / operand1))) { + *math_flag = MATH_OP_ERROR; + return 0; + } + + // Special case for MIN_INT64 + if (operand1 == MIN_INT64 && operand2 == -1) { + *math_flag = MATH_OP_ERROR; + return 0; + } + + return ((int64_t)(operand1) * (int64_t)(operand2)); +} + +static inline int32_t muls32(int32_t operand1, int32_t operand2, + pva_math_error *math_flag) +{ + int64_t product; + + product = (int64_t)(operand1) * (int64_t)(operand2); + if ((product > MAX_INT32) || (product < MIN_INT32)) { + *math_flag = MATH_OP_ERROR; + return 0; + } + + return (int32_t)(product); +} + +static inline uint32_t wrap_add(uint32_t a, uint32_t b, uint32_t size) +{ + uint32_t result = a + b; + if (result >= size) { + result -= size; + } + return result; +} + +static inline uint8_t wrap_add_u8(uint8_t a, uint8_t b, uint8_t size) +{ + uint32_t result = (uint32_t)a + (uint32_t)b; + if (result >= (uint32_t)size) { + result -= (uint32_t)size; + } + return (uint8_t)result; +} + +/* size must be 2^n */ +static inline uint8_t wrap_add_pow2(uint8_t a, uint8_t b, uint8_t size) +{ + return (a + b) & (size - 1); +} + +#define SAT_ADD_DEFINE(a, b, name, type) \ + static inline type sat_add##name(type a, type b) \ + { \ + type result; \ + result = (a) + (b); \ + if ((result) < (a)) { \ + result = (type)-1; \ + } \ + return result; \ + } +#define SAT_ADD_DEFINE_CUSTOM(a, b, name, type) \ + static inline type sat_add##name(type a, type b) \ + { \ + uint32_t result; \ + result = (uint32_t)(a) + (uint32_t)(b); \ + if ((result) > ((type)-1)) { \ + result = (type)-1; \ + } \ + return (type)result; \ + } +SAT_ADD_DEFINE_CUSTOM(a, b, 8, uint8_t) +SAT_ADD_DEFINE_CUSTOM(a, b, 16, uint16_t) +SAT_ADD_DEFINE(a, b, 32, uint32_t) +SAT_ADD_DEFINE(a, b, 64, uint64_t) + +#define SAT_SUB_DEFINE(a, b, name, type) \ + static inline type sat_sub##name(type a, type b) \ + { \ + if ((a) >= (b)) { \ + return (a) - (b); \ + } else { \ + return 0; \ + } \ + } + +SAT_SUB_DEFINE(a, b, 8, uint8_t) +SAT_SUB_DEFINE(a, b, 16, uint16_t) +SAT_SUB_DEFINE(a, b, 32, uint32_t) +SAT_SUB_DEFINE(a, b, 64, uint64_t) + +#define MIN_DEFINE(a, b, name, type) \ + static inline type min##name(type a, type b) \ + { \ + return ((a) < (b) ? (a) : (b)); \ + } + +#define MAX_DEFINE(a, b, name, type) \ + static inline type max##name(type a, type b) \ + { \ + return ((a) > (b) ? (a) : (b)); \ + } + +MIN_DEFINE(a, b, u32, uint32_t) +MIN_DEFINE(a, b, u64, uint64_t) +MIN_DEFINE(a, b, s32, int32_t) +MIN_DEFINE(a, b, s64, int64_t) + +MAX_DEFINE(a, b, u32, uint32_t) +MAX_DEFINE(a, b, u64, uint64_t) +MAX_DEFINE(a, b, s32, int32_t) +MAX_DEFINE(a, b, s64, int64_t) + +static inline uint64_t tsc_to_ns(uint64_t tsc) +{ + return safe_mulu64(tsc, 32); +} + +static inline uint64_t tsc_to_us(uint64_t tsc) +{ + return tsc_to_ns(tsc) / 1000; +} + +static inline uint64_t ns_to_tsc(uint64_t ns) +{ + return ns / 32; +} + +static inline uint64_t us_to_tsc(uint64_t us) +{ + return ns_to_tsc(safe_mulu64(us, 1000)); +} + +/** + * @brief Generates a 64-bit mask based on the specified start position, count, and density. + * + * This function computes a mask from 'lsb' to 'msb' by grouping elements together based on 'density'. + * Each bit in the mask will represent 'density' number of elements. For example, if density is 4 + * and count is 12, a total of 3 bits will be set in the produced mask starting at index 'start / 4'. + * + + * @param start The starting bit position for the mask. + * @param count The number of bits to include in the mask starting from the start position. + * @param density The density factor, assumed to be a power of 2, represents group size. + * + * @return A 64-bit integer representing the mask with bits set between the calculated msb and lsb. + */ + +static inline uint64_t pva_mask64(uint16_t start, uint16_t count, + uint16_t density) +{ + int shift; + uint32_t lsb, msb; + uint64_t lower_mask, upper_mask; + + if (count == 0U) { + return 0U; + } + + shift = __builtin_ctz(density); + lsb = (((uint32_t)start >> shift) & UINT64_MAX_SHIFT_BITS); + msb = ((((uint32_t)start + (uint32_t)count - 1U) >> shift) & + UINT64_MAX_SHIFT_BITS); + + lower_mask = ~safe_subu64((uint64_t)((1ULL << lsb)), 1U); + upper_mask = (safe_subu64(((uint64_t)(1ULL << msb)), 1U) | + ((uint64_t)(1ULL << msb))); + return (lower_mask & upper_mask); +} + +/** + * The size of a block linear surface must be a multiple of RoB (row of blocks). + * Therefore, the maximum block linear surface size that a buffer can store + * needs to be rounded down accordingly. + */ +static inline uint64_t pva_max_bl_surface_size(uint64_t buffer_size, + uint8_t log2_block_height, + uint32_t line_pitch, + pva_math_error *math_error) +{ + uint64_t max_bl_surface_size = 0u; + uint64_t alignment = + mulu64(((uint64_t)1U << (uint64_t)log2_block_height), + (uint64_t)line_pitch, math_error); + + if (alignment != 0u) { + max_bl_surface_size = mulu64((buffer_size / alignment), + alignment, math_error); + } + + return max_bl_surface_size; +} + +static inline uint64_t pva_get_goboffset(uint32_t const x, uint32_t const y, + pva_math_error *math_error) +{ + uint32_t const BL_GOBW = 64; + uint32_t const BL_GOB_PACK_MASK = BL_GOBW >> 1; + uint32_t const BL_GOB_PACK_STRIDE = 8; + uint32_t const BL_GOB_SUBPACK_VER_MASK = 6; + uint32_t const BL_GOB_SECW = 16; // GOB sector width + uint32_t const BL_GOB_SECH = 2; // GOB sector height + uint32_t const BL_GOB_SEC_SZ = BL_GOB_SECW * BL_GOB_SECH; + uint32_t const BL_GOB_SUBPACK_HOR_MASK = BL_GOB_SEC_SZ >> 1; + uint32_t const BL_GOB_SUBPACK_VER_STRIDE = 32; + uint32_t const BL_GOB_SUBPACK_HOR_STRIDE = 2; + uint32_t const BL_GOB_SEC_VER_MASK = BL_GOB_SECH - 1; + uint32_t const BL_GOB_SEC_HOR_MASK = BL_GOB_SECW - 1; + uint32_t const BL_GOB_SEC_VER_STRIDE = 16; + + uint32_t const maskedXPack = (x & BL_GOB_PACK_MASK); + uint32_t const packOffset = + mulu32(maskedXPack, BL_GOB_PACK_STRIDE, math_error); + + uint32_t const maskedYSubpack = (y & BL_GOB_SUBPACK_VER_MASK); + uint32_t const maskedXSubpack = (x & BL_GOB_SUBPACK_HOR_MASK); + uint32_t const subpackOffsetY = + mulu32(maskedYSubpack, BL_GOB_SUBPACK_VER_STRIDE, math_error); + uint32_t const subpackOffsetX = + mulu32(maskedXSubpack, BL_GOB_SUBPACK_HOR_STRIDE, math_error); + + uint32_t const maskedYSec = (y & BL_GOB_SEC_VER_MASK); + uint32_t const maskedXSec = (x & BL_GOB_SEC_HOR_MASK); + uint32_t const secOffset = + addu32(mulu32(maskedYSec, BL_GOB_SEC_VER_STRIDE, math_error), + maskedXSec, math_error); + + uint64_t gobOffset = addu64((uint64_t)packOffset, + (uint64_t)subpackOffsetX, math_error); + gobOffset = addu64(gobOffset, (uint64_t)subpackOffsetY, math_error); + gobOffset = addu64(gobOffset, (uint64_t)secOffset, math_error); + + return gobOffset; +} + +/** Convert pitch linear offset to block linear offset + * + * @param pl_offset Pitch linear offset in bytes + * @param line_pitch Surface line pitch in bytes + * @param log2_block_height Log2 of block height + * */ +static inline uint64_t pva_pl_to_bl_offset(uint64_t pl_offset, + uint32_t line_pitch, + uint32_t log2_block_height, + pva_math_error *math_error) +{ + uint32_t const x = pl_offset % line_pitch; + uint32_t const y = (uint32_t)(pl_offset / line_pitch); + uint32_t const BL_GOBW_LOG2 = 6; + uint32_t const BL_GOBH = 8; + uint32_t const BL_GOBH_LOG2 = 3; + uint32_t const BL_GOB_SZ_LOG2 = BL_GOBW_LOG2 + BL_GOBH_LOG2; + uint32_t const widthInGobs = line_pitch >> BL_GOBW_LOG2; + uint32_t const blockSizeLog2 = + addu32(BL_GOB_SZ_LOG2, log2_block_height, math_error); + uint32_t const linesPerBlock = BL_GOBH << log2_block_height; + uint32_t const linesPerBlockLog2 = + addu32(BL_GOBH_LOG2, log2_block_height, math_error); + uint32_t const maskedY = y & subu32(linesPerBlock, 1, math_error); + uint32_t const gobRowbase = (maskedY >> BL_GOBH_LOG2) << BL_GOB_SZ_LOG2; + uint32_t const gobX = (x >> BL_GOBW_LOG2) << blockSizeLog2; + uint32_t const gobY = (y >> linesPerBlockLog2) << blockSizeLog2; + uint64_t gobOffset = pva_get_goboffset(x, y, math_error); + uint32_t gobBase = mulu32(gobY, widthInGobs, math_error); + gobBase = addu32(gobBase, gobRowbase, math_error); + gobBase = addu32(gobBase, gobX, math_error); + + return addu64((uint64_t)gobBase, gobOffset, math_error); +} + +#endif diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_utils.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_utils.h new file mode 100644 index 00000000..8f61d366 --- /dev/null +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_utils.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA Corporation. All Rights Reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. Any + * use, reproduction, disclosure or distribution of this software and related + * documentation without an express license agreement from NVIDIA Corporation + * is strictly prohibited. + */ + +#ifndef PVA_UTILS_H +#define PVA_UTILS_H +#include "pva_constants.h" +#include "pva_api.h" +#include "pva_bit.h" + +#define PVA_ROUND_UP(val, align) ((((val) + ((align)-1U)) / (align)) * (align)) +#define PVA_ALIGN4(n) PVA_ROUND_UP(n, 4) +#define PVA_ALIGN8(n) PVA_ROUND_UP(n, 8) + +static inline uint64_t assemble_addr(uint8_t hi, uint32_t lo) +{ + return ((uint64_t)hi << 32) | (uint64_t)lo; +} + +static inline uint32_t iova_lo(uint64_t iova) +{ + return PVA_EXTRACT64(iova, 31, 0, uint32_t); +} + +static inline uint8_t iova_hi(uint64_t iova) +{ + return PVA_EXTRACT64(iova, 39, 32, uint8_t); +} + +static inline void *pva_offset_pointer(void *ptr, uintptr_t offset) +{ + return (void *)((uintptr_t)ptr + offset); +} + +static inline void const *pva_offset_const_ptr(void const *ptr, + uintptr_t offset) +{ + return (void const *)((uintptr_t)ptr + offset); +} + +static inline uint64_t pack64(uint32_t hi, uint32_t lo) +{ + uint64_t val = ((uint64_t)hi) << 32; + val |= (uint64_t)lo; + return val; +} + +static inline bool pva_is_64B_aligned(uint64_t addr) +{ + return (addr & 0x3f) == 0; +} + +static inline bool pva_is_512B_aligned(uint64_t addr) +{ + return (addr & 0x1ff) == 0; +} + +static inline bool pva_is_reserved_desc(uint8_t desc_id) +{ + return ((desc_id >= (PVA_RESERVED_DESCRIPTORS_START)) && + (desc_id <= (PVA_RESERVED_DESCRIPTORS_END))); +} + +#define PVA_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +// clang-format off +/* + * pva_dbg_printf is only available in the following two environments: + * - Linux kernel (via printk) + * - User space with c runtime (via printf) + */ +#if PVA_IS_DEBUG == 1 + /* For debug build, we allow printf */ + #if defined(__KERNEL__) + /* Linux kernel */ + #define pva_dbg_printf printk + #elif (PVA_BUILD_MODE == PVA_BUILD_MODE_BAREMETAL) + #include "pva_fw_bm_utils.h" + /* Firmware in silicon */ + #define pva_dbg_printf pva_fw_printf + #else + /* User space with c runtime */ + #include + #define pva_dbg_printf printf + #endif +#else + #if !(defined(__KERNEL__) || (PVA_BUILD_MODE == PVA_BUILD_MODE_BAREMETAL)) + #include + #endif + /*For release build*/ + #define pva_dbg_printf(...) +#endif +// clang-format on + +#endif // PVA_UTILS_H