From 76961fd57b9fe8a05c2cbdc44370fd490fda3ab5 Mon Sep 17 00:00:00 2001 From: omar Date: Fri, 12 May 2023 21:14:06 +0000 Subject: [PATCH] drivers: add pva driver to nvidia-oot - copy nvpva driver and headers to nvidia-oot directory. - remove the file copy operation as part of the build process. Bug 4097111 Change-Id: If040773833405f3941505cb8a2ec3440e0a84c92 Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2903052 Reviewed-by: Omar Nemri GVS: Gerrit_Virtual_Submit Tested-by: Omar Nemri --- drivers/video/tegra/host/pva/Kconfig | 10 + drivers/video/tegra/host/pva/Makefile | 65 +- .../video/tegra/host/pva/elf_include_fix.h | 65 + .../tegra/host/pva/fw_include/fw_config.h | 127 ++ .../video/tegra/host/pva/fw_include/pva-bit.h | 103 ++ .../tegra/host/pva/fw_include/pva-errors.h | 208 +++ .../host/pva/fw_include/pva-fw-address-map.h | 129 ++ .../host/pva/fw_include/pva-fw-version.h | 36 + .../tegra/host/pva/fw_include/pva-interface.h | 481 +++++ .../tegra/host/pva/fw_include/pva-packed.h | 24 + .../tegra/host/pva/fw_include/pva-sys-dma.h | 336 ++++ .../host/pva/fw_include/pva-sys-params.h | 119 ++ .../tegra/host/pva/fw_include/pva-task.h | 421 +++++ .../tegra/host/pva/fw_include/pva-types.h | 98 ++ .../pva/fw_include/pva-ucode-header-types.h | 49 + .../host/pva/fw_include/pva-ucode-header.h | 105 ++ .../tegra/host/pva/fw_include/pva-version.h | 32 + drivers/video/tegra/host/pva/hw_cfg_pva_v1.h | 125 ++ drivers/video/tegra/host/pva/hw_cfg_pva_v2.h | 136 ++ drivers/video/tegra/host/pva/hw_dma_ch_pva.h | 77 + .../video/tegra/host/pva/hw_dma_desc_pva.h | 117 ++ drivers/video/tegra/host/pva/hw_evp_pva.h | 85 + drivers/video/tegra/host/pva/hw_hsp_pva.h | 165 ++ drivers/video/tegra/host/pva/hw_proc_pva.h | 69 + drivers/video/tegra/host/pva/hw_sec_pva_v1.h | 85 + drivers/video/tegra/host/pva/hw_sec_pva_v2.h | 65 + drivers/video/tegra/host/pva/hw_vmem_pva.h | 36 + drivers/video/tegra/host/pva/nvpva_buffer.c | 607 +++++++ drivers/video/tegra/host/pva/nvpva_buffer.h | 224 +++ drivers/video/tegra/host/pva/nvpva_client.c | 218 +++ drivers/video/tegra/host/pva/nvpva_client.h | 61 + .../video/tegra/host/pva/nvpva_elf_parser.c | 445 +++++ .../video/tegra/host/pva/nvpva_elf_parser.h | 210 +++ drivers/video/tegra/host/pva/nvpva_queue.c | 597 +++++++ drivers/video/tegra/host/pva/nvpva_queue.h | 304 ++++ drivers/video/tegra/host/pva/nvpva_syncpt.c | 300 ++++ drivers/video/tegra/host/pva/nvpva_syncpt.h | 39 + drivers/video/tegra/host/pva/pva-vpu-perf.h | 31 + drivers/video/tegra/host/pva/pva.c | 1484 ++++++++++++++++ drivers/video/tegra/host/pva/pva.h | 615 +++++++ drivers/video/tegra/host/pva/pva_abort.c | 105 ++ .../video/tegra/host/pva/pva_bit_helpers.h | 100 ++ drivers/video/tegra/host/pva/pva_ccq_t19x.c | 87 + drivers/video/tegra/host/pva/pva_ccq_t19x.h | 29 + drivers/video/tegra/host/pva/pva_ccq_t23x.c | 234 +++ drivers/video/tegra/host/pva/pva_ccq_t23x.h | 40 + drivers/video/tegra/host/pva/pva_debug.c | 490 ++++++ drivers/video/tegra/host/pva/pva_dma.c | 1264 +++++++++++++ drivers/video/tegra/host/pva/pva_dma.h | 96 + .../video/tegra/host/pva/pva_fw_carveout.c | 80 + .../video/tegra/host/pva/pva_fw_carveout.h | 34 + drivers/video/tegra/host/pva/pva_hwseq.h | 68 + .../tegra/host/pva/pva_interface_regs_t19x.c | 66 + .../tegra/host/pva/pva_interface_regs_t19x.h | 35 + .../tegra/host/pva/pva_interface_regs_t23x.c | 141 ++ .../tegra/host/pva/pva_interface_regs_t23x.h | 74 + drivers/video/tegra/host/pva/pva_ioctl.c | 1115 ++++++++++++ .../tegra/host/pva/pva_iommu_context_dev.c | 237 +++ .../tegra/host/pva/pva_iommu_context_dev.h | 28 + drivers/video/tegra/host/pva/pva_isr.c | 155 ++ drivers/video/tegra/host/pva/pva_isr_t23x.c | 109 ++ drivers/video/tegra/host/pva/pva_isr_t23x.h | 25 + drivers/video/tegra/host/pva/pva_mailbox.c | 207 +++ drivers/video/tegra/host/pva/pva_mailbox.h | 134 ++ .../video/tegra/host/pva/pva_mailbox_t19x.c | 80 + .../video/tegra/host/pva/pva_mailbox_t19x.h | 54 + .../video/tegra/host/pva/pva_mailbox_t23x.c | 56 + .../video/tegra/host/pva/pva_mailbox_t23x.h | 54 + drivers/video/tegra/host/pva/pva_nvhost.h | 91 + drivers/video/tegra/host/pva/pva_queue.c | 1567 +++++++++++++++++ drivers/video/tegra/host/pva/pva_queue.h | 287 +++ drivers/video/tegra/host/pva/pva_regs.h | 205 +++ drivers/video/tegra/host/pva/pva_sec_ec.c | 65 + drivers/video/tegra/host/pva/pva_sec_ec.h | 23 + drivers/video/tegra/host/pva/pva_sha256.c | 214 +++ drivers/video/tegra/host/pva/pva_sha256.h | 93 + .../video/tegra/host/pva/pva_status_regs.h | 44 + .../tegra/host/pva/pva_system_allow_list.c | 40 + .../tegra/host/pva/pva_system_allow_list.h | 19 + drivers/video/tegra/host/pva/pva_trace.c | 107 ++ drivers/video/tegra/host/pva/pva_trace.h | 57 + .../tegra/host/pva/pva_version_config_t19x.c | 47 + .../tegra/host/pva/pva_version_config_t19x.h | 24 + .../tegra/host/pva/pva_version_config_t23x.c | 29 + .../tegra/host/pva/pva_version_config_t23x.h | 25 + .../video/tegra/host/pva/pva_vpu_app_auth.c | 418 +++++ .../video/tegra/host/pva/pva_vpu_app_auth.h | 195 ++ drivers/video/tegra/host/pva/pva_vpu_exe.c | 1172 ++++++++++++ drivers/video/tegra/host/pva/pva_vpu_exe.h | 354 ++++ drivers/video/tegra/host/pva/pva_vpu_ocd.c | 104 ++ drivers/video/tegra/host/pva/pva_vpu_ocd.h | 26 + include/trace/events/nvhost_pva.h | 254 +++ include/uapi/linux/nvpva_ioctl.h | 611 +++++++ kernel-src-files-copy-list.txt | 4 - 94 files changed, 19763 insertions(+), 12 deletions(-) create mode 100644 drivers/video/tegra/host/pva/Kconfig create mode 100644 drivers/video/tegra/host/pva/elf_include_fix.h create mode 100644 drivers/video/tegra/host/pva/fw_include/fw_config.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-bit.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-errors.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-fw-version.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-interface.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-packed.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-sys-params.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-task.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-types.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h create mode 100644 drivers/video/tegra/host/pva/fw_include/pva-version.h create mode 100644 drivers/video/tegra/host/pva/hw_cfg_pva_v1.h create mode 100644 drivers/video/tegra/host/pva/hw_cfg_pva_v2.h create mode 100644 drivers/video/tegra/host/pva/hw_dma_ch_pva.h create mode 100644 drivers/video/tegra/host/pva/hw_dma_desc_pva.h create mode 100644 drivers/video/tegra/host/pva/hw_evp_pva.h create mode 100644 drivers/video/tegra/host/pva/hw_hsp_pva.h create mode 100644 drivers/video/tegra/host/pva/hw_proc_pva.h create mode 100644 drivers/video/tegra/host/pva/hw_sec_pva_v1.h create mode 100644 drivers/video/tegra/host/pva/hw_sec_pva_v2.h create mode 100644 drivers/video/tegra/host/pva/hw_vmem_pva.h create mode 100644 drivers/video/tegra/host/pva/nvpva_buffer.c create mode 100644 drivers/video/tegra/host/pva/nvpva_buffer.h create mode 100644 drivers/video/tegra/host/pva/nvpva_client.c create mode 100644 drivers/video/tegra/host/pva/nvpva_client.h create mode 100644 drivers/video/tegra/host/pva/nvpva_elf_parser.c create mode 100644 drivers/video/tegra/host/pva/nvpva_elf_parser.h create mode 100644 drivers/video/tegra/host/pva/nvpva_queue.c create mode 100644 drivers/video/tegra/host/pva/nvpva_queue.h create mode 100644 drivers/video/tegra/host/pva/nvpva_syncpt.c create mode 100644 drivers/video/tegra/host/pva/nvpva_syncpt.h create mode 100644 drivers/video/tegra/host/pva/pva-vpu-perf.h create mode 100644 drivers/video/tegra/host/pva/pva.c create mode 100644 drivers/video/tegra/host/pva/pva.h create mode 100644 drivers/video/tegra/host/pva/pva_abort.c create mode 100644 drivers/video/tegra/host/pva/pva_bit_helpers.h create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t19x.c create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t19x.h create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t23x.c create mode 100644 drivers/video/tegra/host/pva/pva_ccq_t23x.h create mode 100644 drivers/video/tegra/host/pva/pva_debug.c create mode 100644 drivers/video/tegra/host/pva/pva_dma.c create mode 100644 drivers/video/tegra/host/pva/pva_dma.h create mode 100644 drivers/video/tegra/host/pva/pva_fw_carveout.c create mode 100644 drivers/video/tegra/host/pva/pva_fw_carveout.h create mode 100644 drivers/video/tegra/host/pva/pva_hwseq.h create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t19x.c create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t19x.h create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t23x.c create mode 100644 drivers/video/tegra/host/pva/pva_interface_regs_t23x.h create mode 100644 drivers/video/tegra/host/pva/pva_ioctl.c create mode 100644 drivers/video/tegra/host/pva/pva_iommu_context_dev.c create mode 100644 drivers/video/tegra/host/pva/pva_iommu_context_dev.h create mode 100644 drivers/video/tegra/host/pva/pva_isr.c create mode 100644 drivers/video/tegra/host/pva/pva_isr_t23x.c create mode 100644 drivers/video/tegra/host/pva/pva_isr_t23x.h create mode 100644 drivers/video/tegra/host/pva/pva_mailbox.c create mode 100644 drivers/video/tegra/host/pva/pva_mailbox.h create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t19x.c create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t19x.h create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t23x.c create mode 100644 drivers/video/tegra/host/pva/pva_mailbox_t23x.h create mode 100644 drivers/video/tegra/host/pva/pva_nvhost.h create mode 100644 drivers/video/tegra/host/pva/pva_queue.c create mode 100644 drivers/video/tegra/host/pva/pva_queue.h create mode 100644 drivers/video/tegra/host/pva/pva_regs.h create mode 100644 drivers/video/tegra/host/pva/pva_sec_ec.c create mode 100644 drivers/video/tegra/host/pva/pva_sec_ec.h create mode 100644 drivers/video/tegra/host/pva/pva_sha256.c create mode 100644 drivers/video/tegra/host/pva/pva_sha256.h create mode 100644 drivers/video/tegra/host/pva/pva_status_regs.h create mode 100644 drivers/video/tegra/host/pva/pva_system_allow_list.c create mode 100644 drivers/video/tegra/host/pva/pva_system_allow_list.h create mode 100644 drivers/video/tegra/host/pva/pva_trace.c create mode 100644 drivers/video/tegra/host/pva/pva_trace.h create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t19x.c create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t19x.h create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t23x.c create mode 100644 drivers/video/tegra/host/pva/pva_version_config_t23x.h create mode 100644 drivers/video/tegra/host/pva/pva_vpu_app_auth.c create mode 100644 drivers/video/tegra/host/pva/pva_vpu_app_auth.h create mode 100644 drivers/video/tegra/host/pva/pva_vpu_exe.c create mode 100644 drivers/video/tegra/host/pva/pva_vpu_exe.h create mode 100644 drivers/video/tegra/host/pva/pva_vpu_ocd.c create mode 100644 drivers/video/tegra/host/pva/pva_vpu_ocd.h create mode 100644 include/trace/events/nvhost_pva.h create mode 100644 include/uapi/linux/nvpva_ioctl.h diff --git a/drivers/video/tegra/host/pva/Kconfig b/drivers/video/tegra/host/pva/Kconfig new file mode 100644 index 00000000..a02dcbea --- /dev/null +++ b/drivers/video/tegra/host/pva/Kconfig @@ -0,0 +1,10 @@ +if ARCH_TEGRA + +config PVA_CO_DISABLED + bool "Tegra PVA FW CO disabled" + depends on TEGRA_GRHOST + default n + help + Allow PVA FW to be booted from file + Say n here if not sure. +endif diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile index 3aab963e..cb1ea5ec 100644 --- a/drivers/video/tegra/host/pva/Makefile +++ b/drivers/video/tegra/host/pva/Makefile @@ -1,9 +1,58 @@ -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +ifeq ($(CONFIG_TEGRA_OOT_MODULE),m) +NVPVA_OOT = y +NVPVA_OBJ = m +else +NVPVA_OBJ = $(CONFIG_TEGRA_GRHOST_PVA) +endif + +GCOV_PROFILE := y +ccflags-y += -I$(srctree.nvidia)/drivers/video/tegra/host/pva +ccflags-y += -I$(srctree.nvidia)/drivers/video/tegra/host/pva/fw_include +ccflags-y += -I$(srctree.nvidia)/include +ccflags-y += -I$(srctree.nvidia)/include/linux +ccflags-y += -Werror + +# When using the upstream host1x driver, the Makefile must define the +# srctree.host1x path in order to find the necessary header files for +# the upstream host1x driver. +ccflags-$(NVPVA_OOT) += -I$(srctree.host1x)/include +ccflags-$(NVPVA_OOT) += -DCONFIG_TEGRA_HOST1X +ccflags-$(NVPVA_OOT) += -DCONFIG_TEGRA_FUSE_UPSTREAM +ccflags-$(NVPVA_OOT) += -DTEGRA_OOT_MODULE + + +nvhost-pva-objs = \ + pva.o \ + pva_ioctl.o \ + pva_mailbox.o \ + pva_interface_regs_t19x.o \ + pva_version_config_t19x.o \ + pva_mailbox_t19x.o \ + pva_isr.o \ + pva_queue.o \ + pva_debug.o \ + pva_trace.o \ + pva_abort.o \ + pva_ccq_t19x.o \ + nvpva_elf_parser.o \ + pva_vpu_exe.o \ + nvpva_client.o \ + nvpva_queue.o \ + pva_dma.o \ + nvpva_buffer.o \ + pva_vpu_ocd.o \ + pva_sha256.o \ + pva_system_allow_list.o \ + pva_vpu_app_auth.o \ + pva_iommu_context_dev.o \ + nvpva_syncpt.o \ + pva_fw_carveout.o \ + pva_isr_t23x.o \ + pva_mailbox_t23x.o \ + pva_interface_regs_t23x.o \ + pva_version_config_t23x.o \ + pva_ccq_t23x.o \ + pva_sec_ec.o + +obj-$(NVPVA_OBJ) += nvhost-pva.o -# NOTE: Do not change or add anything in this makefile. -# The source code and makefile rules are copied from the -# kernel/nvidia/drivers/video/tegra/host/pva. This file is -# just place-holder for empty makefile to avoid any build -# issue when copy is not done from command line and building -# the tree independent of source copy. diff --git a/drivers/video/tegra/host/pva/elf_include_fix.h b/drivers/video/tegra/host/pva/elf_include_fix.h new file mode 100644 index 00000000..b4e4ad9d --- /dev/null +++ b/drivers/video/tegra/host/pva/elf_include_fix.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef ELF_INCLUDE_FIX_H +#undef ELF_INCLUDE_FIX_H + +#include + +#undef SHT_NULL +#undef SHT_PROGBITS +#undef SHT_SYMTAB +#undef SHT_STRTAB +#undef SHT_RELA +#undef SHT_HASH +#undef SHT_DYNAMIC +#undef SHT_NOTE +#undef ELFCLASS32 + +#undef SHT_NOBITS +#undef SHT_REL +#undef SHT_SHLIB +#undef SHT_DYNSYM + +#undef SHT_LOPROC +#undef SHT_HIPROC +#undef SHT_LOUSER +#undef SHT_HIUSER + +#undef SHN_UNDEF + +#undef ELF_ST_BIND +#undef ELF_ST_TYPE + +#undef STT_NOTYPE +#undef STT_OBJECT +#undef STT_FUNC +#undef STT_SECTION +#undef STT_FILE +#undef STT_COMMON + +#undef STB_LOCAL +#undef STB_GLOBAL +#undef STB_WEAK + + +#undef SHN_LORESERVE +#undef SHN_ABS +#undef SHN_COMMON + +#endif // ELF_INCLUDE_FIX_H diff --git a/drivers/video/tegra/host/pva/fw_include/fw_config.h b/drivers/video/tegra/host/pva/fw_include/fw_config.h new file mode 100644 index 00000000..9545c17a --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/fw_config.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_CONFIG_H +#define PVA_CONFIG_H + +/** + * @brief Number of DMA channels for T19x or Xavier. + */ +#define PVA_NUM_DMA_CHANNELS_T19X 14U + +/** + * @brief Number of DMA descriptors. + */ +#define PVA_NUM_DMA_DESCS 64U + +/** + * @brief Number of reserved DMA channels. These channels + * are reserved per DMA for R5 transfers. These channels + * will be used by R5 to transfer data which it needs. + */ +#define PVA_NUM_RESERVED_CHANNELS 1U + +/** + * @brief Number of reserved DMA descriptors. These descriptors + * are reserved per DMA for R5 transfers. These descriptors along + * with channels will be used by R5 to transfer data which it needs. + */ + +#define PVA_NUM_RESERVED_DESCRIPTORS 4U +/** + * @brief Number of dynamic DMA descriptors. These descriptors can be + * used by the VPU application transfer data. These exclude + * the reserved descriptors from total available ones. + */ +#define PVA_NUM_DYNAMIC_DESCS (PVA_NUM_DMA_DESCS - \ + PVA_NUM_RESERVED_DESCRIPTORS) + +/** + * @brief Number of reserved AXI data buffers for T19x. + */ +#define PVA_NUM_RESERVED_ADB_BUFFERS_T19X 8U + +/** + * @brief Number of reserved VMEM data buffers. + */ +#define PVA_NUM_RESERVED_VDB_BUFFERS 0U + +/** + * @brief Total number of VMEM data buffers. + */ +#define PVA_NUM_DMA_VDB_BUFFS 128U + +/** + * @brief Total number of AXI data buffers for T19x. + */ +#define PVA_NUM_DMA_ADB_BUFFS_T19X 256U + +/** + * @brief Number of dynamic AXI data buffers for T19x. + * These exclude the reserved AXI data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T19X (PVA_NUM_DMA_ADB_BUFFS_T19X - \ + PVA_NUM_RESERVED_ADB_BUFFERS_T19X) + +/** + * @brief Number of dynamic VMEM data buffers for T19x. + * These exclude the reserved VMEM data buffers from total available ones. + */ +#define PVA_NUM_DYNAMIC_VDB_BUFFS (PVA_NUM_DMA_VDB_BUFFS - \ + PVA_NUM_RESERVED_VDB_BUFFERS) + +/** + * @brief The first Reserved DMA descriptor. This is used as a + * starting point to iterate over reserved DMA descriptors. + */ +#define PVA_RESERVED_DESC_START PVA_NUM_DYNAMIC_DESCS + +/** + * @brief The first Reserved AXI data buffers. This is used as a + * starting point to iterate over reserved AXI data buffers. + */ +#define PVA_RESERVED_ADB_BUFF_START PVA_NUM_DYNAMIC_ADB_BUFFS + +/** + * @brief The first Reserved VMEM data buffers. This is used as a + * starting point to iterate over reserved VMEM data buffers. + */ +#define PVA_RESERVED_VDB_BUFF_START PVA_NUM_DYNAMIC_VDB_BUFFS +/** + * @brief Maximum number of DMA channels for T23x. + */ + +#define PVA_NUM_DMA_CHANNELS_T23X 16U + +/** + * @brief Number of reserved AXI data buffers for T23x. + */ +#define PVA_NUM_RESERVED_ADB_BUFFERS_T23X 16U + +/** + * @brief Total number of AXI data buffers for T23x. + */ +#define PVA_NUM_DMA_ADB_BUFFS_T23X 272U + +/** + * @brief Number of dynamic AXI data buffers for T23x. + * These exclude the reserved AXI data buffers from total available ones. + */ + +#define PVA_NUM_DYNAMIC_ADB_BUFFS_T23X (PVA_NUM_DMA_ADB_BUFFS_T23X - \ + PVA_NUM_RESERVED_ADB_BUFFERS_T23X) +/** @} */ +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-bit.h b/drivers/video/tegra/host/pva/fw_include/pva-bit.h new file mode 100644 index 00000000..bf24551b --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-bit.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_BIT_H +#define PVA_BIT_H + +/* + * Bit manipulation macros + */ + +#define PVA_BITS_PER_BYTE 8UL +/* + * 8-bits + */ +#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu)) + +/* + * 8-bits + */ +#define PVA_BIT8(_b_) ((uint8_t)(((uint8_t)1U << (_b_)) & 0xffu)) +#define PVA_MASK8(_msb_, _lsb_) \ + ((uint8_t)((((PVA_BIT8(_msb_) - 1U) | PVA_BIT8(_msb_)) & \ + ~(PVA_BIT8(_lsb_) - 1U)) & \ + 0xff)) +#define PVA_EXTRACT8(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK8((_msb_), (_lsb_))) >> (_lsb_))) +#define PVA_EXTRACT8_RANGE(_x_, _name_, _type_) \ + PVA_EXTRACT8(_x_, (_name_##_MSB), (_name_##_LSB), _type_) +#define PVA_INSERT8(_x_, _msb_, _lsb_) \ + ((((uint8_t)(_x_)) << (_lsb_)) & PVA_MASK8((_msb_), (_lsb_))) + +/* + * 16-bits + */ +#define PVA_BIT16(_b_) ((uint16_t)(((uint16_t)1U << (_b_)) & 0xffffu)) +#define PVA_MASK16(_msb_, _lsb_) \ + ((uint16_t)((((PVA_BIT16(_msb_) - 1U) | PVA_BIT16(_msb_)) & \ + ~(PVA_BIT16(_lsb_) - 1U)) & \ + 0xffff)) +#define PVA_EXTRACT16(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK16((_msb_), (_lsb_))) >> (_lsb_))) +#define PVA_INSERT16(_x_, _msb_, _lsb_) \ + ((((uint16_t)(_x_)) << (_lsb_)) & PVA_MASK16((_msb_), (_lsb_))) + +/* + * 32-bits + */ +#define PVA_BIT(_b_) ((uint32_t)(((uint32_t)1U << (_b_)) & 0xffffffffUL)) +#define PVA_MASK(_msb_, _lsb_) \ + (((PVA_BIT(_msb_) - 1U) | PVA_BIT(_msb_)) & ~(PVA_BIT(_lsb_) - 1U)) +#define PVA_MASK_RANGE(_name_) PVA_MASK((_name_##_MSB), (_name_##_LSB)) +#define PVA_EXTRACT(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK((_msb_), (_lsb_))) >> (_lsb_))) +#define PVA_EXTRACT_RANGE(_x_, _name_, _type_) \ + PVA_EXTRACT(_x_, (_name_##_MSB), (_name_##_LSB), _type_) +#define PVA_INSERT(_x_, _msb_, _lsb_) \ + ((((uint32_t)(_x_)) << (_lsb_)) & (uint32_t)PVA_MASK((_msb_), (_lsb_))) +#define PVA_INSERT_RANGE(_x_, _name_) \ + PVA_INSERT(_x_, (_name_##_MSB), (_name_##_LSB)) + +/* + * 64-bits + */ +#define PVA_BIT64(_b_) \ + ((uint64_t)(((uint64_t)1UL << (_b_)) & ((uint64_t)(0U) - 1U))) +#define PVA_MASK64(_msb_, _lsb_) \ + (((PVA_BIT64(_msb_) - (uint64_t)1U) | PVA_BIT64(_msb_)) & \ + ~(PVA_BIT64(_lsb_) - (uint64_t)1U)) +#define PVA_MASK64_RANGE(_name_) PVA_MASK64((_name_##_MSB), (_name_##_LSB)) +#define PVA_EXTRACT64(_x_, _msb_, _lsb_, _type_) \ + ((_type_)(((_x_)&PVA_MASK64((_msb_), (_lsb_))) >> (_lsb_))) +#define PVA_EXTRACT64_RANGE(_x_, _name_, _type_) \ + PVA_EXTRACT64(_x_, (_name_##_MSB), (_name_##_LSB), _type_) +#define PVA_INSERT64(_x_, _msb_, _lsb_) \ + ((((uint64_t)(_x_)) << (_lsb_)) & PVA_MASK64((_msb_), (_lsb_))) +#define PVA_INSERT64_RANGE(_x_, _name_) \ + PVA_INSERT64(_x_, (_name_##_MSB), (_name_##_LSB)) + +#define PVA_PACK64(_l_, _h_) \ + (PVA_INSERT64((_h_), 63U, 32U) | PVA_INSERT64((_l_), 31U, 0U)) + +#define PVA_HI32(_x_) PVA_EXTRACT64((_x_), 63U, 32U, uint32_t) +#define PVA_LOW32(_x_) PVA_EXTRACT64((_x_), 31U, 0U, uint32_t) + +#define PVA_RANGE_LOW(_name_) (_name_##_LSB) +#define PVA_RANGE_HIGH(_name_) (_name_##_MSB) +#define PVA_NUM_IN_RANGE(_n_, _name_) \ + ((PVA_RANGE_LOW(_name_) <= (_n_)) && ((_n_) <= PVA_RANGE_HIGH(_name_))) + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-errors.h b/drivers/video/tegra/host/pva/fw_include/pva-errors.h new file mode 100644 index 00000000..e8ea7aed --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-errors.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_ERRORS_H +#define PVA_ERRORS_H + +#include +/* + * PVA Error codes that will be read from PVA_CCQ_STATUS3 + */ +typedef uint16_t pva_errors_t; + +/* + * General and interface errors + */ +#define PVA_ERR_NO_ERROR 0x0U +#define PVA_ERR_BAD_CMD 0x1U +#define PVA_ERR_BAD_STATUS_ID 0x2U +#define PVA_ERR_BAD_QUEUE_ID 0x3U +#define PVA_ERR_BAD_PVE_ID 0x4U +#define PVA_ERR_BUFF_TOO_SMALL 0x5U +#define PVA_ERR_FEATURE_NOT_SUPPORTED 0x6U +#define PVA_ERR_QUEUE_NOT_SUSPENDED 0x7U +#define PVA_ERR_QUEUE_SUSPENDED 0x8U +#define PVA_ERR_BAD_ADDRESS 0x9U +#define PVA_ERR_BAD_THRESHOLD_ID 0xaU +#define PVA_ERR_BAD_ATTR_ID 0xbU +#define PVA_ERR_BAD_VMEM_ID 0xcU +#define PVA_ERR_BAD_TIME_VALUE 0xdU +#define PVA_ERR_BAD_SCHEDULER_ID 0xeU +#define PVA_ERR_BAD_SCHEDULER_ATTR 0xfU +#define PVA_ERR_BAD_STATUS_REG 0x10U +#define PVA_ERR_BAD_REGION_ID 0x11U +#define PVA_ERR_BAD_RESET_ID 0x12U +#define PVA_ERR_BAD_STAT_ID 0x13U +#define PVA_ERR_BAD_INSTANCE 0x14U +#define PVA_ERR_BAD_TASK 0x15U +#define PVA_ERR_BAD_TASK_ACTION_LIST 0x16U +#define PVA_ERR_BAD_TASK_STATE 0x17U +#define PVA_ERR_TASK_STATUS_MISMATCH 0x18U +#define PVA_ERR_BAD_TASK_OFFSET 0x19U +#define PVA_ERR_BAD_PARAMETERS 0x1aU +#define PVA_ERR_VALUE_MISMATCH 0x1bU +#define PVA_ERR_NO_VPU_HEADER 0x1cU +#define PVA_ERR_BAD_SURFACE_ARRAY 0x1dU +#define PVA_ERR_UNKNOWN_VPU_OP 0x1eU +#define PVA_ERR_BAD_VPU_OP_VER 0x1fU +#define PVA_ERR_UNKNOWN_R5_APP 0x20U +#define PVA_ERR_NO_R5_DATA 0x21U +#define PVA_ERR_NO_R5_HEADER 0x22U +#define PVA_ERR_PVE_TIMEOUT 0x23U +#define PVA_ERR_VPU_RESET 0x24U +#define PVA_ERR_VPU_ERROR_HALT 0x25U +#define PVA_ERR_VPU_ILLEGAL_INSTR 0x26U +#define PVA_ERR_VPU_DIVIDE_BY_0 0x27U +#define PVA_ERR_VPU_BAD_STATE 0x28U +#define PVA_ERR_VPU_DEBUG 0x29U +#define PVA_ERR_VPU_EXIT_ERROR 0x2aU +#define PVA_ERR_PPE_EXIT_ERROR 0x2bU +#define PVA_ERR_PVE_ABORT 0x2dU +#define PVA_ERR_BAD_OVERLAY_SEG 0x2eU +#define PVA_ERR_BAD_SEG_START 0x2fU +#define PVA_ERR_SEGMENTS_OVERLAP 0x30U +#define PVA_ERR_NO_VPU_DATA 0x31U +#define PVA_ERR_VPU_FP_NAN 0x32U +#define PVA_ERR_PPE_ILLEGAL_INSTR 0x33U +#define PVA_ERR_PPE_DIVIDE_BY_0 0x34U +#define PVA_ERR_PPE_FP_NAN 0x35U +#define PVA_ERR_PPE_ILLEGAL_DEBUG 0x36U +#define PVA_ERR_PPE_ILLEGAL_INSTR_ALIGN 0x37U +#define PVA_ERR_BAD_CACHED_DRAM_SEG 0x3aU +#define PVA_ERR_BAD_UNCACHED_DRAM_SEG 0x3bU +#define PVA_ERR_BAD_DRAM_IOVA 0x3cU +#define PVA_ERR_REG_MISMATCH 0x3dU +#define PVA_ERR_UNSUPPORTED_TNSR_TYPE 0x3eU +#define PVA_ERR_AISR_QUEUE_EMPTY 0x3fU +#define PVA_ERR_AISR_QUEUE_FULL 0x40U +#define PVA_ERR_BAD_L2SRAM_PARAMS 0x41U +#define PVA_ERR_BAD_TASK_PARAMS 0x42U + +/* + * DMA errors + */ +#define PVA_ERR_DMA_NO_BPP 0x200U +#define PVA_ERR_DMA_INVALID_WIDTH 0x201U +#define PVA_ERR_DMA_DATA_TOO_LARGE 0x202U +#define PVA_ERR_DMA_BPP_MISMATCH 0x203U +#define PVA_ERR_DMA_TRANSFER_TYPE_INVALID 0x204U +#define PVA_ERR_DMA_TILE_SIZE_MISMATCH 0x205U +#define PVA_ERR_DMA_SIZE_MISMATCH 0x206U +#define PVA_ERR_DMA_CHANNEL_TRANSFER 0x207U +#define PVA_ERR_BAD_DMA_DESC_ID 0x208U +#define PVA_ERR_BAD_DMA_CHANNEL_ID 0x209U +#define PVA_ERR_DMA_TOO_MANY_BUFFERS 0x20aU +#define PVA_ERR_DMA_TIMEOUT 0x20bU +#define PVA_ERR_DMA_INSUFFICIENT_SPACE 0x20cU +#define PVA_ERR_DMA_BAD_BLOCK_HEIGHT 0x20dU +#define PVA_ERR_DMA_BAD_LAYOUT 0x20eU +#define PVA_ERR_DMA_BAD_MEMORY 0x20fU +#define PVA_ERR_DMA_UNALIGNED_ADDR 0x210U +#define PVA_ERR_DMA_PRIV_ACCESS 0x211U +#define PVA_ERR_DMA_BAD_CALLBACK 0x212U +#define PVA_ERR_DMA_CALLBACK_REGISTERED 0x213U +#define PVA_ERR_DMA_CHAN_NOT_IN_USE 0x214U +#define PVA_ERR_DMA_INVALID_VDESC_FLAGS 0x215U +#define PVA_ERR_DMA_HWSEQ_BAD_PROGRAM 0x216U +#define PVA_ERR_DMA_HWSEQ_PROGRAM_TOO_LONG 0x217U +#define PVA_ERR_DMA_HWSEQ_FIELD_OVERFLOW 0x218U +#define PVA_ERR_DMA_HWSEQ_BAD_INDEX 0x219U +#define PVA_ERR_DMA_INVALID_CONFIG 0x220U +#define PVA_ERR_DMA_ERROR 0x221U +/* + * MISR errors + */ +#define PVA_ERR_MISR_NOT_RUN 0x280U +#define PVA_ERR_MISR_NOT_DONE 0x281U +#define PVA_ERR_MISR_TIMEOUT 0x282U +#define PVA_ERR_MISR_ADDR 0x283U +#define PVA_ERR_MISR_DATA 0x284U +#define PVA_ERR_MISR_ADDR_DATA 0x285U + +/* + * VPU Errors + */ +#define PVA_ERR_VPU_DMA_TIMEOUT 0x300U +#define PVA_ERR_VPU_PARAMETER_MISMATCH 0x301U +#define PVA_ERR_VPU_BAD_VALUE 0x302U +#define PVA_ERR_VPU_DLUT_CFG 0x303U +#define PVA_ERR_VPU_DLUT_MISS 0x304U +#define PVA_ERR_VPU_CP_ACCESS 0x305U + +/* + * Fast reset errors + */ + +#define PVA_ERR_FAST_RESET_R5_DMA_TIMEOUT 0x400U +#define PVA_ERR_FAST_RESET_TIMEOUT_VPU 0x401U +#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE1 0x402U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH0 0x403U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH1 0x404U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH2 0x405U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH3 0x406U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH4 0x407U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH5 0x408U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH6 0x409U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH7 0x410U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH8 0x411U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH9 0x412U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH10 0x413U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH11 0x414U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH12 0x415U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH13 0x416U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH14 0x417U +#define PVA_ERR_FAST_RESET_TIMEOUT_CH15 0x418U +#define PVA_ERR_FAST_RESET_TIMEOUT_ICACHE2 0x419U + +/* + * R5 Application Errors + */ +#define PVA_ERR_R5_APP_ARGS 0x800U +#define PVA_ERR_R5_APP_COPY_NULL 0x801U +#define PVA_ERR_BAD_QUEUE_HANDLE 0x802U +#define PVA_ERR_QUEUE_EMPTY 0x803U +#define PVA_ERR_SYS_QUEUE_ERROR 0x804U +#define PVA_ERR_APP_WAKE_BREAK 0x805U +#define PVA_ERR_TASK_QUEUE_FULL 0x806U +#define PVA_ERR_APP_BAD_CALLBACK 0x807U +#define PVA_ERR_TASK_QUEUE_EMPTY 0x808U +#define PVA_ERR_VPU_RUNNING 0x809U +#define PVA_ERR_VPU_NOT_STARTED 0x80aU +#define PVA_ERR_VPU_BAD_CALLBACK 0x80bU +#define PVA_ERR_APP_ABORT 0x80cU +#define PVA_ERR_APP_ASSERT 0x80dU +#define PVA_ERR_APP_BAD_CONTEXT 0x80eU +#define PVA_ERR_INSUFFICIENT_MEMORY 0x80fU +#define PVA_ERR_INSUFFICIENT_FAST_MEMORY 0x810U +#define PVA_ERR_PARAMETER_MISMATCH 0x811U +#define PVA_ERR_ALLOC_FAILED 0x812U +#define PVA_ERR_FREE_FAILED 0x813U +#define PVA_ERR_SMMU_NOT_WORKING 0x814U + +/* + * Informational errors + */ +#define PVA_ERR_NO_PARM_ARRAY 0x995U +#define PVA_ERR_NOT_FOUND 0x996U +#define PVA_ERR_NO_TASK 0x997U +#define PVA_ERR_MINIMUM_LENGTH 0x998U +#define PVA_ERR_LENGTH_PROVIDED 0x999U +#define PVA_ERR_TRY_AGAIN 0x99AU + +/* Never used */ +#define PVA_ERR_MAX_ERR 0xFFFFU + +#endif /* _PVA_ERRORS_H_ */ diff --git a/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h b/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h new file mode 100644 index 00000000..1ebde4dd --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-fw-address-map.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_FW_ADDRESS_MAP_H +#define PVA_FW_ADDRESS_MAP_H + +/** + * @brief Starting R5 address where FW code and data is placed. + * This address is expected to be programmed in PVA_CFG_AR1PRIV_START by KMD. + * This address is also expected to be used as offset where + * PVA_CFG_R5PRIV_LSEGREG1 and PVA_CFG_R5PRIV_USEGREG1 registers would point. + */ +#define FW_CODE_DATA_START_ADDR 1610612736 //0x60000000 + +/** + * @brief R5 address where FW code and data is expected to end. + * This address is expected to be programmed in PVA_CFG_AR1PRIV_END by KMD. + */ +#define FW_CODE_DATA_END_ADDR 1612840960 //0x60220000 + +/** + * @defgroup PVA_EXCEPTION_VECTORS + * + * @brief Following macros define R5 addresses that are expected to be + * programmed by KMD in EVP registers as is. + * @{ + */ +/** + * @brief R5 address of reset exception vector + */ +#define EVP_RESET_VECTOR (1610877952) //0x60040C00 +/** + * @brief R5 address of undefined instruction exception vector + */ +#define EVP_UNDEFINED_INSTRUCTION_VECTOR (1610878976) //0x60041000 +/** + * @brief R5 address of svc exception vector + */ +#define EVP_SVC_VECTOR (1610880000) //0x60041400 +/** + * @brief R5 address of prefetch abort exception vector + */ +#define EVP_PREFETCH_ABORT_VECTOR (1610881024) //0x60041800 +/** + * @brief R5 address of data abort exception vector + */ +#define EVP_DATA_ABORT_VECTOR (1610882048) //0x60041C00 +/** + * @brief R5 address of reserved exception vector. + * It points to a dummy handler. + */ +#define EVP_RESERVED_VECTOR (1610883072) //0x60042000 +/** + * @brief R5 address of IRQ exception vector + */ +#define EVP_IRQ_VECTOR (1610884096) //0x60042400 +/** + * @brief R5 address of FIQ exception vector + */ +#define EVP_FIQ_VECTOR (1610885120) //0x60042800 +/** @} */ + +/** + * @defgroup PVA_DEBUG_BUFFERS + * + * @brief These buffers are arranged in the following order: + * TRACE_BUFFER followed by CODE_COVERAGE_BUFFER followed by DEBUG_LOG_BUFFER. + * @{ + */ +/** + * @brief Maximum size of trace buffer in bytes. + */ +#define FW_TRACE_BUFFER_SIZE 262144 //0x40000 +/** + * @brief Maximum size of code coverage buffer in bytes. + */ +#define FW_CODE_COVERAGE_BUFFER_SIZE 524288 //0x80000 +/** + * @brief Maximum size of debug log buffer in bytes. + */ +#define FW_DEBUG_LOG_BUFFER_SIZE 262144 //0x40000 +/** @} */ + +/** + * @brief Total size of buffers used for FW debug in bytes. + * TBD: Update this address based on build configuration once KMD changes + * are merged. + */ +#define FW_DEBUG_DATA_TOTAL_SIZE (FW_TRACE_BUFFER_SIZE + \ + FW_DEBUG_LOG_BUFFER_SIZE + \ + FW_CODE_COVERAGE_BUFFER_SIZE) + +/** + * @brief Starting R5 address where FW debug related data is placed. + * This address is expected to be programmed in PVA_CFG_AR2PRIV_START by KMD. + * This address is also expected to be used as offset where + * PVA_CFG_R5PRIV_LSEGREG2 and PVA_CFG_R5PRIV_USEGREG2 registers would point. + */ +#define FW_DEBUG_DATA_START_ADDR 1879048192 //0x70000000 + +/** + * @brief R5 address where FW debug related data is expected to end. + * This address is expected to be programmed in PVA_CFG_AR2PRIV_END by KMD. + */ +#define FW_DEBUG_DATA_END_ADDR (FW_DEBUG_DATA_START_ADDR + \ + FW_DEBUG_DATA_TOTAL_SIZE) + +/** + * @brief Starting R5 address where FW expects shared buffers between KMD and + * FW to be placed. This is to be used as offset when programming + * PVA_CFG_R5USER_LSEGREG and PVA_CFG_R5USER_USEGREG. + */ +#define FW_SHARED_MEMORY_START 2147483648 //0x80000000 + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h b/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h new file mode 100644 index 00000000..dcbcc896 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-fw-version.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_FW_VERSION_H +#define PVA_FW_VERSION_H + +#define VERSION_TYPE \ + (PVA_DEBUG | (SAFETY << 1) | (PVA_TEST_SUPPORT << 2) | \ + (STANDALONE_TESTS << 3)) + +#define PVA_VERSION_MAJOR 0x08 +#define PVA_VERSION_MINOR 0x02 +#define PVA_VERSION_SUBMINOR 0x03 + +#ifndef PVA_VERSION_GCID_REVISION +#define PVA_VERSION_GCID_REVISION 0x00000000 +#endif + +#ifndef PVA_VERSION_BUILT_ON +#define PVA_VERSION_BUILT_ON 0x00000000 +#endif + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-interface.h b/drivers/video/tegra/host/pva/fw_include/pva-interface.h new file mode 100644 index 00000000..05726a18 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-interface.h @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_INTERFACE_H +#define PVA_INTERFACE_H + +#include +#include +#include +#include +#include + +/* + * Register definition for PVA_SHRD_SMP_STA0 + * + * This is used to communicate various bits of information between the + * OS and the PVA. + */ + +/* + * Bits set by the OS and examined by the R5 + */ +#define PVA_BOOT_INT PVA_BIT(31U) /* OS wants an interrupt */ +#define PVA_OS_PRINT PVA_BIT(30U) /* OS will process print */ +#define PVA_TEST_WAIT PVA_BIT(29U) /* R5 wait to start tests */ +#define PVA_TEST_RUN PVA_BIT(28U) /* Start tests */ +#define PVA_WAIT_DEBUG PVA_BIT(24U) /* Spin-wait early in boot */ +#define PVA_CG_DISABLE PVA_BIT(20U) /* Disable PVA clock gating */ +#define PVA_VMEM_RD_WAR_DISABLE PVA_BIT(19U) /* Disable VMEM RD fail WAR */ +#define PVA_VMEM_MBX_WAR_ENABLE PVA_BIT(18U) /* WAR for Bug 2090939 enabled*/ + +/* + * Bits set by the R5 and examined by the OS + */ +#define PVA_TESTS_STARTED PVA_BIT(10U) /* PVA Tests started */ +#define PVA_TESTS_PASSED PVA_BIT(9U) /* PVA Tests passed */ +#define PVA_TESTS_FAILED PVA_BIT(8U) /* PVA Tests failed */ +#define PVA_HALTED PVA_BIT(2U) /* PVA uCode halted */ +#define PVA_BOOT_DONE PVA_BIT(1U) /* PVA is "ready" */ +#define PVA_TEST_MODE PVA_BIT(0U) /* PVA is in "test mode" */ + +/* + * Symbolic definitions of the mailbox registers (rather than using 0-7) + */ +#define PVA_MBOX_COMMAND 0U +#define PVA_MBOX_ADDR 1U +#define PVA_MBOX_LENGTH 2U +#define PVA_MBOX_ARG 3U +#define PVA_MBOX_SIDE_CHANNEL_HOST_WR 4U +#define PVA_MBOX_AISR 5U +#define PVA_MBOX_SIDE_CHANNEL_HOST_RD 6U +#define PVA_MBOX_ISR 7U + +/* + * For using the mailboxes as a status interface, we overload them + */ +#define PVA_MBOX_STATUS4 1U +#define PVA_MBOX_STATUS5 2U +#define PVA_MBOX_STATUS6 3U +#define PVA_MBOX_STATUS7 4U + +/* + * Mailbox side channel bit definitions + */ +#define PVA_SIDE_CHANNEL_MBOX_BIT 0U +#define PVA_SIDE_CHANNEL_MBOX_BIT_MASK (~(1U << PVA_SIDE_CHANNEL_MBOX_BIT)) + +/* + * Code checking the version of the R5 uCode should check + * the values returned from the R5_VERSION subcommand of + * CMD_GET_STATUS to determine if the version currently + * running on the PVA's R5 is compatible with what the + * driver was compiled against. + */ +#define PVA_R5_VERSION \ + PVA_MAKE_VERSION(0, PVA_VERSION_MAJOR, PVA_VERSION_MINOR, \ + PVA_VERSION_SUBMINOR) + +/* + * PVA interrupt status register contained in PVA_MBOX_ISR. + */ +#define PVA_INT_PENDING PVA_BIT(31U) +#define PVA_READY PVA_BIT(30U) +#define PVA_BUSY PVA_BIT(29U) +#define PVA_CMD_COMPLETE PVA_BIT(28U) +#define PVA_CMD_ERROR PVA_BIT(27U) +#define PVA_VALID_STATUS7 PVA_BIT(26U) +#define PVA_VALID_STATUS6 PVA_BIT(25U) +#define PVA_VALID_STATUS5 PVA_BIT(24U) +#define PVA_VALID_STATUS4 PVA_BIT(23U) +#define PVA_VALID_STATUS3 PVA_BIT(22U) + +#define PVA_VALID_CCQ_ISR PVA_BIT(20U) +#define PVA_VALID_CCQ_AISR PVA_BIT(24U) +#define PVA_CCQ_OVERFLOW PVA_BIT(28U) + +/* + * On T23X we pack the ISR in with the ERR code + */ +#define PVA_STATUS_ISR_MSB 31 +#define PVA_STATUS_ISR_LSB 16 +#define PVA_STATUS_ERR_MSB 15 +#define PVA_STATUS_ERR_LSB 0 + +/* + * PVA interrupt status register contained in PVA_MBOX_AISR + */ +#define PVA_AISR_INT_PENDING PVA_BIT(31U) +#define PVA_AISR_TASK_COMPLETE PVA_BIT(30U) +#define PVA_AISR_TASK_ERROR PVA_BIT(29U) +#define PVA_AISR_ABORT PVA_BIT(0U) + +#define PVA_STATUS_AISR_TASK_ID_MSB (8U) +#define PVA_STATUS_AISR_TASK_ID_LSB (1U) +#define PVA_STATUS_AISR_VPU_ID_MSB (9U) +#define PVA_STATUS_AISR_VPU_ID_LSB (9U) +#define PVA_STATUS_AISR_QUEUE_MSB (12U) +#define PVA_STATUS_AISR_QUEUE_LSB (10U) +#define PVA_STATUS_AISR_ERR_MSB (28U) +#define PVA_STATUS_AISR_ERR_LSB (13U) + +#define PVA_PACK_AISR_STATUS(e, q, v, t) (PVA_INSERT(e, PVA_STATUS_AISR_ERR_MSB,\ + PVA_STATUS_AISR_ERR_LSB) \ + | PVA_INSERT(q, PVA_STATUS_AISR_QUEUE_MSB, \ + PVA_STATUS_AISR_QUEUE_LSB) \ + | PVA_INSERT(v, PVA_STATUS_AISR_VPU_ID_MSB, \ + PVA_STATUS_AISR_VPU_ID_LSB) \ + | PVA_INSERT(t, PVA_STATUS_AISR_TASK_ID_MSB, \ + PVA_STATUS_AISR_TASK_ID_LSB)) +#define PVA_GET_QUEUE_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \ + PVA_STATUS_AISR_QUEUE_MSB, \ + PVA_STATUS_AISR_QUEUE_LSB, \ + uint8_t) +#define PVA_GET_ERROR_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \ + PVA_STATUS_AISR_ERR_MSB, \ + PVA_STATUS_AISR_ERR_LSB, \ + uint16_t) +#define PVA_GET_VPU_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \ + PVA_STATUS_AISR_VPU_ID_MSB, \ + PVA_STATUS_AISR_VPU_ID_LSB, \ + uint8_t) +#define PVA_GET_TASK_ID_FROM_STATUS(_s_) PVA_EXTRACT((_s_), \ + PVA_STATUS_AISR_TASK_ID_MSB, \ + PVA_STATUS_AISR_TASK_ID_LSB, \ + uint8_t) + +#define PVA_GET_ERROR_CODE(_s_) PVA_EXTRACT((_s_), 15U, 0U, pva_errors_t) + +/* + * Commands that can be sent to the PVA through the PVA_SHRD_MBOX + * interface. + */ +typedef uint8_t pva_cmds_t; +#define CMD_GET_STATUS 0U +#define CMD_SUBMIT 1U +#define CMD_ABORT_QUEUE 2U +#define CMD_NOOP 3U +#define CMD_SW_BIST 4U +#define CMD_GET_VPU_STATS 5U +#define CMD_SET_LOGGING 6U +#define CMD_NEXT 7U /* Must be last */ + +/* + * CMD_GET_STATUS subcommands + */ +typedef uint8_t pva_status_cmds_t; +#define R5_VERSION 0U +#define PVA_UPTIME 1U +#define COMPLETED_TASK 2U +#define GET_STATUS_NEXT 3U /* Deleted RUNNING TASKS as it is not used in FW */ + +/* + * CCQ FIFO SUBMIT interface definition + */ +#define PVA_ADDR_LOWER_32BITS_MSB (63U) +#define PVA_ADDR_LOWER_32BITS_LSB (32U) +#define PVA_QUEUE_ID_MSB (28U) +#define PVA_QUEUE_ID_LSB (24U) +#define PVA_BATCH_SIZE_MSB (23U) +#define PVA_BATCH_SIZE_LSB (16U) +#define PVA_ADDR_HIGHER_8BITS_MSB (15U) +#define PVA_ADDR_HIGHER_8BITS_LSB (8U) +#define PVA_CMD_ID_MSB (7U) +#define PVA_CMD_ID_LSB (0U) + +/* + * Macros to indicate LSB and MSB of SUBCOMMAND field in a command + */ +#define PVA_SUB_CMD_ID_MSB (15U) +#define PVA_SUB_CMD_ID_LSB (8U) + +/* + * Macro used to indicate the most significant + * bit to extract higher 8 bits of the 40 bit address + */ +#define PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB 39U +/* + * Macro used to indicate the least significant + * bit to extract higher 8 bits of the 40 bit address + */ +#define PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB 32U + +/** + * Macro used to specify most significant bit + * of the VPU stats enable field in CMD_SET_VPU_STATS_BUFFER command + */ +#define PVA_CMD_VPU_STATS_EN_MSB 23U +/** + * Macro used to specify least significant bit + * of the VPU stats enable field in CMD_SET_VPU_STATS_BUFFER command + */ +#define PVA_CMD_VPU_STATS_EN_LSB 16U + +/* + * SW Bist subcommands + */ +#define PVA_SDL_SUBMIT 0xF1U +#define PVA_SDL_SET_ERROR_INJECT_SDL 0xF2U +#define PVA_SDL_SET_ERROR_INJECT_PANIC 0xF3U + +/* + * Generic fields in a command sent to the PVA through the PVA_SHRD_MBOX + * interface. + */ +#define PVA_CMD_INT_ON_ERR PVA_BIT(30U) +#define PVA_CMD_INT_ON_COMPLETE PVA_BIT(29U) +#define PVA_GET_BATCH_SIZE(_c_, _t_) PVA_EXTRACT(_c_, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB, _t_) +#define PVA_SET_BATCH_SIZE(_c_) PVA_INSERT(_c_, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB) +#define PVA_GET_SUBCOMMAND(_c_, _t_) PVA_EXTRACT(_c_, PVA_SUB_CMD_ID_MSB, PVA_SUB_CMD_ID_LSB, _t_) +#define PVA_SET_SUBCOMMAND(_c_) PVA_INSERT(_c_, PVA_SUB_CMD_ID_MSB, PVA_SUB_CMD_ID_LSB) +#define PVA_GET_COMMAND(_c_) PVA_EXTRACT(_c_, PVA_CMD_ID_MSB, PVA_CMD_ID_LSB, pva_cmds_t) +#define PVA_SET_COMMAND(_c_) PVA_INSERT(_c_, PVA_CMD_ID_MSB, PVA_CMD_ID_LSB) + +/* + * Generic fields in a command sent through the command FIFO interface. + */ +#define PVA_FIFO_GET_COMMAND(_c_) \ + PVA_EXTRACT64_RANGE((_c_), PVA_CCQ_CMD, pva_cmds_t) +#define PVA_CMD_MBOX_TO_FIFO_FLAG_SHIFT 29U +#define PVA_FIFO_INT_ON_ERR PVA_BIT64(1U) +#define PVA_FIFO_INT_ON_COMPLETE PVA_BIT64(0U) + +/* + * Reserved bits in mbox3 used and consumed internally by R5 + */ +#define PVA_MBOX3_RESERVED_SOURCE_INTERFACE_MSB 31 +#define PVA_MBOX3_RESERVED_SOURCE_INTERFACE_LSB 24 + +/* + * On T23X we map 4x32bit pushes to the CCQ to our mailbox command structure + * CCQ is delivered in 64bit chunks. This defines the mapping into each of the + * 64bit chunks. + */ +/* First 64bit write */ +#define PVA_CCQ_FIRST_PUSH_MBOX_0_MSB 31 +#define PVA_CCQ_FIRST_PUSH_MBOX_0_LSB 0 + +#define PVA_CCQ_FIRST_PUSH_MBOX_1_MSB 63 +#define PVA_CCQ_FIRST_PUSH_MBOX_1_LSB 32 +/* Second 64bit write */ +#define PVA_CCQ_SECOND_PUSH_MBOX_2_MSB 31 +#define PVA_CCQ_SECOND_PUSH_MBOX_2_LSB 0 + +#define PVA_CCQ_SECOND_PUSH_MBOX_3_MSB 63 +#define PVA_CCQ_SECOND_PUSH_MBOX_3_LSB 32 + +/* + * Structure for managing commands through PVA_SHRD_MBOX* + */ +struct pva_cmd_s { + uint32_t cmd_field[4]; +}; + +struct pva_vpu_stats_s { + /** + * @brief The accumulated VPU utilization time in the current window. + */ + uint64_t total_utilization_time[2]; + /** + * @brief The timestamp which signifies start of the current window. + */ + uint64_t window_start_time; + /** + * @brief The timestamp of end of the current window. + */ + uint64_t window_end_time; +} __packed; + +/* + * CMD_NOOP command + */ +#define PVA_CMD_FL_NOOP_ECHO PVA_BIT(28U) +#define PVA_CMD_FL_NOOP_ERROR PVA_BIT(27U) + +static inline uint32_t pva_cmd_noop(struct pva_cmd_s *const cmd, + const uint32_t echo_data, + const uint32_t status_reg, + const uint32_t flags) +{ + cmd->cmd_field[0] = flags | PVA_SET_SUBCOMMAND(status_reg) | + PVA_SET_COMMAND(CMD_NOOP); + cmd->cmd_field[1] = echo_data; + + return 2U; +} + +/* + * CMD_GET_STATUS + * Not used directly. + */ +static inline uint32_t pva_cmd_get_status(const pva_status_cmds_t subcommand, + const uint32_t flags) +{ + return flags | PVA_SET_SUBCOMMAND(subcommand) | + PVA_SET_COMMAND(CMD_GET_STATUS); +} + +/* + * R5_VERSION get status command + */ +struct pva_status_R5_version_s { + uint32_t cur_version; + uint32_t oldest_version; + uint32_t change_id; + uint32_t build_date; +}; + +static inline uint32_t pva_cmd_R5_version(struct pva_cmd_s *const cmd, + const uint32_t flags) +{ + cmd->cmd_field[0] = pva_cmd_get_status(R5_VERSION, flags); + return 1U; +} + +/* + * PVA_UPTIME get status command + */ +struct pva_status_pva_uptime_s { + uint32_t uptime_lo; + uint32_t uptime_hi; +}; + +static inline uint32_t pva_cmd_pva_uptime(struct pva_cmd_s *const cmd, + const pva_pve_id_t pve, + const uint32_t flags) +{ + (void)pve; /*For Future use*/ + cmd->cmd_field[0] = pva_cmd_get_status(PVA_UPTIME, flags); + return 1U; +} + +/* + * COMPLETED_TASK get status command + */ +struct pva_status_completed_task_s { + uint32_t task_addr_lo; + uint32_t task_addr_hi; + uint32_t task_error; + uint32_t task_queue_vpu; +}; + +static inline uint32_t pva_cmd_completed_task(struct pva_cmd_s *const cmd, + const uint32_t flags) +{ + cmd->cmd_field[0] = pva_cmd_get_status(COMPLETED_TASK, flags); + return 1U; +} + +/* + * CMD_SET_LOGGING + */ + +#define PVA_CMD_FL_LOG_PVA_ENABLE PVA_BIT(28U) +#define PVA_CMD_FL_LOG_R5_ENABLE PVA_BIT(27U) +#define PVA_CMD_FL_LOG_VPU_ENABLE PVA_BIT(26U) +#define PVA_CMD_FL_LOG_NO_OVERFLOW PVA_BIT(25U) +#define PVA_CMD_FL_LOG_OVERFLOW_INT PVA_BIT(24U) +#define PVA_CMD_FL_PRT_PVA_ENABLE PVA_BIT(23U) +#define PVA_CMD_FL_PRT_R5_ENABLE PVA_BIT(22U) +#define PVA_CMD_FL_PRT_VPU_ENABLE PVA_BIT(21U) +#define PVA_CMD_FL_PRT_NO_OVERFLOW PVA_BIT(20U) +#define PVA_CMD_FL_PRT_OVERFLOW_INT PVA_BIT(19U) + +static inline uint32_t pva_cmd_set_logging_level(struct pva_cmd_s *const cmd, + const uint32_t pva_log_level, + const uint32_t flags) +{ + cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_SET_LOGGING); + cmd->cmd_field[1] = PVA_INSERT(pva_log_level, 31U, 0U); + return 2U; +} + +/* + * CMD_SUBMIT (batch mode) + */ +static inline uint32_t pva_cmd_submit_batch(struct pva_cmd_s *const cmd, + const uint8_t queue_id, + const uint64_t addr, + const uint8_t batch_size, + const uint32_t flags) +{ + cmd->cmd_field[0] = + flags | PVA_SET_COMMAND(CMD_SUBMIT) | + PVA_INSERT(batch_size, PVA_BATCH_SIZE_MSB, PVA_BATCH_SIZE_LSB) | + PVA_INSERT(PVA_EXTRACT64(addr, PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB, + PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB, uint32_t), + PVA_ADDR_HIGHER_8BITS_MSB, PVA_ADDR_HIGHER_8BITS_LSB) | + PVA_INSERT(queue_id, PVA_QUEUE_ID_MSB, PVA_QUEUE_ID_LSB); + cmd->cmd_field[1] = PVA_LOW32(addr); + return 2U; +} + +/* + * CMD_SUBMIT (single task) + */ +static inline uint32_t pva_cmd_submit(struct pva_cmd_s *const cmd, + const uint8_t queue_id, + const uint64_t addr, const uint32_t flags) +{ + return pva_cmd_submit_batch(cmd, queue_id, addr, 0U, flags); +} + +/* + * CMD_SW_BIST + */ +static inline uint32_t pva_cmd_sw_bist(struct pva_cmd_s *const cmd, + const uint32_t bist_cmd, + const uint32_t inject_error, + const uint32_t flags) +{ + cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_SW_BIST) | + PVA_SET_SUBCOMMAND(bist_cmd); + cmd->cmd_field[1] = (inject_error == 1) ? 0xAAAAAAAA : 0xBBBBBBBB; + return 2U; +} + +/* + * CMD_ABORT_QUEUE + */ +static inline uint32_t pva_cmd_abort_task(struct pva_cmd_s *const cmd, + const uint8_t queue_id, + const uint32_t flags) +{ + cmd->cmd_field[0] = flags | PVA_SET_COMMAND(CMD_ABORT_QUEUE) | + PVA_SET_SUBCOMMAND(queue_id); + return 1U; +} + +/* + * CMD_SET_VPU_STATS + */ +static inline uint32_t +pva_cmd_get_vpu_stats(struct pva_cmd_s * const cmd, + const uint64_t addr, + const uint32_t flags, + const uint8_t value) +{ + cmd->cmd_field[0] = flags + | PVA_SET_COMMAND(CMD_GET_VPU_STATS) + | PVA_INSERT(PVA_EXTRACT64(addr, PVA_EXTRACT_ADDR_HIGHER_8BITS_MSB, + PVA_EXTRACT_ADDR_HIGHER_8BITS_LSB, uint32_t), + PVA_ADDR_HIGHER_8BITS_MSB, PVA_ADDR_HIGHER_8BITS_LSB) + | PVA_INSERT(value, PVA_CMD_VPU_STATS_EN_MSB, PVA_CMD_VPU_STATS_EN_LSB); + cmd->cmd_field[1] = PVA_LOW32(addr); + + return 2U; +} +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-packed.h b/drivers/video/tegra/host/pva/fw_include/pva-packed.h new file mode 100644 index 00000000..acdd9520 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-packed.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_PACKED_H +#define PVA_PACKED_H +#ifdef __chess__ +#define PVA_PACKED /* TODO: find chess compiler pragma if there is one. */ +#else +#define PVA_PACKED __packed +#endif +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h b/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h new file mode 100644 index 00000000..cb7c4d2b --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-sys-dma.h @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * @file pva-sys-dma.h + * + * @brief Types and constants related to PVA DMA setup and DMA + * descriptors. + */ + +#ifndef PVA_SYS_DMA_H +#define PVA_SYS_DMA_H + +#include +#include +#include + +/*** Version number of the current DMA info structure */ +#define PVA_DMA_INFO_VERSION_ID (1U) + +/** @brief DMA channels for a VPU app. + * + * The DMA channel structure contains the set-up of a PVA DMA channel + * used by the VPU app. + */ +struct PVA_PACKED pva_dma_ch_config_s { + /**< HW channel number. Zero if this config is unused. */ + uint32_t ch_number; + /**< DMA CH_CNTL0 register. */ + uint32_t cntl0; + /**< DMA CH_CNTL1 register. */ + uint32_t cntl1; + /**< Boundary pad register. */ + uint32_t boundary_pad; + /**< HWSEQ control register, Ignored on t19x. */ + uint32_t hwseqcntl; + /**< HWSEQ Frame Seq control register, Ignored on t19x and t23x. */ + uint32_t hwseqfscntl; + uint32_t pad_dma_channel0[2]; +}; + +/** Number of dma done masks in DMA info structure. */ +#define PVA_SYS_DMA_NUM_TRIGGERS (9U) +/** Number of DMA channel configurations in DMA info structure. */ +#define PVA_SYS_DMA_NUM_CHANNELS (15U) +/** Maximum number of DMA descriptors allowed. */ +#define PVA_SYS_DMA_MAX_DESCRIPTORS (60U) + +/** @brief DMA info for a VPU app. + * + * The DMA info contains the set-up of a PVA DMA engine for a VPU app. + */ +struct PVA_PACKED pva_dma_info_s { + /**< size of this structure */ + uint16_t dma_info_size; + /**< PVA_DMA_INFO_VERSION_ID */ + uint16_t dma_info_version; + /**< Number of used channels */ + uint8_t num_channels; + /**< Number of used descriptors*/ + uint8_t num_descriptors; +#ifdef SYSTEM_TESTS_ENABLED + uint16_t r5_channel_mask; /**< channel is used by R5*/ +#endif + /**< Number of bytes used in hwseq */ + uint16_t num_hwseq; + + /* + * * . + */ + +/** + * @file pva-sys-params.h + * + * @brief Types and constants related to VPU application parameters. + */ + +#ifndef PVA_SYS_PARAMS_H +#define PVA_SYS_PARAMS_H + +#include +#include + +/** VPU parameter header. + * + * The VPU App parameters contains kernel-user-provided data to be + * copied into the VMEM before executing the VPU app. The parameter + * headers are stored in the parameter_data_iova memory area of + * parameter_info_base field. + * + * The FW can also initialize complex datatypes, which are marked by + * special param_base outside the normal IOVA space. See the structure + * struct pva_vpu_instance_data_s for an example. + */ +struct PVA_PACKED pva_vpu_parameters_s { + pva_iova param_base; /**< I/O address of the parameter data */ + uint32_t addr; /**< Target address (VMEM offset) */ + uint32_t size; /**< Size of the parameter data in bytes */ +}; + +/** + * @brief The structure holds the wrapper information + * for the VMEM parameters that is provided by the user. + */ +struct PVA_PACKED pva_vpu_parameter_info_s { + /** + * @brief The IOVA address of the parameter data. + * This should point to an array of type @ref pva_vpu_parameter_list_t . + * If no parameters are present this should be set to 0 + */ + pva_iova parameter_data_iova; + + /** + * @brief The starting IOVA address of the parameter data whose size + * is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This data needs to be + * memcpied by FW to VMEM and DMA should not be used. If no small + * parameters are present this should be set to 0. + */ + pva_iova small_vpu_param_data_iova; + + /** + * @brief The number of bytes of small VPU parameter data, i.e the + * data whose size is lower than @ref PVA_DMA_VMEM_COPY_THRESHOLD . If no small + * parameters are present, this should be set to 0 + */ + uint32_t small_vpu_parameter_data_size; + + /** + * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which + * the VPU large parameters are present, i.e the vpu parameters whose size is greater + * than @ref PVA_DMA_VMEM_COPY_THRESHOLD . This value will always point to the index + * immediately after the small parameters. If no large parameter is present, then + * this field value will be same as the value of + * @ref pva_vpu_parameter_info_t.vpu_instance_parameter_list_start_index field + */ + uint32_t large_vpu_parameter_list_start_index; + + /** + * @brief The index of the array of type @ref pva_vpu_parameter_list_t from which + * the VPU instance parameters are present. This value will always point to the index + * immediately after the large parameters if large parameters are present, + * else it will be the same value as + * @ref pva_vpu_parameter_info_t.large_vpu_parameter_list_start_index field. + */ + uint32_t vpu_instance_parameter_list_start_index; +}; + +/** + * @brief The minimuim size of the VPU parameter for it to be considered + * as a large parameter + */ +#define PVA_DMA_VMEM_COPY_THRESHOLD ((uint32_t)(256U)) + +/** Prefix for special param_base markers */ +#define PVA_COMPLEX_IOVA (0xDA7AULL << 48ULL) +/** Versioned param_base marker */ +#define PVA_COMPLEX_IOVA_V(v) (PVA_COMPLEX_IOVA | ((uint64_t)(v) << 32ULL)) + +/** Marker for struct pva_vpu_instance_data_s */ +#define PVA_SYS_INSTANCE_DATA_V1_IOVA (PVA_COMPLEX_IOVA_V(1) | 0x00000001ULL) + +/** ELF symbol for struct pva_vpu_instance_data_s */ +#define PVA_SYS_INSTANCE_DATA_V1_SYMBOL "_sys_instance_data_v1" + +/** FW-provided instance data */ +struct PVA_PACKED pva_vpu_instance_data_s { + uint32_t vpu_id; + uint32_t vmem_base; + uint32_t dma_descriptor_base; + uint32_t l2ram_base; + uint32_t l2ram_size; +}; + +#endif /* PVA_SYS_PARAMS_H */ diff --git a/drivers/video/tegra/host/pva/fw_include/pva-task.h b/drivers/video/tegra/host/pva/fw_include/pva-task.h new file mode 100644 index 00000000..4981cdd8 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-task.h @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_TASK_H +#define PVA_TASK_H + +#include +#include +#include +#include + +#define TASK_VERSION_ID 0x01U +#define PVA_TASK_VERSION_ID 0x01U +#define PVA_ENGINE_ID 'P' + +#define PVA_MAX_PREACTION_LISTS 26U +#define PVA_MAX_POSTACTION_LISTS 28U + +#define PVA_TASK_POINTER_AUX_SIZE_MASK 0x00ffffffffffffffU +#define PVA_TASK_POINTER_AUX_SIZE_SHIFT 0 +#define PVA_TASK_POINTER_AUX_FLAGS_MASK 0xff00000000000000U +#define PVA_TASK_POINTER_AUX_FLAGS_SHIFT 56 +#define PVA_TASK_POINTER_AUX_FLAGS_CVNAS (1U << 0) + +#define NVPVA_TENSOR_MAX_DIMENSIONS (9u) + +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NHWC 0x00000001U +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCHW 0x00000002U +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCxHWx 0x00000003U +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NDHWC 0x00000004U +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_NCDHW 0x00000005U +#define NVPVA_TENSOR_ATTR_DIMENSION_ORDER_IMPLICIT 0x00000006U + +/* + * Generic task meta-data for the CV pipeline. + */ +typedef uint16_t pva_task_ofs; + +struct PVA_PACKED pva_gen_task_s { + pva_iova next; /* ptr to next task in the list */ + uint8_t versionid; + uint8_t engineid; + pva_task_ofs length; + uint16_t sequence; + uint8_t n_preaction_lists; + uint8_t n_postaction_lists; + pva_task_ofs preaction_lists_p; + pva_task_ofs postaction_lists_p; +}; + +/* + * Structure pointed to by {pre/post}action_lists_p. This points + * to the actual action list. + */ +struct PVA_PACKED pva_action_list_s { + pva_task_ofs offset; + uint16_t length; +}; + +/** @defgroup TASK_ACT PVA Task Action Identifiers. + * + * @{ + */ +#define TASK_ACT_PVA_STATISTICS 0x00U +#define TASK_ACT_PTR_BLK_GTREQL 0x01U +#define TASK_ACT_READ_STATUS 0x02U +#define TASK_ACT_WRITE_STATUS 0x03U +#define TASK_ACT_PTR_WRITE_SOT_V 0x04U +#define TASK_ACT_PTR_WRITE_SOT_R 0x05U +#define TASK_ACT_PTR_WRITE_EOT_V 0x06U +#define TASK_ACT_PTR_WRITE_EOT_R 0x07U +#define TASK_ACT_PTR_WRITE_EOT 0x08U +/** @} */ + +struct PVA_PACKED pva_gen_task_status_s { + uint64_t timestamp; + uint32_t info32; + uint16_t info16; + uint16_t status; +}; + +struct PVA_PACKED pva_task_statistics_s { + uint64_t queued_time; /* Time when the task was queued by KMD */ + uint64_t head_time; /* when task reached head of queue */ + uint64_t input_actions_complete; /* when input actions done */ + uint64_t vpu_assigned_time; /* when task assigned a VPU */ + uint64_t vpu_start_time; /* when VPU started running task */ + uint64_t vpu_complete_time; /* when execution completed */ + uint64_t complete_time; /* when task considered complete */ + uint8_t vpu_assigned; /* which VPU task was assigned */ + uint8_t queue_id; /* ID of the queue the task was submitted on*/ + uint8_t reserved[6]; +}; + +enum pva_task_parameter_type_e { + PVA_PARAM_FIRST = 0U, /* must match first type */ + PVA_PARAM_SCALAR_LIST = 0U, + PVA_PARAM_SURFACE_LIST = 1U, + PVA_PARAM_ROI_LIST = 2U, + PVA_PARAM_2DPOINTS_LIST = 3U, + PVA_PARAM_OPAQUE_DATA = 4U, + PVA_PARAM_LAST = 5U /* must be last! */ +}; + +struct PVA_PACKED pva_task_opaque_data_desc_s { + /* Number of bytes in the primary payload */ + uint16_t primary_payload_size; +}; + +struct PVA_PACKED pva_task_pointer_s { + uint64_t address; + uint64_t aux; +}; + +struct PVA_PACKED pva_task_parameter_array_s { + pva_iova address; + uint32_t size; + uint32_t type; /* type = pva_task_parameter_type_e */ +}; + +/* + * Parameter descriptor (all parameters have the same header) + * the specific data for the parameters immediately follows + * the descriptor. + */ +struct PVA_PACKED pva_task_parameter_desc_s { + uint32_t num_parameters; + uint32_t reserved; +}; + +/* + * Individual Region of Interest (ROI) descriptor + */ +struct PVA_PACKED pva_task_roi_desc_s { + uint32_t left; + uint32_t top; + uint32_t right; + uint32_t bottom; +}; + +/* + * Surface descriptor + */ +struct PVA_PACKED pva_task_surface_s { + pva_iova address; + pva_iova roi_addr; + uint32_t roi_size; + uint32_t surface_size; + uint32_t width; + uint32_t height; + uint32_t line_stride; + uint32_t plane_stride; + uint32_t num_planes; + uint8_t layout; + uint8_t block_height_log2; + uint8_t memory; + uint8_t reserved; + uint64_t format; +}; + +/* + * 2-dimensional point descriptor + */ +struct PVA_PACKED pva_task_point2d_s { + uint32_t x; + uint32_t y; +}; + +/* + * Surface Layout. + */ +#define PVA_TASK_SURFACE_LAYOUT_PITCH_LINEAR 0U +#define PVA_TASK_SURFACE_LAYOUT_BLOCK_LINEAR 1U + +/* + * Where the surface is located. + */ +#define PVA_TASK_SURFACE_MEM_FL_CV_SURFACE PVA_BIT(0U) +#define PVA_TASK_SURFACE_MEM_FL_CV_ROI PVA_BIT(1U) + +/** + * @brief Task descriptor for the new architecture. + * + * The runlist of the new task descriptor contains pointer to + * task-specific parameters of the VPU app, pointer to info structure + * describing its binary code, and its dma setup. + */ +struct PVA_PACKED pva_td_s { + /** @brief IOVA pointer to the next task */ + pva_iova next; + /** @brief Version of task descriptor internal to PVA. + * Should hold a value of 2 for safety architecture + */ + uint8_t runlist_version; + /** @brief Number of pre-actions. + * Valid range is 0..PVA_MAX_PREACTION_LISTS - both inclusive + */ + uint8_t num_preactions; + /** @brief Number of post-actions. + * Valid range is 0..PVA_MAX_POSTACTION_LISTS - both inclusive + */ + uint8_t num_postactions; + /** Index of the stream ID assigned to this task */ + uint8_t sid_index; + /** @brief Task configuration flags */ + uint32_t flags; + /** @brief IOVA pointer to an instance of pva_vpu_parameter_info_t */ + pva_iova parameter_info_base; + /** @brief IOVA pointer to a pva_bin_info_t structure */ + pva_iova bin_info; + /** @brief IOVA pointer to a pva_bin_info_t structure */ + pva_iova ppe_bin_info; + /** @brief IOVA pointer to a pva_dma_info_t structure */ + pva_iova dma_info; + /** IOVA pointer to a pva_circular_info_t structure */ + pva_iova stdout_info; + /** @brief IOVA pointer to an array of pva_task_action_t structure */ + pva_iova preactions; + /** @brief IOVA pointer to an array of pva_task_action_t structure */ + pva_iova postactions; + /** @brief Timeout for the VPU algorithm in micro-seconds. + * Valid range is 0..PVA_MAX_TIMEOUT - both inclusive + */ + uint64_t timeout; + /** @brief Variable to hold the queued time of the task */ + uint64_t queued_time; + /** @brief The ID of the batch that this task belongs to */ + uint64_t batch_id; + /** Size of L2SRAM required for the task */ + uint32_t l2sram_size; + /** Number of total tasks with timer resource utilization */ + uint16_t timer_ref_cnt; + /** Number of total tasks with L2SRAM resource utilization */ + uint16_t l2sram_ref_cnt; + /** @brief Number of parameters in parameter array */ + uint16_t num_parameters; + /** @brief Interface on which FW should return status */ + uint8_t status_interface; + /** @brief The ID of this task used to identify it during AISR */ + uint8_t task_id; + /** @note The below two fields are added for backward + * compatibility, will be removed once changes are merged + */ + /** Additional padding to maintain alignement */ + uint8_t pad0[4]; +}; + +/** Runlist version for new task descriptor format */ +#define PVA_RUNLIST_VERSION_ID (0x02U) + +/** @addtogroup PVA_TASK_FL + * @{ + */ +/** Schedule on VPU0 only */ +#define PVA_TASK_FL_VPU0 PVA_BIT(0U) + +/** Schedule on VPU1 only */ +#define PVA_TASK_FL_VPU1 PVA_BIT(1U) + +/** Flag to allow VPU debugger attach for the task */ +#define PVA_TASK_FL_VPU_DEBUG PVA_BIT(2U) + +/** Flag to request masking of illegal instruction error for the task */ +#define PVA_TASK_FL_ERR_MASK_ILLEGAL_INSTR PVA_BIT(3U) + +/** Flag to request masking of divide by zero error for the task */ +#define PVA_TASK_FL_ERR_MASK_DIVIDE_BY_0 PVA_BIT(4U) + +/** Flag to request masking of floating point NAN error for the task */ +#define PVA_TASK_FL_ERR_MASK_FP_NAN PVA_BIT(5U) + +/** Schedule next task in list immediately on this VPU. + * + * Not allowed in the last task of batch list. + */ +#define PVA_TASK_FL_HOT_VPU PVA_BIT(10U) + +/** @brief Flag to identify a barrier task */ +#define PVA_TASK_FL_SYNC_TASKS PVA_BIT(11U) + +/** @brief Flag to identify L2SRAM is being utilized for + * the task and to decrement l2sram_ref_count after task is done + */ +#define PVA_TASK_FL_DEC_L2SRAM PVA_BIT(12U) + +#define PVA_TASK_FL_DEC_TIMER PVA_BIT(13U) + +/** Flag to indicate special access needed by task */ +#define PVA_TASK_FL_SPECIAL_ACCESS PVA_BIT(15U) + +/** Flag to indicate queued time is needed by task */ +#define PVA_TASK_FL_QUEUED_TS PVA_BIT(16U) + +/** Flag to indicate head time is needed by task */ +#define PVA_TASK_FL_HEAD_TS PVA_BIT(17U) + +/** Flag to indicate ready time is needed by task */ +#define PVA_TASK_FL_READY_TS PVA_BIT(18U) + +/** Flag to indicate R5 start time/vpu assigned time is needed by task */ +#define PVA_TASK_FL_SOT_R_TS PVA_BIT(19U) + +/** Flag to indicate VPU start time is needed by task */ +#define PVA_TASK_FL_SOT_V_TS PVA_BIT(20U) + +/** Flag to indicate VPU done time is needed by task */ +#define PVA_TASK_FL_EOT_V_TS PVA_BIT(21U) + +/** Flag to indicate R5 complete time is needed by task */ +#define PVA_TASK_FL_EOT_R_TS PVA_BIT(22U) + +/** Flag to indicate Golden register check is needed by task */ +#define PVA_TASK_FL_GR_CHECK PVA_BIT(23U) + +/** Flag to indicate that stats are enabled */ +#define PVA_TASK_FL_STATS_ENABLE (PVA_TASK_FL_QUEUED_TS | PVA_TASK_FL_HEAD_TS |\ + PVA_TASK_FL_READY_TS | PVA_TASK_FL_SOT_R_TS |\ + PVA_TASK_FL_SOT_V_TS | PVA_TASK_FL_EOT_V_TS |\ + PVA_TASK_FL_EOT_R_TS) +/** @} */ + +/** Version of the binary info */ +#define PVA_BIN_INFO_VERSION_ID (0x01U) +#define PVA_MAX_VPU_METADATA (4U) + +#define PVA_CODE_SEC_BASE_ADDR_ALIGN (128ULL) +#define PVA_CODE_SEC_SIZE_ALIGN (32U) + +#define PVA_DATA_SEC_BASE_ADDR_ALIGN (64ULL) +#define PVA_DATA_SEC_SIZE_ALIGN (32U) + +struct pva_vpu_data_section_s { + uint32_t offset; /**< Offset from the base source address */ + uint32_t addr; /**< Target address (VMEM offset) */ + uint32_t size; /**< Size of the section in bytes */ +}; + +/** @brief Information of a VPU app binary. + * + * The PVA kernels are implemented as VPU apps, small VPU programs + * executed independently on a VPU. The information structure is used + * by PVA R5 to preload the code in the VPU icache as well as preload + * the data sections into the VPU VMEM. + * + * If PVA has multiple address spaces, the application code, data, and + * metadata may be placed in different address space domains accessed + * using different StreamIDs. The code is accessed by VPU, the data + * sections by PVA DMA, the metadata by R5. + * + * The metadata sections contain the ABI information of the VPU + * app. The metadata is stored as data sections in the ELF executable, + * however, the address of the metadata section is >= 768K (0xC0000). + */ +struct PVA_PACKED pva_bin_info_s { + uint16_t bin_info_size; /**< Size of this structure */ + uint16_t bin_info_version; /**< PVA_BIN_INFO_VERSION_ID */ + + /** Size of the code */ + uint32_t code_size; + /** Base address of the code. Should be aligned at 128. */ + pva_iova code_base; + + /** Base address of the data. Should be aligned at 64. */ + /** @brief Holds address of data section info of type + * @ref pva_vpu_data_section_t + */ + pva_iova data_sec_base; + + /** @brief Number of data section info stored @ref data_sec_base */ + uint32_t data_sec_count; + + pva_iova data_base; +}; + +/* + * Status structure that will be return to circular buffer + */ +struct PVA_PACKED pva_task_error_s { + /* IOVA address of task */ + pva_iova addr; + + /* Status of task execution */ + uint16_t error; + + /* Indicates if status is valid */ + uint8_t valid; + + /* VPU id on which the task was scheduled */ + uint8_t vpu; + + /* Queue to which the task belongs */ + uint8_t queue; + + /* Task ID of the task */ + uint8_t task_id; +}; + + +struct PVA_PACKED pva_circular_buffer_info_s { + pva_iova head; + pva_iova tail; + pva_iova err; + pva_iova buffer; + uint32_t buffer_size; +}; + + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-types.h b/drivers/video/tegra/host/pva/fw_include/pva-types.h new file mode 100644 index 00000000..1caff658 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-types.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_TYPES_H +#define PVA_TYPES_H +#if !defined(__KERNEL__) +#define __user +#include +#include +#else +#include +#endif +#include + +typedef uint64_t pva_iova; + +/* + * Queue IDs + */ +enum pva_queue_id_e { + PVA_FW_QUEUE_0, + PVA_FW_QUEUE_1, + PVA_FW_QUEUE_2, + PVA_FW_QUEUE_3, + PVA_FW_QUEUE_4, + PVA_FW_QUEUE_5, + PVA_FW_QUEUE_6, + PVA_FW_QUEUE_7, + PVA_FW_QUEUE_8, /* PVA_SW_BIST_QUEUE_ID0 */ + PVA_FW_QUEUE_9, /* PVA_SW_BIST_QUEUE_ID1 */ + PVA_NUM_QUEUES +}; + +/* + * Hardware FIFO IDs + */ +typedef uint8_t pva_ccq_fifo_id_t; + +/* + * PVE IDs + */ +typedef uint8_t pva_pve_id_t; +#define PVA_PVE_ID_NONE 0xffU + +/* + * VMEM IDs + */ +typedef uint8_t pva_vmem_id_t; + +/* + * DMA Descriptor IDs + */ +typedef uint8_t pva_dma_desc_t; + +/* + * DMA Channel IDs + */ +typedef uint8_t pva_dma_channel_id_t; + +/* + * DMA Channel Mask + */ +typedef uint16_t pva_dma_channel_mask_t; + +/* + * Address range + */ +struct pva_addr_range_s { + uint32_t offset; + uint32_t addr; + uint32_t size; +}; + +/* + * Macro to access size of a member of a struct + */ +#define PVA_MEMBER_SIZEOF(_struct_, _member_) \ + (sizeof(((_struct_ *)0)->_member_)) + +/* + * SID + */ +typedef uint8_t pva_sid_t; + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h new file mode 100644 index 00000000..71c8d548 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header-types.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_UCODE_HEADER_TYPES_H +#define PVA_UCODE_HEADER_TYPES_H + +/* + * This file is distinct from the other uCode header file because it + * defines constants/values that are used by the linker scripts and therefor + * cannot have C structures (only pre-processor directives). + */ + +/* + * Define the length of a section header to be defined independently than + * the C structure (it will be larger). Picking a value that is easy to + * compute. + */ +#define PVA_UCODE_SEG_HDR_LENGTH 128 + +#define PVA_UCODE_SEG_NONE 0 /* not a segment */ +#define PVA_UCODE_SEG_EVP 1 /* EVP information */ +#define PVA_UCODE_SEG_R5 2 /* R5 code/data */ +#define PVA_UCODE_SEG_CRASHDUMP 3 /* space for crash dump */ +#define PVA_UCODE_SEG_TRACE_LOG 4 /* space for PVA trace logs */ +#define PVA_UCODE_SEG_DRAM_CACHED 5 /* cachable DRAM area */ +#define PVA_UCODE_SEG_CODE_COVERAGE 6 /* space for PVA FW code coverage */ +#define PVA_UCODE_SEG_DEBUG_LOG 7 /* space for PVA debug logs */ +#define PVA_UCODE_SEG_NEXT 8 /* must be last */ + +/* PVA FW binary max segment size used for section alignment */ +#define PVA_BIN_MAX_HEADER_SIZE 0x1000 +#define PVA_BIN_MAX_EVP_SIZE 0x1000 + +#define PVA_HDR_MAGIC 0x31415650 /* PVA1 in little endian */ +#define PVA_HDR_VERSION 0x00010000 /* version 1.0 of the header */ +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h new file mode 100644 index 00000000..bdbd8516 --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-ucode-header.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_UCODE_HEADER_H +#define PVA_UCODE_HEADER_H + +#include +#include + +#define MAX_SEGMENT_NAME_LEN 64 + +/* + * PVA uCode Header. + * + * There is a basic header that describes the uCode. Other than the + * validation information (such as versions, checksums (MD5 hash?), etc) + * it describes the various segments of the uCode image. The important + * thing to note is that there are multiple segments for various parts of + * the uCode. + * + * Each segment has: + * - type: this indicates the type of segment it is. + * - id: this gives a uniqueness to the segment when there are multiple + * segments of the same type. It also allows different segments types + * to be related by using the same segment ID (such as relating VPU code, + * R5 application code and parameter data together). + * - name: this is NUL terminated string that is the "name" of the segment + * - size: size of the segment in bytes + * - offset: this is the offset from the start of the binary as to + * where the data contained in the segment is to be placed. + * - address: this is the address of where the data in the segment is + * to be written to. + * - physical address: this is used in some segments to denote where in + * the 40-bit address space the segment is located. This allows for + * setting up some of the segment registers. + * + * A segment can define a region but contain no data. In those cases, the + * file offset would be 0. + * + * In the case of DRAM the load address and size can be used to setup the + * relevant segment registers and DRAM apertures. + * + */ + +/* + * There can be multiple segments of the same type. + */ +struct pva_ucode_seg_s { + uint32_t type; /* type of segment */ + uint32_t id; /* ID of segment */ + uint32_t size; /* size of the segment */ + uint32_t offset; /* offset from header to segment start */ + uint32_t addr; /* load address of segment */ + uint8_t name[MAX_SEGMENT_NAME_LEN]; + uint64_t phys_addr __aligned(8); +}; + +/* + * Ucode header gives information on what kind of images are contained in + * a binary. + * + * nsegments : Number of segments available in pva_ucode_r5_sysfw_info_t. + * + * R5 system image layout used for booting R5. + * +--------------------------------+ + * + Ucode header + + * +--------------------------------+ + * + struct + + * + pva_ucode_r5_sysfw_info_t + + * +--------------------------------+ + * + + + * + pva firwmare data/code + + * +--------------------------------+ + */ +struct __packed pva_ucode_hdr_s { + uint32_t magic; + uint32_t hdr_version; + uint32_t ucode_version; + uint32_t nsegments; +}; + +struct pva_ucode_r5_sysfw_info_s { + struct pva_ucode_seg_s evp __aligned(128); + struct pva_ucode_seg_s dram __aligned(128); + struct pva_ucode_seg_s crash_dump __aligned(128); + struct pva_ucode_seg_s trace_log __aligned(128); + struct pva_ucode_seg_s code_coverage __aligned(128); + struct pva_ucode_seg_s debug_log __aligned(128); + struct pva_ucode_seg_s cached_dram __aligned(128); +}; + +#endif diff --git a/drivers/video/tegra/host/pva/fw_include/pva-version.h b/drivers/video/tegra/host/pva/fw_include/pva-version.h new file mode 100644 index 00000000..9f4a84ea --- /dev/null +++ b/drivers/video/tegra/host/pva/fw_include/pva-version.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_VERSION_H +#define PVA_VERSION_H + +#include +#include +#include + +#define PVA_MAKE_VERSION(_type_, _major_, _minor_, _subminor_) \ + (PVA_INSERT(_type_, 31, 24) | PVA_INSERT(_major_, 23, 16) | \ + PVA_INSERT(_minor_, 15, 8) | PVA_INSERT(_subminor_, 7, 0)) + +#define PVA_VERSION(_type_) \ + PVA_MAKE_VERSION(_type_, PVA_VERSION_MAJOR, PVA_VERSION_MINOR, \ + PVA_VERSION_SUBMINOR) + +#endif diff --git a/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h b/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h new file mode 100644 index 00000000..c81582bb --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_cfg_pva_v1.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_cfg_pva_v1_h_ +#define _hw_cfg_pva_v1_h_ + +static inline u32 v1_cfg_user_sid_r(void) +{ + return 0x70000; +} +static inline u32 v1_cfg_ccq_r(void) +{ + return 0x71000; +} +static inline u32 v1_cfg_vps0user_lsegreg_r(void) +{ + return 0x71004; +} +static inline u32 v1_cfg_vps1user_lsegreg_r(void) +{ + return 0x71008; +} +static inline u32 v1_cfg_r5user_lsegreg_r(void) +{ + return 0x7100c; +} +static inline u32 v1_cfg_vps0user_usegreg_r(void) +{ + return 0x71010; +} +static inline u32 v1_cfg_vps1user_usegreg_r(void) +{ + return 0x71014; +} +static inline u32 v1_cfg_r5user_usegreg_r(void) +{ + return 0x71018; +} +static inline u32 v1_cfg_ccq_status_r(u32 status_id) +{ + return 0x72000U + 0x4U * status_id; +} +static inline u32 v1_cfg_priv_sid_r(void) +{ + return 0x80000; +} +static inline u32 v1_cfg_priv_ar1_lsegreg_r(void) +{ + return 0x80004; +} +static inline u32 v1_cfg_priv_ar1_usegreg_r(void) +{ + return 0x80008; +} +static inline u32 v1_cfg_priv_ar2_lsegreg_r(void) +{ + return 0x8000c; +} +static inline u32 v1_cfg_priv_ar2_usegreg_r(void) +{ + return 0x80010; +} +static inline u32 v1_cfg_priv_ar1_start_r(void) +{ + return 0x80014; +} +static inline u32 v1_cfg_priv_ar1_end_r(void) +{ + return 0x80018; +} +static inline u32 v1_cfg_priv_ar2_start_r(void) +{ + return 0x8001c; +} +static inline u32 v1_cfg_priv_ar2_end_r(void) +{ + return 0x80020; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h b/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h new file mode 100644 index 00000000..98224875 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_cfg_pva_v2.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_cfg_pva_gen2_h_ +#define _hw_cfg_pva_gen2_h_ +#include "hw_cfg_pva_v1.h" +#define V2_SID_CONTROL_BASE 0x240000U + +static inline u32 v2_cfg_user_sid_vm_r(u32 idx) +{ + return V2_SID_CONTROL_BASE + 0x4U * idx; +} + +static inline u32 v2_cfg_priv_sid_r(void) +{ + return V2_SID_CONTROL_BASE + 0x20U; +} + +static inline u32 v2_cfg_vps_sid_r(void) +{ + return V2_SID_CONTROL_BASE + 0x24U; +} + +#define V2_ADDRESS_CONTROL_BASE 0x250000U + +static inline u32 v2_cfg_r5user_lsegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x8U; +} + +static inline u32 v2_cfg_priv_ar1_lsegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0xCU; +} + +static inline u32 v2_cfg_priv_ar2_lsegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x10U; +} + +static inline u32 v2_cfg_r5user_usegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x1CU; +} + +static inline u32 v2_cfg_priv_ar1_usegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x20U; +} + +static inline u32 v2_cfg_priv_ar2_usegreg_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x24U; +} + +static inline u32 v2_cfg_priv_ar1_start_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x28U; +} + +static inline u32 v2_cfg_priv_ar1_end_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x2CU; +} + +static inline u32 v2_cfg_priv_ar2_start_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x30U; +} + +static inline u32 v2_cfg_priv_ar2_end_r(void) +{ + return V2_ADDRESS_CONTROL_BASE + 0x34U; +} + +#define V2_CFG_CCQ_BASE 0x260000U +#define V2_CFG_CCQ_SIZE 0x010000U + +static inline u32 v2_cfg_ccq_r(u32 idx) +{ + return V2_CFG_CCQ_BASE + V2_CFG_CCQ_SIZE * idx; +} + +static inline u32 v2_cfg_ccq_status_r(u32 ccq_idx, u32 status_idx) +{ + return V2_CFG_CCQ_BASE + V2_CFG_CCQ_SIZE * ccq_idx + 0x4U + + 0x4U * status_idx; +} + +#endif diff --git a/drivers/video/tegra/host/pva/hw_dma_ch_pva.h b/drivers/video/tegra/host/pva/hw_dma_ch_pva.h new file mode 100644 index 00000000..91616865 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_dma_ch_pva.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_dma_ch_pva_h_ +#define _hw_dma_ch_pva_h_ + +static inline u32 dma_ch_base_r(void) +{ + return 0xa0000; +} +static inline u32 dma_ch_size_r(void) +{ + return 0x2000; +} +static inline u32 dma_ch_cntl0_r(void) +{ + return 0x0; +} +static inline u32 dma_ch_cntl0_enable_m(void) +{ + return 0x1 << 31; +} +static inline u32 dma_ch_cntl0_did_f(u32 v) +{ + return (v & 0xff) << 0; +} +static inline u32 dma_ch_status0_r(void) +{ + return 0x8; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_dma_desc_pva.h b/drivers/video/tegra/host/pva/hw_dma_desc_pva.h new file mode 100644 index 00000000..9c670a8e --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_dma_desc_pva.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_dma_desc_pva_h_ +#define _hw_dma_desc_pva_h_ + +static inline u32 dma_desc_base_r(void) +{ + return 0xc1000; +} +static inline u32 dma_desc_size_r(void) +{ + return 0x40; +} +static inline u32 dma_desc_cntl_r(void) +{ + return 0x0; +} +static inline u32 dma_desc_cntl_dstm_f(u32 v) +{ + return (v & 0x7) << 0; +} +static inline u32 dma_desc_cntl_dstm_dstm_mc_v(void) +{ + return 0x00000001; +} +static inline u32 dma_desc_cntl_dstm_dstm_tcm_v(void) +{ + return 0x00000004; +} +static inline u32 dma_desc_cntl_ddtm_f(u32 v) +{ + return (v & 0x7) << 4; +} +static inline u32 dma_desc_cntl_ddtm_ddtm_mc_v(void) +{ + return 0x00000001; +} +static inline u32 dma_desc_cntl_ddtm_ddtm_tcm_v(void) +{ + return 0x00000004; +} +static inline u32 dma_desc_cntl_srch_f(u32 v) +{ + return (v & 0xff) << 16; +} +static inline u32 dma_desc_cntl_dsth_f(u32 v) +{ + return (v & 0xff) << 24; +} +static inline u32 dma_desc_srcl_r(void) +{ + return 0x4; +} +static inline u32 dma_desc_dstl_r(void) +{ + return 0x8; +} +static inline u32 dma_desc_tile_cntl_r(void) +{ + return 0xc; +} +static inline u32 dma_desc_tile_cntl_tx_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 dma_desc_tile_cntl_ty_f(u32 v) +{ + return (v & 0xffff) << 16; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_evp_pva.h b/drivers/video/tegra/host/pva/hw_evp_pva.h new file mode 100644 index 00000000..8732c5c3 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_evp_pva.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_evp_pva_h_ +#define _hw_evp_pva_h_ + +static inline u32 evp_reset_addr_r(void) +{ + return 0x20; +} +static inline u32 evp_undef_addr_r(void) +{ + return 0x24; +} +static inline u32 evp_swi_addr_r(void) +{ + return 0x28; +} +static inline u32 evp_prefetch_abort_addr_r(void) +{ + return 0x2c; +} +static inline u32 evp_data_abort_addr_r(void) +{ + return 0x30; +} +static inline u32 evp_rsvd_addr_r(void) +{ + return 0x34; +} +static inline u32 evp_irq_addr_r(void) +{ + return 0x38; +} +static inline u32 evp_fiq_addr_r(void) +{ + return 0x3c; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_hsp_pva.h b/drivers/video/tegra/host/pva/hw_hsp_pva.h new file mode 100644 index 00000000..62e6cab0 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_hsp_pva.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_hsp_pva_h_ +#define _hw_hsp_pva_h_ + +static inline u32 hsp_common_r(void) +{ + return 0x160000; +} +static inline u32 hsp_int_ie0_r(void) +{ + return 0x160100; +} +static inline u32 hsp_int_ie1_r(void) +{ + return 0x160104; +} +static inline u32 hsp_int_ie2_r(void) +{ + return 0x160108; +} +static inline u32 hsp_int_ie3_r(void) +{ + return 0x16010c; +} +static inline u32 hsp_int_ie4_r(void) +{ + return 0x160110; +} +static inline u32 hsp_int_external_r(void) +{ + return 0x160300; +} +static inline u32 hsp_int_internal_r(void) +{ + return 0x160304; +} +static inline u32 hsp_sm0_r(void) +{ + return 0x170000; +} +static inline u32 hsp_sm1_r(void) +{ + return 0x178000; +} +static inline u32 hsp_sm2_r(void) +{ + return 0x180000; +} +static inline u32 hsp_sm3_r(void) +{ + return 0x188000; +} +static inline u32 hsp_sm4_r(void) +{ + return 0x190000; +} +static inline u32 hsp_sm5_r(void) +{ + return 0x198000; +} +static inline u32 hsp_sm6_r(void) +{ + return 0x1a0000; +} +static inline u32 hsp_sm7_r(void) +{ + return 0x1a8000; +} +static inline u32 hsp_ss0_state_r(void) +{ + return 0x1b0000; +} +static inline u32 hsp_ss0_set_r(void) +{ + return 0x1b0004; +} +static inline u32 hsp_ss0_clr_r(void) +{ + return 0x1b0008; +} +static inline u32 hsp_ss1_state_r(void) +{ + return 0x1c0000; +} +static inline u32 hsp_ss1_set_r(void) +{ + return 0x1c0004; +} +static inline u32 hsp_ss1_clr_r(void) +{ + return 0x1c0008; +} +static inline u32 hsp_ss2_state_r(void) +{ + return 0x1d0000; +} +static inline u32 hsp_ss2_set_r(void) +{ + return 0x1d0004; +} +static inline u32 hsp_ss2_clr_r(void) +{ + return 0x1d0008; +} +static inline u32 hsp_ss3_state_r(void) +{ + return 0x1e0000; +} +static inline u32 hsp_ss3_set_r(void) +{ + return 0x1e0004; +} +static inline u32 hsp_ss3_clr_r(void) +{ + return 0x1e0008; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_proc_pva.h b/drivers/video/tegra/host/pva/hw_proc_pva.h new file mode 100644 index 00000000..e4c6145d --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_proc_pva.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_proc_pva_h_ +#define _hw_proc_pva_h_ + +static inline u32 proc_cpuhalt_r(void) +{ + return 0x30000; +} +static inline u32 proc_cpuhalt_ncpuhalt_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 proc_cpuhalt_ncpuhalt_halted_v(void) +{ + return 0x00000000; +} +static inline u32 proc_cpuhalt_ncpuhalt_done_v(void) +{ + return 0x00000001; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_sec_pva_v1.h b/drivers/video/tegra/host/pva/hw_sec_pva_v1.h new file mode 100644 index 00000000..36b19893 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_sec_pva_v1.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_sec_pva_v1_h_ +#define _hw_sec_pva_v1_h_ + +static inline u32 v1_sec_lic_intr_enable_r(void) +{ + return 0x2804CU; +} +static inline u32 sec_lic_intr_enable_dma0_f(u32 v) +{ + return (v & 0x1) << 9; +} +static inline u32 sec_lic_intr_enable_dma1_f(u32 v) +{ + return (v & 0x1) << 8; +} +static inline u32 sec_lic_intr_enable_actmon_f(u32 v) +{ + return (v & 0x1) << 7; +} +static inline u32 sec_lic_intr_enable_h1x_f(u32 v) +{ + return (v & 0x7) << 5; +} +static inline u32 sec_lic_intr_enable_hsp_f(u32 v) +{ + return (v & 0xf) << 1; +} +static inline u32 sec_lic_intr_enable_wdt_f(u32 v) +{ + return (v & 0x1) << 0; +} +static inline u32 v1_sec_lic_intr_status_r(void) +{ + return 0x28054U; +} +#endif diff --git a/drivers/video/tegra/host/pva/hw_sec_pva_v2.h b/drivers/video/tegra/host/pva/hw_sec_pva_v2.h new file mode 100644 index 00000000..826a67a6 --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_sec_pva_v2.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _o(void) : Returns the offset for element . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ +#ifndef _hw_sec_pva_v2_h_ +#define _hw_sec_pva_v2_h_ + +#define SEC_BASE 0x20000U + +static inline u32 v2_sec_lic_intr_enable_r(void) +{ + return SEC_BASE + 0x8064U; +} + +static inline u32 v2_sec_lic_intr_status_r(void) +{ + return SEC_BASE + 0x806CU; +} + +#endif diff --git a/drivers/video/tegra/host/pva/hw_vmem_pva.h b/drivers/video/tegra/host/pva/hw_vmem_pva.h new file mode 100644 index 00000000..561d833b --- /dev/null +++ b/drivers/video/tegra/host/pva/hw_vmem_pva.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _hw_vmem_pva_h_ +#define _hw_vmem_pva_h_ + +#define NUM_HEM_GEN 2U +#define VMEM_REGION_COUNT 3U +#define T19X_VMEM0_START 0x40U +#define T19X_VMEM0_END 0x10000U +#define T19X_VMEM1_START 0x40000U +#define T19X_VMEM1_END 0x50000U +#define T19X_VMEM2_START 0x80000U +#define T19X_VMEM2_END 0x90000U + +#define T23x_VMEM0_START 0x40U +#define T23x_VMEM0_END 0x20000U +#define T23x_VMEM1_START 0x40000U +#define T23x_VMEM1_END 0x60000U +#define T23x_VMEM2_START 0x80000U +#define T23x_VMEM2_END 0xA0000U + +#endif diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.c b/drivers/video/tegra/host/pva/nvpva_buffer.c new file mode 100644 index 00000000..9d9beba5 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_buffer.c @@ -0,0 +1,607 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pva.h" +#include "nvpva_buffer.h" + +/** + * nvpva_vm_buffer - Virtual mapping information for a buffer + * + * @attach: Pointer to dma_buf_attachment struct + * @dmabuf: Pointer to dma_buf struct + * @sgt: Pointer to sg_table struct + * @addr: Physical address of the buffer + * @size: Size of the buffer + * @user_map_count: Buffer reference count from user space + * @submit_map_count: Buffer reference count from task submit + * @rb_node: pinned buffer node + * @list_head: List entry + * + */ +struct nvpva_vm_buffer { + struct dma_buf_attachment *attach; + struct dma_buf *dmabuf; + struct sg_table *sgt; + dma_addr_t addr; + size_t size; + enum nvpva_buffers_heap heap; + s32 user_map_count; + s32 submit_map_count; + u32 id; + dma_addr_t user_addr; + u64 user_offset; + u64 user_size; + struct rb_node rb_node; + struct rb_node rb_node_id; + struct list_head list_head; +}; + +static uint32_t get_unique_id(struct nvpva_buffers *nvpva_buffers) +{ + struct nvhost_device_data *pdata = + platform_get_drvdata(nvpva_buffers->pdev); + struct pva *pva = pdata->private_data; + uint32_t id = rmos_find_first_zero_bit(nvpva_buffers->ids, + NVPVA_MAX_NUM_UNIQUE_IDS); + if (id == NVPVA_MAX_NUM_UNIQUE_IDS) { + nvpva_dbg_fn(pva, "No buffer ID available"); + id = 0; + goto out; + } + + rmos_set_bit32((id%NVPVA_ID_SEGMENT_SIZE), + &nvpva_buffers->ids[id/NVPVA_ID_SEGMENT_SIZE]); + + ++(nvpva_buffers->num_assigned_ids); + id |= 0x554c0000; +out: + return id; +} + +static int32_t put_unique_id(struct nvpva_buffers *nvpva_buffers, uint32_t id) +{ + id &= (~0x554c0000); + if (!rmos_test_bit32((id % 32), &nvpva_buffers->ids[id / 32])) + return -1; + + rmos_clear_bit32((id % 32), &nvpva_buffers->ids[id/32]); + --(nvpva_buffers->num_assigned_ids); + + return 0; +} + +#define COMPARE_AND_ASSIGN(a1, a2, b1, b2, c1, c2, curr, n1, n2) \ + do { \ + is_equal = false; \ + if ((a1) > (a2)) \ + (curr) = (n1); \ + else if ((a1) < (a2)) \ + (curr) = (n2); \ + else if ((b1) > (b2)) \ + (curr) = (n1); \ + else if ((b1) < (b2)) \ + (curr) = (n2); \ + else if ((c1) > (c2)) \ + (curr) = (n1); \ + else if ((c1) < (c2)) \ + (curr) = (n2); \ + else \ + is_equal = true; \ + } while (0) + + + +static struct nvpva_vm_buffer * +nvpva_find_map_buffer(struct nvpva_buffers *nvpva_buffers, + u64 offset, + u64 size, + struct dma_buf *dmabuf) +{ + struct rb_root *root = &nvpva_buffers->rb_root; + struct rb_node *node = root->rb_node; + struct nvpva_vm_buffer *vm; + bool is_equal = false; + + /* check in a sorted tree */ + while (node) { + vm = rb_entry(node, struct nvpva_vm_buffer, + rb_node); + COMPARE_AND_ASSIGN(vm->dmabuf, + dmabuf, + vm->user_offset, + offset, + vm->user_size, + size, + node, + node->rb_left, + node->rb_right); + if (is_equal) + return vm; + } + + return NULL; +} + +static struct nvpva_vm_buffer *nvpva_find_map_buffer_id( + struct nvpva_buffers *nvpva_buffers, u32 id) +{ + struct rb_root *root = &nvpva_buffers->rb_root_id; + struct rb_node *node = root->rb_node; + struct nvpva_vm_buffer *vm; + + /* check in a sorted tree */ + while (node) { + vm = rb_entry(node, struct nvpva_vm_buffer, + rb_node_id); + + if (vm->id > id) + node = node->rb_left; + else if (vm->id != id) + node = node->rb_right; + else + return vm; + } + + return NULL; +} +static void nvpva_buffer_insert_map_buffer( + struct nvpva_buffers *nvpva_buffers, + struct nvpva_vm_buffer *new_vm) +{ + struct rb_node **new_node = &(nvpva_buffers->rb_root.rb_node); + struct rb_node *parent = NULL; + bool is_equal = false; + + /* Figure out where to put the new node */ + while (*new_node) { + struct nvpva_vm_buffer *vm = + rb_entry(*new_node, struct nvpva_vm_buffer, + rb_node); + parent = *new_node; + + COMPARE_AND_ASSIGN(vm->dmabuf, + new_vm->dmabuf, + vm->user_offset, + new_vm->user_offset, + vm->user_size, + new_vm->user_size, + new_node, + &((*new_node)->rb_left), + &((*new_node)->rb_right)); + if (is_equal) + new_node = &((*new_node)->rb_right); + } + + /* Add new node and rebalance tree */ + rb_link_node(&new_vm->rb_node, parent, new_node); + rb_insert_color(&new_vm->rb_node, &nvpva_buffers->rb_root); + + /* Add the node into a list */ + list_add_tail(&new_vm->list_head, &nvpva_buffers->list_head); +} + +static void nvpva_buffer_insert_map_buffer_id( + struct nvpva_buffers *nvpva_buffers, + struct nvpva_vm_buffer *new_vm) +{ + struct rb_node **new_node = &(nvpva_buffers->rb_root_id.rb_node); + struct rb_node *parent = NULL; + + /* Figure out where to put the new node */ + while (*new_node) { + struct nvpva_vm_buffer *vm = + rb_entry(*new_node, struct nvpva_vm_buffer, + rb_node_id); + parent = *new_node; + + if (vm->id > new_vm->id) + new_node = &((*new_node)->rb_left); + else + new_node = &((*new_node)->rb_right); + } + + /* Add new node and rebalance tree */ + rb_link_node(&new_vm->rb_node_id, parent, new_node); + rb_insert_color(&new_vm->rb_node_id, &nvpva_buffers->rb_root_id); +} + +static int +nvpva_buffer_map(struct platform_device *pdev, + struct platform_device *pdev_priv, + struct platform_device *pdev_user, + struct dma_buf *dmabuf, + u64 offset, + u64 size, + struct nvpva_vm_buffer *vm, + bool is_user) +{ + + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + const dma_addr_t cvnas_begin = nvcvnas_get_cvsram_base(); + const dma_addr_t cvnas_end = cvnas_begin + nvcvnas_get_cvsram_size(); + struct dma_buf_attachment *attach; + struct sg_table *sgt; + dma_addr_t dma_addr; + dma_addr_t phys_addr; + int err = 0; + + nvpva_dbg_fn(pva, ""); + + get_dma_buf(dmabuf); + if (is_user) + attach = dma_buf_attach(dmabuf, &pdev_user->dev); + else + attach = dma_buf_attach(dmabuf, &pdev_priv->dev); + + if (IS_ERR_OR_NULL(attach)) { + err = PTR_ERR(dmabuf); + dev_err(&pdev->dev, "dma_attach failed: %d\n", err); + goto buf_attach_err; + } + + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) { + err = PTR_ERR(sgt); + dev_err(&pdev->dev, "dma mapping failed: %d\n", err); + goto buf_map_err; + } + + phys_addr = sg_phys(sgt->sgl); + dma_addr = sg_dma_address(sgt->sgl); + + /* Determine the heap */ + if (phys_addr >= cvnas_begin && phys_addr < cvnas_end) + vm->heap = NVPVA_BUFFERS_HEAP_CVNAS; + else + vm->heap = NVPVA_BUFFERS_HEAP_DRAM; + + /* + * If dma address is not available or heap is in CVNAS, use the + * physical address. + */ + if (!dma_addr || vm->heap == NVPVA_BUFFERS_HEAP_CVNAS) + dma_addr = phys_addr; + + vm->sgt = sgt; + vm->attach = attach; + vm->dmabuf = dmabuf; + vm->addr = dma_addr; + vm->user_addr = dma_addr + offset; + + vm->size = dmabuf->size; + vm->user_offset = offset; + vm->user_size = size; + vm->user_map_count = 1; + + if (is_user) + nvpva_dbg_fn(pva, "mapped user @ base %llx, uaddr %llx, size %llx\n", + (u64) dma_addr, (u64) vm->user_addr, size); + else + nvpva_dbg_fn(pva, "mapped priv @ base %llx, uaddr %llx, size %llx\n", + (u64) dma_addr, (u64) vm->user_addr, size); + + return err; + +buf_map_err: + dma_buf_detach(dmabuf, attach); +buf_attach_err: + dma_buf_put(dmabuf); + return err; +} + +static void nvpva_free_buffers(struct kref *kref) +{ + struct nvpva_buffers *nvpva_buffers = + container_of(kref, struct nvpva_buffers, kref); + + kfree(nvpva_buffers); +} + +static void nvpva_buffer_unmap(struct nvpva_buffers *nvpva_buffers, + struct nvpva_vm_buffer *vm) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(nvpva_buffers->pdev); + struct pva *pva = pdata->private_data; + + nvpva_dbg_fn(pva, ""); + + if ((vm->user_map_count != 0) || (vm->submit_map_count != 0)) + return; + + dma_buf_unmap_attachment(vm->attach, vm->sgt, DMA_BIDIRECTIONAL); + dma_buf_detach(vm->dmabuf, vm->attach); + dma_buf_put(vm->dmabuf); + + rb_erase(&vm->rb_node, &nvpva_buffers->rb_root); + list_del(&vm->list_head); + rb_erase(&vm->rb_node_id, &nvpva_buffers->rb_root_id); + put_unique_id(nvpva_buffers, vm->id); + + kfree(vm); +} + +struct nvpva_buffers +*nvpva_buffer_init(struct platform_device *pdev, + struct platform_device *pdev_priv, + struct platform_device *pdev_user) +{ + struct nvpva_buffers *nvpva_buffers; + int err = 0; + + nvpva_buffers = kzalloc(sizeof(struct nvpva_buffers), GFP_KERNEL); + if (!nvpva_buffers) { + err = -ENOMEM; + goto nvpva_buffer_init_err; + } + + nvpva_buffers->pdev = pdev; + nvpva_buffers->pdev_priv = pdev_priv; + nvpva_buffers->pdev_user = pdev_user; + mutex_init(&nvpva_buffers->mutex); + nvpva_buffers->rb_root = RB_ROOT; + nvpva_buffers->rb_root_id = RB_ROOT; + INIT_LIST_HEAD(&nvpva_buffers->list_head); + kref_init(&nvpva_buffers->kref); + memset(nvpva_buffers->ids, 0, sizeof(nvpva_buffers->ids)); + nvpva_buffers->num_assigned_ids = 0; + + return nvpva_buffers; + +nvpva_buffer_init_err: + return ERR_PTR(err); +} + +int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, + u32 count, + struct dma_buf **dmabuf, + dma_addr_t *paddr, + u64 *psize, + enum nvpva_buffers_heap *heap) +{ + struct nvpva_vm_buffer *vm; + int i = 0; + + kref_get(&nvpva_buffers->kref); + + mutex_lock(&nvpva_buffers->mutex); + + for (i = 0; i < count; i++) { + vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]); + if (vm == NULL) + goto submit_err; + + vm->submit_map_count++; + paddr[i] = vm->user_addr; + dmabuf[i] = vm->dmabuf; + psize[i] = vm->user_size; + + /* Return heap only if requested */ + if (heap != NULL) + heap[i] = vm->heap; + } + + mutex_unlock(&nvpva_buffers->mutex); + return 0; + +submit_err: + mutex_unlock(&nvpva_buffers->mutex); + + count = i; + + nvpva_buffer_submit_unpin_id(nvpva_buffers, ids, count); + + return -EINVAL; +} + +int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, + u64 *offset, + u64 *size, + u32 segment, + u32 count, + u32 *id, + u32 *eerr) +{ + struct nvpva_vm_buffer *vm; + int i = 0; + int err = 0; + + *eerr = 0; + + if (segment >= NVPVA_SEGMENT_MAX) + return -EINVAL; + + mutex_lock(&nvpva_buffers->mutex); + + for (i = 0; i < count; i++) { + u64 limit; + + if (U64_MAX - size[i] < offset[i]) { + err = -EFAULT; + goto unpin; + } else { + limit = (size[i] + offset[i]); + } + + if (dmabufs[i]->size < limit) { + err = -EFAULT; + goto unpin; + } + + vm = nvpva_find_map_buffer(nvpva_buffers, + offset[i], + size[i], + dmabufs[i]); + if (vm) { + vm->user_map_count++; + id[i] = vm->id; + continue; + } + + vm = kzalloc(sizeof(struct nvpva_vm_buffer), GFP_KERNEL); + if (!vm) { + err = -ENOMEM; + goto unpin; + } + + vm->id = get_unique_id(nvpva_buffers); + if (vm->id == 0) { + *eerr = NVPVA_ENOSLOT; + err = -EINVAL; + goto free_vm; + } + + err = nvpva_buffer_map(nvpva_buffers->pdev, + nvpva_buffers->pdev_priv, + nvpva_buffers->pdev_user, + dmabufs[i], + offset[i], + size[i], + vm, + (segment == NVPVA_SEGMENT_USER)); + if (err) { + put_unique_id(nvpva_buffers, vm->id); + goto free_vm; + } + + nvpva_buffer_insert_map_buffer(nvpva_buffers, vm); + nvpva_buffer_insert_map_buffer_id(nvpva_buffers, vm); + id[i] = vm->id; + } + + mutex_unlock(&nvpva_buffers->mutex); + + return err; + +free_vm: + kfree(vm); +unpin: + mutex_unlock(&nvpva_buffers->mutex); + + /* free pinned buffers */ + count = i; + nvpva_buffer_unpin(nvpva_buffers, dmabufs, offset, size, count); + + return err; +} + +void nvpva_buffer_submit_unpin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, u32 count) +{ + struct nvpva_vm_buffer *vm; + int i = 0; + + mutex_lock(&nvpva_buffers->mutex); + + for (i = 0; i < count; i++) { + + vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]); + if (vm == NULL) + continue; + + --vm->submit_map_count; + if ((vm->submit_map_count) < 0) + vm->submit_map_count = 0; + + nvpva_buffer_unmap(nvpva_buffers, vm); + } + + mutex_unlock(&nvpva_buffers->mutex); + + kref_put(&nvpva_buffers->kref, nvpva_free_buffers); +} + +void +nvpva_buffer_unpin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, + u64 *offset, + u64 *size, + u32 count) +{ + int i = 0; + + mutex_lock(&nvpva_buffers->mutex); + + for (i = 0; i < count; i++) { + struct nvpva_vm_buffer *vm = NULL; + + vm = nvpva_find_map_buffer(nvpva_buffers, + offset[i], + size[i], + dmabufs[i]); + if (vm == NULL) + continue; + + --vm->user_map_count; + if (vm->user_map_count < 0) + vm->user_map_count = 0; + + nvpva_buffer_unmap(nvpva_buffers, vm); + } + + mutex_unlock(&nvpva_buffers->mutex); +} + +void nvpva_buffer_unpin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, u32 count) +{ + int i = 0; + + mutex_lock(&nvpva_buffers->mutex); + + for (i = 0; i < count; i++) { + struct nvpva_vm_buffer *vm = NULL; + + vm = nvpva_find_map_buffer_id(nvpva_buffers, ids[i]); + if (vm == NULL) + continue; + + --vm->user_map_count; + if (vm->user_map_count < 0) + vm->user_map_count = 0; + + nvpva_buffer_unmap(nvpva_buffers, vm); + } + + mutex_unlock(&nvpva_buffers->mutex); +} + +void nvpva_buffer_release(struct nvpva_buffers *nvpva_buffers) +{ + struct nvpva_vm_buffer *vm, *n; + + /* Go through each entry and remove it safely */ + mutex_lock(&nvpva_buffers->mutex); + list_for_each_entry_safe(vm, n, &nvpva_buffers->list_head, + list_head) { + vm->user_map_count = 0; + nvpva_buffer_unmap(nvpva_buffers, vm); + } + mutex_unlock(&nvpva_buffers->mutex); + + kref_put(&nvpva_buffers->kref, nvpva_free_buffers); +} diff --git a/drivers/video/tegra/host/pva/nvpva_buffer.h b/drivers/video/tegra/host/pva/nvpva_buffer.h new file mode 100644 index 00000000..2a535533 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_buffer.h @@ -0,0 +1,224 @@ +/* + * NVPVA Buffer Management Header + * + * Copyright (c) 2016-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVPVA_NVPVA_BUFFER_H__ +#define __NVPVA_NVPVA_BUFFER_H__ + +#include +#include "pva_bit_helpers.h" + +enum nvpva_buffers_heap { + NVPVA_BUFFERS_HEAP_DRAM = 0, + NVPVA_BUFFERS_HEAP_CVNAS +}; + +/** + * @brief Information needed for buffers + * + * pdev Pointer to NVHOST device + * rb_root RB tree root for of all the buffers used by a file pointer + * list List for traversing through all the buffers + * mutex Mutex for the buffer tree and the buffer list + * kref Reference count for the bufferlist + * ids unique ID assigned to a pinned buffer + */ +#define NVPVA_ID_SEGMENT_SIZE 32 +#define NVPVA_MAX_NUM_UNIQUE_IDS (NVPVA_ID_SEGMENT_SIZE * 1024) +#define NVPVA_NUM_ID_SEGMENTS \ + (NVPVA_MAX_NUM_UNIQUE_IDS/NVPVA_ID_SEGMENT_SIZE) +struct nvpva_buffers { + struct platform_device *pdev; + struct platform_device *pdev_priv; + struct platform_device *pdev_user; + struct list_head list_head; + struct rb_root rb_root; + struct rb_root rb_root_id; + struct mutex mutex; + struct kref kref; + uint32_t ids[NVPVA_NUM_ID_SEGMENTS]; + uint32_t num_assigned_ids; +}; + +/** + * @brief Initialize the nvpva_buffer per open request + * + * This function allocates nvpva_buffers struct and init the bufferlist + * and mutex. + * + * @param nvpva_buffers Pointer to nvpva_buffers struct + * @return nvpva_buffers pointer on success + * or negative on error + * + */ +struct nvpva_buffers +*nvpva_buffer_init(struct platform_device *pdev, + struct platform_device *pdev_priv, + struct platform_device *pdev_user); + +/** + * @brief Pin the memhandle using dma_buf functions + * + * This function maps the buffer memhandle list passed from user side + * to device iova. + * + * @param nvpva_buffers Pointer to nvpva_buffers struct + * @param dmabufs Pointer to dmabuffer list + * @param offset pointer to offsets of regions to be pinned + * @param size pointer to sizes of regions to be pinned + * @param count Number of memhandles in the list + * @return 0 on success or negative on error + * + */ +int nvpva_buffer_pin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, + u64 *offset, + u64 *size, + u32 segment, + u32 count, + u32 *id, + u32 *eerr); +/** + * @brief UnPins the mapped address space. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param dmabufs Pointer to dmabuffer list + * @param count Pointer to offset list + * @param offset Pointer to size list + * @param count Number of memhandles in the list + * @return None + * + */ +void +nvpva_buffer_unpin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, + u64 *offset, + u64 *size, + u32 count); +/** + * @brief UnPins the mapped address space. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param ids Pointer to id list + * @param count Number of memhandles in the list + * @param id pointer to variable where assigned + * ID is returned + * @return None + * + */ +void nvpva_buffer_unpin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, + u32 count); + +/** + * @brief Pin the mapped buffer for a task submit + * + * This function increased the reference count for a mapped buffer during + * task submission. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param dmabufs Pointer to dmabuffer list + * @param count Number of memhandles in the list + * @param paddr Pointer to IOVA list + * @param psize Pointer to size of buffer to return + * @param heap Pointer to a list of heaps. This is + * filled by the routine. + * + * @return 0 on success or negative on error + * + */ +int nvpva_buffer_submit_pin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, u32 count, + dma_addr_t *paddr, size_t *psize, + enum nvpva_buffers_heap *heap); +/** + * @brief Pin the mapped buffer for a task submit + * + * This function increased the reference count for a mapped buffer during + * task submission. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param ids Pointer to id list + * @param count Number of memhandles in the list + * @param paddr Pointer to IOVA list + * @param psize Pointer to size of buffer to return + * @param heap Pointer to a list of heaps. This is + * filled by the routine. + * + * @return 0 on success or negative on error + * + */ +int nvpva_buffer_submit_pin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, + u32 count, + struct dma_buf **dmabuf, + dma_addr_t *paddr, + u64 *psize, + enum nvpva_buffers_heap *heap); + +/** + * @brief UnPins the mapped address space on task completion. + * + * This function decrease the reference count for a mapped buffer when the + * task get completed or aborted. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param dmabufs Pointer to dmabuffer list + * @param count Number of memhandles in the list + * @return None + * + */ +void nvpva_buffer_submit_unpin(struct nvpva_buffers *nvpva_buffers, + struct dma_buf **dmabufs, u32 count); + +/** + * @brief UnPins the mapped address space on task completion. + * + * This function decrease the reference count for a mapped buffer when the + * task get completed or aborted. + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param ids Pointer to dmabuffer list + * @param count Number of memhandles in the list + * @return None + * + */ +void nvpva_buffer_submit_unpin_id(struct nvpva_buffers *nvpva_buffers, + u32 *ids, u32 count); + +/** + * @brief Drop a user reference to buffer structure + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @return None + * + */ +void nvpva_buffer_release(struct nvpva_buffers *nvpva_buffers); + +/** + * @brief Returns dma buf and dma addr for a given handle + * + * @param nvpva_buffers Pointer to nvpva_buffer struct + * @param dmabuf dma buf pointer to search for + * @param addr dma_addr_t pointer to return + * @return 0 on success or negative on error + * + */ +int nvpva_get_iova_addr(struct nvpva_buffers *nvpva_buffers, + struct dma_buf *dmabuf, dma_addr_t *addr); + +#endif /*__NVPVA_NVPVA_BUFFER_H__ */ diff --git a/drivers/video/tegra/host/pva/nvpva_client.c b/drivers/video/tegra/host/pva/nvpva_client.c new file mode 100644 index 00000000..04537871 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_client.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "pva.h" +#include "nvpva_buffer.h" +#include "nvpva_client.h" +#include "pva_iommu_context_dev.h" + +/* Maximum contexts KMD creates per engine */ +#define NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG (MAX_PVA_CLIENTS) + +/* Search if the pid already have a context + * The function does below things; + * 1. loop through each clients in the client array and validates pid. + * 2. Also tracks the first free client in the array + */ +static struct nvpva_client_context * +client_context_search_locked(struct platform_device *pdev, + struct pva *dev, + pid_t pid) +{ + struct nvpva_client_context *c_node = NULL; + uint32_t i; + bool shared_cntxt_dev; + + for (i = 0U; i < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; i++) { + c_node = &dev->clients[i]; + if ((c_node->ref_count != 0U) && (c_node->pid == pid)) + return c_node; + } + + for (i = 0U; i < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; i++) { + c_node = &dev->clients[i]; + if (c_node->ref_count == 0U) + break; + } + + if (i >= NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG) + return NULL; + + shared_cntxt_dev = i > (NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG - 3); + + c_node->pid = pid; + c_node->pva = dev; + c_node->curr_sema_value = 0; + mutex_init(&c_node->sema_val_lock); + if (dev->version == PVA_HW_GEN2) { + c_node->cntxt_dev = + nvpva_iommu_context_dev_allocate(NULL, + 0, + shared_cntxt_dev); + + if (c_node->cntxt_dev == NULL) + return NULL; + + c_node->sid_index = nvpva_get_id_idx(dev, c_node->cntxt_dev) - 1; + } else { + c_node->cntxt_dev = pdev; + c_node->sid_index = 0; + } + + c_node->elf_ctx.cntxt_dev = c_node->cntxt_dev; + c_node->buffers = nvpva_buffer_init(dev->pdev, dev->aux_pdev, c_node->cntxt_dev); + if (IS_ERR(c_node->buffers)) { + dev_err(&dev->pdev->dev, + "failed to init nvhost buffer for client:%lu", + PTR_ERR(c_node->buffers)); + if (dev->version == PVA_HW_GEN2) + nvpva_iommu_context_dev_release(c_node->cntxt_dev); + c_node = NULL; + } + + return c_node; +} + +/* Allocate a client context from the client array + * The function does below things; + * 1. Search for an existing client context, if not found then a free client + * 2. Allocate a buffer pool if needed + */ +struct nvpva_client_context +*nvpva_client_context_alloc(struct platform_device *pdev, + struct pva *dev, + pid_t pid) +{ + struct nvpva_client_context *client = NULL; + + mutex_lock(&dev->clients_lock); + client = client_context_search_locked(pdev, dev, pid); + if (client != NULL) + client->ref_count += 1; + + mutex_unlock(&dev->clients_lock); + + return client; +} + +void nvpva_client_context_get(struct nvpva_client_context *client) +{ + struct pva *dev = client->pva; + + mutex_lock(&dev->clients_lock); + client->ref_count += 1; + mutex_unlock(&dev->clients_lock); +} + +/* Free a client context from the client array */ +static void +nvpva_client_context_free_locked(struct nvpva_client_context *client) +{ + nvpva_buffer_release(client->buffers); + nvpva_iommu_context_dev_release(client->cntxt_dev); + mutex_destroy(&client->sema_val_lock); + client->buffers = NULL; + client->pva = NULL; + client->pid = 0; + pva_unload_all_apps(&client->elf_ctx); +} + +/* Release the client context + * The function does below things; + * 1. Reduce the active Q count + * 2. Initiate freeing if the count is 0 + */ +void nvpva_client_context_put(struct nvpva_client_context *client) +{ + struct pva *pva = client->pva; + + mutex_lock(&pva->clients_lock); + client->ref_count--; + if (client->ref_count == 0U) + nvpva_client_context_free_locked(client); + + mutex_unlock(&pva->clients_lock); +} + +/* De-initialize the client array for the device + * The function does below things; + * 1. Free all the remaining buffer pools if any + * 2. Release the memory + */ +void nvpva_client_context_deinit(struct pva *dev) +{ + struct nvpva_client_context *client; + uint32_t max_clients; + uint32_t i; + + max_clients = NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; + if (dev->clients != NULL) { + mutex_lock(&dev->clients_lock); + for (i = 0U; i < max_clients; i++) { + client = &dev->clients[i]; + pva_vpu_deinit(&client->elf_ctx); + } + mutex_unlock(&dev->clients_lock); + mutex_destroy(&dev->clients_lock); + kfree(dev->clients); + dev->clients = NULL; + } +} + +/* Initialize a set of clients for the device + * The function does below things; + * 1. Allocate memory for maximum number of clients + * 2. Assign stream ID for each client contexts + */ +int nvpva_client_context_init(struct pva *pva) +{ + struct nvpva_client_context *clients = NULL; + uint32_t max_clients; + uint32_t j = 0U; + int err = 0; + + max_clients = NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; + clients = kcalloc(max_clients, sizeof(struct nvpva_client_context), + GFP_KERNEL); + if (clients == NULL) { + err = -ENOMEM; + goto done; + } + mutex_init(&pva->clients_lock); + for (j = 0U; j < NVPVA_CLIENT_MAX_CONTEXTS_PER_ENG; j++) { + err = pva_vpu_init(pva, &clients[j].elf_ctx); + if (err < 0) { + dev_err(&pva->pdev->dev, + "No memory for allocating VPU parsing"); + goto vpu_init_fail; + } + } + + pva->clients = clients; + return err; + +vpu_init_fail: + while (j--) + pva_vpu_deinit(&clients[j].elf_ctx); + + kfree(clients); + mutex_destroy(&pva->clients_lock); +done: + return err; +} diff --git a/drivers/video/tegra/host/pva/nvpva_client.h b/drivers/video/tegra/host/pva/nvpva_client.h new file mode 100644 index 00000000..1b217ede --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_client.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVPVA_CLIENT_H +#define NVPVA_CLIENT_H + +#include +#include +#include "pva_vpu_exe.h" + +struct pva; + +struct nvpva_client_context { + /* Reference to the device*/ + struct pva *pva; + + /* context device */ + struct platform_device *cntxt_dev; + + /* PID of client process which uses this context */ + pid_t pid; + + /* This tracks active users */ + u32 ref_count; + + u32 sid_index; + + /* Data structure to track pinned buffers for this client */ + struct nvpva_buffers *buffers; + + u32 curr_sema_value; + struct mutex sema_val_lock; + + /* Data structure to track elf context for vpu parsing */ + struct nvpva_elf_context elf_ctx; +}; + +struct pva; +int nvpva_client_context_init(struct pva *pva); +void nvpva_client_context_deinit(struct pva *pva); +void nvpva_client_context_get(struct nvpva_client_context *client); +struct nvpva_client_context +*nvpva_client_context_alloc(struct platform_device *pdev, + struct pva *dev, + pid_t pid); +void nvpva_client_context_put(struct nvpva_client_context *client); + +#endif /* NVPVA_CLIENT_H */ diff --git a/drivers/video/tegra/host/pva/nvpva_elf_parser.c b/drivers/video/tegra/host/pva/nvpva_elf_parser.c new file mode 100644 index 00000000..bb69ba6b --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_elf_parser.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#if IS_ENABLED(CONFIG_TEGRA_GRHOST) +#include "stdalign.h" +#else +#define alignof _Alignof /*stdalign.h not found*/ +#endif + +#include "nvpva_elf_parser.h" +#include + +#define UINT_MAX (~0U) + +/* CERT complains about casts from const char*, so do intermediate cast to + * void* + */ +static inline const void *to_void(const char *const p) +{ + return (const void *)p; +} + +bool image_is_elf(const void *const image) +{ + if (image == NULL) + return false; + + /* assume little endian format */ + if (ELFMAGIC_LSB == *(const u32 *)image) + return true; + + return false; +} + +bool elf_is_32bit(const void *e) +{ + if (image_is_elf(e)) { + const struct elf_file_header *efh = + (const struct elf_file_header *)e; + if (efh->oclass == ELFCLASS32) + return true; + } + return false; +} + +static inline size_t get_table_end(u32 num, u16 entsize, size_t off) +{ + /* We need to ensure the off+(num*entsize) doesn't overflow. Originally + * num and entsize are ushort, which C converts to int for multiply so + * instead cast them up to u32 or u64 + */ + size_t end; + u32 tablesize = num * entsize; + + if (tablesize < num) + return UZERO; /* wraparound error */ + + end = off + tablesize; + if (end < off) + return UZERO; /* wraparound error */ + + return end; +} + +static const struct elf_file_header *elf_file_header(const void *e) +{ + if (!elf_is_32bit(e)) + return NULL; + + return (const struct elf_file_header *)e; +} + +static inline const struct elf_section_header *elf_section_table(const void *e) +{ + const struct elf_file_header *efh = elf_file_header(e); + const char *p = (const char *)e; + + if ((e == NULL) || (efh == NULL)) + return NULL; + + p = &p[efh->shoff]; + /* proper ELF should always have offsets be aligned, but add check just + * in case. + */ + if (((uintptr_t)(p) % alignof(struct elf_section_header)) != UZERO) + return NULL; /* pointer not aligned */ + + return (const struct elf_section_header *)to_void(p); +} + +static size_t elf_section_size(const void *e, + const struct elf_section_header *esh) +{ + if ((e == NULL) || (esh == NULL)) + return UZERO; + + return (size_t)esh->size; +} + +u32 elf_shnum(const void *e) +{ + const struct elf_file_header *efh = elf_file_header(e); + + if (efh == NULL) + return UZERO; + + if (efh->shnum == UZERO) { + /* get value from size of first (empty) section to avoid + * recursion, don't call elf_section_header(0) + */ + const struct elf_section_header *esh = elf_section_table(e); + size_t size = elf_section_size(e, esh); + + if (size > UINT_MAX) { /* make sure we don't lose precision */ + return UZERO; + } else { + return (u32)size; + } + } + return efh->shnum; +} + +const struct elf_section_header *elf_section_header(const void *e, + unsigned int index) +{ + const struct elf_section_header *esh = elf_section_table(e); + + if (esh == NULL) + return NULL; + + if (index >= elf_shnum(e)) + return NULL; + + esh = &esh[index]; + return esh; +} + +size_t elf_size(const void *e) +{ + /* different elf writers emit elf in different orders, so look for end + * after program headers, section headers, or sections + */ + size_t max_size; + unsigned int i; + const struct elf_file_header *efh = elf_file_header(e); + + if (efh == NULL) + return UZERO; + + if (efh->phoff > efh->shoff) { + max_size = + get_table_end(efh->phnum, efh->phentsize, efh->phoff); + if (max_size == UZERO) + return UZERO; /* wraparound error */ + } else { + max_size = + get_table_end(elf_shnum(e), efh->shentsize, efh->shoff); + if (max_size == UZERO) + return UZERO; /* wraparound error */ + } + for (i = UZERO; i < elf_shnum(e); ++i) { + u32 esh_end; + const struct elf_section_header *esh = elf_section_header(e, i); + + if (esh == NULL) + return UZERO; + + esh_end = esh->offset + esh->size; + if (esh_end < esh->offset) + return UZERO; /* wraparound error */ + + if ((esh->type != SHT_NOBITS) && (esh_end > max_size)) + max_size = esh_end; + } + return max_size; +} + +static u32 elf_shstrndx(const void *e) +{ + const struct elf_file_header *efh = elf_file_header(e); + + if (efh == NULL) + return UZERO; + + if (efh->shstrndx == SHN_XINDEX) { + /* get value from link field of first (empty) section to avoid + * recursion, don't call elf_section_header(0) + */ + const struct elf_section_header *esh = elf_section_table(e); + + if (esh == NULL) + return UZERO; + + return esh->link; + } + return efh->shstrndx; +} + +static const char *elf_string_at_offset(const void *e, + const struct elf_section_header *eshstr, + unsigned int offset) +{ + const char *strtab; + u32 stroffset; + + if ((e == NULL) || (eshstr == NULL)) + return NULL; + + if (eshstr->type != SHT_STRTAB) + return NULL; + + if (offset >= eshstr->size) + return NULL; + + strtab = (const char *)e; + stroffset = eshstr->offset + offset; + + if (stroffset < eshstr->offset) + return NULL; + + strtab = &strtab[stroffset]; + return strtab; +} + +const char *elf_section_name(const void *e, + const struct elf_section_header *esh) +{ + const char *name; + /* get section header string table */ + u32 shstrndx = elf_shstrndx(e); + const struct elf_section_header *eshstr = + elf_section_header(e, shstrndx); + if ((esh == NULL) || (eshstr == NULL)) + return NULL; + + name = elf_string_at_offset(e, eshstr, esh->name); + return name; +} + +const struct elf_section_header *elf_named_section_header(const void *e, + const char *name) +{ + const struct elf_section_header *esh; + unsigned int i; + + if (name == NULL) + return NULL; + + esh = elf_section_table(e); + if (esh == NULL) + return NULL; + + /* iterate through sections till find matching name */ + for (i = UZERO; i < elf_shnum(e); ++i) { + const char *secname = elf_section_name(e, esh); + + if (secname != NULL) { + size_t seclen = strlen(secname); + + /* use strncmp to avoid problem with input not being + * null-terminated, but then need to check for false + * partial match + */ + if ((strncmp(secname, name, seclen) == ZERO) && + ((unsigned char)name[seclen]) == UZERO) { + return esh; + } + } + ++esh; + } + return NULL; +} + +static const struct elf_section_header *elf_typed_section_header(const void *e, + u32 type) +{ + unsigned int i; + const struct elf_section_header *esh = elf_section_table(e); + + if (esh == NULL) + return NULL; + + /* iterate through sections till find matching type */ + for (i = UZERO; i < elf_shnum(e); ++i) { + if (esh->type == type) + return esh; + + ++esh; + } + return NULL; +} + +const struct elf_section_header *elf_offset_section_header(const void *e, + u32 offset) +{ + unsigned int i; + const struct elf_section_header *esh = elf_section_table(e); + + if (esh == NULL) + return NULL; + + /* iterate through sections till find matching offset */ + for (i = UZERO; i < elf_shnum(e); ++i) { + if (esh->offset == offset) + return esh; + ++esh; + } + return NULL; +} + +const u8 *elf_section_contents(const void *e, + const struct elf_section_header *esh) +{ + const u8 *p; + + if ((e == NULL) || (esh == NULL)) + return NULL; + + p = (const u8 *)e; + return &p[esh->offset]; +} + +const struct elf_symbol *elf_symbol(const void *e, unsigned int index) +{ + const struct elf_section_header *esh; + const struct elf_symbol *esymtab; + const uint8_t *p = e; + uint8_t align = 0; + /* get symbol table */ + esh = elf_typed_section_header(e, SHT_SYMTAB); + if ((esh == NULL) || (esh->entsize == UZERO)) + return NULL; + + if (index >= (esh->size / esh->entsize)) + return NULL; + + align = esh->addralign; + p = &p[esh->offset]; + if ((align != 0) && (((uintptr_t)(p) % align != UZERO))) + return NULL; + + esymtab = (const struct elf_symbol *)(p); + return &esymtab[index]; +} + +const char *elf_symbol_name(const void *e, const struct elf_section_header *esh, + unsigned int index) +{ + const struct elf_section_header *eshstr; + const struct elf_symbol *esymtab; + const struct elf_symbol *esym; + const char *name; + const char *p; + uint8_t align = 0; + + if ((esh == NULL) || (esh->entsize == UZERO)) + return NULL; + + if (esh->type != SHT_SYMTAB) + return NULL; + + if (index >= (esh->size / esh->entsize)) + return NULL; + + /* get string table */ + eshstr = elf_section_header(e, esh->link); + if (eshstr == NULL) + return NULL; + + p = (const char *)e; + align = esh->addralign; + p = &p[esh->offset]; + if ((align != 0) && (((uintptr_t)(p) % align != UZERO))) + return NULL; + + esymtab = (const struct elf_symbol *)to_void(p); + esym = &esymtab[index]; + name = elf_string_at_offset(e, eshstr, esym->name); + return name; +} + +u32 elf_symbol_shndx(const void *e, const struct elf_symbol *esym, + unsigned int index) +{ + if ((e == NULL) || (esym == NULL)) + return UZERO; + + if (esym->shndx == SHN_XINDEX) { + const u8 *contents; + const u32 *shndx_array; + const struct elf_section_header *esh = + elf_typed_section_header(e, SHT_SYMTAB_SHNDX); + if (esh == NULL || esh->entsize == UZERO) + return UZERO; + + contents = elf_section_contents(e, esh); + if (contents == NULL) + return UZERO; + + if (((uintptr_t)(contents) % alignof(u32)) != UZERO) + return UZERO; + + shndx_array = (const u32 *)(contents); + if (index >= (esh->size / esh->entsize)) + return UZERO; + + return shndx_array[index]; + } + return esym->shndx; +} + +const struct elf_program_header *elf_program_header(const void *e, + unsigned int index) +{ + const struct elf_file_header *efh = elf_file_header(e); + const struct elf_program_header *eph; + const char *p = e; + + if (efh == NULL) + return NULL; + + if (index >= efh->phnum) + return NULL; + + p = &p[efh->phoff]; + if (((uintptr_t)(p) % alignof(struct elf_program_header)) != UZERO) + return NULL; + + eph = (const struct elf_program_header *)to_void(p); + eph = &eph[index]; + return eph; +} diff --git a/drivers/video/tegra/host/pva/nvpva_elf_parser.h b/drivers/video/tegra/host/pva/nvpva_elf_parser.h new file mode 100644 index 00000000..c9242a19 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_elf_parser.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVPVA_ELF_PARSER_H +#define NVPVA_ELF_PARSER_H +#include +#include "elf_include_fix.h" + +#define ZERO 0 +#define UZERO 0U +#define ULLZERO 0ULL + +/*---------------------------------- Header ----------------------------------*/ + +struct elf_file_header { + u32 magic; /* 0x7f,0x45,0x4c,0x46 */ + u8 oclass; /* Object file class */ + u8 data; /* Data encoding */ + u8 formatVersion; /* Object format version */ + u8 abi; /* OS application binary interface */ + u8 abiVersion; /* Version of abi */ + u8 padd[7]; /* Elf ident padding */ + u16 type; /* Object file type */ + u16 machine; /* Architecture */ + u32 version; /* Object file version */ + u32 entry; /* Entry point virtual address */ + u32 phoff; /* Program header table file offset */ + u32 shoff; /* Section header table file offset */ + u32 flags; /* Processor-specific flags */ + u16 ehsize; /* ELF header size in bytes */ + u16 phentsize; /* Program header table entry size */ + u16 phnum; /* Program header table entry count */ + u16 shentsize; /* Section header table entry size */ + u16 shnum; /* Section header table entry count */ + u16 shstrndx; /* Section header string table index */ +}; + +#define ELFMAGIC 0x7f454c46U /* This is in big endian */ +#define ELFMAGIC_LSB 0x464c457fU /* This is in little endian */ +#define ELFCLASS32 1U /* 32 bit object file */ + +#define EV_NONE 0 /* Invalid version */ +#define EV_CURRENT 1 /* Current version */ + +/*---------------------------------- Section ---------------------------------*/ + +struct elf_section_header { + u32 name; /* Section name, string table index */ + u32 type; /* Type of section */ + u32 flags; /* Miscellaneous section attributes */ + u32 addr; /* Section virtual addr at execution */ + u32 offset; /* Section file offset */ + u32 size; /* Size of section in bytes */ + u32 link; /* Index of another section */ + u32 info; /* Additional section information */ + u32 addralign; /* Section alignment */ + u32 entsize; /* Entry size if section holds table */ +}; + +/* + * Type + */ +#define SHT_NULL 0x00U /* NULL section */ +#define SHT_PROGBITS 0x01U /* Loadable program data */ +#define SHT_SYMTAB 0x02U /* Symbol table */ +#define SHT_STRTAB 0x03U /* String table */ +#define SHT_RELA 0x04U /* Relocation table with addents */ +#define SHT_HASH 0x05U /* Hash table */ +#define SHT_DYNAMIC 0x06U /* Information for dynamic linking */ +#define SHT_NOTE 0x07U /* Information that marks file */ +#define SHT_NOBITS 0x08U /* Section does not have data in file */ +#define SHT_REL 0x09U /* Relocation table without addents */ +#define SHT_SHLIB 0x0aU /* Reserved */ +#define SHT_DYNSYM 0x0bU /* Dynamic linker symbol table */ +#define SHT_INIT_ARRAY 0x0eU /* Array of pointers to init funcs */ +#define SHT_FINI_ARRAY 0x0fU /* Array of function to finish funcs */ +#define SHT_PREINIT_ARRAY 0x10U /* Array of pointers to pre-init functions */ +#define SHT_GROUP 0x11U /* Section group */ +#define SHT_SYMTAB_SHNDX 0x12U /* Table of 32bit symtab shndx */ +#define SHT_LOOS 0x60000000U /* Start OS-specific. */ +#define SHT_HIOS 0x6fffffffU /* End OS-specific type */ +#define SHT_LOPROC 0x70000000U /* Start of processor-specific */ +#define SHT_HIPROC 0x7fffffffU /* End of processor-specific */ +#define SHT_LOUSER 0x80000000U /* Start of application-specific */ +#define SHT_HIUSER 0x8fffffffU /* End of application-specific */ + +/* + * Special section index + */ +#define SHN_UNDEF 0U /* Undefined section */ +#define SHN_LORESERVE 0xff00U /* lower bound of reserved indexes */ +#define SHN_ABS 0xfff1U /* Associated symbol is absolute */ +#define SHN_COMMON 0xfff2U /* Associated symbol is common */ +#define SHN_XINDEX 0xffffU /* Index is in symtab_shndx */ + +/* + * Special section names + */ +#define SHNAME_SHSTRTAB ".shstrtab" /* section string table */ +#define SHNAME_STRTAB ".strtab" /* string table */ +#define SHNAME_SYMTAB ".symtab" /* symbol table */ +#define SHNAME_SYMTAB_SHNDX ".symtab_shndx" /* symbol table shndx array */ +#define SHNAME_TEXT ".text." /* suffix with entry name */ + +/*---------------------------------- Program Segment -------------------------*/ + +struct elf_program_header { + u32 type; /* Identifies program segment type */ + u32 offset; /* Segment file offset */ + u32 vaddr; /* Segment virtual address */ + u32 paddr; /* Segment physical address */ + u32 filesz; /* Segment size in file */ + u32 memsz; /* Segment size in memory */ + u32 flags; /* Segment flags */ + u32 align; /* Segment alignment, file & memory */ +}; + +/*----------------------------------- Symbol ---------------------------------*/ + +struct elf_symbol { + u32 name; /* Symbol name, index in string tbl */ + u32 value; /* Value of the symbol */ + u32 size; /* Associated symbol size */ + u8 info; /* Type and binding attributes */ + u8 other; /* Extra flags */ + u16 shndx; /* Associated section index */ +}; + +#define ELF_ST_BIND(s) ((u32)((s)->info) >> 4) +#define ELF_ST_TYPE(s) ((u32)((s)->info) & 0xfU) +#define ELF_ST_INFO(b, t) (((b) << 4) + ((t)&0xfU)) + +/* + * Type + */ +#define STT_NOTYPE 0U /* No type known */ +#define STT_OBJECT 1U /* Data symbol */ +#define STT_FUNC 2U /* Code symbol */ +#define STT_SECTION 3U /* Section */ +#define STT_FILE 4U /* File */ +#define STT_COMMON 5U /* Common symbol */ +#define STT_LOOS 10U /* Start of OS-specific */ + +/* + * Scope + */ +#define STB_LOCAL 0U /* Symbol not visible outside object */ +#define STB_GLOBAL 1U /* Symbol visible outside object */ +#define STB_WEAK 2U /* Weak symbol */ + +/* + * The following routines that return file/program/section headers + * all return NULL when not found. + */ + +/* + * Typical elf readers create a table of information that is passed + * to the different routines. For simplicity, we're going to just + * keep the image of the whole file and pass that around. Later, if we see + * a need to speed this up, we could consider changing void * to be something + * more complicated. + */ + +bool image_is_elf(const void *const image); + +bool elf_is_32bit(const void *e); + +u32 elf_shnum(const void *e); + +const struct elf_section_header *elf_section_header(const void *e, + unsigned int index); + +const char *elf_section_name(const void *e, + const struct elf_section_header *esh); + +const struct elf_section_header *elf_named_section_header(const void *e, + const char *name); + +const u8 *elf_section_contents(const void *e, + const struct elf_section_header *esh); + +const struct elf_symbol *elf_symbol(const void *e, unsigned int index); + +const char *elf_symbol_name(const void *e, const struct elf_section_header *esh, + unsigned int index); + +const struct elf_program_header *elf_program_header(const void *e, + unsigned int index); + +u32 elf_symbol_shndx(const void *e, const struct elf_symbol *esym, + unsigned int index); + +const struct elf_section_header *elf_offset_section_header(const void *e, + u32 offset); + +size_t elf_size(const void *e); +#endif diff --git a/drivers/video/tegra/host/pva/nvpva_queue.c b/drivers/video/tegra/host/pva/nvpva_queue.c new file mode 100644 index 00000000..f61cd406 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_queue.c @@ -0,0 +1,597 @@ +/* + * NVHOST queue management for T194 + * + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "nvpva_syncpt.h" +#include "nvpva_queue.h" +#include "pva_bit_helpers.h" +#include "pva.h" + +#define CMDBUF_SIZE 4096 + +/** + * @brief Describe a task pool struct + * + * Array of fixed task memory is allocated during queue_alloc call. + * The memory will be shared for various task based on availability + * + * dma_addr Physical address of task memory pool + * aux_dma_addr Physical address of task aux memory pool + * va Virtual address of the task memory pool + * aux_va Virtual address of the task memory pool + * kmem_addr Kernel memory for task struct + * lock Mutex lock for the array access. + * alloc_table Keep track of the index being assigned + * and freed for a task + * max_task_cnt Maximum task count that can be supported. + * + */ +struct nvpva_queue_task_pool { + dma_addr_t dma_addr; + dma_addr_t aux_dma_addr; + void *va; + void *aux_va; + void *kmem_addr[MAX_PVA_SEG_COUNT_PER_QUEUE]; + struct mutex lock; + + unsigned long alloc_table[NUM_POOL_ALLOC_SUB_TABLES]; + unsigned int max_task_cnt; +}; + +static int nvpva_queue_task_pool_alloc(struct platform_device *pdev, + struct platform_device *pprim_dev, + struct platform_device *paux_dev, + struct nvpva_queue *queue, + unsigned int num_tasks) +{ + int err = 0; + unsigned int i; + unsigned int num_segments = num_tasks/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG; + struct nvpva_queue_task_pool *task_pool; + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + u64 mem_size; + + task_pool = queue->task_pool; + memset(task_pool->kmem_addr, 0, sizeof(task_pool->kmem_addr)); + + /* Allocate the kernel memory needed for the task */ + if (queue->task_kmem_size) { + for (i = 0; i < num_segments; i++) { + task_pool->kmem_addr[i] = + kcalloc(MAX_PVA_TASK_COUNT_PER_QUEUE_SEG, + queue->task_kmem_size, GFP_KERNEL); + if (!task_pool->kmem_addr[i]) { + nvpva_err(&pdev->dev, + "failed to allocate " \ + "task_pool->kmem_addr"); + err = -ENOMEM; + goto err_alloc_task_pool; + } + } + } + + mem_size = queue->task_dma_size * num_tasks; + if (queue->task_dma_size != mem_size / num_tasks) { + nvpva_err(&pdev->dev, "mem size too large"); + err = -EINVAL; + goto err_alloc_task_pool; + } + + /* Allocate memory for the task itself */ + task_pool->va = dma_alloc_attrs(&pprim_dev->dev, + mem_size, + &task_pool->dma_addr, GFP_KERNEL, + 0); + + if (task_pool->va == NULL) { + nvpva_err(&pdev->dev, "failed to allocate task_pool->va"); + err = -ENOMEM; + goto err_alloc_task_pool; + } + + mem_size = queue->aux_dma_size * num_tasks; + if (queue->aux_dma_size != mem_size / num_tasks) { + nvpva_err(&pdev->dev, "mem size too large"); + err = -EINVAL; + goto err_alloc_aux_task_pool; + } + + /* Allocate aux memory for the task itself */ + task_pool->aux_va = dma_alloc_attrs(&paux_dev->dev, + mem_size, + &task_pool->aux_dma_addr, GFP_KERNEL, + 0); + + if (task_pool->aux_va == NULL) { + nvpva_err(&pdev->dev, "failed to allocate task_pool->aux_va"); + err = -ENOMEM; + goto err_alloc_aux_task_pool; + } + + nvpva_dbg_info(pva, + "task_pool->dma_addr = %llx, task_pool->auxdma_addr = %llx", + (u64)task_pool->dma_addr, (u64)task_pool->aux_dma_addr); + + task_pool->max_task_cnt = num_tasks; + mutex_init(&task_pool->lock); + + return err; + +err_alloc_aux_task_pool: + dma_free_attrs(&pprim_dev->dev, + queue->task_dma_size * task_pool->max_task_cnt, + task_pool->va, task_pool->dma_addr, + 0); +err_alloc_task_pool: + for (i = 0; i < num_segments; i++) { + if (task_pool->kmem_addr[i] == NULL) + continue; + + kfree(task_pool->kmem_addr[i]); + task_pool->kmem_addr[i] = NULL; + } + + return err; +} + +static void nvpva_queue_task_free_pool(struct platform_device *pdev, + struct nvpva_queue *queue) +{ + unsigned int i; + unsigned int segments; + u64 mem_size; + struct nvpva_queue_task_pool *task_pool = + (struct nvpva_queue_task_pool *)queue->task_pool; + + segments = task_pool->max_task_cnt/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG; + + mem_size = queue->task_dma_size * task_pool->max_task_cnt; + if (queue->task_dma_size != mem_size / task_pool->max_task_cnt) { + nvpva_err(&pdev->dev, "mem size too large"); + return; + } + + dma_free_attrs(&queue->vm_pprim_dev->dev, + mem_size, + task_pool->va, task_pool->dma_addr, + 0); + + mem_size = queue->aux_dma_size * task_pool->max_task_cnt; + if (queue->aux_dma_size != mem_size / task_pool->max_task_cnt) { + nvpva_err(&pdev->dev, "mem size too large"); + return; + } + + dma_free_attrs(&queue->vm_paux_dev->dev, + mem_size, + task_pool->aux_va, task_pool->aux_dma_addr, + 0); + for (i = 0; i < segments; i++) + kfree(task_pool->kmem_addr[i]); + + memset(task_pool->alloc_table, 0, sizeof(task_pool->alloc_table)); + task_pool->max_task_cnt = 0U; +} + +static int nvpva_queue_dump(struct nvpva_queue_pool *pool, + struct nvpva_queue *queue, + struct seq_file *s) +{ + if (pool->ops && pool->ops->dump) + pool->ops->dump(queue, s); + + return 0; +} + +static int queue_dump(struct seq_file *s, void *data) +{ + struct nvpva_queue_pool *pool = s->private; + unsigned long queue_id; + u32 i; + + mutex_lock(&pool->queue_lock); + for (i = 0; i < NUM_POOL_ALLOC_SUB_TABLES; i++) + for_each_set_bit(queue_id, + &pool->alloc_table[i], + pool->max_queue_cnt) + nvpva_queue_dump(pool, + &pool->queues[64 * i + queue_id], s); + + mutex_unlock(&pool->queue_lock); + + return 0; +} + +static int queue_expose_open(struct inode *inode, struct file *file) +{ + return single_open(file, queue_dump, inode->i_private); +} + +static const struct file_operations queue_expose_operations = { + .open = queue_expose_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev, + struct platform_device *paux_dev, + struct nvpva_queue_ops *ops, + unsigned int num_queues) +{ + struct nvhost_device_data *pdata; + struct nvpva_queue_pool *pool; + struct nvpva_queue *queues; + struct nvpva_queue *queue; + struct nvpva_queue_task_pool *task_pool; + unsigned int i; + int err; + + pool = kzalloc(sizeof(struct nvpva_queue_pool), GFP_KERNEL); + if (pool == NULL) { + err = -ENOMEM; + goto fail_alloc_pool; + } + + queues = kcalloc(num_queues, sizeof(struct nvpva_queue), GFP_KERNEL); + if (queues == NULL) { + err = -ENOMEM; + goto fail_alloc_queues; + } + + task_pool = kcalloc(num_queues, + sizeof(struct nvpva_queue_task_pool), GFP_KERNEL); + if (task_pool == NULL) { + nvpva_err(&pdev->dev, "failed to allocate task_pool"); + err = -ENOMEM; + goto fail_alloc_task_pool; + } + + pdata = platform_get_drvdata(pdev); + + /* initialize pool and queues */ + pool->pdev = pdev; + pool->pprim_dev = paux_dev; + pool->ops = ops; + pool->queues = queues; + memset(pool->alloc_table, 0, sizeof(pool->alloc_table)); + pool->max_queue_cnt = num_queues; + pool->queue_task_pool = task_pool; + mutex_init(&pool->queue_lock); + + debugfs_create_file("queues", 0444, + pdata->debugfs, pool, + &queue_expose_operations); + + + for (i = 0; i < num_queues; i++) { + queue = &queues[i]; + queue->id = i; + queue->pool = pool; + queue->task_pool = (void *)&task_pool[i]; + queue->batch_id = 0U; + nvpva_queue_get_task_size(queue); + } + + return pool; + +fail_alloc_task_pool: + kfree(pool->queues); +fail_alloc_queues: + kfree(pool); +fail_alloc_pool: + return ERR_PTR(err); +} + +void nvpva_queue_deinit(struct nvpva_queue_pool *pool) +{ + if (!pool) + return; + + kfree(pool->queue_task_pool); + kfree(pool->queues); + kfree(pool); + pool = NULL; +} + +void nvpva_queue_abort_all(struct nvpva_queue_pool *pool) +{ + u32 id; + u32 i; + + mutex_lock(&pool->queue_lock); + for (i = 0; i < NUM_POOL_ALLOC_SUB_TABLES; i++) + for_each_set_bit(id, + &pool->alloc_table[i], + pool->max_queue_cnt) + nvpva_queue_abort(&pool->queues[64 * i + id]); + + mutex_unlock(&pool->queue_lock); +} + +static void nvpva_queue_release(struct kref *ref) +{ + struct nvpva_queue *queue = container_of(ref, struct nvpva_queue, + kref); + struct nvpva_queue_pool *pool = queue->pool; + + struct nvhost_device_data *pdata = platform_get_drvdata(pool->pdev); + struct pva *pva = pdata->private_data; + + nvpva_dbg_fn(pva, ""); + + /* release allocated resources */ + nvpva_syncpt_put_ref_ext(pool->pdev, queue->syncpt_id); + + /* free the task_pool */ + if (queue->task_dma_size) + nvpva_queue_task_free_pool(pool->pdev, queue); + + /* free the queue mutex */ + mutex_destroy(&queue->tail_lock); + + /* ..and mark the queue free */ + mutex_lock(&pool->queue_lock); + clear_bit(queue->id%64, &pool->alloc_table[queue->id/64]); + mutex_unlock(&pool->queue_lock); +} + +void nvpva_queue_put(struct nvpva_queue *queue) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(queue->pool->pdev); + struct pva *pva = pdata->private_data; + + nvpva_dbg_fn(pva, ""); + kref_put(&queue->kref, nvpva_queue_release); +} + +void nvpva_queue_get(struct nvpva_queue *queue) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(queue->pool->pdev); + struct pva *pva = pdata->private_data; + + nvpva_dbg_fn(pva, ""); + kref_get(&queue->kref); +} + +struct nvpva_queue *nvpva_queue_alloc(struct nvpva_queue_pool *pool, + struct platform_device *paux_dev, + unsigned int num_tasks) +{ + struct platform_device *pdev = pool->pdev; + struct nvpva_queue *queues = pool->queues; + struct nvpva_queue *queue; + int index = 0; + int err = 0; + u32 syncpt_val; + + mutex_lock(&pool->queue_lock); + + index = rmos_find_first_zero_bit((u32 *) pool->alloc_table, + pool->max_queue_cnt); + + /* quit if we found a queue */ + if (index >= pool->max_queue_cnt) { + dev_err(&pdev->dev, "failed to get free Queue\n"); + err = -ENOMEM; + goto err_alloc_queue; + } + + /* reserve the queue */ + queue = &queues[index]; + set_bit(index%64, &pool->alloc_table[index/64]); + + /* allocate a syncpt for the queue */ + queue->syncpt_id = nvpva_get_syncpt_client_managed(pdev, "pva_syncpt"); + if (queue->syncpt_id == 0) { + dev_err(&pdev->dev, "failed to get syncpt\n"); + err = -ENOMEM; + goto err_alloc_syncpt; + } + + if (nvhost_syncpt_read_ext_check(pdev, + queue->syncpt_id, + &syncpt_val) != 0) { + err = -EIO; + goto err_read_syncpt; + } + + atomic_set(&queue->syncpt_maxval, syncpt_val); + + /* initialize queue ref count and sequence*/ + kref_init(&queue->kref); + queue->sequence = 0; + + /* initialize task list */ + INIT_LIST_HEAD(&queue->tasklist); + mutex_init(&queue->list_lock); + + /* initialize task list */ + queue->attr = NULL; + mutex_init(&queue->attr_lock); + + mutex_unlock(&pool->queue_lock); + + queue->vm_pdev = pdev; + queue->vm_pprim_dev = pool->pprim_dev; + + mutex_init(&queue->tail_lock); + queue->vm_paux_dev = paux_dev; + + if (queue->task_dma_size) { + err = nvpva_queue_task_pool_alloc(queue->vm_pdev, + queue->vm_pprim_dev, + queue->vm_paux_dev, + queue, + num_tasks); + if (err < 0) + goto err_alloc_task_pool; + } + + return queue; + +err_alloc_task_pool: + mutex_lock(&pool->queue_lock); +err_read_syncpt: + nvpva_syncpt_put_ref_ext(pool->pdev, queue->syncpt_id); +err_alloc_syncpt: + clear_bit(queue->id%64, &pool->alloc_table[queue->id/64]); +err_alloc_queue: + mutex_unlock(&pool->queue_lock); + return ERR_PTR(err); +} + +int nvpva_queue_abort(struct nvpva_queue *queue) +{ + struct nvpva_queue_pool *pool = queue->pool; + + if (pool->ops && pool->ops->abort) + return pool->ops->abort(queue); + + return 0; +} + +int nvpva_queue_submit(struct nvpva_queue *queue, void *task_arg) +{ + struct nvpva_queue_pool *pool = queue->pool; + + if (pool->ops && pool->ops->submit) + return pool->ops->submit(queue, task_arg); + + return 0; +} + +int nvpva_queue_set_attr(struct nvpva_queue *queue, void *arg) +{ + struct nvpva_queue_pool *pool = queue->pool; + + if (pool->ops && pool->ops->set_attribute) + return pool->ops->set_attribute(queue, arg); + + return 0; +} + +struct nvpva_queue_task { + struct platform_device *host1x_pdev; + + struct nvpva_queue *queue; + + dma_addr_t dma_addr; + u32 *cpu_addr; +}; + +int nvpva_queue_get_task_size(struct nvpva_queue *queue) +{ + struct nvpva_queue_pool *pool = queue->pool; + + if (pool->ops && pool->ops->get_task_size) + pool->ops->get_task_size(&queue->task_dma_size, + &queue->task_kmem_size, + &queue->aux_dma_size); + + return 0; +} + +int nvpva_queue_alloc_task_memory( + struct nvpva_queue *queue, + struct nvpva_queue_task_mem_info *task_mem_info) +{ + int err = 0; + unsigned int index; + unsigned int hw_offset; + unsigned int sw_offset; + unsigned int seg_base; + unsigned int seg_index; + size_t aux_hw_offset; + struct platform_device *pdev = queue->pool->pdev; + struct nvpva_queue_task_pool *task_pool = + (struct nvpva_queue_task_pool *)queue->task_pool; + + mutex_lock(&task_pool->lock); + + index = rmos_find_first_zero_bit((u32 *) task_pool->alloc_table, + task_pool->max_task_cnt); + + /* quit if pre-allocated task array is not free */ + if (index >= task_pool->max_task_cnt) { + dev_err(&pdev->dev, + "failed to get Task Pool Memory\n"); + err = -EAGAIN; + goto err_alloc_task_mem; + } + + /* assign the task array */ + seg_index = index%MAX_PVA_TASK_COUNT_PER_QUEUE_SEG; + seg_base = (index/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG); + set_bit(index%64, &task_pool->alloc_table[index/64]); + hw_offset = index * queue->task_dma_size; + aux_hw_offset = index * queue->aux_dma_size; + sw_offset = seg_index * queue->task_kmem_size; + task_mem_info->kmem_addr = + (void *)((u8 *)task_pool->kmem_addr[seg_base] + sw_offset); + task_mem_info->va = (void *)((u8 *)task_pool->va + hw_offset); + task_mem_info->dma_addr = task_pool->dma_addr + hw_offset; + task_mem_info->aux_va = (void *)((u8 *)task_pool->aux_va + aux_hw_offset); + if ((U64_MAX - task_pool->aux_dma_addr) < task_pool->aux_dma_addr) { + err = -EFAULT; + goto err_alloc_task_mem; + } + + task_mem_info->aux_dma_addr = task_pool->aux_dma_addr + aux_hw_offset; + task_mem_info->pool_index = index; + +err_alloc_task_mem: + mutex_unlock(&task_pool->lock); + + return err; +} + +void nvpva_queue_free_task_memory(struct nvpva_queue *queue, int index) +{ + unsigned int hw_offset; + unsigned int sw_offset; + unsigned int seg_index; + unsigned int seg_base; + + u8 *task_kmem, *task_dma_va; + struct nvpva_queue_task_pool *task_pool = + (struct nvpva_queue_task_pool *)queue->task_pool; + + /* clear task kernel and dma virtual memory contents*/ + seg_index = index%MAX_PVA_TASK_COUNT_PER_QUEUE_SEG; + seg_base = (index/MAX_PVA_TASK_COUNT_PER_QUEUE_SEG); + hw_offset = index * queue->task_dma_size; + sw_offset = seg_index * queue->task_kmem_size; + task_kmem = (u8 *)task_pool->kmem_addr[seg_base] + sw_offset; + task_dma_va = (u8 *)task_pool->va + hw_offset; + + memset(task_kmem, 0, queue->task_kmem_size); + memset(task_dma_va, 0, queue->task_dma_size); + + mutex_lock(&task_pool->lock); + clear_bit(index%64, &task_pool->alloc_table[index/64]); + mutex_unlock(&task_pool->lock); +} diff --git a/drivers/video/tegra/host/pva/nvpva_queue.h b/drivers/video/tegra/host/pva/nvpva_queue.h new file mode 100644 index 00000000..c4924f5e --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_queue.h @@ -0,0 +1,304 @@ +/* + * NVPVA Queue management header for T194 and T234 + * + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVPVA_NVPVA_QUEUE_H__ +#define __NVPVA_NVPVA_QUEUE_H__ + +#include +#include +#include + +#define NUM_POOL_ALLOC_SUB_TABLES 4 + +struct nvpva_queue_task_pool; +/** @brief Holds PVA HW task which can be submitted to PVA R5 FW */ +struct pva_hw_task; + +/** + * @brief Describe a allocated task mem struct + * + * kmem_addr Address for the task kernel memory + * dma_addr Physical address of task memory + * aux_dma_addr Physical address of aux task memory + * va Virtual address of the task memory + * aux_va Virtual address of the aux task memory + * pool_index Index to the allocated task memory + * + * This is keep track of the memory details of the task + * struct that is being shared between kernel and firmware. + */ +struct nvpva_queue_task_mem_info { + void *kmem_addr; + dma_addr_t dma_addr; + dma_addr_t aux_dma_addr; + void *va; + void *aux_va; + int pool_index; +}; +/** + * @brief Information needed in a Queue + * + * pool pointer queue pool + * kref struct kref for reference count + * syncpt_id Host1x syncpt id + * id Queue id + * list_lock mutex for tasks lists control + * tasklist Head of tasks list + * sequence monotonically incrementing task id per queue + * task_pool pointer to struct for task memory pool + * task_dma_size dma size used in hardware for a task + * task_kmem_size kernel memory size for a task + * aux_dma_size kernel memory size for a task aux buffer + * attr queue attribute associated with the host module + * + */ +struct nvpva_queue { + struct nvpva_queue_task_pool *task_pool; + struct nvpva_queue_pool *pool; + struct kref kref; + u32 id; + + /*wait list for task mem requester*/ + struct semaphore task_pool_sem; + + /* Host1x resources */ + struct nvhost_channel *channel; + struct platform_device *vm_pdev; + struct platform_device *vm_pprim_dev; + struct platform_device *vm_paux_dev; + u32 syncpt_id; + u32 local_sync_counter; + atomic_t syncpt_maxval; + + size_t task_dma_size; + size_t task_kmem_size; + size_t aux_dma_size; + + u32 sequence; + + struct mutex attr_lock; + void *attr; + + struct mutex list_lock; + struct list_head tasklist; + + /*! Mutex for exclusive access of tail task submit */ + struct mutex tail_lock; + struct pva_hw_task *old_tail; + struct pva_hw_task *hw_task_tail; + + u64 batch_id; +}; + +/** + * @brief hardware specific queue callbacks + * + * dump dump the task information + * abort abort all tasks from a queue + * submit submit the given list of tasks to hardware + * get_task_size get the dma size needed for the task in hw + * and the kernel memory size needed for task. + * + */ +struct nvpva_queue_ops { + void (*dump)(struct nvpva_queue *queue, struct seq_file *s); + int (*abort)(struct nvpva_queue *queue); + int (*submit)(struct nvpva_queue *queue, void *task_arg); + void (*get_task_size)(size_t *dma_size, + size_t *kmem_size, + size_t *aux_dma_size); + int (*set_attribute)(struct nvpva_queue *queue, void *arg); +}; + +/** + * @brief Queue pool data structure to hold queue table + * + * pdev Pointer to the Queue client device + * ops Pointer to hardware specific queue ops + * queues Queues available for the client + * queue_lock Mutex for the bitmap of reserved queues + * alloc_table Bitmap of allocated queues + * max_queue_cnt Max number queues available for client + * queue_task_pool Pointer to the task memory pool for queues. + * + */ +struct nvpva_queue_pool { + struct platform_device *pdev; + struct platform_device *pprim_dev; + struct nvpva_queue_ops *ops; + struct nvpva_queue *queues; + struct mutex queue_lock; + unsigned long alloc_table[NUM_POOL_ALLOC_SUB_TABLES]; + unsigned int max_queue_cnt; + void *queue_task_pool; +}; + +/** + * @brief Initialize queue structures + * + * This function allocates and initializes queue data structures. + * + * @param pdev Pointer to the Queue client device + * @param paux_dev Pointer to the Queue client aux device + * @param ops Pointer to device speicific callbacks + * @param num_queues Max number queues available for client + * @return pointer to queue pool + * + */ +struct nvpva_queue_pool *nvpva_queue_init(struct platform_device *pdev, + struct platform_device *paux_dev, + struct nvpva_queue_ops *ops, + unsigned int num_queues); + +/** + * @brief De-initialize queue structures + * + * This function free's all queue data structures. + * + * @param pool pointer to queue pool + * @return void + * + */ +void nvpva_queue_deinit(struct nvpva_queue_pool *pool); + +/** + * @brief Release reference of a queue + * + * This function releases reference for a queue. + * + * @param queue Pointer to an allocated queue. + * @return void + * + */ +void nvpva_queue_put(struct nvpva_queue *queue); + +/** + * @brief Get reference on a queue. + * + * This function used to get a reference to an already allocated queue. + * + * @param queue Pointer to an allocated queue. + * @return None + * + */ +void nvpva_queue_get(struct nvpva_queue *queue); + +/** + * @brief Allocate a queue for client. + * + * This function allocates a queue from the pool to client for the user. + * + * @param pool Pointer to a queue pool table + * @param paux_dev pointer to auxiliary dev + * @param num_tasks Max number of tasks per queue + * + * @return Pointer to a queue struct on success + * or negative error on failure. + * + */ +struct nvpva_queue *nvpva_queue_alloc(struct nvpva_queue_pool *pool, + struct platform_device *paux_dev, + unsigned int num_tasks); + +/** + * @brief Abort all active queues + * + * @param pool Pointer to a queue pool table + */ +void nvpva_queue_abort_all(struct nvpva_queue_pool *pool); + +/** + * @brief Abort tasks within a client queue + * + * This function aborts all tasks from the given clinet queue. If there is no + * active tasks, the function call is no-op. + * It is expected to be called when an active device fd gets closed. + * + * @param queue Pointer to an allocated queue + * @return None + * + */ +int nvpva_queue_abort(struct nvpva_queue *queue); + +/** + * @brief submits the given list of tasks to hardware + * + * This function submits the given list of tasks to hardware. + * The submit structure is updated with the fence values as appropriate. + * + * @param queue Pointer to an allocated queue + * @param submit Submit the given list of tasks to hardware + * @return 0 on success or negative error code on failure. + * + */ +int nvpva_queue_submit(struct nvpva_queue *queue, void *submit); + +/** + * @brief Get the Task Size needed + * + * This function get the needed memory size for the task. This memory is + * shared memory between kernel and firmware + * + * @param queue Pointer to an allocated queue + * @return Size of the task + * + */ +int nvpva_queue_get_task_size(struct nvpva_queue *queue); + +/** + * @brief Allocate a memory from task memory pool + * + * This function helps to assign a task memory from + * the preallocated task memory pool. This memory is shared memory between + * kernel and firmware + * + * @queue Pointer to an allocated queue + * @task_mem_info Pointer to nvpva_queue_task_mem_info struct + * + * @return 0 on success, otherwise a negative error code is returned + * + */ +int nvpva_queue_alloc_task_memory( + struct nvpva_queue *queue, + struct nvpva_queue_task_mem_info *task_mem_info); + +/** + * @brief Free the assigned task memory + * + * This function helps to unset the assigned task memory + * + * @param queue Pointer to an allocated queue + * @param index Index of the assigned task pool memory + * @return void + * + */ +void nvpva_queue_free_task_memory(struct nvpva_queue *queue, int index); + +/** + * @brief Sets the attribute to the queue + * + * This function set the attribute of the queue with the arguments passed + * + * @param queue Pointer to an allocated queue + * @param arg The structure which consists of the id and value + * @return 0 on success or negative error code on failure. + * + */ +int nvpva_queue_set_attr(struct nvpva_queue *queue, void *arg); + +#endif diff --git a/drivers/video/tegra/host/pva/nvpva_syncpt.c b/drivers/video/tegra/host/pva/nvpva_syncpt.c new file mode 100644 index 00000000..5bbd7b78 --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_syncpt.c @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include "pva.h" + +int nvpva_map_region(struct device *dev, + phys_addr_t start, + size_t size, + dma_addr_t *sp_start, + u32 attr) +{ + /* If IOMMU is enabled, map it into the device memory */ + if (iommu_get_domain_for_dev(dev)) { + *sp_start = dma_map_resource(dev, start, size, attr, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, *sp_start)) + return -ENOMEM; + } else { + *sp_start = start; + } + + return 0; +} + +int nvpva_unmap_region(struct device *dev, + dma_addr_t addr, + size_t size, + u32 attr) +{ + if (iommu_get_domain_for_dev(dev)) { + dma_unmap_resource(dev, addr, size, attr, + DMA_ATTR_SKIP_CPU_SYNC); + } + + return 0; +} + +void nvpva_syncpt_put_ref_ext(struct platform_device *pdev, + u32 id) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int i; + + if (pva->version == PVA_HW_GEN1) { + nvhost_syncpt_put_ref_ext(pdev, id); + return; + } + + for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) { + if (pva->syncpts.syncpts_rw[i].id == id) { + pva->syncpts.syncpts_rw[i].assigned = 0; + break; + } + } +} + +u32 nvpva_get_syncpt_client_managed(struct platform_device *pdev, + const char *syncpt_name) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + u32 id = 0; + int i; + + if (pva->version == PVA_HW_GEN1) { + id = nvhost_get_syncpt_client_managed(pdev, "pva_syncpt"); + goto out; + } + + for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) { + if (pva->syncpts.syncpts_rw[i].assigned == 0) { + id = pva->syncpts.syncpts_rw[i].id; + pva->syncpts.syncpts_rw[i].assigned = 1; + break; + } + } +out: + return id; +} + +dma_addr_t +nvpva_syncpt_address(struct platform_device *pdev, u32 id, bool rw) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + struct platform_device *host_pdev = pva->syncpts.host_pdev; + dma_addr_t addr = 0; + u32 offset = 0; + int i; + + if (pva->version == PVA_HW_GEN1) { + addr = nvhost_syncpt_address(pdev, id); + goto out; + } + + if (!rw) { + offset = nvhost_syncpt_unit_interface_get_byte_offset_ext(host_pdev, id); + addr = pva->syncpts.syncpt_start_iova_r + (dma_addr_t)offset; + goto out; + } + + for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) { + if (pva->syncpts.syncpts_rw[i].id == id) { + addr = pva->syncpts.syncpts_rw[i].addr; + break; + } + } +out: + nvpva_dbg_info(pva, + "syncpt_addr: id: %d addr: %llx offset: %llx\n", + id, + addr, + (u64)offset); + + return addr; +} + +void nvpva_syncpt_unit_interface_deinit(struct platform_device *pdev, + struct platform_device *paux_dev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int i; + + if (!pva->syncpts.syncpts_mapped_r) + goto out; + + if (pva->version == PVA_HW_GEN1) { + pva->syncpts.syncpts_mapped_rw = false; + pva->syncpts.syncpts_mapped_r = false; + goto out; + } + + nvpva_unmap_region(&paux_dev->dev, pva->syncpts.syncpt_start_iova_r, + pva->syncpts.syncpt_range_r, DMA_TO_DEVICE); + pva->syncpts.syncpts_mapped_r = false; + pva->syncpts.syncpt_start_iova_r = 0; + pva->syncpts.syncpt_range_r = 0; + + for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) { + if (pva->syncpts.syncpts_rw[i].id == 0) + continue; + + nvpva_unmap_region(&paux_dev->dev, pva->syncpts.syncpts_rw[i].addr, + pva->syncpts.syncpts_rw[i].size, + DMA_BIDIRECTIONAL); + pva->syncpts.syncpts_rw[i].addr = 0; + pva->syncpts.syncpts_rw[i].size = 0; + pva->syncpts.syncpts_rw[i].assigned = 0; + nvhost_syncpt_put_ref_ext(pdev, + pva->syncpts.syncpts_rw[i].id); + pva->syncpts.syncpts_rw[i].id = 0; + } + + pva->syncpts.syncpts_mapped_rw = false; +out: + return; +} + +int nvpva_syncpt_unit_interface_init(struct platform_device *pdev, + struct platform_device *paux_dev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + phys_addr_t base; + size_t size; + dma_addr_t syncpt_addr_rw; + u32 syncpt_offset; + int err = 0; + int i; + u32 id = 0; + + if ((pva->syncpts.syncpts_mapped_r) + || (pva->syncpts.syncpts_mapped_rw)) + goto out; + + if (pva->version == PVA_HW_GEN1) { + pva->syncpts.syncpt_start_iova_r = 0; + pva->syncpts.syncpt_range_r = 0; + pva->syncpts.page_size = 0; + pva->syncpts.syncpts_mapped_r = true; + pva->syncpts.syncpts_mapped_rw = true; + pva->syncpts.syncpt_start_iova_rw = 0; + pva->syncpts.syncpt_range_rw = 0; + goto out; + } + + pva->syncpts.host_pdev = to_platform_device(pdev->dev.parent); + err = nvhost_syncpt_unit_interface_get_aperture(pva->syncpts.host_pdev, + &base, + &size); + if (err) { + dev_err(&pdev->dev, "failed to get aperture"); + goto out; + } + + syncpt_offset = + nvhost_syncpt_unit_interface_get_byte_offset_ext(pva->syncpts.host_pdev, 1); + + err = nvpva_map_region(&paux_dev->dev, + base, + size, + &syncpt_addr_rw, + DMA_TO_DEVICE); + if (err) + goto out; + + pva->syncpts.syncpt_start_iova_r = syncpt_addr_rw; + pva->syncpts.syncpt_range_r = size; + pva->syncpts.page_size = syncpt_offset; + pva->syncpts.syncpts_mapped_r = true; + + nvpva_dbg_info(pva, + "syncpt_start_iova %llx, size %llx\n", + pva->syncpts.syncpt_start_iova_rw, + pva->syncpts.syncpt_range_r); + + for (i = 0; i < MAX_PVA_QUEUE_COUNT; i++) { + id = nvhost_get_syncpt_client_managed(pdev, "pva_syncpt"); + if (id == 0) { + dev_err(&pdev->dev, "failed to get syncpt\n"); + err = -ENOMEM; + goto err_alloc_syncpt; + } + + syncpt_offset = + nvhost_syncpt_unit_interface_get_byte_offset_ext(pva->syncpts.host_pdev, + id); + err = nvpva_map_region(&paux_dev->dev, + (base + syncpt_offset), + pva->syncpts.page_size, + &syncpt_addr_rw, + DMA_BIDIRECTIONAL); + if (err) { + dev_err(&pdev->dev, "failed to map syncpt %d\n", id); + goto err_map_sp; + } + + pva->syncpts.syncpts_rw[i].addr = syncpt_addr_rw; + pva->syncpts.syncpts_rw[i].id = id; + pva->syncpts.syncpts_rw[i].assigned = 0; + nvpva_dbg_info(pva, + "syncpt_addr: id: %d addr: %llx offset: %llx\n", + id, + syncpt_addr_rw, + 0LLU); + } + + pva->syncpts.syncpts_mapped_rw = true; + syncpt_addr_rw = pva->syncpts.syncpts_rw[MAX_PVA_QUEUE_COUNT - 1].addr; + pva->syncpts.syncpt_start_iova_rw = syncpt_addr_rw; + pva->syncpts.syncpt_range_rw = MAX_PVA_QUEUE_COUNT * + (pva->syncpts.syncpts_rw[0].addr - + pva->syncpts.syncpts_rw[1].addr); + + if (pva->version == PVA_HW_GEN1) + goto out; + + if (syncpt_addr_rw % (pva->syncpts.syncpt_range_rw) != 0) { + dev_err(&pdev->dev, "RW sync pts base not aligned to 512k"); + err = -ENOMEM; + goto err_map_sp; + } + + syncpt_addr_rw += (MAX_PVA_QUEUE_COUNT - 1) * pva->syncpts.page_size; + if (syncpt_addr_rw != pva->syncpts.syncpts_rw[0].addr) { + dev_err(&pdev->dev, "RW sync pts not contiguous"); + err = -ENOMEM; + goto err_map_sp; + } + + goto out; + +err_map_sp: +err_alloc_syncpt: + nvpva_syncpt_unit_interface_deinit(pdev, paux_dev); +out: + return err; +} diff --git a/drivers/video/tegra/host/pva/nvpva_syncpt.h b/drivers/video/tegra/host/pva/nvpva_syncpt.h new file mode 100644 index 00000000..d6d2bccb --- /dev/null +++ b/drivers/video/tegra/host/pva/nvpva_syncpt.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVPVA_SYNCPT_H__ +#define __NVPVA_SYNCPT_H__ + +void nvpva_syncpt_put_ref_ext(struct platform_device *pdev, + u32 id); +dma_addr_t nvpva_syncpt_address(struct platform_device *pdev, u32 id, + bool rw); +void nvpva_syncpt_unit_interface_deinit(struct platform_device *pdev, + struct platform_device *paux_dev); +int nvpva_syncpt_unit_interface_init(struct platform_device *pdev, + struct platform_device *paux_dev); +u32 nvpva_get_syncpt_client_managed(struct platform_device *pdev, + const char *syncpt_name); +int nvpva_map_region(struct device *dev, + phys_addr_t start, + size_t size, + dma_addr_t *sp_start, + u32 attr); +int nvpva_unmap_region(struct device *dev, + dma_addr_t addr, + size_t size, + u32 attr); +#endif diff --git a/drivers/video/tegra/host/pva/pva-vpu-perf.h b/drivers/video/tegra/host/pva/pva-vpu-perf.h new file mode 100644 index 00000000..b499cb56 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva-vpu-perf.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2018 NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _PVA_VPU_PERF_H_ +#define _PVA_VPU_PERF_H_ + +#define PVA_TASK_VPU_NUM_PERF_COUNTERS 8 + +struct pva_task_vpu_perf_counter { + u32 count; + u32 sum; + u64 sum_squared; + u32 min; + u32 max; +}; + +#endif + diff --git a/drivers/video/tegra/host/pva/pva.c b/drivers/video/tegra/host/pva/pva.c new file mode 100644 index 00000000..f11eb598 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva.c @@ -0,0 +1,1484 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pva_mailbox.h" +#include +#include "nvpva_client.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#endif +#include + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) +#include +#include +#include +#endif + +#include "pva_mailbox_t23x.h" +#include "pva_interface_regs_t23x.h" +#include "pva_version_config_t23x.h" +#include "pva_ccq_t23x.h" +#include "nvpva_queue.h" +#include "pva_queue.h" +#include "pva.h" +#include "pva_regs.h" +#include "pva_mailbox_t19x.h" +#include "pva_interface_regs_t19x.h" +#include "pva_version_config_t19x.h" +#include "pva_ccq_t19x.h" +#include "pva-ucode-header.h" +#include "pva_system_allow_list.h" +#include "pva_iommu_context_dev.h" +#include "nvpva_syncpt.h" +#include "pva-fw-address-map.h" +#include "pva_sec_ec.h" + +/* + * NO IOMMU set 0x60000000 as start address. + * With IOMMU set 0x80000000(>2GB) as startaddress + */ +#define DRAM_PVA_IOVA_START_ADDRESS 0x80000000 +#define DRAM_PVA_NO_IOMMU_START_ADDRESS 0x60000000 + +extern struct platform_driver nvpva_iommu_context_dev_driver; +static u32 vm_regs_sid_idx_t19x[] = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; +static u32 vm_regs_reg_idx_t19x[] = {0, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; +#ifdef CONFIG_PVA_CO_DISABLED +static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7, + 8, 8, 8, 8, 8, 0, 0, 0}; +#else +static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7, + 8, 0, 8, 8, 8, 0, 0, 0}; +#endif +static u32 vm_regs_reg_idx_t234[] = {0, 1, 2, 3, 4, 5, 6, 7, + 8, 8, 8, 9, 9, 0, 0, 0}; +static char *aux_dev_name = "16000000.pva0:pva0_niso1_ctx7"; +static u32 aux_dev_name_len = 29; + +struct nvhost_device_data t19_pva1_info = { + .version = PVA_HW_GEN1, + .num_channels = 1, + .clocks = { + {"axi", UINT_MAX,}, + {"vps0", UINT_MAX,}, + {"vps1", UINT_MAX,}, + }, + .ctrl_ops = &tegra_pva_ctrl_ops, + .devfs_name_family = "pva", + .class = NV_PVA1_CLASS_ID, + .autosuspend_delay = 500, + .finalize_poweron = pva_finalize_poweron, + .prepare_poweroff = pva_prepare_poweroff, + .firmware_name = "nvhost_pva010.fw", + .resource_policy = RESOURCE_PER_CHANNEL_INSTANCE, + .vm_regs = { + {0x70000, true, 0}, + {0x80000, false, 0}, + {0x80000, false, 8} + }, + .poweron_reset = true, + .serialize = true, + .push_work_done = true, + .get_reloc_phys_addr = nvhost_t194_get_reloc_phys_addr, + .can_powergate = true, +}; + +struct nvhost_device_data t19_pva0_info = { + .version = PVA_HW_GEN1, + .num_channels = 1, + .clocks = { + {"nafll_pva_vps", UINT_MAX,}, + {"nafll_pva_core", UINT_MAX,}, + {"axi", UINT_MAX,}, + {"vps0", UINT_MAX,}, + {"vps1", UINT_MAX,}, + }, + .ctrl_ops = &tegra_pva_ctrl_ops, + .devfs_name_family = "pva", + .class = NV_PVA0_CLASS_ID, + .autosuspend_delay = 500, + .finalize_poweron = pva_finalize_poweron, + .prepare_poweroff = pva_prepare_poweroff, + .firmware_name = "nvhost_pva010.fw", + .resource_policy = RESOURCE_PER_CHANNEL_INSTANCE, + .vm_regs = { + {0x70000, true, 0}, + {0x80000, false, 0}, + {0x80000, false, 8} + }, + .poweron_reset = true, + .serialize = true, + .get_reloc_phys_addr = nvhost_t194_get_reloc_phys_addr, + .can_powergate = true, +}; + +struct nvhost_device_data t23x_pva0_info = { + .version = PVA_HW_GEN2, + .num_channels = 1, + .clocks = { + {"axi", UINT_MAX,}, + {"vps0", UINT_MAX,}, + {"vps1", UINT_MAX,}, + }, + .ctrl_ops = &tegra_pva_ctrl_ops, + .devfs_name_family = "pva", + .class = NV_PVA0_CLASS_ID, + .autosuspend_delay = 500, + .finalize_poweron = pva_finalize_poweron, + .prepare_poweroff = pva_prepare_poweroff, + .firmware_name = "nvhost_pva020.fw", + .resource_policy = RESOURCE_PER_CHANNEL_INSTANCE, + .vm_regs = { + {0x240000, false, 0}, + {0x240004, false, 0}, + {0x240008, false, 0}, + {0x24000c, false, 0}, + {0x240010, false, 0}, + {0x240014, false, 0}, + {0x240018, false, 0}, + {0x24001c, false, 0}, + {0x240020, false, 0}, + {0x240020, false, 8}, + {0x240020, false, 16}, + {0x240024, false, 0}, + {0x240024, false, 8} + }, + .poweron_reset = true, + .serialize = true, + .get_reloc_phys_addr = nvhost_t23x_get_reloc_phys_addr, + .can_powergate = true, +}; + +/* Map PVA-A and PVA-B to respective configuration items in nvhost */ +static struct of_device_id tegra_pva_of_match[] = { + { + .name = "pva0", + .compatible = "nvidia,tegra194-pva", + .data = (struct nvhost_device_data *)&t19_pva0_info }, + { + .name = "pva1", + .compatible = "nvidia,tegra194-pva", + .data = (struct nvhost_device_data *)&t19_pva1_info }, + { + .name = "pva0", + .compatible = "nvidia,tegra234-pva", + .data = (struct nvhost_device_data *)&t23x_pva0_info }, + { + .name = "pva0", + .compatible = "nvidia,tegra234-pva-hv", + .data = (struct nvhost_device_data *)&t23x_pva0_info }, + { }, +}; + +MODULE_DEVICE_TABLE(of, tegra_pva_of_match); + +#define EVP_REG_NUM 8 +static u32 pva_get_evp_reg(u32 index) +{ + u32 evp_reg[EVP_REG_NUM] = { + evp_reset_addr_r(), + evp_undef_addr_r(), + evp_swi_addr_r(), + evp_prefetch_abort_addr_r(), + evp_data_abort_addr_r(), + evp_rsvd_addr_r(), + evp_irq_addr_r(), + evp_fiq_addr_r() + }; + + return evp_reg[index]; +} + +static u32 evp_reg_val[EVP_REG_NUM] = { + EVP_RESET_VECTOR, + EVP_UNDEFINED_INSTRUCTION_VECTOR, + EVP_SVC_VECTOR, + EVP_PREFETCH_ABORT_VECTOR, + EVP_DATA_ABORT_VECTOR, + EVP_RESERVED_VECTOR, + EVP_IRQ_VECTOR, + EVP_FIQ_VECTOR +}; + +/** + * Allocate and set a circular array for FW to provide status info about + * completed tasks from all the PVA R5 queues. + * To avoid possible overwrite of info, the size of circular array needs to be + * sufficient to hold the status info for maximum allowed number of tasks + * across all PVA R5 queues at any time. + * PVA R5 FW shall fill task status info at incremental positions in the array + * while PVA KMD shall read the task status info at incremental positions from + * the array. + * Both PVA R5 FW and PVA KMD shall independently maintain an internal index + * to dictate the current write position and read position respectively. + */ +static int pva_alloc_task_status_buffer(struct pva *pva) +{ + size_t min_size = 0U; + + /* Determine worst case size required for circular array based on + * maximum allowed per PVA engine and maximum allowed number of task + * submissions per PVA queue at any time. + */ + min_size = MAX_PVA_TASK_COUNT * sizeof(struct pva_task_error_s); + + pva->priv_circular_array.size = ALIGN(min_size + 64, 64); + + pva->priv_circular_array.va = + dma_alloc_coherent(&pva->aux_pdev->dev, + pva->priv_circular_array.size, + &pva->priv_circular_array.pa, GFP_KERNEL); + + if (pva->priv_circular_array.va == NULL) { + pr_err("pva: failed to alloc mem for task status info"); + return -ENOMEM; + } + + INIT_WORK(&pva->task_update_work, pva_task_update); + + atomic_set(&pva->n_pending_tasks, 0); + pva->task_status_workqueue = + create_workqueue("pva_task_status_workqueue"); + return 0; +} + +static void pva_reset_task_status_buffer(struct pva *pva) +{ + flush_workqueue(pva->task_status_workqueue); + WARN_ON(atomic_read(&pva->n_pending_tasks) != 0); + atomic_set(&pva->n_pending_tasks, 0); + pva->circular_array_rd_pos = 0U; + pva->circular_array_wr_pos = 0U; +} + +static void pva_free_task_status_buffer(struct pva *pva) +{ + flush_workqueue(pva->task_status_workqueue); + destroy_workqueue(pva->task_status_workqueue); + dma_free_coherent(&pva->aux_pdev->dev, pva->priv_circular_array.size, + pva->priv_circular_array.va, + pva->priv_circular_array.pa); +} + +static int pva_init_fw(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + struct pva_fw *fw_info = &pva->fw_info; + struct pva_dma_alloc_info *priv1_buffer; + struct pva_dma_alloc_info *priv2_buffer; + u32 *ucode_ptr; + int err = 0; + u64 ucode_useg_addr; + u32 sema_value = 0; + u32 dram_base; + uint64_t useg_addr; + u32 i; + + nvpva_dbg_fn(pva, ""); + + priv1_buffer = &fw_info->priv1_buffer; + priv2_buffer = &fw_info->priv2_buffer; + ucode_ptr = priv1_buffer->va; + + /* Set the Ucode Header address for R5 */ + /* Program user seg subtracting the offset */ + ucode_useg_addr = 0; + host1x_writel(pdev, cfg_r5user_lsegreg_r(pva->version), + PVA_LOW32(ucode_useg_addr)); + host1x_writel(pdev, cfg_r5user_usegreg_r(pva->version), + PVA_EXTRACT64(ucode_useg_addr, 39, 32, u32)); + + /* Program the extra memory to be used by R5 */ + ucode_useg_addr = priv2_buffer->pa - fw_info->priv2_reg_offset; + host1x_writel(pdev, cfg_priv_ar2_start_r(pva->version), + fw_info->priv2_reg_offset); + host1x_writel(pdev, cfg_priv_ar2_end_r(pva->version), + fw_info->priv2_reg_offset + priv2_buffer->size); + host1x_writel(pdev, cfg_priv_ar2_lsegreg_r(pva->version), + PVA_LOW32(ucode_useg_addr)); + host1x_writel(pdev, cfg_priv_ar2_usegreg_r(pva->version), + PVA_EXTRACT64(ucode_useg_addr, 39, 32, u32)); + + /* Write EVP registers */ + for (i = 0; i < EVP_REG_NUM; i++) + host1x_writel(pdev, pva_get_evp_reg(i), evp_reg_val[i]); + + host1x_writel(pdev, + cfg_priv_ar1_start_r(pva->version), + FW_CODE_DATA_START_ADDR); + host1x_writel(pdev, + cfg_priv_ar1_end_r(pva->version), + FW_CODE_DATA_END_ADDR); + useg_addr = priv1_buffer->pa - FW_CODE_DATA_START_ADDR; + host1x_writel(pdev, + cfg_priv_ar1_lsegreg_r(pva->version), + PVA_LOW32(useg_addr)); + host1x_writel(pdev, + cfg_priv_ar1_usegreg_r(pva->version), + PVA_EXTRACT64((useg_addr), 39, 32, u32)); + + /* Indicate the OS is waiting for PVA ready Interrupt */ + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_WFI; + + if (pva->r5_dbg_wait) { + sema_value = PVA_WAIT_DEBUG; + pva->timeout_enabled = false; + } + + if (pva->slcg_disable) + sema_value |= PVA_CG_DISABLE; + + if (pva->vmem_war_disable) + sema_value |= PVA_VMEM_RD_WAR_DISABLE; + + sema_value |= (PVA_BOOT_INT | PVA_TEST_WAIT | PVA_VMEM_MBX_WAR_ENABLE); + host1x_writel(pdev, hsp_ss0_set_r(), sema_value); + + if (pva->version == PVA_HW_GEN1) { + host1x_writel(pdev, hsp_ss2_set_r(), 0xFFFFFFFF); + host1x_writel(pdev, hsp_ss3_set_r(), 0xFFFFFFFF); + } else { + if (pva->syncpts.syncpt_start_iova_r > 0xFBFFFFFF) { + dev_err(&pdev->dev, + "rd sema base greater than 32 bit "); + err = -EINVAL; + goto out; + } + + sema_value = (u32)pva->syncpts.syncpt_start_iova_r; + if (iommu_get_domain_for_dev(&pdev->dev)) + dram_base = DRAM_PVA_IOVA_START_ADDRESS; + else + dram_base = DRAM_PVA_NO_IOMMU_START_ADDRESS; + + if (sema_value < dram_base) { + dev_err(&pdev->dev, + "rd sema base less than dram base"); + err = -EINVAL; + goto out; + } + + sema_value -= dram_base; + + host1x_writel(pdev, hsp_ss2_clr_r(), 0xFFFFFFFF); + host1x_writel(pdev, hsp_ss2_set_r(), sema_value); + + if (pva->syncpts.syncpt_start_iova_rw > 0xFFF7FFFF) { + dev_err(&pdev->dev, + "rw sema base greater than 32 bit "); + err = -EINVAL; + goto out; + } + + sema_value = (u32)pva->syncpts.syncpt_start_iova_rw; + if (sema_value < dram_base) { + dev_err(&pdev->dev, + "rw sema base less than dram base"); + err = -EINVAL; + goto out; + } + + sema_value -= dram_base; + + host1x_writel(pdev, hsp_ss3_clr_r(), 0xFFFFFFFF); + host1x_writel(pdev, hsp_ss3_set_r(), sema_value); + } + + /* Take R5 out of reset */ + host1x_writel(pdev, proc_cpuhalt_r(), + proc_cpuhalt_ncpuhalt_f(proc_cpuhalt_ncpuhalt_done_v())); + + nvpva_dbg_fn(pva, "Waiting for PVA to be READY"); + + /* Wait PVA to report itself as ready */ + err = pva_mailbox_wait_event(pva, 60000); + if (err) { + dev_err(&pdev->dev, "mbox timedout boot sema=%x\n", + (host1x_readl(pdev, hsp_ss0_state_r()))); + goto wait_timeout; + } + + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID; + + nvpva_dbg_fn(pva, "PVA boot returned: %d", err); + + pva_reset_task_status_buffer(pva); + (void)memset(pva->priv_circular_array.va, 0, + pva->priv_circular_array.size); +wait_timeout: +out: + return err; +} + +static int pva_free_fw(struct platform_device *pdev, struct pva *pva) +{ + struct pva_fw *fw_info = &pva->fw_info; + + if (pva->boot_from_file) { + if (pva->priv1_dma.va) + dma_free_coherent(&pva->aux_pdev->dev, pva->priv1_dma.size, + pva->priv1_dma.va, pva->priv1_dma.pa); + } else { + if (pva->map_co_needed && (pva->priv1_dma.pa != 0)) { + nvpva_unmap_region(&pdev->dev, + pva->priv1_dma.pa, + pva->co->size, + DMA_BIDIRECTIONAL); + } + + pva->co->base_pa = 0; + pva->co->base_va = 0; + } + + pva->priv1_dma.pa = 0; + if (pva->priv2_dma.va) { + dma_free_coherent(&pva->aux_pdev->dev, pva->priv2_dma.size, + pva->priv2_dma.va, pva->priv2_dma.pa); + pva->priv2_dma.va = 0; + pva->priv2_dma.pa = 0; + } + + memset(fw_info, 0, sizeof(struct pva_fw)); + + return 0; +} + +int nvpva_request_firmware(struct platform_device *pdev, const char *fw_name, + const struct firmware **ucode_fw) +{ + int err = 0; + +#if IS_ENABLED(CONFIG_TEGRA_GRHOST) + *ucode_fw = nvhost_client_request_firmware(pdev, fw_name, true); + if (*ucode_fw == NULL) + err = -ENOENT; +#else + err = request_firmware(ucode_fw, fw_name, &pdev->dev); +#endif + return err; +} + +static int +pva_read_ucode_file(struct platform_device *pdev, + const char *fw_name, + struct pva *pva) +{ + int err = 0; + struct pva_fw *fw_info = &pva->fw_info; + int w; + u32 *ucode_ptr; + const struct firmware *ucode_fw = NULL; + + err = nvpva_request_firmware(pva->pdev, fw_name, &ucode_fw); + if (err != 0) { + dev_err(&pdev->dev, "Failed to load the %s firmware\n", + fw_name); + return err; + } + + fw_info->priv1_buffer.size = ucode_fw->size; + pva->priv1_dma.size = FW_CODE_DATA_END_ADDR - FW_CODE_DATA_START_ADDR; + pva->priv1_dma.size = ALIGN(pva->priv1_dma.size + SZ_4K, SZ_4K); + /* Allocate memory to R5 for app code, data or to log information */ + pva->priv1_dma.va = dma_alloc_coherent(&pdev->dev, pva->priv1_dma.size, + &pva->priv1_dma.pa, GFP_KERNEL); + if (!pva->priv1_dma.va) { + err = -ENOMEM; + goto clean_up; + } + + fw_info->priv1_buffer.va = pva->priv1_dma.va; + fw_info->priv1_buffer.pa = pva->priv1_dma.pa; + ucode_ptr = fw_info->priv1_buffer.va; + + /* copy the whole thing taking into account endianness */ + for (w = 0; w < ucode_fw->size / sizeof(u32); w++) + ucode_ptr[w] = le32_to_cpu(((__le32 *)ucode_fw->data)[w]); +clean_up: + release_firmware(ucode_fw); + + return err; +} + +static int pva_read_ucode_co(struct platform_device *pdev, + struct pva *pva) +{ + int err = 0; + struct pva_fw *fw_info = &pva->fw_info; + + if (pva->map_co_needed) { + err = nvpva_map_region(&pdev->dev, + pva->co->base, + pva->co->size, + &pva->priv1_dma.pa, + DMA_BIDIRECTIONAL); + if (err) { + err = -ENOMEM; + goto out; + } + } else { + pva->priv1_dma.pa = pva->co->base; + pva->priv1_dma.va = 0; + } + + fw_info->priv1_buffer.va = pva->priv1_dma.va; + fw_info->priv1_buffer.pa = pva->priv1_dma.pa; + fw_info->priv1_buffer.size = pva->co->size; + pva->priv1_dma.size = pva->co->size; + +out: + return err; +} + +static int pva_read_ucode(struct platform_device *pdev, const char *fw_name, + struct pva *pva) +{ + int err = 0; + struct pva_fw *fw_info = &pva->fw_info; + + if (pva->boot_from_file) + err = pva_read_ucode_file(pdev, fw_name, pva); + else + err = pva_read_ucode_co(pdev, pva); + + nvpva_dbg_fn(pva, "co iova = %llx\n", pva->priv1_dma.pa); + + fw_info->priv2_buffer.size = FW_DEBUG_DATA_TOTAL_SIZE; + + /* Make sure the address is aligned to 4K */ + pva->priv2_dma.size = ALIGN(fw_info->priv2_buffer.size, SZ_4K); + + /* Allocate memory to R5 for app code, data or to log information */ + pva->priv2_dma.va = dma_alloc_coherent(&pva->aux_pdev->dev, pva->priv2_dma.size, + &pva->priv2_dma.pa, GFP_KERNEL); + if (!pva->priv2_dma.va) { + err = -ENOMEM; + goto out; + } + + fw_info->priv2_buffer.pa = pva->priv2_dma.pa; + fw_info->priv2_buffer.va = pva->priv2_dma.va; + fw_info->priv2_reg_offset = FW_DEBUG_DATA_START_ADDR; + + /* setup trace buffer */ + fw_info->trace_buffer_size = FW_TRACE_BUFFER_SIZE; + pva->pva_trace.addr = fw_info->priv2_buffer.va; + pva->pva_trace.size = FW_TRACE_BUFFER_SIZE; + pva->pva_trace.offset = 0L; + + /* setup FW debug log buffer */ + pva->fw_debug_log.addr = fw_info->priv2_buffer.va + + FW_TRACE_BUFFER_SIZE + + FW_CODE_COVERAGE_BUFFER_SIZE; +out: + return err; +} + +static int pva_load_fw(struct platform_device *pdev, struct pva *pva) +{ + int err = 0; + struct nvhost_device_data *pdata = platform_get_drvdata(pva->pdev); + + nvpva_dbg_fn(pva, ""); + + err = pva_read_ucode(pdev, pdata->firmware_name, pva); + if (err < 0) + goto load_fw_err; + + return err; + +load_fw_err: + pva_free_fw(pdev, pva); + + return err; +} + +int pva_get_firmware_version(struct pva *pva, struct pva_version_info *info) +{ + uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + struct pva_cmd_status_regs status; + struct pva_cmd_s cmd; + int err = 0; + u32 nregs; + + nregs = pva_cmd_R5_version(&cmd, flags); + + /* Submit request to PVA and wait for response */ + err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status); + if (err < 0) { + nvpva_warn(&pva->pdev->dev, + "mbox get firmware version cmd failed: %d\n", err); + + return err; + } + + info->pva_r5_version = status.status[PVA_CMD_STATUS4_INDEX]; + info->pva_compat_version = status.status[PVA_CMD_STATUS5_INDEX]; + info->pva_revision = status.status[PVA_CMD_STATUS6_INDEX]; + info->pva_built_on = status.status[PVA_CMD_STATUS7_INDEX]; + + return err; +} + +int pva_boot_kpi(struct pva *pva, u64 *r5_boot_time) +{ + uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + struct pva_cmd_status_regs status; + struct pva_cmd_s cmd; + int err = 0; + u32 nregs; + + nregs = pva_cmd_pva_uptime(&cmd, 255, flags); + + /* Submit request to PVA and wait for response */ + err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status); + if (err < 0) { + nvpva_warn(&pva->pdev->dev, "mbox get uptime cmd failed: %d\n", + err); + return err; + } + *r5_boot_time = status.status[PVA_CMD_STATUS7_INDEX]; + *r5_boot_time = ((*r5_boot_time) << 32); + *r5_boot_time = (*r5_boot_time) | status.status[PVA_CMD_STATUS6_INDEX]; + + return err; +} + +int pva_set_log_level(struct pva *pva, u32 log_level, bool mailbox_locked) +{ + uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + struct pva_cmd_status_regs status; + struct pva_cmd_s cmd; + int err = 0; + u32 nregs; + + nregs = pva_cmd_set_logging_level(&cmd, log_level, flags); + + if (mailbox_locked) + pva_mailbox_send_cmd_sync_locked(pva, &cmd, nregs, &status); + else + pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status); + + if (err < 0) + nvpva_warn(&pva->pdev->dev, "mbox set log level failed: %d\n", + err); + + return err; +} + +u32 nvpva_get_id_idx(struct pva *dev, struct platform_device *pdev) +{ + s32 sid; + u32 i; + + if (pdev == NULL) + return 0; + + sid = nvpva_get_device_hwid(pdev, 0); + if (sid < 0) + return UINT_MAX; + + for (i = 0; i < dev->sid_count; i++) + if (dev->sids[i] == sid) + return i; + + return UINT_MAX; +} + +int nvpva_get_device_hwid(struct platform_device *pdev, + unsigned int id) +{ + struct device *dev = &pdev->dev; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) + struct iommu_fwspec *fwspec = dev->iommu_fwspec; +#else + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); +#endif + + if (!fwspec) + return -EINVAL; + + if (id >= fwspec->num_ids) + return -EINVAL; + + return fwspec->ids[id] & 0xffff; +} + +static int nvpva_write_hwid(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int i; + u32 *id_idx; + u32 *reg_idx; + int *streamids = pva->sids; + u32 reg_array[16] = {0}; + + if (pva->version == PVA_HW_GEN1) { + id_idx = vm_regs_sid_idx_t19x; + reg_idx = vm_regs_reg_idx_t19x; + } else { + id_idx = vm_regs_sid_idx_t234; + reg_idx = vm_regs_reg_idx_t234; + } + + /* Go through the StreamIDs and assemble register values */ + for (i = 0; i < ARRAY_SIZE(pdata->vm_regs); i++) { + u64 addr = pdata->vm_regs[i].addr; + u32 shift = pdata->vm_regs[i].shift; + u32 val; + + /* Break if this was the last StreamID */ + if (!addr) + break; + + /* Update the StreamID value */ + val = ((streamids[id_idx[i]] & 0x000000FF) << shift); + reg_array[reg_idx[i]] |= val; + } + + /*write register values */ + for (i = 0; i < ARRAY_SIZE(pdata->vm_regs); i++) { + u64 addr = pdata->vm_regs[i].addr; + u32 val; + + /* Break if this was the last StreamID */ + if (!addr) + break; + + val = reg_array[reg_idx[i]]; + nvpva_dbg_fn(pva, "i= %d, reg_idx[i] = %d, val = %d\n", + i, reg_idx[i], val); + host1x_writel(pdev, addr, val); + } + + return 0; +} + +int pva_finalize_poweron(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int err = 0; + int i; + u64 timestamp; + u64 timestamp2; + + timestamp = nvpva_get_tsc_stamp(); + + nvpva_dbg_fn(pva, ""); + if (!pva->boot_from_file) { + nvpva_dbg_fn(pva, "boot from co"); + pva->co = pva_fw_co_get_info(pva); + + if (pva->co == NULL) { + nvpva_dbg_fn(pva, "failed to get carveout"); + err = -ENOMEM; + goto err_poweron; + } + + nvpva_dbg_fn(pva, "CO base = %llx, CO size = %llu\n", + (u64)pva->co->base, (u64)pva->co->size); + } + + /* Enable LIC_INTERRUPT line for HSP1, H1X and WDT */ + if (pva->version == PVA_HW_GEN1) { + host1x_writel(pva->pdev, sec_lic_intr_enable_r(pva->version), + sec_lic_intr_enable_hsp_f(SEC_LIC_INTR_HSP1) | + sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_19) | + sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT)); + } else { + host1x_writel(pva->pdev, sec_lic_intr_enable_r(pva->version), + sec_lic_intr_enable_hsp_f(SEC_LIC_INTR_HSP1) | + sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_23) | + sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT)); + + } + + nvpva_write_hwid(pdev); + if (!pva->boot_from_file) + err = pva_load_fw(pdev, pva); + else + err = pva_load_fw(pva->aux_pdev, pva); + + if (err < 0) { + nvpva_err(&pdev->dev, " pva fw failed to load\n"); + goto err_poweron; + } + + for (i = 0; i < pva->version_config->irq_count; i++) + enable_irq(pva->irq[i]); + + err = pva_init_fw(pdev); + if (err < 0) { + nvpva_err(&pdev->dev, " pva fw failed to init\n"); + goto err_poweron; + } + + timestamp2 = nvpva_get_tsc_stamp() - timestamp; + + pva_set_log_level(pva, pva->log_level, true); + pva->booted = true; + + timestamp = nvpva_get_tsc_stamp() - timestamp; + + nvpva_dbg_prof(pva, "Power on took %lld us, without log level%lld\n", + (32 * timestamp)/1000, (32 * timestamp2)/1000); + + return err; + +err_poweron: + for (i = 0; i < pva->version_config->irq_count; i++) + disable_irq(pva->irq[i]); + return err; +} + +void save_fw_debug_log(struct pva *pva) +{ + if (pva->fw_debug_log.saved_log != NULL && + pva->fw_debug_log.addr != NULL) { + mutex_lock(&pva->fw_debug_log.saved_log_lock); + memcpy(pva->fw_debug_log.saved_log, pva->fw_debug_log.addr, + pva->fw_debug_log.size); + mutex_unlock(&pva->fw_debug_log.saved_log_lock); + } +} + +int pva_prepare_poweroff(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int i; + + /* + * Disable IRQs. Interrupt handler won't be under execution after the + * call returns. + */ + for (i = 0; i < pva->version_config->irq_count; i++) + disable_irq(pva->irq[i]); + + /* disable error reporting to HSM*/ + pva_disable_ec_err_reporting(pva); + + /* Put PVA to reset to ensure that the firmware doesn't get accessed */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + reset_control_acquire(pdata->reset_control); +#endif + reset_control_assert(pdata->reset_control); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + reset_control_release(pdata->reset_control); +#endif + save_fw_debug_log(pva); + pva->booted = false; + pva_free_fw(pdev, pva); + + return 0; +} + +#ifdef CONFIG_TEGRA_SOC_HWPM +int pva_hwpm_ip_pm(void *ip_dev, bool disable) +{ + int err = 0; + struct platform_device *dev = (struct platform_device *)ip_dev; + + struct nvhost_device_data *pdata = platform_get_drvdata(dev); + struct pva *pva = pdata->private_data; + + nvpva_dbg_info(pva, "ip power management %s", + disable ? "disable" : "enable"); + + if (disable) { + err = nvhost_module_busy(ip_dev); + if (err < 0) + dev_err(&dev->dev, "nvhost_module_busy failed"); + } else { + nvhost_module_idle(ip_dev); + } + + return err; +} + +int pva_hwpm_ip_reg_op(void *ip_dev, enum tegra_soc_hwpm_ip_reg_op reg_op, + u32 inst_element_index, u64 reg_offset, u32 *reg_data) +{ + struct platform_device *dev = (struct platform_device *)ip_dev; + struct nvhost_device_data *pdata = platform_get_drvdata(dev); + struct pva *pva = pdata->private_data; + + if (reg_offset > UINT_MAX) + return -EINVAL; + + nvpva_dbg_info(pva, "reg_op %d reg_offset %llu", reg_op, reg_offset); + + if (reg_op == TEGRA_SOC_HWPM_IP_REG_OP_READ) + *reg_data = host1x_readl(dev, + (hwpm_get_offset() + (unsigned int)reg_offset)); + else if (reg_op == TEGRA_SOC_HWPM_IP_REG_OP_WRITE) + host1x_writel(dev, + (hwpm_get_offset() + (unsigned int)reg_offset), + *reg_data); + + return 0; +} +#endif + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) +static ssize_t clk_cap_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct nvhost_device_data *pdata = + container_of(kobj, struct nvhost_device_data, clk_cap_kobj); + /* i is indeed 'index' here after type conversion */ + int ret, i = attr - pdata->clk_cap_attrs; + struct clk_bulk_data *clks = &pdata->clks[i]; + struct clk *clk = clks->clk; + unsigned long freq_cap; + long freq_cap_signed; + + ret = kstrtoul(buf, 0, &freq_cap); + if (ret) + return -EINVAL; + /* Remove previous freq cap to get correct rounted rate for new cap */ + ret = clk_set_max_rate(clk, UINT_MAX); + if (ret < 0) + return ret; + + freq_cap_signed = clk_round_rate(clk, freq_cap); + if (freq_cap_signed < 0) + return -EINVAL; + freq_cap = (unsigned long)freq_cap_signed; + /* Apply new freq cap */ + ret = clk_set_max_rate(clk, freq_cap); + if (ret < 0) + return ret; + + /* Update the clock rate */ + clk_set_rate(clks->clk, freq_cap); + if (ret < 0) + return ret; + + return count; +} + +static ssize_t clk_cap_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct nvhost_device_data *pdata = + container_of(kobj, struct nvhost_device_data, clk_cap_kobj); + /* i is indeed 'index' here after type conversion */ + int i = attr - pdata->clk_cap_attrs; + struct clk_bulk_data *clks = &pdata->clks[i]; + struct clk *clk = clks->clk; + long max_rate; + + max_rate = clk_round_rate(clk, UINT_MAX); + if (max_rate < 0) + return max_rate; + + return snprintf(buf, PAGE_SIZE, "%ld\n", max_rate); +} + +static struct kobj_type nvpva_kobj_ktype = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +#endif + +static int pva_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct nvhost_device_data *pdata; + const struct of_device_id *match; + struct pva *pva; + int err = 0; + size_t i; + +#ifdef CONFIG_TEGRA_SOC_HWPM + u32 offset; +#endif + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) + struct kobj_attribute *attr = NULL; + int j = 0; + struct clk_bulk_data *clks; + struct clk *c; +#endif + + match = of_match_device(tegra_pva_of_match, dev); + if (!match) { + dev_err(dev, "no match for pva dev\n"); + err = -ENODATA; + goto err_get_pdata; + } + + pdata = (struct nvhost_device_data *)match->data; + + WARN_ON(!pdata); + if (!pdata) { + dev_info(dev, "no platform data\n"); + err = -ENODATA; + goto err_get_pdata; + } +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) + of_platform_default_populate(dev->of_node, NULL, dev); +#endif + + if ((pdata->version != PVA_HW_GEN1) + && !is_cntxt_initialized()) { + dev_warn(&pdev->dev, + "nvpva cntxt was not initialized, deferring probe."); + return -EPROBE_DEFER; + } + + if (pdata->version == PVA_HW_GEN1 && + tegra_get_sku_id() == 0x9E) { + dev_err(dev, "PVA IP is disabled in SKU\n"); + err = -ENODEV; + goto err_no_ip; + } + + if (pdata->version == PVA_HW_GEN1 && + tegra_get_sku_id() == 0x9F && pdata->class == NV_PVA1_CLASS_ID) { + dev_err(dev, "PVA1 IP is disabled in SKU\n"); + err = -ENODEV; + goto err_no_ip; + } + + pva = devm_kzalloc(dev, sizeof(*pva), GFP_KERNEL); + if (!pva) { + err = -ENOMEM; + goto err_alloc_pva; + } + + /* Initialize PVA private data */ + if (pdata->version == PVA_HW_GEN2) { + pva->version = PVA_HW_GEN2; + pdata->firmware_name = "nvpva_020.fw"; + pdata->firmware_not_in_subdir = true; + pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ; + pva->version_config = &pva_t23x_config; + } else { + pva->version = PVA_HW_GEN1; + pdata->firmware_name = "nvpva_010.fw"; + pdata->firmware_not_in_subdir = true; + pva->submit_cmd_mode = PVA_SUBMIT_MODE_MAILBOX; + pva->version_config = &pva_t19x_config; + } + + pva->pdev = pdev; + + /* Enable powergating and timeout only on silicon */ + if (!tegra_platform_is_silicon()) { + pdata->can_powergate = false; + pva->timeout_enabled = false; + } else { + pva->timeout_enabled = true; + } + + /* Initialize nvhost specific data */ + pdata->pdev = pdev; + mutex_init(&pdata->lock); + pdata->private_data = pva; + platform_set_drvdata(pdev, pdata); + mutex_init(&pva->mailbox_mutex); + mutex_init(&pva->ccq_mutex); + pva->submit_task_mode = PVA_SUBMIT_MODE_MMIO_CCQ; + pva->slcg_disable = 0; + pva->vmem_war_disable = 0; + pva->vpu_printf_enabled = true; + pva->vpu_debug_enabled = true; + pva->driver_log_mask = NVPVA_DEFAULT_DBG_MASK; + pva->profiling_level = 0; + pva->stats_enabled = false; + memset(&pva->vpu_util_info, 0, sizeof(pva->vpu_util_info)); + pva->syncpts.syncpts_mapped_r = false; + pva->syncpts.syncpts_mapped_rw = false; + nvpva_dbg_fn(pva, "match. compatible = %s", match->compatible); + if (is_tegra_hypervisor_mode()) + pva->map_co_needed = false; + else + pva->map_co_needed = true; + +#ifdef CONFIG_PVA_CO_DISABLED + pva->boot_from_file = true; +#else + if (pdata->version == PVA_HW_GEN1) + pva->boot_from_file = true; + else + pva->boot_from_file = false; +#endif + +#ifdef __linux__ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE + if (tegra_chip_get_revision() != TEGRA194_REVISION_A01) +#else + if (pdata->version != PVA_HW_GEN1) +#endif + pva->vmem_war_disable = 1; +#endif +#endif + + /* Map MMIO range to kernel space */ + err = nvhost_client_device_get_resources(pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "nvhost_client_device_get_resources failed\n"); + goto err_get_resources; + } + + /* Get clocks */ + err = nvhost_module_init(pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "nvhost_module_init failed\n"); + goto err_module_init; + } + + /* + * Add this to nvhost device list, initialize scaling, + * setup memory management for the device, create dev nodes + */ + err = nvhost_client_device_init(pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "nvhost_client_device_init failed\n"); + goto err_client_device_init; + } + + if (pdata->version != PVA_HW_GEN1) { + pva->aux_pdev = + nvpva_iommu_context_dev_allocate(aux_dev_name, + aux_dev_name_len, + false); + if (pva->aux_pdev == NULL) { + dev_err(&pva->pdev->dev, + "failed to allocate aux device"); + goto err_context_alloc; + } + } else { + pva->aux_pdev = pva->pdev; + } + + pva->pool = nvpva_queue_init(pdev, pva->aux_pdev, &pva_queue_ops, + MAX_PVA_QUEUE_COUNT); + if (IS_ERR(pva->pool)) { + err = PTR_ERR(pva->pool); + goto err_queue_init; + } + + err = pva_alloc_task_status_buffer(pva); + if (err) { + dev_err(&pva->pdev->dev, "failed to init task status buffer"); + goto err_status_init; + } + + err = nvpva_client_context_init(pva); + if (err) { + dev_err(&pva->pdev->dev, "failed to init client context"); + goto err_client_ctx_init; + } + + err = pva_register_isr(pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "failed to register isr"); + goto err_isr_init; + } + + for (i = 0; i < pva->version_config->irq_count; i++) + init_waitqueue_head(&pva->cmd_waitqueue[i]); + + pva_abort_init(pva); + + err = nvhost_syncpt_unit_interface_init(pdev); + if (err) + goto err_mss_init; + + err = nvpva_syncpt_unit_interface_init(pdev, pva->aux_pdev); + if (err) + goto err_syncpt_xface_init; + + mutex_init(&pva->pva_auth.allow_list_lock); + mutex_init(&pva->pva_auth_sys.allow_list_lock); + pva->pva_auth.pva_auth_enable = true; + pva->pva_auth_sys.pva_auth_enable = true; + +#ifdef CONFIG_DEBUG_FS + pva_debugfs_init(pdev); +#endif + + pva->sid_count = 0; + err = nvpva_iommu_context_dev_get_sids(&pva->sids[1], + &pva->sid_count, + NVPVA_USER_VM_COUNT); + if (err) + goto err_iommu_ctxt_init; + + pva->sids[0] = nvpva_get_device_hwid(pdev, 0); + if (pva->sids[0] < 0) { + err = pva->sids[0]; + goto err_iommu_ctxt_init; + } + + ++(pva->sid_count); + +#ifdef CONFIG_TEGRA_SOC_HWPM + offset = hwpm_get_offset(); + + if ((UINT_MAX - offset) < pdev->resource[0].start) { + err = -ENODEV; + goto err_mss_init; + } + + nvpva_dbg_info(pva, "hwpm ip %s register", pdev->name); + pva->hwpm_ip_ops.ip_dev = (void *)pdev; + pva->hwpm_ip_ops.ip_base_address = (pdev->resource[0].start + offset); + pva->hwpm_ip_ops.resource_enum = TEGRA_SOC_HWPM_RESOURCE_PVA; + pva->hwpm_ip_ops.hwpm_ip_pm = &pva_hwpm_ip_pm; + pva->hwpm_ip_ops.hwpm_ip_reg_op = &pva_hwpm_ip_reg_op; + tegra_soc_hwpm_ip_register(&pva->hwpm_ip_ops); +#endif + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) + if (pdata->num_clks > 0) { + err = kobject_init_and_add(&pdata->clk_cap_kobj, &nvpva_kobj_ktype, + &pdev->dev.kobj, "%s", "clk_cap"); + if (err) { + dev_err(dev, "Could not add dir 'clk_cap'\n"); + goto err_iommu_ctxt_init; + } + + pdata->clk_cap_attrs = devm_kcalloc(dev, pdata->num_clks, + sizeof(*attr), GFP_KERNEL); + if (!pdata->clk_cap_attrs) + goto err_cleanup_sysfs; + + for (j = 0; j < pdata->num_clks; ++j) { + clks = &pdata->clks[j]; + c = clks->clk; + if (!c) + continue; + + attr = &pdata->clk_cap_attrs[j]; + attr->attr.name = __clk_get_name(c); + /* octal permission is preferred nowadays */ + attr->attr.mode = 0644; + attr->show = clk_cap_show; + attr->store = clk_cap_store; + sysfs_attr_init(&attr->attr); + if (sysfs_create_file(&pdata->clk_cap_kobj, &attr->attr)) { + dev_err(dev, "Could not create sysfs attribute %s\n", + __clk_get_name(c)); + err = -EIO; + goto err_cleanup_sysfs; + } + } + } +#endif + + return 0; + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) +err_cleanup_sysfs: + /* kobj of nvpva_kobj_ktype cleans up sysfs entries automatically */ + kobject_put(&pdata->clk_cap_kobj); +#endif +err_iommu_ctxt_init: + nvpva_syncpt_unit_interface_deinit(pdev, pva->aux_pdev); +err_syncpt_xface_init: +err_mss_init: +err_isr_init: + nvpva_client_context_deinit(pva); +err_client_ctx_init: + pva_free_task_status_buffer(pva); +err_status_init: + nvpva_queue_deinit(pva->pool); +err_queue_init: + if (pdata->version != PVA_HW_GEN1) + nvpva_iommu_context_dev_release(pva->aux_pdev); +err_context_alloc: + nvhost_client_device_release(pdev); +err_client_device_init: + nvhost_module_deinit(pdev); +err_module_init: +err_get_resources: + devm_kfree(dev, pva); +err_alloc_pva: +err_no_ip: +err_get_pdata: + + return err; +} + +static int __exit pva_remove(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + int i; + +#if !IS_ENABLED(CONFIG_TEGRA_GRHOST) + struct kobj_attribute *attr = NULL; + + if (&pdata->clk_cap_kobj) { + for (i = 0; i < pdata->num_clks; i++) { + attr = &pdata->clk_cap_attrs[i]; + sysfs_remove_file(&pdata->clk_cap_kobj, &attr->attr); + } + + kobject_put(&pdata->clk_cap_kobj); + } +#endif + +#ifdef CONFIG_TEGRA_SOC_HWPM + tegra_soc_hwpm_ip_unregister(&pva->hwpm_ip_ops); +#endif + +#ifdef CONFIG_DEBUG_FS + pva_debugfs_deinit(pva); +#endif + if (pdata->version != PVA_HW_GEN1) + nvpva_iommu_context_dev_release(pva->aux_pdev); + + pva_auth_allow_list_destroy(&pva->pva_auth_sys); + pva_auth_allow_list_destroy(&pva->pva_auth); + pva_free_task_status_buffer(pva); + nvpva_syncpt_unit_interface_deinit(pdev, pva->aux_pdev); + nvpva_client_context_deinit(pva); + nvpva_queue_deinit(pva->pool); + nvhost_client_device_release(pdev); + for (i = 0; i < pva->version_config->irq_count; i++) + free_irq(pva->irq[i], pva); + + nvhost_module_deinit(pdev); + mutex_destroy(&pdata->lock); + mutex_destroy(&pva->mailbox_mutex); + mutex_destroy(&pva->ccq_mutex); + mutex_destroy(&pva->pva_auth.allow_list_lock); + mutex_destroy(&pva->pva_auth_sys.allow_list_lock); + + return 0; +} + +static struct platform_driver pva_driver = { + .probe = pva_probe, + .remove = __exit_p(pva_remove), + .driver = { + .owner = THIS_MODULE, + .name = "pva", +#ifdef CONFIG_OF + .of_match_table = tegra_pva_of_match, +#endif +#ifdef CONFIG_PM + .pm = &nvhost_module_pm_ops, +#endif + }, +}; +#if IS_ENABLED(CONFIG_TEGRA_GRHOST) +static int __init nvpva_init(void) +{ + int err; + + err = platform_driver_register(&nvpva_iommu_context_dev_driver); + if (err < 0) + return err; + + err = platform_driver_register(&pva_driver); + if (err < 0) + platform_driver_unregister(&nvpva_iommu_context_dev_driver); + + return err; +} +module_init(nvpva_init); +static void __exit nvpva_exit(void) +{ + platform_driver_unregister(&pva_driver); + platform_driver_unregister(&nvpva_iommu_context_dev_driver); +} +module_exit(nvpva_exit); +#else +static struct host1x_driver host1x_nvpva_driver = { + .driver = { + .name = "host1x-nvpva", + }, + .subdevs = tegra_pva_of_match, +}; +static int __init nvpva_init(void) +{ + int err; + + err = host1x_driver_register(&host1x_nvpva_driver); + if (err < 0) + goto out; + + err = platform_driver_register(&nvpva_iommu_context_dev_driver); + if (err < 0) + goto ctx_failed; + + err = platform_driver_register(&pva_driver); + if (err) + goto pva_failed; + + return err; + +pva_failed: + platform_driver_unregister(&nvpva_iommu_context_dev_driver); +ctx_failed: + host1x_driver_unregister(&host1x_nvpva_driver); +out: + return err; +} + +module_init(nvpva_init); +static void __exit nvpva_exit(void) +{ + platform_driver_unregister(&pva_driver); + platform_driver_unregister(&nvpva_iommu_context_dev_driver); + host1x_driver_unregister(&host1x_nvpva_driver); +} + +module_exit(nvpva_exit); +#endif + +#if KERNEL_VERSION(5, 16, 0) <= LINUX_VERSION_CODE +MODULE_IMPORT_NS(DMA_BUF); +#endif +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/tegra/host/pva/pva.h b/drivers/video/tegra/host/pva/pva.h new file mode 100644 index 00000000..7e55c8c7 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva.h @@ -0,0 +1,615 @@ +/* + * drivers/video/tegra/host/pva/pva.h + * + * Tegra PVA header + * + * Copyright (c) 2016-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVHOST_PVA_H__ +#define __NVHOST_PVA_H__ + +#include +#include +#include +#include + +#include "nvpva_queue.h" +#include "pva_regs.h" +#include "pva_nvhost.h" +#include "pva-ucode-header.h" +#include "pva_vpu_app_auth.h" +#include "pva_fw_carveout.h" + +#ifdef CONFIG_TEGRA_SOC_HWPM +#include +#endif + +/** + * PVA Host1x class IDs + */ +enum { + NV_PVA0_CLASS_ID = 0xF1, + NV_PVA1_CLASS_ID = 0xF2, +}; + +struct nvpva_client_context; + +enum pva_submit_mode { + PVA_SUBMIT_MODE_MAILBOX = 0, + PVA_SUBMIT_MODE_MMIO_CCQ = 1, +}; + +struct pva_version_info { + u32 pva_r5_version; + u32 pva_compat_version; + u32 pva_revision; + u32 pva_built_on; +}; + +/** + * Queue count of 8 is maintained per PVA. + */ +#define MAX_PVA_QUEUE_COUNT 8 +#define MAX_PVA_CLIENTS 8 +#define MAX_PVA_TASK_COUNT_PER_QUEUE 256U +#define MAX_PVA_SEG_COUNT_PER_QUEUE 4U +#define MAX_PVA_TASK_COUNT_PER_QUEUE_SEG \ + (MAX_PVA_TASK_COUNT_PER_QUEUE/MAX_PVA_SEG_COUNT_PER_QUEUE) + +#define NVPVA_USER_VM_COUNT MAX_PVA_CLIENTS + +/** + * Maximum task count that a PVA engine can support + */ +#define MAX_PVA_TASK_COUNT \ + ((MAX_PVA_QUEUE_COUNT) * (MAX_PVA_TASK_COUNT_PER_QUEUE)) + +/** + * Minium PVA frequency (10MHz) + */ +#define MIN_PVA_FREQUENCY 10000000 + +/** + * Maximum number of IRQS to be serviced by the driver. Gen1 has a single IRQ, + * Gen2 has 9. + */ +#define MAX_PVA_IRQS 9 +#define MAX_PVA_INTERFACE 9 +#define PVA_MAILBOX_INDEX 0 +#define PVA_CCQ0_INDEX 1 +#define PVA_CCQ1_INDEX 2 +#define PVA_CCQ2_INDEX 3 +#define PVA_CCQ3_INDEX 4 +#define PVA_CCQ4_INDEX 5 +#define PVA_CCQ5_INDEX 6 +#define PVA_CCQ6_INDEX 7 +#define PVA_CCQ7_INDEX 8 + + +/** + * Number of VPUs for each PVA + */ +#define NUM_VPU_BLOCKS 2 + +/** + * nvpva_dbg_* macros provide wrappers around kernel print functions + * that use a debug mask configurable at runtime to provide control over + * the level of detail that gets printed. + */ +#ifdef CONFIG_DEBUG_FS + /* debug info, default is compiled-in but effectively disabled (0 mask) */ + #define NVPVA_DEBUG + /*e.g: echo 1 > /d/pva0/driver_dbg_mask */ + #define NVPVA_DEFAULT_DBG_MASK 0 +#else + /* manually enable and turn on the mask */ + #define NVPVA_DEFAULT_DBG_MASK (pva_dbg_info) +#endif + +enum nvpva_dbg_categories { + pva_dbg_info = BIT(0), /* slightly verbose info */ + pva_dbg_fn = BIT(2), /* fn name tracing */ + pva_dbg_reg = BIT(3), /* register accesses, very verbose */ + pva_dbg_prof = BIT(7), /* profiling info */ + pva_dbg_mem = BIT(31), /* memory accesses, very verbose */ +}; + +#if defined(NVPVA_DEBUG) +#define nvpva_dbg(pva, dbg_mask, format, arg...) \ + do { \ + if (unlikely((dbg_mask)&pva->driver_log_mask)) { \ + pr_info("nvpva %s: " format "\n", __func__, ##arg); \ + } \ + } while (0) + +#else /* NVPVA_DEBUG */ +#define nvpva_dbg(pva, dbg_mask, format, arg...) \ + do { \ + if (0) { \ + (void) pva; /* unused variable */ \ + pr_info("nvhost %s: " format "\n", __func__, ##arg); \ + } \ + } while (0) + +#endif + +/* convenience,shorter err/fn/dbg_info */ +#define nvpva_err(d, fmt, arg...) \ + dev_err(d, "%s: " fmt "\n", __func__, ##arg) + +#define nvpva_err_ratelimited(d, fmt, arg...) \ + dev_err_ratelimited(d, "%s: " fmt "\n", __func__, ##arg) + +#define nvpva_warn(d, fmt, arg...) \ + dev_warn(d, "%s: " fmt "\n", __func__, ##arg) + +#define nvpva_dbg_fn(pva, fmt, arg...) \ + nvpva_dbg(pva, pva_dbg_fn, fmt, ##arg) + +#define nvpva_dbg_info(pva, fmt, arg...) \ + nvpva_dbg(pva, pva_dbg_info, fmt, ##arg) + +#define nvpva_dbg_prof(pva, fmt, arg...) \ + nvpva_dbg(pva, pva_dbg_prof, fmt, ##arg) + +/** + * @brief struct to hold the segment details + * + * addr: virtual addr of the segment from PRIV2 address base + * size: segment size + * offset: offset of the addr from priv2 base + * + */ +struct pva_seg_info { + void *addr; + u32 size; + u32 offset; +}; + +/** + * @breif struct to hold the segment details for debug purpose + * + * pva Pointer to pva struct + * seg_info pva_seg_info struct + * + */ +struct pva_crashdump_debugfs_entry { + struct pva *pva; + struct pva_seg_info seg_info; +}; + +/** + * @brief struct to handle dma alloc memory info + * + * size size allocated + * phys_addr physical address + * va virtual address + * + */ +struct pva_dma_alloc_info { + size_t size; + dma_addr_t pa; + void *va; +}; + +/** + * @brief struct to handle the PVA firmware information + * + * hdr pointer to the pva_code_hdr struct + * priv1_buffer pva_dma_alloc_info for priv1_buffer + * priv2_buffer pva_dma_alloc_info for priv2_buffer + * priv2_reg_offset priv2 register offset from uCode + * trace_buffer_size buffer size for trace log + * + */ +struct pva_fw { + struct pva_ucode_hdr_s *hdr; + + struct pva_dma_alloc_info priv1_buffer; + struct pva_dma_alloc_info priv2_buffer; + u32 priv2_reg_offset; + + u32 trace_buffer_size; +}; + +/* + * @brief store trace log segment's address and size + * + * addr Pointer to the pva trace log segment + * size Size of pva trace log segment + * offset Offset in bytes for trace log segment + * + */ +struct pva_trace_log { + void *addr; + u32 size; + u32 offset; +}; + +struct pva_fw_debug_log { + void *addr; + u32 size; + struct mutex saved_log_lock; + u8 *saved_log; +}; +void save_fw_debug_log(struct pva *pva); + +/* + * @brief stores address and other attributes of the vpu function table + * + * addr The pointer to start of the VPU function table + * size Table size of the function table + * handle The IOVA address of the function table + * entries The total number of entries in the function table + * + */ +struct pva_func_table { + struct vpu_func *addr; + uint32_t size; + dma_addr_t handle; + uint32_t entries; +}; + +struct pva_status_interface_registers { + uint32_t registers[5]; +}; + +#define PVA_HW_GEN1 1 +#define PVA_HW_GEN2 2 + +/** + * @brief HW version specific configuration and functions + * read_mailbox Function to read from mailbox based on PVA revision + * write_mailbox Function to write to mailbox based on PVA revision + * ccq_send_task Function to submit task to ccq based on PVA revision + * submit_cmd_sync_locked + * Function to submit command to PVA based on PVA revision + * Should be called only if appropriate locks have been + * acquired + * + * submit_cmd_sync Function to submit command to PVA based on PVA revision + * irq_count Number of IRQs associated with this PVA revision + * + */ + +struct pva_version_config { + u32 (*read_mailbox)(struct platform_device *pdev, u32 mbox_id); + void (*write_mailbox)(struct platform_device *pdev, u32 mbox_id, + u32 value); + void (*read_status_interface)(struct pva *pva, uint32_t interface_id, + u32 isr_status, + struct pva_cmd_status_regs *status_out); + int (*ccq_send_task)(struct pva *pva, u32 queue_id, + dma_addr_t task_addr, u8 batchsize, u32 flags); + int (*submit_cmd_sync_locked)(struct pva *pva, struct pva_cmd_s *cmd, + u32 nregs, u32 queue_id, + struct pva_cmd_status_regs *status_regs); + + int (*submit_cmd_sync)(struct pva *pva, struct pva_cmd_s *cmd, + u32 nregs, u32 queue_id, + struct pva_cmd_status_regs *status_regs); + int irq_count; +}; + +/** + * @brief Describe a VPU hardware debug block + * vbase Address mapped to virtual space + */ +struct pva_vpu_dbg_block { + void __iomem *vbase; +}; + +/** + * @brief VPU utilization information + * + * start_stamp time stamp when measurment started + * end_stamp time stamp when measurment is to end + * vpu_stats avaraged vpu utilization stats + * stats_fw_buffer_iova + * stats_fw_buffer_va + */ +struct pva_vpu_util_info { + u64 start_stamp; + u64 end_stamp; + u64 vpu_stats[2]; + dma_addr_t stats_fw_buffer_iova; + struct pva_vpu_stats_s *stats_fw_buffer_va; +}; + +struct scatterlist; +struct nvpva_syncpt_desc { + dma_addr_t addr; + size_t size; + u32 id; + u32 assigned; +}; + +struct nvpva_syncpts_desc { + struct platform_device *host_pdev; + struct nvpva_syncpt_desc syncpts_rw[MAX_PVA_QUEUE_COUNT]; + dma_addr_t syncpt_start_iova_r; + dma_addr_t syncpt_range_r; + dma_addr_t syncpt_start_iova_rw; + dma_addr_t syncpt_range_rw; + uint32_t page_size; + bool syncpts_mapped_r; + bool syncpts_mapped_rw; +}; + +/** + * @brief Driver private data, shared with all applications + * + * version pva version; 1 or 2 + * pdev Pointer to the PVA device + * pool Pointer to Queue table available for the PVA + * fw_info firmware information struct + * irq IRQ number obtained on registering the module + * cmd_waitqueue Command Waitqueue for response waiters + * for syncronous commands + * cmd_status_regs Response to commands is stored into this + * structure temporarily + * cmd_status Status of the command interface + * mailbox_mutex Mutex to avoid concurrent mailbox accesses + * debugfs_entry_r5 debugfs segment information for r5 + * debugfs_entry_vpu0 debugfs segment information for vpu0 + * debugfs_entry_vpu1 debugfs segment information for vpu1 + * priv1_dma struct pva_dma_alloc_info for priv1_dma + * priv2_dma struct pva_dma_alloc_info for priv2_dma + * pva_trace struct for pva_trace_log + * submit_mode Select the task submit mode + * dbg_vpu_app_id Set the vpu_app id to debug + * r5_dbg_wait Set the r5 debugger to wait + * timeout_enabled Set pva timeout enabled based on debug + * slcg_disable Second level Clock Gating control variable + * vpu_printf_enabled + * vpu_debug_enabled + * log_level controls the level of detail printed by FW + * debug statements + * profiling_level + * driver_log_mask controls the level of detail printed by kernel + * debug statements + */ + +struct pva { + int version; + struct pva_version_config *version_config; + struct platform_device *pdev; + struct platform_device *aux_pdev; + struct nvpva_queue_pool *pool; + struct pva_fw fw_info; + struct nvpva_carveout_info *co; + struct nvpva_carveout_info fw_carveout; + struct pva_vpu_auth_s pva_auth; + struct pva_vpu_auth_s pva_auth_sys; + struct nvpva_syncpts_desc syncpts; + + int irq[MAX_PVA_IRQS]; + s32 sids[16]; + u32 sid_count; + u32 ec_state[8]; + + wait_queue_head_t cmd_waitqueue[MAX_PVA_INTERFACE]; + struct pva_cmd_status_regs cmd_status_regs[MAX_PVA_INTERFACE]; + enum pva_cmd_status cmd_status[MAX_PVA_INTERFACE]; + struct mutex mailbox_mutex; + + struct mutex ccq_mutex; + + struct pva_crashdump_debugfs_entry debugfs_entry_r5; + struct pva_crashdump_debugfs_entry debugfs_entry_vpu0; + struct pva_crashdump_debugfs_entry debugfs_entry_vpu1; + + struct pva_dma_alloc_info priv1_dma; + struct pva_dma_alloc_info priv2_dma; + /* Circular array to share with PVA R5 FW for task status info */ + struct pva_dma_alloc_info priv_circular_array; + /* Current position to read task status buffer from the circular + * array + */ + u32 circular_array_rd_pos; + /* Current position to write task status buffer from the circular + * array + */ + u32 circular_array_wr_pos; + struct work_struct task_update_work; + atomic_t n_pending_tasks; + struct workqueue_struct *task_status_workqueue; + struct pva_trace_log pva_trace; + struct pva_fw_debug_log fw_debug_log; + u32 submit_task_mode; + u32 submit_cmd_mode; + + u32 r5_dbg_wait; + bool timeout_enabled; + u32 slcg_disable; + u32 vmem_war_disable; + bool vpu_printf_enabled; + bool vpu_debug_enabled; + bool stats_enabled; + bool map_co_needed; + bool boot_from_file; + struct pva_vpu_util_info vpu_util_info; + u32 profiling_level; + + struct work_struct pva_abort_handler_work; + bool booted; + u32 log_level; + u32 driver_log_mask; + struct nvpva_client_context *clients; + struct mutex clients_lock; + + struct pva_vpu_dbg_block vpu_dbg_blocks[NUM_VPU_BLOCKS]; + +#ifdef CONFIG_TEGRA_SOC_HWPM + struct tegra_soc_hwpm_ip_ops hwpm_ip_ops; +#endif +}; + +/** + * @brief Copy traces to kernel trace buffer. + * + * When mailbox interrupt for copying ucode trace buffer to + * kernel-ucode shared trace buffer is arrived it copies the kernel-ucode + * shared trace buffer to kernel ftrace buffer + * + * @pva Pointer to pva structure + * + */ +void pva_trace_copy_to_ftrace(struct pva *pva); + +/** + * @brief Register PVA ISR + * + * This function called from driver to register the + * PVA ISR with IRQ. + * + * @param pdev Pointer to PVA device + * @return 0 on Success or negative error code + * + */ +int pva_register_isr(struct platform_device *dev); + +/** + * @brief deInitiallze pva debug utils + * + * @param pva Pointer to PVA device + * @return none + * + */ +void pva_debugfs_deinit(struct pva *pva); + +/** + * @brief Initiallze pva debug utils + * + * @param pdev Pointer to PVA device + * @return none + * + */ +void pva_debugfs_init(struct platform_device *pdev); + +/** + * @brief Initiallze PVA abort handler + * + * @param pva Pointer to PVA structure + * @return none + * + */ +void pva_abort_init(struct pva *pva); + +/** + * @brief Recover PVA back into working state + * + * @param pva Pointer to PVA structure + * @return none + * + */ +void pva_abort(struct pva *pva); + +/** + * @brief Run the ucode selftests + * + * This function is invoked if the ucode is in selftest mode. + * The function will do the static memory allocation for the + * ucode self test to run. + * + * @param pdev Pointer to PVA device + * @return 0 on Success or negative error code + * + */ +int pva_run_ucode_selftest(struct platform_device *pdev); + +/** + * @brief Allocate and populate the function table to the memory + * + * This function is called when the vpu table needs to be populated. + * The function also allocates the memory required for the vpu table. + * + * @param pva Pointer to PVA device + * @param pva_func_table Pointer to the function table which contains + * the address, table size and number of entries + * @return 0 on Success or negative error code + * + */ +int pva_alloc_and_populate_function_table(struct pva *pva, + struct pva_func_table *fn_table); + +/** + * @brief Deallocate the memory of the function table + * + * This function is called once the allocated memory for vpu table needs to + * be freed. + * + * @param pva Pointer to PVA device + * @param pva_func_table Pointer to the function table which contains + * the address, table size and number of entries + * + */ +void pva_dealloc_vpu_function_table(struct pva *pva, + struct pva_func_table *fn_table); + +/** + * @brief Get PVA version information + * + * @param pva Pointer to a PVA device node + * @param info Pointer to an information structure to be filled + * + * @return 0 on success, otherwise a negative error code + */ +int pva_get_firmware_version(struct pva *pva, struct pva_version_info *info); + +/** + * @brief Set trace log level of PVA + * + * @param pva Pointer to a PVA device node + * @param log_level 32-bit mask for logs that we want to receive + * + * @return 0 on success, otherwise a negative error code + */ + +/** + * @brief Get PVA Boot KPI + * + * @param pva Pointer to a PVA device node + * @param r5_boot_time Pointer to a variable, where r5 boot time will be filled + * + * @return 0 on success, otherwise a negative error code + */ +int pva_boot_kpi(struct pva *pva, u64 *r5_boot_time); + +int pva_set_log_level(struct pva *pva, u32 log_level, bool mailbox_locked); + +int nvpva_request_firmware(struct platform_device *pdev, const char *fw_name, + const struct firmware **ucode_fw); + +int nvpva_get_device_hwid(struct platform_device *pdev, + unsigned int id); + +u32 nvpva_get_id_idx(struct pva *dev, struct platform_device *pdev); + +void pva_push_aisr_status(struct pva *pva, uint32_t aisr_status); + +static inline u64 nvpva_get_tsc_stamp(void) +{ + u64 timestamp; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + timestamp = arch_timer_read_counter(); +#else + timestamp = arch_counter_get_cntvct(); +#endif + return timestamp; +} +#endif diff --git a/drivers/video/tegra/host/pva/pva_abort.c b/drivers/video/tegra/host/pva/pva_abort.c new file mode 100644 index 00000000..a15e73b5 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_abort.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "pva.h" +#include "pva_sec_ec.h" + +static void pva_abort_handler(struct work_struct *work) +{ + struct pva *pva = container_of(work, struct pva, + pva_abort_handler_work); + struct platform_device *pdev = pva->pdev; + int i; + + /* Dump nvhost state to show the pending jobs */ + nvhost_debug_dump_device(pdev); + + + /*wake up sync cmd waiters*/ + for (i = 0; i < pva->version_config->irq_count; i++) { + if (pva->cmd_status[i] == PVA_CMD_STATUS_WFI) { + pva->cmd_status[i] = PVA_CMD_STATUS_ABORTED; + wake_up(&pva->cmd_waitqueue[i]); + schedule(); + } + } + + /* lock mailbox mutex to avoid synchronous communication. */ + do { + schedule(); + } while (mutex_trylock(&pva->mailbox_mutex) == false); + + /* There is no ongoing activity anymore. Update mailbox status */ + for (i = 0; i < pva->version_config->irq_count; i++) { + pva->cmd_status[i] = PVA_CMD_STATUS_INVALID; + } + + /* Lock CCQ mutex to avoid asynchornous communication */ + mutex_lock(&pva->ccq_mutex); + + /* + * If boot was still on-going, skip over recovery and let boot-up + * routine handle the failure + */ + if (!pva->booted) { + nvpva_warn(&pdev->dev, "Recovery skipped: PVA is not booted"); + goto skip_recovery; + } + + /* disable error reporting to hsm*/ + pva_disable_ec_err_reporting(pva); + + /* Reset the PVA and reload firmware */ + nvhost_module_reset(pdev, true); + + /* enable error reporting to hsm*/ + pva_enable_ec_err_reporting(pva); + + /* Remove pending tasks from the queue */ + nvpva_queue_abort_all(pva->pool); + + nvpva_warn(&pdev->dev, "Recovery finished"); + +skip_recovery: + mutex_unlock(&pva->ccq_mutex); + mutex_unlock(&pva->mailbox_mutex); +} + +void pva_abort(struct pva *pva) +{ + struct platform_device *pdev = pva->pdev; + size_t i; + /* For selftest mode to finish the test */ + if (host1x_readl(pdev, hsp_ss0_state_r()) + & PVA_TEST_MODE) { + for (i = 0; i < pva->version_config->irq_count; i++) { + pva->cmd_status[i] = PVA_CMD_STATUS_DONE; + wake_up(&pva->cmd_waitqueue[i]); + } + return; + } + + WARN(true, "Attempting to recover the engine"); + schedule_work(&pva->pva_abort_handler_work); +} + +void pva_abort_init(struct pva *pva) +{ + INIT_WORK(&pva->pva_abort_handler_work, pva_abort_handler); +} diff --git a/drivers/video/tegra/host/pva/pva_bit_helpers.h b/drivers/video/tegra/host/pva/pva_bit_helpers.h new file mode 100644 index 00000000..fc93b211 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_bit_helpers.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_BIT_HELPERS_H_ +#define PVA_BIT_HELPERS_H_ + +#include +#include + +#define RMOS_BYTES_PER_WORD (sizeof(unsigned int)) +#define RMOS_BITS_PER_WORD (RMOS_BYTES_PER_WORD * 8U) + +static inline uint32_t rmos_get_first_set_bit(uint32_t val) +{ + uint32_t index = 0U; + + for (index = 0U; index < 32U; index++) { + if (1U == (val & 1U)) + break; + + val = val >> 1U; + } + + return index; +} + +static inline uint32_t rmos_get_first_zero_bit(uint32_t val) +{ + if ((~(uint32_t)0U) == val) + return RMOS_BITS_PER_WORD; + + return rmos_get_first_set_bit(~val); +} + +static inline uint32_t rmos_find_first_zero_bit(uint32_t *addr, uint32_t size) +{ + const uint32_t *p = addr; + uint32_t result = 0U; + uint32_t tmp; + uint32_t first_zero_bit; + + while (size >= RMOS_BITS_PER_WORD) { + tmp = *(p++); + if (0U != ~tmp) { + first_zero_bit = rmos_get_first_zero_bit(tmp); + + /* + * Result will not wrap around in any case as the + * Maximum possible return value is the 'size' itself. + */ + return result + first_zero_bit; + } + result += RMOS_BITS_PER_WORD; + size -= RMOS_BITS_PER_WORD; + } + + if (size == 0U) + return result; + + tmp = (*p) | (~0U << size); + tmp = rmos_get_first_zero_bit(tmp); + if (tmp == 32U) { + if ((U32_MAX - result) < size) + return size; + else + return result + size; + } + + return result + tmp; +} + +static inline void rmos_set_bit32(unsigned int nr, unsigned int *addr) +{ + *addr |= (1U << nr); +} + +static inline void rmos_clear_bit32(unsigned int nr, unsigned int *addr) +{ + *addr &= ~(1U << nr); +} + +static inline bool rmos_test_bit32(unsigned int nr, const unsigned int *addr) +{ + return (*addr & (1 << nr)) != 0U; +} + +#endif diff --git a/drivers/video/tegra/host/pva/pva_ccq_t19x.c b/drivers/video/tegra/host/pva/pva_ccq_t19x.c new file mode 100644 index 00000000..998fb268 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_ccq_t19x.c @@ -0,0 +1,87 @@ +/* + * PVA Command Queue Interface handling + * + * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pva-interface.h" +#include +#include +#include + +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif + +#include "pva.h" +#include "pva_ccq_t19x.h" + +#include "pva_regs.h" +#include "pva-interface.h" + +#define MAX_CCQ_ELEMENTS 6 + +static int pva_ccq_wait(struct pva *pva, int timeout) +{ + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + /* + * Wait until there is free room in the CCQ. Otherwise the writes + * could stall the CPU. Ignore the timeout in simulation. + */ + + while (time_before(jiffies, end_jiffies) || + (pva->timeout_enabled == false)) { + u32 val = host1x_readl(pva->pdev, + cfg_ccq_status_r(pva->version, 0, + PVA_CCQ_STATUS2_INDEX)); + if (val <= MAX_CCQ_ELEMENTS) + return 0; + + usleep_range(5, 10); + } + + return -ETIMEDOUT; +} + +int pva_ccq_send_task_t19x(struct pva *pva, u32 queue_id, dma_addr_t task_addr, + u8 batchsize, u32 flags) +{ + int err = 0; + struct pva_cmd_s cmd = {0}; + + (void)pva_cmd_submit_batch(&cmd, queue_id, task_addr, batchsize, flags); + + mutex_lock(&pva->ccq_mutex); + err = pva_ccq_wait(pva, 100); + if (err < 0) + goto err_wait_ccq; + + /* Make the writes to CCQ */ + host1x_writel(pva->pdev, cfg_ccq_r(pva->version, 0), cmd.cmd_field[1]); + host1x_writel(pva->pdev, cfg_ccq_r(pva->version, 0), cmd.cmd_field[0]); + + mutex_unlock(&pva->ccq_mutex); + + return err; + +err_wait_ccq: + mutex_unlock(&pva->ccq_mutex); + pva_abort(pva); + + return err; +} diff --git a/drivers/video/tegra/host/pva/pva_ccq_t19x.h b/drivers/video/tegra/host/pva/pva_ccq_t19x.h new file mode 100644 index 00000000..61c5a78b --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_ccq_t19x.h @@ -0,0 +1,29 @@ +/* + * PVA Command Queue Interface handling + * + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_CCQ_T19X_H +#define PVA_CCQ_T19X_H + +#include + +#include "pva.h" + +int pva_ccq_send_task_t19x(struct pva *pva, u32 queue_id, dma_addr_t task_addr, + u8 batchsize, u32 flags); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_ccq_t23x.c b/drivers/video/tegra/host/pva/pva_ccq_t23x.c new file mode 100644 index 00000000..9122f9c0 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_ccq_t23x.c @@ -0,0 +1,234 @@ +/* + * PVA Command Queue Interface handling + * + * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#else +#include +#endif + +#include "pva.h" +#include "pva_mailbox.h" +#include "pva_ccq_t23x.h" + +#include "pva_regs.h" + +#define MAX_CCQ_ELEMENTS 6 + +static int pva_ccq_wait(struct pva *pva, int timeout, unsigned int queue_id) +{ + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + /* + * Wait until there is free room in the CCQ. Otherwise the writes + * could stall the CPU. Ignore the timeout in simulation. + */ + + while (time_before(jiffies, end_jiffies) || + (pva->timeout_enabled == false)) { + u32 val = PVA_EXTRACT( + host1x_readl(pva->pdev, + cfg_ccq_status_r(pva->version, queue_id, + PVA_CCQ_STATUS2_INDEX)), + 4, 0, u32); + if (val <= MAX_CCQ_ELEMENTS) + return 0; + + usleep_range(5, 10); + } + + return -ETIMEDOUT; +} + +static int pva_ccq_send_cmd(struct pva *pva, u32 queue_id, + struct pva_cmd_s *cmd) +{ + int err = 0; + err = pva_ccq_wait(pva, 100, queue_id); + if (err < 0) + goto err_wait_ccq; + + /* Make the writes to CCQ */ + host1x_writel(pva->pdev, cfg_ccq_r(pva->version, queue_id), + cmd->cmd_field[1]); + host1x_writel(pva->pdev, cfg_ccq_r(pva->version, queue_id), + cmd->cmd_field[0]); + return err; + +err_wait_ccq: + pva_abort(pva); + return err; +} + +int pva_ccq_send_task_t23x(struct pva *pva, u32 queue_id, dma_addr_t task_addr, + u8 batchsize, u32 flags) +{ + int err = 0; + struct pva_cmd_s cmd = { 0 }; + + (void)pva_cmd_submit_batch(&cmd, queue_id, task_addr, batchsize, flags); + + err = pva_ccq_send_cmd(pva, queue_id, &cmd); + return err; +} + +void pva_ccq_isr_handler(struct pva *pva, unsigned int queue_id) +{ + struct platform_device *pdev = pva->pdev; + u32 int_status; + unsigned int cmd_status_index = queue_id + PVA_CCQ0_INDEX; + int_status = + host1x_readl(pdev, cfg_ccq_status_r(pva->version, queue_id, + PVA_CCQ_STATUS7_INDEX)); + if (pva->cmd_status[cmd_status_index] != PVA_CMD_STATUS_WFI) { + nvpva_warn(&pdev->dev, "No ISR for CCQ %u", queue_id); + return; + } + /* Save the current command and subcommand for later processing */ + + pva->version_config->read_status_interface( + pva, cmd_status_index, int_status, + &pva->cmd_status_regs[cmd_status_index]); + /* Clear the mailbox interrupt status */ + + /* Wake up the waiters */ + pva->cmd_status[cmd_status_index] = PVA_CMD_STATUS_DONE; + wake_up(&pva->cmd_waitqueue[cmd_status_index]); +} + +int pva_ccq_wait_event(struct pva *pva, unsigned int queue_id, int wait_time) +{ + int timeout = 1; + int err; + u32 interface = queue_id + 1; + /* Wait for the event being triggered in ISR */ + if (pva->timeout_enabled == true) { + timeout = wait_event_timeout( + pva->cmd_waitqueue[interface], + pva->cmd_status[interface] == PVA_CMD_STATUS_DONE || + pva->cmd_status[interface] == + PVA_CMD_STATUS_ABORTED, + msecs_to_jiffies(wait_time)); + } else { + wait_event(pva->cmd_waitqueue[interface], + pva->cmd_status[interface] == PVA_CMD_STATUS_DONE || + pva->cmd_status[interface] == + PVA_CMD_STATUS_ABORTED); + } + if (timeout <= 0) { + err = -ETIMEDOUT; + pva_abort(pva); + } else if (pva->cmd_status[interface] == PVA_CMD_STATUS_ABORTED) + err = -EIO; + else + err = 0; + return err; +} + +int pva_ccq_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, struct pva_cmd_status_regs *status_regs) +{ + int err = 0; + u32 interface = queue_id + 1U; + + if (status_regs == NULL) { + err = -EINVAL; + goto err_invalid_parameter; + } + + if (queue_id >= MAX_PVA_QUEUE_COUNT) { + err = -EINVAL; + goto err_invalid_parameter; + } + + /* Ensure that mailbox state is sane */ + if (WARN_ON(pva->cmd_status[interface] != PVA_CMD_STATUS_INVALID)) { + err = -EIO; + goto err_check_status; + } + + /* Mark that we are waiting for an interrupt */ + pva->cmd_status[interface] = PVA_CMD_STATUS_WFI; + memset(&pva->cmd_status_regs[interface], 0, + sizeof(struct pva_cmd_status_regs)); + + /* Submit command to PVA */ + err = pva_ccq_send_cmd(pva, queue_id, cmd); + if (err < 0) + goto err_send_command; + + err = pva_ccq_wait_event(pva, queue_id, 100); + if (err < 0) + goto err_wait_response; + /* Return interrupt status back to caller */ + memcpy(status_regs, &pva->cmd_status_regs[interface], + sizeof(struct pva_cmd_status_regs)); + + pva->cmd_status[interface] = PVA_CMD_STATUS_INVALID; + + return err; + +err_wait_response: +err_send_command: + pva->cmd_status[interface] = PVA_CMD_STATUS_INVALID; +err_check_status: +err_invalid_parameter: + return err; +} + +int pva_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, struct pva_cmd_status_regs *status_regs) +{ + int err = 0; + + switch (pva->submit_cmd_mode) { + case PVA_SUBMIT_MODE_MAILBOX: + err = pva_mailbox_send_cmd_sync(pva, cmd, nregs, status_regs); + break; + case PVA_SUBMIT_MODE_MMIO_CCQ: + err = pva_ccq_send_cmd_sync(pva, cmd, nregs, queue_id, + status_regs); + break; + } + + return err; +} + +int pva_send_cmd_sync_locked(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, + struct pva_cmd_status_regs *status_regs) +{ + int err = 0; + + switch (pva->submit_cmd_mode) { + case PVA_SUBMIT_MODE_MAILBOX: + err = pva_mailbox_send_cmd_sync_locked(pva, cmd, nregs, + status_regs); + break; + case PVA_SUBMIT_MODE_MMIO_CCQ: + err = pva_ccq_send_cmd_sync(pva, cmd, nregs, queue_id, + status_regs); + break; + } + + return err; +} diff --git a/drivers/video/tegra/host/pva/pva_ccq_t23x.h b/drivers/video/tegra/host/pva/pva_ccq_t23x.h new file mode 100644 index 00000000..87c94fa7 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_ccq_t23x.h @@ -0,0 +1,40 @@ +/* + * PVA Command Queue Interface handling + * + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_CCQ_T23X_H +#define PVA_CCQ_T23X_H + +#include + +#include "pva.h" +#include "pva_status_regs.h" + +int pva_ccq_send_task_t23x(struct pva *pva, u32 queue_id, dma_addr_t task_addr, + u8 batchsize, u32 flags); +void pva_ccq_isr_handler(struct pva *pva, unsigned int queue_id); +int pva_ccq_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, + struct pva_cmd_status_regs *ccq_status_regs); +int pva_send_cmd_sync(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, + struct pva_cmd_status_regs *ccq_status_regs); +int pva_send_cmd_sync_locked(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, + struct pva_cmd_status_regs *ccq_status_regs); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_debug.c b/drivers/video/tegra/host/pva/pva_debug.c new file mode 100644 index 00000000..618b027d --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_debug.c @@ -0,0 +1,490 @@ +/* + * PVA Debug Information file + * + * Copyright (c) 2017-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pva.h" +#include +#include "pva_vpu_ocd.h" +#include "pva-fw-address-map.h" + +static void pva_read_crashdump(struct seq_file *s, struct pva_seg_info *seg_info) +{ + int i = 0; + u32 *seg_addr = (u32 *) seg_info->addr; + + if (!seg_addr) + return; + + for (i = 0; i < (seg_info->size >> 4);) { + seq_printf(s, "0x%x 0x%x 0x%x 0x%x\n", + seg_addr[i], seg_addr[i+1], + seg_addr[i+2], seg_addr[i+3]); + i = i + 4; + } +} + +static int pva_crashdump(struct seq_file *s, void *data) +{ + int err = 0; + struct pva_crashdump_debugfs_entry *entry = + (struct pva_crashdump_debugfs_entry *)s->private; + struct pva *pva = entry->pva; + + err = nvhost_module_busy(pva->pdev); + if (err) { + nvpva_dbg_info(pva, "err in powering up pva\n"); + goto err_poweron; + } + + pva_read_crashdump(s, &entry->seg_info); + + nvhost_module_idle(pva->pdev); + +err_poweron: + return err; +} + +static int crashdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, pva_crashdump, inode->i_private); +} + +static const struct file_operations pva_crashdump_fops = { + .open = crashdump_open, + .read = seq_read, + .release = single_release, +}; + +struct pva_fw_debug_log_iter { + struct pva *pva; + u8 *buffer; + loff_t pos; + size_t size; +}; + +static void *log_seq_start(struct seq_file *s, loff_t *pos) +{ + struct pva_fw_debug_log_iter *iter; + + iter = s->private; + if (*pos >= iter->size) + return NULL; + + iter->pos = *pos; + return iter; +} + +static void log_seq_stop(struct seq_file *s, void *v) +{ +} + +static void *log_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct pva_fw_debug_log_iter *iter = v; + + iter->pos += 1; + *pos = iter->pos; + + if (iter->pos >= iter->size) + return NULL; + + return iter; +} + +static int log_seq_show(struct seq_file *s, void *v) +{ + struct pva_fw_debug_log_iter *iter = v; + + seq_putc(s, iter->buffer[iter->pos]); + return 0; +} + +static struct seq_operations const log_seq_ops = { .start = log_seq_start, + .stop = log_seq_stop, + .next = log_seq_next, + .show = log_seq_show }; + +static int fw_debug_log_open(struct inode *inode, struct file *file) +{ + struct pva_fw_debug_log_iter *iter = + __seq_open_private(file, &log_seq_ops, sizeof(*iter)); + int err = 0; + struct pva *pva = inode->i_private; + + if (IS_ERR_OR_NULL(iter)) { + err = -ENOMEM; + goto err_out; + } + + iter->pva = pva; + + if (pva->booted) { + err = nvhost_module_busy(pva->pdev); + if (err) { + nvpva_err(&pva->pdev->dev, "err in powering up pva"); + err = -EIO; + goto free_iter; + } + + save_fw_debug_log(pva); + + nvhost_module_idle(pva->pdev); + } + + iter->buffer = pva->fw_debug_log.saved_log; + iter->size = + strnlen(pva->fw_debug_log.saved_log, pva->fw_debug_log.size); + iter->pos = 0; + + return 0; +free_iter: + kfree(iter); +err_out: + return err; +} + +static const struct file_operations pva_fw_debug_log_fops = { + .open = fw_debug_log_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private +}; + +static inline void print_version(struct seq_file *s, + const char *version_str, + const u32 version) +{ + const char type = PVA_EXTRACT(version, 31, 24, u8); + const u32 major = PVA_EXTRACT(version, 23, 16, u32); + const u32 minor = PVA_EXTRACT(version, 15, 8, u32); + const u32 subminor = PVA_EXTRACT(version, 7, 0, u32); + + seq_printf(s, "%s: %c.%02u.%02u.%02u\n", version_str, + type, major, minor, subminor); +} + +static int print_firmware_versions(struct seq_file *s, void *data) +{ + struct pva *pva = s->private; + struct pva_version_info info; + int ret = 0; + + ret = nvhost_module_busy(pva->pdev); + if (ret < 0) + goto err_poweron; + + ret = pva_get_firmware_version(pva, &info); + if (ret < 0) + goto err_get_firmware_version; + + nvhost_module_idle(pva->pdev); + + print_version(s, "pva_r5_version", info.pva_r5_version); + print_version(s, "pva_compat_version", info.pva_compat_version); + seq_printf(s, "pva_revision: %x\n", info.pva_revision); + seq_printf(s, "pva_built_on: %u\n", info.pva_built_on); + + return 0; + +err_get_firmware_version: + nvhost_module_idle(pva->pdev); +err_poweron: + return ret; +} + +static int print_version_open(struct inode *inode, struct file *file) +{ + return single_open(file, print_firmware_versions, inode->i_private); +} + +static const struct file_operations print_version_fops = { + .open = print_version_open, + .read = seq_read, + .release = single_release, +}; + +static int get_log_level(void *data, u64 *val) +{ + struct pva *pva = (struct pva *) data; + + *val = pva->log_level; + return 0; +} + +static int set_log_level(void *data, u64 val) +{ + struct pva *pva = (struct pva *) data; + + pva->log_level = val; + if (pva->booted) + return pva_set_log_level(pva, val, false); + else + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(log_level_fops, get_log_level, set_log_level, "%llu"); + +static void update_vpu_stats(struct pva *pva, bool stats_enabled) +{ + u32 flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + struct pva_cmd_status_regs status = {}; + struct pva_cmd_s cmd = {}; + int err = 0; + u32 nregs; + u64 duration = 0; + struct pva_vpu_stats_s *stats_buf = + pva->vpu_util_info.stats_fw_buffer_va; + u64 *vpu_stats = pva->vpu_util_info.vpu_stats; + + if (vpu_stats == 0) + goto err_out; + + err = nvhost_module_busy(pva->pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "error in powering up pva %d", + err); + vpu_stats[0] = 0; + vpu_stats[1] = 0; + return; + } + + nregs = pva_cmd_get_vpu_stats(&cmd, + pva->vpu_util_info.stats_fw_buffer_iova, + flags, stats_enabled); + err = pva_mailbox_send_cmd_sync(pva, &cmd, nregs, &status); + if (err < 0) { + nvpva_warn(&pva->pdev->dev, "get vpu stats cmd failed: %d\n", + err); + goto err_out; + } + + if (stats_enabled == false) + goto err_out; + + duration = stats_buf->window_end_time - stats_buf->window_start_time; + if (duration == 0) + goto err_out; + + vpu_stats[0] = + (10000ULL * stats_buf->total_utilization_time[0]) / duration; + vpu_stats[1] = + (10000ULL * stats_buf->total_utilization_time[1]) / duration; + pva->vpu_util_info.start_stamp = stats_buf->window_start_time; + pva->vpu_util_info.end_stamp = stats_buf->window_end_time; + goto out; +err_out: + vpu_stats[0] = 0; + vpu_stats[1] = 0; +out: + nvhost_module_idle(pva->pdev); +} + +static int print_vpu_stats(struct seq_file *s, void *data) +{ + struct pva *pva = s->private; + + update_vpu_stats(pva, pva->stats_enabled); + seq_printf(s, "%llu\n%llu\n%llu\n%llu\n", + pva->vpu_util_info.start_stamp, + pva->vpu_util_info.end_stamp, + pva->vpu_util_info.vpu_stats[0], + pva->vpu_util_info.vpu_stats[1]); + + return 0; +} + +static int pva_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, print_vpu_stats, inode->i_private); +} + +static const struct file_operations pva_stats_fops = { + .open = pva_stats_open, + .read = seq_read, + .release = single_release, +}; + +static int get_authentication(void *data, u64 *val) +{ + struct pva *pva = (struct pva *) data; + + *val = pva->pva_auth.pva_auth_enable ? 1 : 0; + + return 0; +} + +static int set_authentication(void *data, u64 val) +{ + struct pva *pva = (struct pva *) data; + + pva->pva_auth.pva_auth_enable = (val == 1) ? true : false; + + if (pva->pva_auth.pva_auth_enable) + pva->pva_auth.pva_auth_allow_list_parsed = false; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(pva_auth_fops, get_authentication, set_authentication, "%llu"); + +static long vpu_ocd_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct pva_vpu_dbg_block *dbg_block = f->f_inode->i_private; + int err = 0; + + switch (cmd) { + case PVA_OCD_IOCTL_VPU_IO: { + struct pva_ocd_ioctl_vpu_io_param io_param; + + if (copy_from_user(&io_param, (void __user *)arg, + sizeof(io_param))) { + pr_err("failed copy ioctl buffer from user; size: %u", + _IOC_SIZE(cmd)); + err = -EFAULT; + goto out; + } + err = pva_vpu_ocd_io(dbg_block, io_param.instr, + &io_param.data[0], io_param.n_write, + &io_param.data[0], io_param.n_read); + if (err) + goto out; + + err = copy_to_user((void __user *)arg, &io_param, + sizeof(io_param)); + if (err) + goto out; + + break; + } + default: + err = -ENOIOCTLCMD; + break; + } + +out: + return err; +} + +static const struct file_operations pva_vpu_ocd_fops = { + .unlocked_ioctl = vpu_ocd_ioctl +}; + +void pva_debugfs_deinit(struct pva *pva) +{ + if (pva->vpu_util_info.stats_fw_buffer_va != NULL) { + dma_free_coherent(&pva->aux_pdev->dev, + sizeof(struct pva_vpu_stats_s), + pva->vpu_util_info.stats_fw_buffer_va, + pva->vpu_util_info.stats_fw_buffer_iova); + pva->vpu_util_info.stats_fw_buffer_va = 0; + pva->vpu_util_info.stats_fw_buffer_iova = 0; + } + + if (pva->fw_debug_log.saved_log != NULL) { + mutex_destroy(&pva->fw_debug_log.saved_log_lock); + kfree(pva->fw_debug_log.saved_log); + } +} + +void pva_debugfs_init(struct platform_device *pdev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + struct dentry *de = pdata->debugfs; + static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0", + "ocd_vpu1" }; + int i, err; + + if (!de) + return; + + pva->debugfs_entry_r5.pva = pva; + pva->debugfs_entry_vpu0.pva = pva; + pva->debugfs_entry_vpu1.pva = pva; + + debugfs_create_file("r5_crashdump", S_IRUGO, de, + &pva->debugfs_entry_r5, &pva_crashdump_fops); + debugfs_create_file("vpu0_crashdump", S_IRUGO, de, + &pva->debugfs_entry_vpu0, &pva_crashdump_fops); + debugfs_create_file("vpu1_crashdump", S_IRUGO, de, + &pva->debugfs_entry_vpu1, &pva_crashdump_fops); + debugfs_create_u32("submit_task_mode", S_IRUGO | S_IWUSR, de, + &pva->submit_task_mode); + debugfs_create_bool("vpu_debug", 0644, de, + &pva->vpu_debug_enabled); + debugfs_create_u32("r5_dbg_wait", 0644, de, + &pva->r5_dbg_wait); + debugfs_create_bool("r5_timeout_enable", 0644, de, + &pva->timeout_enabled); + debugfs_create_file("firmware_version", S_IRUGO, de, pva, + &print_version_fops); + debugfs_create_u32("cg_disable", 0644, de, &pva->slcg_disable); + debugfs_create_bool("vpu_printf_enabled", 0644, de, + &pva->vpu_printf_enabled); + debugfs_create_file("fw_log_level", 0644, de, pva, &log_level_fops); + debugfs_create_u32("driver_log_mask", 0644, de, &pva->driver_log_mask); + debugfs_create_file("vpu_app_authentication", 0644, de, pva, + &pva_auth_fops); + debugfs_create_u32("profiling_level", 0644, de, &pva->profiling_level); + debugfs_create_bool("stats_enabled", 0644, de, &pva->stats_enabled); + debugfs_create_file("vpu_stats", 0644, de, pva, &pva_stats_fops); + + mutex_init(&pva->fw_debug_log.saved_log_lock); + pva->fw_debug_log.size = FW_DEBUG_LOG_BUFFER_SIZE; + pva->fw_debug_log.saved_log = + kzalloc(FW_DEBUG_LOG_BUFFER_SIZE, GFP_KERNEL); + if (IS_ERR_OR_NULL(pva->fw_debug_log.saved_log)) { + dev_err(&pva->pdev->dev, + "failed to allocate memory for saving debug log"); + pva->fw_debug_log.saved_log = NULL; + mutex_destroy(&pva->fw_debug_log.saved_log_lock); + } else { + debugfs_create_file("fw_debug_log", 0444, de, pva, + &pva_fw_debug_log_fops); + } + + pva->vpu_util_info.stats_fw_buffer_va = dma_alloc_coherent( + &pva->aux_pdev->dev, sizeof(struct pva_vpu_stats_s), + &pva->vpu_util_info.stats_fw_buffer_iova, GFP_KERNEL); + if (IS_ERR_OR_NULL(pva->vpu_util_info.stats_fw_buffer_va)) { + err = PTR_ERR(pva->vpu_util_info.stats_fw_buffer_va); + dev_err(&pva->pdev->dev, + "err = %d. failed to allocate stats buffer\n", err); + pva->vpu_util_info.stats_fw_buffer_va = 0; + pva->vpu_util_info.stats_fw_buffer_iova = 0; + } + + err = pva_vpu_ocd_init(pva); + if (err == 0) { + for (i = 0; i < NUM_VPU_BLOCKS; i++) + debugfs_create_file(vpu_ocd_names[i], 0644, de, + &pva->vpu_dbg_blocks[i], + &pva_vpu_ocd_fops); + } else { + dev_err(&pva->pdev->dev, "VPU OCD initialization failed\n"); + } +} diff --git a/drivers/video/tegra/host/pva/pva_dma.c b/drivers/video/tegra/host/pva/pva_dma.c new file mode 100644 index 00000000..d648e0f2 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_dma.c @@ -0,0 +1,1264 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include "pva_dma.h" +#include "pva_queue.h" +#include "pva-sys-dma.h" +#include "pva.h" +#include "pva_vpu_exe.h" +#include "nvpva_client.h" +#include "pva-bit.h" +#include "fw_config.h" +#include "pva_hwseq.h" + +static int32_t check_address_range(struct nvpva_dma_descriptor const *desc, + uint64_t max_size, + uint64_t max_size2, + bool src_dst, + bool dst2) +{ + int32_t err = 0; + int64_t start = 0; + int64_t end = 0; + int64_t offset = 0; + int64_t offset2 = 0; + uint32_t i; + int64_t bppSize = ((int64_t)desc->bytePerPixel == 0) ? 1 : + ((int64_t)desc->bytePerPixel == 1) ? 2 : 4; + int64_t s[5] = {}; // max 5 dimension loop for DMA + int64_t last_tx = (int64_t)desc->tx - 1; + int64_t last_ty = (int64_t)desc->ty - 1; + + /** dummy transfer mode with no data transfer */ + if (desc->tx == 0U) + return err; + + /** ty = 0 is not allowed */ + if (desc->ty == 0U) + return -EINVAL; + + /** Source transfer mode take care padding */ + if (src_dst == false) { + last_tx -= (int64_t)desc->px; + last_ty -= (int64_t)desc->py; + } + + /* 1st dimension */ + s[0] = last_tx; + start = min((s[0]*bppSize), 0LL); + end = max(((s[0]*bppSize) + (bppSize - 1)), 0LL); + if (src_dst) { + /* 2nd destination dim */ + s[1] = (int64_t)desc->dstLinePitch * last_ty; + if (desc->dstCbEnable == 1U) { + /* ((DLP_ADV * (Ty-1)) + Tx) * BPP <= DB_SIZE */ + if (((s[1] + last_tx + 1) * bppSize) <= + (int64_t)desc->dstCbSize) + return 0; + + pr_err("invalid dst cb advance"); + return -EINVAL; + } + + offset = (int64_t)desc->dst_offset; + offset2 = (int64_t)desc->dst2Offset; + /* 3rd destination dim */ + s[2] = ((int64_t)desc->dstAdv1 * (int64_t)desc->dstRpt1); + /* 4th destination dim */ + s[3] = ((int64_t)desc->dstAdv2 * (int64_t)desc->dstRpt2); + /* 5th destination dim */ + s[4] = ((int64_t)desc->dstAdv3 * (int64_t)desc->dstRpt3); + } else { + /* 2nd source dim */ + s[1] = (int64_t)desc->srcLinePitch * last_ty; + if (desc->srcCbEnable == 1U) { + /* ((SLP_ADV * (Ty-1)) + Tx) * BPP <= SB_SIZE */ + if (((s[1] + last_tx + 1) * bppSize) <= + (int64_t)desc->srcCbSize) + return 0; + pr_err("invalid src cb"); + return -EINVAL; + } + + offset = (int64_t)desc->src_offset; + /* 3rd source dim */ + s[2] = ((int64_t)desc->srcAdv1 * (int64_t)desc->srcRpt1); + /* 4th source dim */ + s[3] = ((int64_t)desc->srcAdv2 * (int64_t)desc->srcRpt2); + /* 5th source dim */ + s[4] = ((int64_t)desc->srcAdv3 * (int64_t)desc->srcRpt3); + } + + for (i = 1U; i < 5U; i++) { + start += min(s[i]*bppSize, 0LL); + end += max(s[i]*bppSize, 0LL); + } + + /* check for out of range access */ + if (((int64_t) max_size) < 0) { + pr_err("max_size too large"); + err = -EINVAL; + goto out; + } + + if (!(((offset + start) >= 0) + && ((offset + end) < (int64_t)max_size))) { + pr_err("ERROR: Out of range detected"); + err = -EINVAL; + } + + if (dst2) { + if ((max_size2 > UINT_MAX) || !(((offset2 + start) >= 0) + && ((offset2 + end) < (int64_t)max_size2))) { + pr_err("ERROR: Out of range detected"); + err = -EINVAL; + } + } +out: + return err; +} + +static int32_t +patch_dma_desc_address(struct pva_submit_task *task, + struct nvpva_dma_descriptor *umd_dma_desc, + struct pva_dtd_s *dma_desc, u8 desc_id, bool is_misr) +{ + int32_t err = 0; + uint64_t addr_base = 0; + + switch (umd_dma_desc->srcTransferMode) { + case DMA_DESC_SRC_XFER_L2RAM: + /* + * PVA_HW_GEN1 has CVNAS RAM PVA_HW_GEN2 has L2SRAM CVNAS RAM + * memory is pinned and needs conversion from pin ID -> IOVA + * L2SRAM has memory offset which does not need conversion. The + * same conversion is applied for dst + */ + if (task->pva->version == PVA_HW_GEN1) { + struct pva_pinned_memory *mem = + pva_task_pin_mem(task, umd_dma_desc->srcPtr); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err(task, + "invalid memory handle in" + " descriptor for SRC CVSRAM"); + goto out; + } + + addr_base = mem->dma_addr; + err = check_address_range(umd_dma_desc, + mem->size, + 0, + false, + false); + } else { + addr_base = 0; + if ((task->desc_hwseq_frm & (1ULL << desc_id)) == 0ULL) + err = check_address_range(umd_dma_desc, + task->l2_alloc_size, + 0, + false, + false); + } + + if (err) + goto out; + + break; + case DMA_DESC_SRC_XFER_VMEM:{ + /* calculate symbol address */ + u32 addr = 0; + u32 size = 0; + + if (umd_dma_desc->src_offset > U32_MAX) { + err = -EINVAL; + goto out; + } + + err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id, + umd_dma_desc->srcPtr, &addr, &size); + if (err) { + err = -EINVAL; + task_err( + task, + "invalid symbol id in descriptor for src VMEM"); + goto out; + } + + err = check_address_range(umd_dma_desc, + size, + 0, + false, + false); + if (err) { + err = -EINVAL; + task_err( + task, "ERROR: Invalid offset or address"); + goto out; + } + + addr_base = addr; + break; + } + case DMA_DESC_SRC_XFER_VPU_CONFIG: { + u32 addr = 0; + u32 size = 0; + + /* dest must be null*/ + if ((umd_dma_desc->dstPtr != NVPVA_INVALID_SYMBOL_ID) + || (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID) + || (umd_dma_desc->src_offset > U32_MAX)) { + task_err(task, "ERROR: Invalid VPUC"); + err = -EINVAL; + goto out; + } + + /* calculate symbol address */ + /* TODO: check VPUC handling in ELF segment */ + err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id, + umd_dma_desc->srcPtr, &addr, &size); + if (err) { + task_err(task, "ERROR: Invalid offset or address"); + err = -EINVAL; + goto out; + } + + if (err) { + task_err(task, "ERROR: Invalid offset or address"); + goto out; + } + + addr_base = addr; + break; + } + case DMA_DESC_SRC_XFER_MC: { + struct pva_pinned_memory *mem = + pva_task_pin_mem(task, umd_dma_desc->srcPtr); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err( + task, + "invalid memory handle: descriptor: src MC"); + goto out; + } + if ((task->desc_hwseq_frm & (1ULL << desc_id)) == 0ULL) + err = check_address_range(umd_dma_desc, + mem->size, + 0, + false, + false); + + if (err) { + err = -EINVAL; + task_err(task, "ERROR: address"); + goto out; + } + + addr_base = mem->dma_addr; + task->src_surf_base_addr = addr_base; + + /** If BL format selected, set addr bit 39 to indicate */ + /* XBAR_RAW swizzling is required */ + addr_base |= (u64)umd_dma_desc->srcFormat << 39U; + + break; + } + case DMA_DESC_SRC_XFER_R5TCM: + if (!task->is_system_app) { + err = -EFAULT; + goto out; + } else { + task->special_access = 1; + addr_base = 0; + break; + } + case DMA_DESC_SRC_XFER_MMIO: + case DMA_DESC_SRC_XFER_INVAL: + case DMA_DESC_SRC_XFER_RSVD: + task_err(task, "invalid src mode %d", + umd_dma_desc->srcTransferMode); + err = -EINVAL; + goto out; + default: + err = -EFAULT; + goto out; + } + + addr_base += umd_dma_desc->src_offset; + dma_desc->src_adr0 = (uint32_t)(addr_base & 0xFFFFFFFFLL); + dma_desc->src_adr1 = (uint8_t)((addr_base >> 32U) & 0xFF); + if (umd_dma_desc->srcTransferMode == + (uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG) + goto out; + + addr_base = 0; + if (is_misr) { + if (umd_dma_desc->dstTransferMode == DMA_DESC_DST_XFER_L2RAM + || umd_dma_desc->dstTransferMode == DMA_DESC_DST_XFER_MC) { + addr_base = umd_dma_desc->dstPtr; + goto done; + } else { + err = -EINVAL; + task_err( + task, + "invalid dst transfer mode for MISR descriptor"); + goto out; + } + } + + switch (umd_dma_desc->dstTransferMode) { + case DMA_DESC_DST_XFER_L2RAM: + if (task->pva->version == PVA_HW_GEN1) { + struct pva_pinned_memory *mem = + pva_task_pin_mem(task, umd_dma_desc->dstPtr); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err(task, + "invalid memory handle in" + " descriptor for dst CVSRAM"); + goto out; + } + + addr_base = mem->dma_addr; + err = check_address_range(umd_dma_desc, + mem->size, + 0, + true, + false); + } else { + addr_base = 0; + err = check_address_range(umd_dma_desc, + task->l2_alloc_size, + 0, + true, + false); + } + + if (err) { + task_err(task, "ERROR: Invalid offset or address"); + err = -EINVAL; + goto out; + } + + break; + case DMA_DESC_DST_XFER_VMEM: { + /* calculate symbol address */ + u32 addr = 0; + u32 size = 0; + u32 addr2 = 0; + u32 size2 = 0; + bool check_size2 = false; + + if ((umd_dma_desc->dst_offset > U32_MAX) + || (umd_dma_desc->dst2Offset > U32_MAX)) { + err = -EINVAL; + goto out; + } + + err = pva_get_sym_offset(&task->client->elf_ctx, task->exe_id, + umd_dma_desc->dstPtr, &addr, &size); + if (err) { + err = -EINVAL; + task_err( + task, + "invalid symbol id in descriptor for dst VMEM"); + goto out; + } + + if (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID) { + err = pva_get_sym_offset(&task->client->elf_ctx, + task->exe_id, + umd_dma_desc->dst2Ptr, + &addr2, + &size2); + + if (err) { + err = -EINVAL; + task_err( + task, + "invalid symbol id in descriptor " + "for dst2 VMEM"); + goto out; + } + + if ((addr2 + umd_dma_desc->dst2Offset) & 0x3F) { + task_err(task, + "ERR: dst2Ptr/Offset not aligned"); + err = -EINVAL; + goto out; + } + + check_size2 = true; + } + + err = check_address_range(umd_dma_desc, + size, + size2, + true, + check_size2); + if (err) { + err = -EINVAL; + task_err( + task, "ERROR: Invalid offset or address"); + goto out; + } + + addr_base = addr; + break; + } + case DMA_DESC_DST_XFER_MC: { + struct pva_pinned_memory *mem = + pva_task_pin_mem(task, umd_dma_desc->dstPtr); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err( + task, + "invalid memory handle: descriptor: dst MC"); + goto out; + } + + err = check_address_range(umd_dma_desc, + mem->size, + 0, + true, + false); + if (err) { + err = -EINVAL; + task_err(task, "ERROR: address"); + goto out; + } + + addr_base = mem->dma_addr; + task->dst_surf_base_addr = addr_base; + + /* If BL format selected, set addr bit 39 to indicate */ + /* XBAR_RAW swizzling is required */ + addr_base |= (u64)umd_dma_desc->dstFormat << 39U; + break; + } + case DMA_DESC_DST_XFER_R5TCM: + if (!task->is_system_app) { + err = -EFAULT; + goto out; + } else { + task->special_access = 1; + addr_base = 0; + break; + } + case DMA_DESC_DST_XFER_MMIO: + case DMA_DESC_DST_XFER_INVAL: + case DMA_DESC_DST_XFER_RSVD1: + case DMA_DESC_DST_XFER_RSVD2: + task_err(task, "invalid dst mode %d", + umd_dma_desc->dstTransferMode); + err = -EINVAL; + goto out; + default: + err = -EFAULT; + goto out; + } +done: + addr_base += umd_dma_desc->dst_offset; + dma_desc->dst_adr0 = (uint32_t)(addr_base & 0xFFFFFFFFLL); + dma_desc->dst_adr1 = (uint8_t)((addr_base >> 32U) & 0xFF); +out: + return err; +} + +static bool +is_valid_vpu_trigger_mode(const struct nvpva_dma_descriptor *desc, + u32 trigger_mode) +{ + bool valid = true; + + if (desc->trigEventMode == 0U) + return valid; + + switch ((enum nvpva_task_dma_trig_vpu_hw_events) + desc->trigVpuEvents) { + case TRIG_VPU_NO_TRIGGER: + if (trigger_mode != NVPVA_HWSEQTM_DMATRIG) + valid = false; + + break; + case TRIG_VPU_CONFIG_START: + /** If trig = VPU configuration trigger, + * the DSTM should be VPU configuration + * mode (0x7) + */ + if (desc->srcTransferMode != + (uint8_t) DMA_DESC_SRC_XFER_VPU_CONFIG) { + valid = false; + } + break; + case TRIG_VPU_DMA_READ0_START: + case TRIG_VPU_DMA_READ1_START: + case TRIG_VPU_DMA_READ2_START: + case TRIG_VPU_DMA_READ3_START: + case TRIG_VPU_DMA_READ4_START: + case TRIG_VPU_DMA_READ5_START: + case TRIG_VPU_DMA_READ6_START: + /* should be either vpu config or write to VMEM */ + valid = ((desc->srcTransferMode == + (uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG) + || (desc->dstTransferMode == + (uint8_t)DMA_DESC_DST_XFER_VMEM)); + break; + case TRIG_VPU_DMA_STORE0_START: + case TRIG_VPU_DMA_STORE1_START: + case TRIG_VPU_DMA_STORE2_START: + case TRIG_VPU_DMA_STORE3_START: + case TRIG_VPU_DMA_STORE4_START: + case TRIG_VPU_DMA_STORE5_START: + case TRIG_VPU_DMA_STORE6_START: + //should be either vpu config or read from VMEM + valid = ((desc->srcTransferMode == + (uint8_t)DMA_DESC_SRC_XFER_VPU_CONFIG) + || (desc->srcTransferMode == + (uint8_t) DMA_DESC_SRC_XFER_VMEM)); + break; + default: + valid = false; + break; + } + + return valid; +} + +static int32_t +validate_descriptor(const struct nvpva_dma_descriptor *desc, + u32 trigger_mode) +{ + uint32_t ret = 0; + int32_t retval = 0; + + /** padding related validation */ + if (desc->dstTransferMode == (uint8_t) DMA_DESC_DST_XFER_VMEM) { + ret |= ((desc->px != 0U) && + (desc->px >= desc->tx)) ? 1UL : 0UL; + + ret |= ((desc->py != 0U) && + (desc->py >= desc->ty)) ? 1UL : 0UL; + } + + /** Validate VPU trigger event config */ + ret |= (is_valid_vpu_trigger_mode(desc, trigger_mode)) ? 0UL : 1UL; + + /** Check src/dstADV values with respect to ECET bits */ + ret |= ( + (desc->trigEventMode == (uint8_t) TRIG_EVENT_MODE_DIM4) + && ((desc->srcRpt1 == 0U) || (desc->srcRpt2 == 0U) + || (desc->dstRpt1 == 0U) || + (desc->dstRpt2 == 0U))) ? 1UL : 0UL; + + ret |= (((desc->trigEventMode) == ((uint8_t)TRIG_EVENT_MODE_DIM3)) && + ((desc->srcRpt1 == 0U) || (desc->dstRpt1 == 0U))) ? 1UL : 0UL; + + /** BL format should be associated with MC only */ + if (desc->srcFormat == 1U) { + ret |= (!(desc->srcTransferMode == + (uint8_t) DMA_DESC_SRC_XFER_MC)) ? 1UL : 0UL; + } + + if (desc->dstFormat == 1U) { + ret |= (!(desc->dstTransferMode == + (uint8_t) DMA_DESC_DST_XFER_MC)) ? 1UL : 0UL; + } + + if (ret != 0U) + retval = -EINVAL; + + return retval; +} +/* User to FW DMA descriptor structure mapping helper */ +/* TODO: Need to handle DMA descriptor like dst2ptr and dst2Offset */ +static int32_t nvpva_task_dma_desc_mapping(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + struct nvpva_dma_descriptor *umd_dma_desc = NULL; + struct pva_dtd_s *dma_desc = NULL; + int32_t err = 0; + unsigned int desc_num; + uint32_t addr = 0U; + uint32_t size = 0; + bool is_misr; + + task->special_access = 0; + + for (desc_num = 0U; desc_num < task->num_dma_descriptors; desc_num++) { + umd_dma_desc = &task->dma_descriptors[desc_num]; + dma_desc = &hw_task->dma_desc[desc_num]; + is_misr = !((task->dma_misr_config.descriptor_mask + & PVA_BIT64(desc_num)) == 0U); + is_misr = is_misr && (task->dma_misr_config.enable != 0U); + + err = validate_descriptor(umd_dma_desc, + task->hwseq_config.hwseqTrigMode); + if (err) { + task_err( + task, + "DMA descriptor validation falied"); + goto out; + } + + err = patch_dma_desc_address(task, umd_dma_desc, dma_desc, + desc_num, is_misr); + if (err) + goto out; + + /* DMA_DESC_TRANS CNTL0 */ + dma_desc->transfer_control0 = + umd_dma_desc->srcTransferMode | + (umd_dma_desc->srcFormat << 3U) | + (umd_dma_desc->dstTransferMode << 4U) | + (umd_dma_desc->dstFormat << 7U); + /* DMA_DESC_TRANS CNTL1 */ + dma_desc->transfer_control1 = + umd_dma_desc->bytePerPixel | + (umd_dma_desc->pxDirection << 2U) | + (umd_dma_desc->pyDirection << 3U) | + (umd_dma_desc->boundaryPixelExtension << 4U) | + (umd_dma_desc->transTrueCompletion << 7U); + /* DMA_DESC_TRANS CNTL2 */ + if (umd_dma_desc->prefetchEnable && + (umd_dma_desc->tx == 0 || umd_dma_desc->ty == 0 || + umd_dma_desc->srcTransferMode != DMA_DESC_SRC_XFER_MC || + umd_dma_desc->dstTransferMode != DMA_DESC_DST_XFER_VMEM)) { + /* also ECET must be non zero */ + task_err(task, " Invalid criteria to enable Prefetch"); + return -EINVAL; + } + dma_desc->transfer_control2 = + umd_dma_desc->prefetchEnable | + (umd_dma_desc->dstCbEnable << 1U) | + (umd_dma_desc->srcCbEnable << 2U); + + /*! + * Block-linear surface offset. Only the surface in dram + * can be block-linear. + * BLBaseAddress = translate(srcPtr / dstPtr) + surfBLOffset; + * transfer_control2.bit[3:7] = BLBaseAddress[1].bit[1:5] + * GOB offset in BL mode and corresponds to surface address + * bits [13:9] + */ + if ((umd_dma_desc->srcFormat == 1U) + && (umd_dma_desc->srcTransferMode == + DMA_DESC_SRC_XFER_MC)) { + task->src_surf_base_addr += umd_dma_desc->surfBLOffset; + dma_desc->transfer_control2 |= + (u8)((task->src_surf_base_addr & 0x3E00) >> 6U); + } else if ((umd_dma_desc->dstFormat == 1U) && + (umd_dma_desc->dstTransferMode == + DMA_DESC_DST_XFER_MC)) { + task->dst_surf_base_addr += umd_dma_desc->surfBLOffset; + dma_desc->transfer_control2 |= + (u8)((task->dst_surf_base_addr & 0x3E00) >> 6U); + } + + if (umd_dma_desc->linkDescId > task->num_dma_descriptors) { + task_err(task, "invalid link ID"); + return -EINVAL; + } + + dma_desc->link_did = umd_dma_desc->linkDescId; + + /* DMA_DESC_TX */ + dma_desc->tx = umd_dma_desc->tx; + /* DMA_DESC_TY */ + dma_desc->ty = umd_dma_desc->ty; + /* DMA_DESC_DLP_ADV */ + dma_desc->dlp_adv = umd_dma_desc->dstLinePitch; + /* DMA_DESC_SLP_ADV */ + dma_desc->slp_adv = umd_dma_desc->srcLinePitch; + /* DMA_DESC_DB_START */ + dma_desc->db_start = umd_dma_desc->dstCbStart; + /* DMA_DESC_DB_SIZE */ + dma_desc->db_size = umd_dma_desc->dstCbSize; + /* DMA_DESC_SB_START */ + dma_desc->sb_start = umd_dma_desc->srcCbStart; + /* DMA_DESC_SB_SIZE */ + dma_desc->sb_size = umd_dma_desc->srcCbSize; + /* DMA_DESC_TRIG_CH */ + /* TODO: Need to handle this parameter */ + dma_desc->trig_ch_events = 0U; + /* DMA_DESC_HW_SW_TRIG */ + dma_desc->hw_sw_trig_events = + umd_dma_desc->trigEventMode | + (umd_dma_desc->trigVpuEvents << 2U) | + (umd_dma_desc->descReloadEnable << (8U + 4U)); + /* DMA_DESC_PX */ + dma_desc->px = (uint8_t)umd_dma_desc->px; + /* DMA_DESC_PY */ + dma_desc->py = (uint8_t)umd_dma_desc->py; + /* DMA_DESC_FRDA */ + if (umd_dma_desc->dst2Ptr != NVPVA_INVALID_SYMBOL_ID) { + err = pva_get_sym_offset(&task->client->elf_ctx, + task->exe_id, + umd_dma_desc->dst2Ptr, + &addr, + &size); + if (err) { + task_err(task, + "invalid symbol id in descriptor"); + goto out; + } + + addr = addr + umd_dma_desc->dst2Offset; + dma_desc->frda |= ((addr >> 6U) & 0x3FFF); + } + + /* DMA_DESC_NDTM_CNTL0 */ + dma_desc->cb_ext = (((umd_dma_desc->srcCbStart >> 16) & 0x1) << 0) + | (((umd_dma_desc->dstCbStart >> 16) & 0x1) << 2) + | (((umd_dma_desc->srcCbSize >> 16) & 0x1) << 4) + | (((umd_dma_desc->dstCbSize >> 16) & 0x1) << 6); + /* DMA_DESC_NS1_ADV & DMA_DESC_ST1_ADV */ + dma_desc->srcpt1_cntl = + (((umd_dma_desc->srcRpt1 & 0xFF) << 24U) | + (umd_dma_desc->srcAdv1 & 0xFFFFFF)); + /* DMA_DESC_ND1_ADV & DMA_DESC_DT1_ADV */ + dma_desc->dstpt1_cntl = + (((umd_dma_desc->dstRpt1 & 0xFF) << 24U) | + (umd_dma_desc->dstAdv1 & 0xFFFFFF)); + /* DMA_DESC_NS2_ADV & DMA_DESC_ST2_ADV */ + dma_desc->srcpt2_cntl = + (((umd_dma_desc->srcRpt2 & 0xFF) << 24U) | + (umd_dma_desc->srcAdv2 & 0xFFFFFF)); + /* DMA_DESC_ND2_ADV & DMA_DESC_DT2_ADV */ + dma_desc->dstpt2_cntl = + (((umd_dma_desc->dstRpt2 & 0xFF) << 24U) | + (umd_dma_desc->dstAdv2 & 0xFFFFFF)); + /* DMA_DESC_NS3_ADV & DMA_DESC_ST3_ADV */ + dma_desc->srcpt3_cntl = + (((umd_dma_desc->srcRpt3 & 0xFF) << 24U) | + (umd_dma_desc->srcAdv3 & 0xFFFFFF)); + /* DMA_DESC_ND3_ADV & DMA_DESC_DT3_ADV */ + dma_desc->dstpt3_cntl = + (((umd_dma_desc->dstRpt3 & 0xFF) << 24U) | + (umd_dma_desc->dstAdv3 & 0xFFFFFF)); + } +out: + return err; +} + +static int +verify_dma_desc_hwseq(struct pva_submit_task *task, + struct nvpva_dma_channel *user_ch, + struct pva_hw_sweq_blob_s *blob, + u32 did) +{ + int err = 0; + u64 *desc_hwseq_frm = &task->desc_hwseq_frm; + struct nvpva_dma_descriptor *desc; + + if ((did == 0U) + || (did >= NVPVA_TASK_MAX_DMA_DESCRIPTORS)) { + pr_err("invalid Descritor ID"); + err = -EINVAL; + goto out; + } + + did = array_index_nospec((did - 1), + NVPVA_TASK_MAX_DMA_DESCRIPTORS); + + if ((*desc_hwseq_frm & (1ULL << did)) != 0ULL) + goto out; + + *desc_hwseq_frm |= (1ULL << did); + + desc = &task->dma_descriptors[did]; + + if ((desc->px != 0U) + || (desc->py != 0U) + || (desc->descReloadEnable != 0U)) { + pr_err("invalid descriptor padding"); + err = -EINVAL; + goto out; + } + + switch (desc->srcTransferMode) { + case DMA_DESC_SRC_XFER_VMEM: + if (((desc->dstTransferMode != DMA_DESC_DST_XFER_MC) + && (desc->dstTransferMode != DMA_DESC_DST_XFER_L2RAM)) + || (desc->dstCbEnable == 1U)) { + pr_err("invalid dst transfer mode"); + err = -EINVAL; + } + break; + case DMA_DESC_SRC_XFER_L2RAM: + case DMA_DESC_SRC_XFER_MC: + if ((desc->dstTransferMode != DMA_DESC_DST_XFER_VMEM) + || (desc->srcCbEnable == 1U)) { + pr_err("invalid src transfer mode"); + err = -EINVAL; + } + break; + case DMA_DESC_SRC_XFER_MMIO: + case DMA_DESC_SRC_XFER_INVAL: + case DMA_DESC_SRC_XFER_R5TCM: + case DMA_DESC_SRC_XFER_RSVD: + default: + pr_err("invalid dma desc transfer mode"); + err = -EINVAL; + break; + } + + if (err) + goto out; + + if (user_ch->hwseqTxSelect != 1U) + goto out; + + if (((desc->srcFormat == 1U) + || (desc->dstFormat == 1U)) + && (blob->f_header.to == 0)) { + pr_err("invalid tile offset"); + err = -EINVAL; + goto out; + } + + if (user_ch->hwseqTraversalOrder == 0) { + if (((uint32_t)((uint32_t)desc->tx + + (uint32_t)blob->f_header.pad_l) > 0xFFFFU) + || ((uint32_t)((uint32_t)desc->tx + + (uint32_t)blob->f_header.pad_r) > 0xFFFFU)) { + pr_err("invalid tx + pad x"); + err = -EINVAL; + } + } else if (user_ch->hwseqTraversalOrder == 1) { + if (((uint32_t)((uint32_t)desc->ty + + (uint32_t)blob->f_header.pad_t) > 0xFFFFU) + || ((uint32_t)((uint32_t)desc->ty + + (uint32_t)blob->f_header.pad_b) > 0xFFFFU)) { + pr_err("invalid ty + pad y"); + err = -EINVAL; + } + } else { + pr_err("invalid traversal order"); + err = -EINVAL; + } +out: + return err; +} + +static int +verify_hwseq_blob(struct pva_submit_task *task, + struct nvpva_dma_channel *user_ch, + struct nvpva_dma_descriptor *decriptors, + uint8_t *hwseqbuf_cpuva, + int8_t ch_num) + +{ + struct pva_hw_sweq_blob_s *blob; + struct pva_hwseq_desc_header_s *blob_desc; + struct pva_hwseq_cr_header_s *cr_header; + struct pva_hwseq_cr_header_s *end_addr; + u32 end = user_ch->hwseqEnd * 4; + u32 start = user_ch->hwseqStart * 4; + int err = 0; + u32 i; + u32 j; + u32 k; + u32 cr_count = 0; + u32 entry_size; + uintptr_t tmp_addr; + + blob = (struct pva_hw_sweq_blob_s *)&hwseqbuf_cpuva[start]; + end_addr = (struct pva_hwseq_cr_header_s *)&hwseqbuf_cpuva[end + 4]; + cr_header = &blob->cr_header; + blob_desc = &blob->desc_header; + + if ((end <= start) + || (((end - start + 4U) < sizeof(*blob)))) { + pr_err("invalid size of HW sequencer blob"); + err = -EINVAL; + goto out; + } + + if (end > task->hwseq_config.hwseqBuf.size) { + pr_err("blob end greater than buffer size"); + err = -EINVAL; + goto out; + } + + if (is_desc_mode(blob->f_header.fid)) { + if (task->hwseq_config.hwseqTrigMode == NVPVA_HWSEQTM_DMATRIG) { + pr_err("dma master not allowed"); + err = -EINVAL; + } + + goto out; + } + + if (!is_frame_mode(blob->f_header.fid)) { + pr_err("invalid addressing mode"); + err = -EINVAL; + goto out; + } + + cr_count = (blob->f_header.no_cr + 1U); + start += sizeof(blob->f_header); + end += 4; + for (i = 0; i < cr_count; i++) { + u32 num_descriptors = cr_header->dec + 1; + u32 num_desc_entries = (cr_header->dec + 2) / 2; + + entry_size = num_desc_entries; + entry_size *= sizeof(struct pva_hwseq_desc_header_s); + entry_size += sizeof(struct pva_hwseq_cr_header_s); + if ((start + entry_size) > end) { + pr_err("row/column entries larger than blob"); + err = -EINVAL; + goto out; + } + + for (j = 0, k = 0; j < num_desc_entries; j++) { + err = verify_dma_desc_hwseq(task, + user_ch, + blob, + blob_desc->did1); + if (err) { + pr_err("seq descriptor 1 verification failed"); + goto out; + } + + ++k; + if (k >= num_descriptors) { + ++blob_desc; + break; + } + + err = verify_dma_desc_hwseq(task, + user_ch, + blob, + blob_desc->did2); + if (err) { + pr_err("seq descriptor 2 verification failed"); + goto out; + } + + ++blob_desc; + } + + start += entry_size; + cr_header = (struct pva_hwseq_cr_header_s *)blob_desc; + tmp_addr = (uintptr_t)blob_desc + sizeof(*cr_header); + blob_desc = (struct pva_hwseq_desc_header_s *)tmp_addr; + if (cr_header > end_addr) { + pr_err("blob size smaller than entries"); + err = -EINVAL; + goto out; + } + } +out: + return err; +} +/* User to FW mapping for DMA channel */ +static int +nvpva_task_dma_channel_mapping(struct pva_submit_task *task, + struct pva_dma_ch_config_s *ch, + u8 *hwseqbuf_cpuva, + int8_t ch_num, + int32_t hwgen, + bool hwseq_in_use) + +{ + struct nvpva_dma_channel *user_ch = &task->dma_channels[ch_num - 1]; + struct nvpva_dma_descriptor *decriptors = task->dma_descriptors; + u32 adb_limit; + int err = 0; + + if (((user_ch->descIndex > PVA_NUM_DYNAMIC_DESCS) || + ((user_ch->vdbSize + user_ch->vdbOffset) > + PVA_NUM_DYNAMIC_VDB_BUFFS))) { + pr_err("ERR: Invalid Channel control data"); + err = -EINVAL; + goto out; + } + + if (hwgen == PVA_HW_GEN1) + adb_limit = PVA_NUM_DYNAMIC_ADB_BUFFS_T19X; + else + adb_limit = PVA_NUM_DYNAMIC_ADB_BUFFS_T23X; + + if ((user_ch->adbSize + user_ch->adbOffset) > adb_limit) { + pr_err("ERR: Invalid ADB Buff size or offset"); + err = -EINVAL; + goto out; + } + + /* DMA_CHANNEL_CNTL0_CHSDID: DMA_CHANNEL_CNTL0[0] = descIndex + 1;*/ + ch->cntl0 = (((user_ch->descIndex + 1U) & 0xFFU) << 0U); + + /* DMA_CHANNEL_CNTL0_CHVMEMOREQ */ + ch->cntl0 |= ((user_ch->vdbSize & 0xFFU) << 8U); + + /* DMA_CHANNEL_CNTL0_CHBH */ + ch->cntl0 |= ((user_ch->adbSize & 0x1FFU) << 16U); + + /* DMA_CHANNEL_CNTL0_CHAXIOREQ */ + ch->cntl0 |= ((user_ch->blockHeight & 7U) << 25U); + + /* DMA_CHANNEL_CNTL0_CHPREF */ + ch->cntl0 |= ((user_ch->prefetchEnable & 1U) << 30U); + + /* Enable DMA channel */ + ch->cntl0 |= (0x1U << 31U); + + /* DMA_CHANNEL_CNTL1_CHPWT */ + ch->cntl1 = ((user_ch->reqPerGrant & 0x7U) << 2U); + + /* DMA_CHANNEL_CNTL1_CHVDBSTART */ + ch->cntl1 |= ((user_ch->vdbOffset & 0x7FU) << 16U); + + /* DMA_CHANNEL_CNTL1_CHADBSTART */ + if (hwgen == PVA_HW_GEN1) + ch->cntl1 |= ((user_ch->adbOffset & 0xFFU) << 24U); + else + ch->cntl1 |= ((user_ch->adbOffset & 0x1FFU) << 23U); + + ch->boundary_pad = user_ch->padValue; + if (hwgen == PVA_HW_GEN1) + goto out; + + /* Applicable only for T23x */ + + /* DMA_CHANNEL_CNTL1_CHREP */ + if ((user_ch->chRepFactor) && (user_ch->chRepFactor != 6)) { + pr_err("ERR: Invalid replication factor"); + err = -EINVAL; + goto out; + } + + ch->cntl1 |= ((user_ch->chRepFactor & 0x7U) << 8U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQSTART */ + ch->hwseqcntl = ((user_ch->hwseqStart & 0xFFU) << 0U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEND */ + ch->hwseqcntl |= ((user_ch->hwseqEnd & 0xFFU) << 12U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTD */ + ch->hwseqcntl |= ((user_ch->hwseqTriggerDone & 0x3U) << 24U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTS */ + ch->hwseqcntl |= ((user_ch->hwseqTxSelect & 0x1U) << 27U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQTO */ + ch->hwseqcntl |= ((user_ch->hwseqTraversalOrder & 0x1U) << 30U); + + /* DMA_CHANNEL_HWSEQCNTL_CHHWSEQEN */ + ch->hwseqcntl |= ((user_ch->hwseqEnable & 0x1U) << 31U); + + if ((user_ch->hwseqEnable & 0x1U) && hwseq_in_use) + err = verify_hwseq_blob(task, + user_ch, + decriptors, + hwseqbuf_cpuva, + ch_num); + +out: + return err; +} + +int pva_task_write_dma_info(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + int err = 0; + u8 ch_num = 0L; + int hwgen = task->pva->version; + bool is_hwseq_mode = false; + struct pva_pinned_memory *mem; + u8 *hwseqbuf_cpuva = NULL; + u32 i; + u32 j; + u32 mask; + struct pva_dma_info_s *hw_task_dma_info; + + hw_task_dma_info = &hw_task->dma_info_and_params_list.dma_info; + + if (task->num_dma_descriptors == 0L || task->num_dma_channels == 0L) { + nvpva_dbg_info(task->pva, "pva: no DMA resources: NOOP mode"); + goto out; + } + + if (task->hwseq_config.hwseqBuf.pin_id != 0U) { + if (hwgen != PVA_HW_GEN2) { + /* HW sequencer is supported only in HW_GEN2 */ + err = -EINVAL; + goto out; + } + + /* Ensure that HWSeq blob size is valid and within the + * acceptable range, i.e. up to 1KB, as per HW Sequencer RAM + * size from T23x DMA IAS doc. + */ + if ((task->hwseq_config.hwseqBuf.size == 0U) || + (task->hwseq_config.hwseqBuf.size > 1024U)) { + err = -EINVAL; + goto out; + } + + is_hwseq_mode = true; + + /* Configure HWSeq trigger mode selection in DMA Configuration + * Register + */ + hw_task_dma_info->dma_common_config |= + (task->hwseq_config.hwseqTrigMode & 0x1U) << 12U; + + mem = pva_task_pin_mem(task, + task->hwseq_config.hwseqBuf.pin_id); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err(task, "failed to pin hwseq buffer"); + goto out; + } + + hwseqbuf_cpuva = pva_dmabuf_vmap(mem->dmabuf) + + task->hwseq_config.hwseqBuf.offset; + hw_task_dma_info->dma_hwseq_base = mem->dma_addr + + task->hwseq_config.hwseqBuf.offset; + hw_task_dma_info->num_hwseq = + task->hwseq_config.hwseqBuf.size; + } + + /* write dma channel info */ + hw_task_dma_info->num_channels = task->num_dma_channels; + hw_task_dma_info->num_descriptors = task->num_dma_descriptors; + hw_task_dma_info->descriptor_id = 1U; /* PVA_DMA_DESC0 */ + task->desc_hwseq_frm = 0ULL; + + for (i = 0; i < task->num_dma_channels; i++) { + ch_num = i + 1; /* Channel 0 can't use */ + err = nvpva_task_dma_channel_mapping( + task, + &hw_task_dma_info->dma_channels[i], + hwseqbuf_cpuva, + ch_num, + hwgen, + is_hwseq_mode); + if (err) { + task_err(task, "failed to map DMA channel info"); + goto out; + } + + /* Ensure that HWSEQCNTRL is zero for all dma channels in SW + * mode + */ + if (!is_hwseq_mode && + (hw_task_dma_info->dma_channels[i].hwseqcntl != 0U)) { + task_err(task, "invalid HWSeq config in SW mode"); + err = -EINVAL; + goto out; + } + + hw_task_dma_info->dma_channels[i].ch_number = ch_num; + mask = task->dma_channels[i].outputEnableMask; + for (j = 0; j < 7; j++) { + u32 *trig = &(hw_task_dma_info->dma_triggers[j]); + + (*trig) |= (((mask >> 2*j) & 1U) << ch_num); + (*trig) |= (((mask >> (2*j + 1)) & 1U) << (ch_num + 16U)); + } + + hw_task_dma_info->dma_triggers[7] |= + (((mask >> 14) & 1U) << ch_num); + if (hwgen == PVA_HW_GEN2) { + u32 *trig = &(hw_task_dma_info->dma_triggers[8]); + + (*trig) |= (((mask >> 15) & 1U) << ch_num); + (*trig) |= (((mask >> 16) & 1U) << (ch_num + 16U)); + } + } + + err = nvpva_task_dma_desc_mapping(task, hw_task); + if (err) { + task_err(task, "failed to map DMA desc info"); + goto out; + } + + hw_task->task.dma_info = + task->dma_addr + offsetof(struct pva_hw_task, dma_info_and_params_list) + + offsetof(struct pva_dma_info_and_params_list_s, dma_info); + hw_task_dma_info->dma_descriptor_base = + task->dma_addr + offsetof(struct pva_hw_task, dma_desc); + + hw_task_dma_info->dma_info_version = PVA_DMA_INFO_VERSION_ID; + hw_task_dma_info->dma_info_size = sizeof(struct pva_dma_info_s); +out: + if (hwseqbuf_cpuva != NULL) + pva_dmabuf_vunmap(mem->dmabuf, hwseqbuf_cpuva); + + return err; +} + +int pva_task_write_dma_misr_info(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + struct pva_dma_info_s *hw_task_dma_info; + uint32_t common_config; + // MISR channel mask bits in DMA COMMON CONFIG + uint32_t common_config_ch_mask = PVA_MASK(31, 16); + // AXI output enable bit in DMA COMMON CONFIG + uint32_t common_config_ao_enable_mask = PVA_BIT(15U); + // SW Event select bit in DMA COMMON CONFIG + uint32_t common_config_sw_event0 = PVA_BIT(5U); + // MISR TO interrupt enable bit in DMA COMMON CONFIG + uint32_t common_config_misr_to_enable_mask = PVA_BIT(0U); + + hw_task_dma_info = &hw_task->dma_info_and_params_list.dma_info; + common_config = hw_task_dma_info->dma_common_config; + + hw_task_dma_info->dma_misr_base = 0U; + if (task->dma_misr_config.enable != 0U) { + hw_task->dma_misr_config.ref_addr = + task->dma_misr_config.ref_addr; + hw_task->dma_misr_config.seed_crc0 = + task->dma_misr_config.seed_crc0; + hw_task->dma_misr_config.ref_data_1 = + task->dma_misr_config.ref_data_1; + hw_task->dma_misr_config.seed_crc1 = + task->dma_misr_config.seed_crc1; + hw_task->dma_misr_config.ref_data_2 = + task->dma_misr_config.ref_data_2; + hw_task->dma_misr_config.misr_timeout = + task->dma_misr_config.misr_timeout; + + hw_task_dma_info->dma_misr_base = task->dma_addr + + offsetof(struct pva_hw_task, dma_misr_config); + + /* Prepare data to be written to DMA COMMON CONFIG register */ + + // Select channels that will participate in MISR computation + common_config = ((common_config & ~common_config_ch_mask) + | (~task->dma_misr_config.channel_mask << 16U)); + // Set SW_EVENT0 bit to 0 + common_config = (common_config & ~common_config_sw_event0); + // Disable AXI output + common_config = common_config & ~common_config_ao_enable_mask; + // common_config = common_config | common_config_ao_enable_mask; + // Enable MISR TO interrupts + common_config = common_config | common_config_misr_to_enable_mask; + + hw_task_dma_info->dma_common_config = common_config; + } + + return 0; +} diff --git a/drivers/video/tegra/host/pva/pva_dma.h b/drivers/video/tegra/host/pva/pva_dma.h new file mode 100644 index 00000000..1fe589b4 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_dma.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_DMA_H +#define PVA_DMA_H + +#include "pva_queue.h" + +enum nvpva_task_dma_trig_vpu_hw_events { + TRIG_VPU_NO_TRIGGER, + TRIG_VPU_DMA_READ0_START, + TRIG_VPU_DMA_STORE0_START, + TRIG_VPU_CONFIG_START, + TRIG_VPU_DMA_READ1_START, + TRIG_VPU_DMA_STORE1_START, + TRIG_VPU_DMA_READ2_START, + TRIG_VPU_DMA_STORE2_START, + TRIG_VPU_DMA_READ3_START, + TRIG_VPU_DMA_STORE3_START, + TRIG_VPU_DMA_READ4_START, + TRIG_VPU_DMA_STORE4_START, + TRIG_VPU_DMA_READ5_START, + TRIG_VPU_DMA_STORE5_START, + TRIG_VPU_DMA_READ6_START, + TRIG_VPU_DMA_STORE6_START +}; + +enum nvpva_dma_trig_event_mode { + TRIG_EVENT_MODE_DISABLED, + TRIG_EVENT_MODE_DIM4, + TRIG_EVENT_MODE_DIM3, + TRIG_EVENT_MODE_TILE +}; + +enum nvpva_task_dma_src_xfer_mode { + DMA_DESC_SRC_XFER_INVAL = 0U, + DMA_DESC_SRC_XFER_MC = 1U, + DMA_DESC_SRC_XFER_VMEM = 2U, + DMA_DESC_SRC_XFER_L2RAM = 3U, + DMA_DESC_SRC_XFER_R5TCM = 4U, + DMA_DESC_SRC_XFER_MMIO = 5U, + DMA_DESC_SRC_XFER_RSVD = 6U, + DMA_DESC_SRC_XFER_VPU_CONFIG = 7U, +}; + +enum nvpva_task_dma_dst_xfer_mode { + DMA_DESC_DST_XFER_INVAL = 0U, + DMA_DESC_DST_XFER_MC = 1U, + DMA_DESC_DST_XFER_VMEM = 2U, + DMA_DESC_DST_XFER_L2RAM = 3U, + DMA_DESC_DST_XFER_R5TCM = 4U, + DMA_DESC_DST_XFER_MMIO = 5U, + DMA_DESC_DST_XFER_RSVD1 = 6U, + DMA_DESC_DST_XFER_RSVD2 = 7U, +}; + +/* signals generated by channel */ +enum pva_dma_chan_sig { + PVA_DMA_READ0 = 0x0001, + PVA_DMA_STORE0 = 0x0002, + PVA_DMA_READ1 = 0x0004, + PVA_DMA_STORE1 = 0x0008, + PVA_DMA_READ2 = 0x0010, + PVA_DMA_STORE2 = 0x0020, + PVA_DMA_READ3 = 0x0040, + PVA_DMA_STORE3 = 0x0080, + PVA_DMA_READ4 = 0x0100, + PVA_DMA_STORE4 = 0x0200, + PVA_DMA_READ5 = 0x0400, + PVA_DMA_STORE5 = 0x0800, + PVA_DMA_READ6 = 0x1000, + PVA_DMA_STORE6 = 0x2000, + PVA_VPUCONFIG = 0x4000, + PVA_HWSEQ_VPUREAD_START = 0x8000, + PVA_HWSEQ_VPUWRITE_START = 0x10000 +}; + +int pva_task_write_dma_info(struct pva_submit_task *task, + struct pva_hw_task *hw_task); + +int pva_task_write_dma_misr_info(struct pva_submit_task *task, + struct pva_hw_task *hw_task); +#endif diff --git a/drivers/video/tegra/host/pva/pva_fw_carveout.c b/drivers/video/tegra/host/pva/pva_fw_carveout.c new file mode 100644 index 00000000..3949bd2e --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_fw_carveout.c @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PVA carveout handling + * + * Copyright (c) 2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pva.h" +#include "pva_fw_carveout.h" + +struct nvpva_carveout_info *pva_fw_co_get_info(struct pva *pva) +{ + struct device_node *np; + const char *status = NULL; + u32 reg[4] = {0}; + + np = of_find_compatible_node(NULL, NULL, "nvidia,pva-carveout"); + if (np == NULL) { + dev_err(&pva->pdev->dev, "find node failed\n"); + goto err_out; + } + + if (of_property_read_string(np, "status", &status)) { + dev_err(&pva->pdev->dev, "read status failed\n"); + goto err_out; + } + + if (strcmp(status, "okay")) { + dev_err(&pva->pdev->dev, "status %s compare failed\n", status); + goto err_out; + } + + if (of_property_read_u32_array(np, "reg", reg, 4)) { + dev_err(&pva->pdev->dev, "reaf_32_array failed\n"); + goto err_out; + } + + pva->fw_carveout.base = ((u64)reg[0] << 32 | (u64)reg[1]); + pva->fw_carveout.size = ((u64)reg[2] << 32 | (u64)reg[3]); + pva->fw_carveout.base_va = 0; + pva->fw_carveout.base_pa = 0; + pva->fw_carveout.initialized = true; + + nvpva_dbg_fn(pva, "get co success\n"); + + return &pva->fw_carveout; +err_out: + dev_err(&pva->pdev->dev, "get co fail\n"); + pva->fw_carveout.initialized = false; + + return NULL; +} + +bool pva_fw_co_initialized(struct pva *pva) +{ + return pva->fw_carveout.initialized; +} diff --git a/drivers/video/tegra/host/pva/pva_fw_carveout.h b/drivers/video/tegra/host/pva/pva_fw_carveout.h new file mode 100644 index 00000000..860a7ccb --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_fw_carveout.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PVA carveout handling + * + * Copyright (c) 2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_FW_CO_H +#define PVA_FW_CO_H + +struct nvpva_carveout_info { + dma_addr_t base; + dma_addr_t base_pa; + void *base_va; + size_t size; + bool initialized; +}; + +struct nvpva_carveout_info *pva_fw_co_get_info(struct pva *pva); +bool pva_fw_co_initialized(struct pva *pva); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_hwseq.h b/drivers/video/tegra/host/pva/pva_hwseq.h new file mode 100644 index 00000000..f8fc60df --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_hwseq.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_HWSEQ_H +#define PVA_HWSEQ_H + +#include +#include +#include + +#define PVA_HWSEQ_FRAME_ADDR 0xC0DE +#define PVA_HWSEQ_DESC_ADDR 0xDEAD + +struct pva_hwseq_frame_header_s { + u16 fid; + u8 fr; + u8 no_cr; + u16 to; + u16 fo; + u8 pad_r; + u8 pad_t; + u8 pad_l; + u8 pad_b; +} __packed; + +struct pva_hwseq_cr_header_s { + u8 dec; + u8 crr; + u16 cro; +} __packed; + +struct pva_hwseq_desc_header_s { + u8 did1; + u8 dr1; + u8 did2; + u8 dr2; +} __packed; + +struct pva_hw_sweq_blob_s { + struct pva_hwseq_frame_header_s f_header; + struct pva_hwseq_cr_header_s cr_header; + struct pva_hwseq_desc_header_s desc_header; +} __packed; + +static inline bool is_frame_mode(u16 id) +{ + return (id == PVA_HWSEQ_FRAME_ADDR); +} + +static inline bool is_desc_mode(u16 id) +{ + return (id == PVA_HWSEQ_DESC_ADDR); +} +#endif + diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c new file mode 100644 index 00000000..6ba68c4f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif +#include +#include + +#include "pva.h" +#include "pva_mailbox.h" +#include "pva_interface_regs_t19x.h" + +static struct pva_status_interface_registers t19x_status_regs[NUM_INTERFACES_T19X] = { + { + { + PVA_CCQ_STATUS3_REG, + PVA_CCQ_STATUS4_REG, + PVA_CCQ_STATUS5_REG, + PVA_CCQ_STATUS6_REG, + PVA_CCQ_STATUS7_REG + } + }, +}; + +void read_status_interface_t19x(struct pva *pva, + uint32_t interface_id, u32 isr_status, + struct pva_cmd_status_regs *status_output) +{ + int i; + uint32_t *status_registers; + + status_registers = t19x_status_regs[interface_id].registers; + + for (i = 0; i < PVA_CMD_STATUS_REGS; i++) { + if (isr_status & (PVA_VALID_STATUS3 << i)) { + status_output->status[i] = host1x_readl(pva->pdev, + status_registers[i]); + if ((i == 0) && (isr_status & PVA_CMD_ERROR)) { + status_output->error = + PVA_GET_ERROR_CODE( + status_output->status[i]); + } + } + } +} diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h new file mode 100644 index 00000000..2d887697 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_interface_regs_t19x.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_INTERFACE_REGS_T19X_H__ +#define __PVA_INTERFACE_REGS_T19X_H__ + +#include "pva.h" +#include "pva_mailbox.h" + +#define NUM_INTERFACES_T19X 1 + +#define PVA_CCQ_STATUS3_REG 0x7200c +#define PVA_CCQ_STATUS4_REG 0x72010 +#define PVA_CCQ_STATUS5_REG 0x72014 +#define PVA_CCQ_STATUS6_REG 0x72018 +#define PVA_CCQ_STATUS7_REG 0x7201c + +void read_status_interface_t19x(struct pva *pva, + uint32_t interface_id, u32 isr_status, + struct pva_cmd_status_regs *status_output); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c new file mode 100644 index 00000000..af072343 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#else +#include +#endif +#include +#include + +#include "pva.h" +#include "pva_mailbox.h" +#include "pva_interface_regs_t23x.h" + +static struct pva_status_interface_registers t23x_status_regs[NUM_INTERFACES_T23X] = { + { + { + PVA_EMPTY_STATUS_REG, + PVA_MBOX_STATUS4_REG, + PVA_MBOX_STATUS5_REG, + PVA_MBOX_STATUS6_REG, + PVA_MBOX_STATUS7_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ0_STATUS3_REG, + PVA_CCQ0_STATUS4_REG, + PVA_CCQ0_STATUS5_REG, + PVA_CCQ0_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ1_STATUS3_REG, + PVA_CCQ1_STATUS4_REG, + PVA_CCQ1_STATUS5_REG, + PVA_CCQ1_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ2_STATUS3_REG, + PVA_CCQ2_STATUS4_REG, + PVA_CCQ2_STATUS5_REG, + PVA_CCQ2_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ3_STATUS3_REG, + PVA_CCQ3_STATUS4_REG, + PVA_CCQ3_STATUS5_REG, + PVA_CCQ3_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ4_STATUS3_REG, + PVA_CCQ4_STATUS4_REG, + PVA_CCQ4_STATUS5_REG, + PVA_CCQ4_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ5_STATUS3_REG, + PVA_CCQ5_STATUS4_REG, + PVA_CCQ5_STATUS5_REG, + PVA_CCQ5_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ6_STATUS3_REG, + PVA_CCQ6_STATUS4_REG, + PVA_CCQ6_STATUS5_REG, + PVA_CCQ6_STATUS6_REG + } + }, + { + { + PVA_EMPTY_STATUS_REG, + PVA_CCQ7_STATUS3_REG, + PVA_CCQ7_STATUS4_REG, + PVA_CCQ7_STATUS5_REG, + PVA_CCQ7_STATUS6_REG + } + } +}; + + +void read_status_interface_t23x(struct pva *pva, + uint32_t interface_id, u32 isr_status, + struct pva_cmd_status_regs *status_output) +{ + int i; + u32 valid_status = PVA_VALID_STATUS3; + uint32_t *status_registers; + status_registers = t23x_status_regs[interface_id].registers; + if (isr_status & PVA_CMD_ERROR) { + status_output->error = PVA_GET_ERROR_CODE(isr_status); + } + if (isr_status & PVA_VALID_STATUS3) { + status_output->status[0] = PVA_GET_ERROR_CODE(isr_status); + } + for (i = 1; i < PVA_CMD_STATUS_REGS; i++) { + valid_status = valid_status << 1; + if (isr_status & valid_status) { + status_output->status[i] = host1x_readl(pva->pdev, + status_registers[i]); + } + } + +} diff --git a/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h new file mode 100644 index 00000000..5ac6562f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_interface_regs_t23x.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_INTERFACE_REGS_T23X_H__ +#define __PVA_INTERFACE_REGS_T23X_H__ + +#include "pva.h" + +#define NUM_INTERFACES_T23X 9 + +#define PVA_EMPTY_STATUS_REG 0 + +#define PVA_MBOX_STATUS4_REG 0x178000 +#define PVA_MBOX_STATUS5_REG 0x180000 +#define PVA_MBOX_STATUS6_REG 0x188000 +#define PVA_MBOX_STATUS7_REG 0x190000 + +#define PVA_CCQ0_STATUS3_REG 0x260010 +#define PVA_CCQ0_STATUS4_REG 0x260014 +#define PVA_CCQ0_STATUS5_REG 0x260018 +#define PVA_CCQ0_STATUS6_REG 0x26001c + +#define PVA_CCQ1_STATUS3_REG 0x270010 +#define PVA_CCQ1_STATUS4_REG 0x270014 +#define PVA_CCQ1_STATUS5_REG 0x270018 +#define PVA_CCQ1_STATUS6_REG 0x27001c + +#define PVA_CCQ2_STATUS3_REG 0x280010 +#define PVA_CCQ2_STATUS4_REG 0x280014 +#define PVA_CCQ2_STATUS5_REG 0x280018 +#define PVA_CCQ2_STATUS6_REG 0x28001c + +#define PVA_CCQ3_STATUS3_REG 0x290010 +#define PVA_CCQ3_STATUS4_REG 0x290014 +#define PVA_CCQ3_STATUS5_REG 0x290018 +#define PVA_CCQ3_STATUS6_REG 0x29001c + +#define PVA_CCQ4_STATUS3_REG 0x2a0010 +#define PVA_CCQ4_STATUS4_REG 0x2a0014 +#define PVA_CCQ4_STATUS5_REG 0x2a0018 +#define PVA_CCQ4_STATUS6_REG 0x2a001c + +#define PVA_CCQ5_STATUS3_REG 0x2b0010 +#define PVA_CCQ5_STATUS4_REG 0x2b0014 +#define PVA_CCQ5_STATUS5_REG 0x2b0018 +#define PVA_CCQ5_STATUS6_REG 0x2b001c + +#define PVA_CCQ6_STATUS3_REG 0x2c0010 +#define PVA_CCQ6_STATUS4_REG 0x2c0014 +#define PVA_CCQ6_STATUS5_REG 0x2c0018 +#define PVA_CCQ6_STATUS6_REG 0x2c001c + +#define PVA_CCQ7_STATUS3_REG 0x2d0010 +#define PVA_CCQ7_STATUS4_REG 0x2d0014 +#define PVA_CCQ7_STATUS5_REG 0x2d0018 +#define PVA_CCQ7_STATUS6_REG 0x2d001c + +void read_status_interface_t23x(struct pva *pva, + uint32_t interface_id, u32 isr_status, + struct pva_cmd_status_regs *status_output); +#endif diff --git a/drivers/video/tegra/host/pva/pva_ioctl.c b/drivers/video/tegra/host/pva/pva_ioctl.c new file mode 100644 index 00000000..52e59e57 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_ioctl.c @@ -0,0 +1,1115 @@ +/* + * Copyright (c) 2016-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pva.h" +#include "pva_queue.h" +#include "nvpva_buffer.h" +#include "pva_vpu_exe.h" +#include "pva_vpu_app_auth.h" +#include "pva_system_allow_list.h" +#include "nvpva_client.h" +/** + * @brief pva_private - Per-fd specific data + * + * pdev Pointer the pva device + * queue Pointer the struct nvpva_queue + * buffer Pointer to the struct nvpva_buffer + */ +struct pva_private { + struct pva *pva; + struct nvpva_queue *queue; + struct pva_cb *vpu_print_buffer; + struct nvpva_client_context *client; +}; + +static int copy_part_from_user(void *kbuffer, size_t kbuffer_size, + struct nvpva_ioctl_part part) +{ + int err = 0; + int copy_ret; + + if (part.size == 0) + goto out; + + if (kbuffer_size < part.size) { + pr_err("pva: failed to copy from user due to size too large: %llu > %lu", + part.size, kbuffer_size); + err = -EINVAL; + goto out; + } + copy_ret = + copy_from_user(kbuffer, (void __user *)part.addr, part.size); + if (copy_ret) { + err = -EFAULT; + goto out; + } +out: + return err; +} + +static struct pva_cb *pva_alloc_cb(struct device *dev, uint32_t size) +{ + int err; + struct pva_cb *cb; + + if ((size == 0) || (((size - 1) & size) != 0)) { + dev_err(dev, "invalid circular buffer size: %u; it must be 2^N.", size); + err = -EINVAL; + goto out; + } + + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (IS_ERR_OR_NULL(cb)) { + err = PTR_ERR(cb); + goto out; + } + + cb->size = size; + cb->buffer_va = + dma_alloc_coherent(dev, cb->size, &cb->buffer_addr, GFP_KERNEL); + + if (IS_ERR_OR_NULL(cb->buffer_va)) { + err = PTR_ERR(cb->buffer_va); + goto free_mem; + } + + cb->head_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->head_addr, + GFP_KERNEL); + if (IS_ERR_OR_NULL(cb->head_va)) { + err = PTR_ERR(cb->head_va); + goto free_buffer; + } + + cb->tail_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->tail_addr, + GFP_KERNEL); + if (IS_ERR_OR_NULL(cb->tail_va)) { + err = PTR_ERR(cb->tail_va); + goto free_head; + } + + cb->err_va = dma_alloc_coherent(dev, sizeof(uint32_t), &cb->err_addr, + GFP_KERNEL); + if (IS_ERR_OR_NULL(cb->err_va)) { + err = PTR_ERR(cb->err_va); + goto free_tail; + } + + *cb->head_va = 0; + cb->tail = 0; + *cb->tail_va = cb->tail; + *cb->err_va = 0; + return cb; + +free_tail: + dma_free_coherent(dev, sizeof(uint32_t), cb->tail_va, cb->tail_addr); +free_head: + dma_free_coherent(dev, sizeof(uint32_t), cb->head_va, cb->head_addr); +free_buffer: + dma_free_coherent(dev, cb->size, cb->buffer_va, cb->buffer_addr); +free_mem: + kfree(cb); +out: + return ERR_PTR(err); +} + +static void pva_free_cb(struct device *dev, struct pva_cb *cb) +{ + dma_free_coherent(dev, sizeof(uint32_t), cb->tail_va, cb->tail_addr); + dma_free_coherent(dev, sizeof(uint32_t), cb->head_va, cb->head_addr); + dma_free_coherent(dev, sizeof(uint32_t), cb->err_va, cb->err_addr); + dma_free_coherent(dev, cb->size, cb->buffer_va, cb->buffer_addr); + kfree(cb); +} + + +/** + * @brief Copy a single task from userspace to kernel space + * + * This function copies fields from ioctl_task and performs a deep copy + * of the task to kernel memory. At the same time, input values shall + * be validated. This allows using all the fields without manually performing + * copies of the structure and performing checks later. + * + * @param ioctl_task Pointer to a userspace task that is copied + * to kernel memory + * @param task Pointer to a task that should be created + * @return 0 on Success or negative error code + * + */ +static int pva_copy_task(struct nvpva_ioctl_task *ioctl_task, + struct pva_submit_task *task) +{ + int err = 0; + u32 i; + struct pva_elf_image *image = NULL; + + nvpva_dbg_fn(task->pva, ""); + /* + * These fields are clear-text in the task descriptor. Just + * copy them. + */ + task->exe_id = ioctl_task->exe_id; + task->l2_alloc_size = ioctl_task->l2_alloc_size; + task->symbol_payload_size = ioctl_task->symbol_payload.size; + task->flags = ioctl_task->flags; + if (task->exe_id < NVPVA_NOOP_EXE_ID) + image = get_elf_image(&task->client->elf_ctx, task->exe_id); + + task->is_system_app = (image != NULL) && image->is_system_app; + +#define IOCTL_ARRAY_SIZE(field_name) \ + (ioctl_task->field_name.size / sizeof(task->field_name[0])) + + task->num_prefences = IOCTL_ARRAY_SIZE(prefences); + task->num_user_fence_actions = IOCTL_ARRAY_SIZE(user_fence_actions); + task->num_input_task_status = IOCTL_ARRAY_SIZE(input_task_status); + task->num_output_task_status = IOCTL_ARRAY_SIZE(output_task_status); + task->num_dma_descriptors = IOCTL_ARRAY_SIZE(dma_descriptors); + task->num_dma_channels = IOCTL_ARRAY_SIZE(dma_channels); + task->num_symbols = IOCTL_ARRAY_SIZE(symbols); + +#undef IOCTL_ARRAY_SIZE + + err = copy_part_from_user(&task->prefences, sizeof(task->prefences), + ioctl_task->prefences); + if (err) + goto out; + + err = copy_part_from_user(&task->user_fence_actions, + sizeof(task->user_fence_actions), + ioctl_task->user_fence_actions); + if (err) + goto out; + + err = copy_part_from_user(&task->input_task_status, + sizeof(task->input_task_status), + ioctl_task->input_task_status); + if (err) + goto out; + + err = copy_part_from_user(&task->output_task_status, + sizeof(task->output_task_status), + ioctl_task->output_task_status); + if (err) + goto out; + + err = copy_part_from_user(&task->dma_descriptors, + sizeof(task->dma_descriptors), + ioctl_task->dma_descriptors); + if (err) + goto out; + + err = copy_part_from_user(&task->dma_channels, + sizeof(task->dma_channels), + ioctl_task->dma_channels); + if (err) + goto out; + + if (task->is_system_app) + err = copy_part_from_user(&task->dma_misr_config, + sizeof(task->dma_misr_config), + ioctl_task->dma_misr_config); + else + task->dma_misr_config.enable = 0; + + if (err) + goto out; + + err = copy_part_from_user(&task->hwseq_config, + sizeof(task->hwseq_config), + ioctl_task->hwseq_config); + if (err) + goto out; + + err = copy_part_from_user(&task->symbols, sizeof(task->symbols), + ioctl_task->symbols); + if (err) + goto out; + + err = copy_part_from_user(&task->symbol_payload, + sizeof(task->symbol_payload), + ioctl_task->symbol_payload); + if (err) + goto out; + + /* Parse each postfence provided by user in 1D array and store into + * internal 2D array representation wrt type of fence and number of + * fences of each type for further processing + */ + for (i = 0; i < task->num_user_fence_actions; i++) { + struct nvpva_fence_action *fence = &task->user_fence_actions[i]; + enum nvpva_fence_action_type fence_type = fence->type; + u8 num_fence; + + if ((fence_type == 0U) || + (fence_type >= NVPVA_MAX_FENCE_TYPES)) { + task_err(task, "invalid fence type at index: %u", i); + err = -EINVAL; + goto out; + } + + /* Ensure that the number of postfences for each type are within + * limit + */ + num_fence = task->num_pva_fence_actions[fence_type]; + if (num_fence >= NVPVA_TASK_MAX_FENCEACTIONS) { + task_err(task, "too many fences for type: %u", + fence_type); + err = -EINVAL; + goto out; + } + + task->pva_fence_actions[fence_type][num_fence] = *fence; + task->num_pva_fence_actions[fence_type] += 1; + } + + /* Check for valid HWSeq trigger mode */ + if ((task->hwseq_config.hwseqTrigMode != NVPVA_HWSEQTM_VPUTRIG) && + (task->hwseq_config.hwseqTrigMode != NVPVA_HWSEQTM_DMATRIG)) { + task_err(task, "invalid hwseq trigger mode: %d", + task->hwseq_config.hwseqTrigMode); + err = -EINVAL; + goto out; + } + +#undef COPY_FIELD + +out: + return err; +} + +/** + * @brief Submit a task to PVA + * + * This function takes the given list of tasks, converts + * them into kernel internal representation and submits + * them to the task queue. On success, it populates + * the post-fence structures in userspace and returns 0. + * + * @param priv PVA Private data + * @param arg ioctl data + * @return 0 on Success or negative error code + * + */ +static int pva_submit(struct pva_private *priv, void *arg) +{ + struct nvpva_ioctl_submit_in_arg *ioctl_tasks_header = + (struct nvpva_ioctl_submit_in_arg *)arg; + struct nvpva_ioctl_task *ioctl_tasks = NULL; + struct pva_submit_tasks *tasks_header; + int err = 0; + unsigned long rest; + int i, j; + uint32_t num_tasks; + + num_tasks = ioctl_tasks_header->tasks.size / sizeof(*ioctl_tasks); + /* Sanity checks for the task heaader */ + if (num_tasks > NVPVA_SUBMIT_MAX_TASKS) { + err = -EINVAL; + dev_err(&priv->pva->pdev->dev, + "exceeds maximum number of tasks: %u > %u", num_tasks, + NVPVA_SUBMIT_MAX_TASKS); + goto out; + } + + num_tasks = array_index_nospec(num_tasks, NVPVA_SUBMIT_MAX_TASKS + 1); + if (ioctl_tasks_header->version > 0) { + err = -ENOSYS; + goto out; + } + + + /* Allocate memory for the UMD representation of the tasks */ + ioctl_tasks = kzalloc(ioctl_tasks_header->tasks.size, GFP_KERNEL); + if (ioctl_tasks == NULL) { + pr_err("pva: submit: allocation for tasks failed"); + err = -ENOMEM; + goto out; + } + + tasks_header = kzalloc(sizeof(struct pva_submit_tasks), GFP_KERNEL); + if (tasks_header == NULL) { + pr_err("pva: submit: allocation for tasks_header failed"); + kfree(ioctl_tasks); + err = -ENOMEM; + goto out; + } + + /* Copy the tasks from userspace */ + rest = copy_from_user(ioctl_tasks, + (void __user *)ioctl_tasks_header->tasks.addr, + ioctl_tasks_header->tasks.size); + + if (rest > 0) { + err = -EFAULT; + pr_err("pva: failed to copy tasks"); + goto free_ioctl_tasks; + } + + tasks_header->num_tasks = 0; + + /* Go through the tasks and make a KMD representation of them */ + for (i = 0; i < num_tasks; i++) { + struct pva_submit_task *task; + struct nvpva_queue_task_mem_info task_mem_info; + long timeout_jiffies = usecs_to_jiffies( + ioctl_tasks_header->submission_timeout_us); + + /* Allocate memory for the task and dma */ + err = down_timeout(&priv->queue->task_pool_sem, + timeout_jiffies); + if (err) { + pr_err("pva: timeout when allocating task buffer"); + /* UMD expects this error code */ + err = -EAGAIN; + goto free_tasks; + } + err = nvpva_queue_alloc_task_memory(priv->queue, + &task_mem_info); + task = task_mem_info.kmem_addr; + + WARN_ON((err < 0) || !task); + + /* initialize memory to 0 */ + (void)memset(task_mem_info.kmem_addr, 0, + priv->queue->task_kmem_size); + (void)memset(task_mem_info.va, 0, priv->queue->task_dma_size); + + /* Obtain an initial reference */ + kref_init(&task->ref); + INIT_LIST_HEAD(&task->node); + + tasks_header->tasks[i] = task; + tasks_header->num_tasks += 1; + + task->dma_addr = task_mem_info.dma_addr; + task->aux_dma_addr = task_mem_info.aux_dma_addr; + task->va = task_mem_info.va; + task->aux_va = task_mem_info.aux_va; + task->pool_index = task_mem_info.pool_index; + + task->pva = priv->pva; + task->queue = priv->queue; + task->client = priv->client; + + /* setup ownership */ + err = nvhost_module_busy(task->pva->pdev); + if (err) + goto free_tasks; + + nvpva_client_context_get(task->client); + + err = pva_copy_task(ioctl_tasks + i, task); + if (err) + goto free_tasks; + + if (priv->pva->vpu_printf_enabled) + task->stdout = priv->vpu_print_buffer; + } + + /* Populate header structure */ + tasks_header->execution_timeout_us = + ioctl_tasks_header->execution_timeout_us; + + /* TODO: submission timeout */ + /* ..and submit them */ + err = nvpva_queue_submit(priv->queue, tasks_header); + + if (err < 0) + goto free_tasks; + + /* Copy fences back to userspace */ + for (i = 0; i < tasks_header->num_tasks; i++) { + struct pva_submit_task *task = tasks_header->tasks[i]; + u32 n_copied[NVPVA_MAX_FENCE_TYPES] = {}; + struct nvpva_fence_action __user *action_fences = + (struct nvpva_fence_action __user *)ioctl_tasks[i] + .user_fence_actions.addr; + + /* Copy return postfences in the same order as that provided in + * input + */ + for (j = 0; j < task->num_user_fence_actions; j++) { + struct nvpva_fence_action *fence = + &task->user_fence_actions[j]; + enum nvpva_fence_action_type fence_type = fence->type; + + *fence = task->pva_fence_actions[fence_type] + [n_copied[fence_type]]; + n_copied[fence_type] += 1; + } + + rest = copy_to_user(action_fences, task->user_fence_actions, + ioctl_tasks[i].user_fence_actions.size); + + if (rest) { + nvpva_warn(&priv->pva->pdev->dev, + "Failed to copy pva fences to userspace"); + err = -EFAULT; + goto free_tasks; + } + } + +free_tasks: + + for (i = 0; i < tasks_header->num_tasks; i++) { + struct pva_submit_task *task = tasks_header->tasks[i]; + /* Drop the reference */ + kref_put(&task->ref, pva_task_free); + } + +free_ioctl_tasks: + + kfree(ioctl_tasks); + kfree(tasks_header); + +out: + return err; +} + +static int pva_pin(struct pva_private *priv, void *arg) +{ + int err = 0; + struct dma_buf *dmabuf[1]; + struct nvpva_pin_in_arg *in_arg = (struct nvpva_pin_in_arg *)arg; + struct nvpva_pin_out_arg *out_arg = (struct nvpva_pin_out_arg *)arg; + + dmabuf[0] = dma_buf_get(in_arg->pin.handle); + if (IS_ERR_OR_NULL(dmabuf[0])) { + dev_err(&priv->pva->pdev->dev, "invalid handle to pin: %u", + in_arg->pin.handle); + err = -EFAULT; + goto out; + } + + err = nvpva_buffer_pin(priv->client->buffers, + &dmabuf[0], + &in_arg->pin.offset, + &in_arg->pin.size, + in_arg->pin.segment, + 1, + &out_arg->pin_id, + &out_arg->error_code); + dma_buf_put(dmabuf[0]); +out: + return err; +} + +static int pva_unpin(struct pva_private *priv, void *arg) +{ + int err = 0; + struct nvpva_unpin_in_arg *in_arg = (struct nvpva_unpin_in_arg *)arg; + + nvpva_buffer_unpin_id(priv->client->buffers, &in_arg->pin_id, 1); + + return err; +} + +static int +pva_authenticate_vpu_app(struct pva *pva, + struct pva_vpu_auth_s *auth, + uint8_t *data, + u32 size, + bool is_sys) +{ + int err = 0; + + if (!auth->pva_auth_enable) + goto out; + + mutex_lock(&auth->allow_list_lock); + if (!auth->pva_auth_allow_list_parsed) { + if (is_sys) + err = pva_auth_allow_list_parse_buf(pva->pdev, + auth, pva_auth_allow_list_sys, + pva_auth_allow_list_sys_len); + else + err = pva_auth_allow_list_parse(pva->pdev, auth); + + if (err) { + nvpva_warn(&pva->pdev->dev, + "allow list parse failed"); + mutex_unlock(&auth->allow_list_lock); + goto out; + } + } + + mutex_unlock(&auth->allow_list_lock); + err = pva_vpu_check_sha256_key(pva, + auth->vpu_hash_keys, + data, + size); + if (err != 0) + nvpva_dbg_fn(pva, "app authentication failed"); +out: + return err; +} + +static int pva_register_vpu_exec(struct pva_private *priv, void *arg) +{ + struct nvpva_vpu_exe_register_in_arg *reg_in = + (struct nvpva_vpu_exe_register_in_arg *)arg; + struct nvpva_vpu_exe_register_out_arg *reg_out = + (struct nvpva_vpu_exe_register_out_arg *)arg; + struct pva_elf_image *image; + void *exec_data = NULL; + uint16_t exe_id; + bool is_system = false; + uint64_t data_size; + int err = 0; + + data_size = reg_in->exe_data.size; + exec_data = kmalloc(data_size, GFP_KERNEL); + if (exec_data == NULL) { + nvpva_err(&priv->pva->pdev->dev, + "failed to allocate memory for elf"); + err = -ENOMEM; + goto out; + } + + err = copy_part_from_user(exec_data, data_size, + reg_in->exe_data); + if (err) { + nvpva_err(&priv->pva->pdev->dev, + "failed to copy vpu exe data"); + goto free_mem; + } + + err = pva_authenticate_vpu_app(priv->pva, + &priv->pva->pva_auth, + (uint8_t *)exec_data, + data_size, + false); + if (err != 0) { + err = pva_authenticate_vpu_app(priv->pva, + &priv->pva->pva_auth_sys, + (uint8_t *)exec_data, + data_size, + true); + if (err != 0) + goto free_mem; + + is_system = true; + } + + err = pva_load_vpu_app(&priv->client->elf_ctx, exec_data, + data_size, &exe_id, + is_system, + priv->pva->version); + + if (err) { + nvpva_err(&priv->pva->pdev->dev, + "failed to register vpu app"); + goto free_mem; + } + + reg_out->exe_id = exe_id; + image = get_elf_image(&priv->client->elf_ctx, exe_id); + reg_out->num_of_symbols = image->num_symbols - + image->num_sys_symbols; + reg_out->symbol_size_total = image->symbol_size_total; + +free_mem: + + if (exec_data != NULL) + kfree(exec_data); +out: + return err; +} + +static int pva_unregister_vpu_exec(struct pva_private *priv, void *arg) +{ + struct nvpva_vpu_exe_unregister_in_arg *unreg_in = + (struct nvpva_vpu_exe_unregister_in_arg *)arg; + return pva_release_vpu_app(&priv->client->elf_ctx, + unreg_in->exe_id, false); +} + +static int pva_get_symbol_id(struct pva_private *priv, void *arg) +{ + struct nvpva_get_symbol_in_arg *symbol_in = + (struct nvpva_get_symbol_in_arg *)arg; + struct nvpva_get_symbol_out_arg *symbol_out = + (struct nvpva_get_symbol_out_arg *)arg; + char *symbol_buffer; + int err = 0; + uint64_t name_size = symbol_in->name.size; + struct pva_elf_symbol symbol = {0}; + + if (name_size > ELF_MAX_SYMBOL_LENGTH) { + nvpva_warn(&priv->pva->pdev->dev, "symbol size too large:%llu", + symbol_in->name.size); + name_size = ELF_MAX_SYMBOL_LENGTH; + } + + symbol_buffer = kmalloc(name_size, GFP_KERNEL); + if (symbol_buffer == NULL) { + err = -ENOMEM; + goto out; + } + + err = copy_from_user(symbol_buffer, + (void __user *)symbol_in->name.addr, + name_size); + if (err) { + nvpva_err(&priv->pva->pdev->dev, + "failed to copy all name from user"); + goto free_mem; + } + + if (symbol_buffer[name_size - 1] != '\0') { + nvpva_warn(&priv->pva->pdev->dev, + "symbol name not terminated with NULL"); + symbol_buffer[name_size - 1] = '\0'; + } + + err = pva_get_sym_info(&priv->client->elf_ctx, symbol_in->exe_id, + symbol_buffer, &symbol); + if (err) { + goto free_mem; + } + + symbol_out->symbol.id = symbol.symbolID; + symbol_out->symbol.size = symbol.size; + symbol_out->symbol.isPointer = + (symbol.type == (uint32_t)VMEM_TYPE_POINTER) ? 1U : 0U; +free_mem: + kfree(symbol_buffer); +out: + return err; +} + +static int pva_get_symtab(struct pva_private *priv, void *arg) +{ + struct nvpva_get_sym_tab_in_arg *sym_tab_in = + (struct nvpva_get_sym_tab_in_arg *)arg; + + int err = 0; + struct nvpva_sym_info *sym_tab_buffer; + u64 tab_size; + + err = pva_get_sym_tab_size(&priv->client->elf_ctx, + sym_tab_in->exe_id, + &tab_size); + if (err) + goto out; + + if (sym_tab_in->tab.size < tab_size) { + nvpva_err(&priv->pva->pdev->dev, + "symbol table size smaller than needed:%llu", + sym_tab_in->tab.size); + err = -EINVAL; + goto out; + } + + sym_tab_buffer = kmalloc(tab_size, GFP_KERNEL); + if (sym_tab_buffer == NULL) { + err = -ENOMEM; + goto out; + } + + err = pva_get_sym_tab(&priv->client->elf_ctx, + sym_tab_in->exe_id, + sym_tab_buffer); + if (err) + goto free_mem; + + err = copy_to_user((void __user *)sym_tab_in->tab.addr, + sym_tab_buffer, + tab_size); + +free_mem: + kfree(sym_tab_buffer); +out: + return err; +} + +/* Maximum VPU print buffer size is 16M */ +#define MAX_VPU_PRINT_BUFFER_SIZE (16 * (1 << 20)) +static int pva_set_vpu_print_buffer_size(struct pva_private *priv, void *arg) +{ + union nvpva_set_vpu_print_buffer_size_args *in_arg = + (union nvpva_set_vpu_print_buffer_size_args *)arg; + uint32_t buffer_size = in_arg->in.size; + struct device *dev = &priv->pva->aux_pdev->dev; + int err = 0; + + if (buffer_size > MAX_VPU_PRINT_BUFFER_SIZE) { + dev_err(&priv->pva->pdev->dev, + "requested VPU print buffer too large: %u > %u\n", + buffer_size, MAX_VPU_PRINT_BUFFER_SIZE); + err = -EINVAL; + goto out; + } + + mutex_lock(&priv->queue->list_lock); + if (!list_empty(&priv->queue->tasklist)) { + dev_err(&priv->pva->pdev->dev, + "can't set VPU print buffer size when there's unfinished tasks\n"); + err = -EAGAIN; + goto unlock; + } + + if (priv->vpu_print_buffer != NULL) { + pva_free_cb(dev, priv->vpu_print_buffer); + priv->vpu_print_buffer = NULL; + } + + if (buffer_size == 0) + goto unlock; + + priv->vpu_print_buffer = pva_alloc_cb(dev, buffer_size); + + if (IS_ERR(priv->vpu_print_buffer)) { + err = PTR_ERR(priv->vpu_print_buffer); + priv->vpu_print_buffer = NULL; + } + +unlock: + mutex_unlock(&priv->queue->list_lock); +out: + return err; +} + +static ssize_t pva_read_cb(struct pva_cb *cb, u8 __user *buffer, + size_t buffer_size) +{ + const u32 tail = cb->tail; + const u32 head = *cb->head_va; + const u32 size = cb->size; + ssize_t ret = 0; + u32 transfer1_size; + u32 transfer2_size; + + /* + * Check if overflow happened, and if so, report it. + */ + if (*cb->err_va != 0) { + pr_warn("pva: VPU print buffer overflowed!\n"); + ret = -ENOSPC; + goto out; + } + + transfer1_size = CIRC_CNT_TO_END(head, tail, size); + if (transfer1_size <= buffer_size) { + buffer_size -= transfer1_size; + } else { + transfer1_size = buffer_size; + buffer_size = 0; + } + + transfer2_size = + CIRC_CNT(head, tail, size) - CIRC_CNT_TO_END(head, tail, size); + if (transfer2_size <= buffer_size) { + buffer_size -= transfer2_size; + } else { + transfer2_size = buffer_size; + buffer_size = 0; + } + + if (transfer1_size > 0) { + unsigned long failed_count; + + failed_count = copy_to_user(buffer, cb->buffer_va + tail, + transfer1_size); + if (failed_count > 0) { + pr_err("pva: VPU print buffer: write to user buffer 1 failed\n"); + ret = -EFAULT; + goto out; + } + } + + if (transfer2_size > 0) { + unsigned long failed_count; + + failed_count = copy_to_user(&buffer[transfer1_size], + cb->buffer_va, transfer2_size); + if (failed_count > 0) { + pr_err("pva: VPU print buffer: write to user buffer 2 failed\n"); + ret = -EFAULT; + goto out; + } + } + + cb->tail = + (cb->tail + transfer1_size + transfer2_size) & (cb->size - 1); + + /* + * Update tail so that firmware knows the content is consumed; Memory + * barrier is needed here because the update should only be visible to + * firmware after the content is read. + */ + mb(); + *cb->tail_va = cb->tail; + ret = transfer1_size + transfer2_size; + +out: + return ret; +} + +static long pva_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pva_private *priv = file->private_data; + u8 buf[NVPVA_IOCTL_MAX_SIZE] __aligned(sizeof(u64)); + int err = 0; + int err2 = 0; + + nvpva_dbg_fn(priv->pva, ""); + + if ((_IOC_TYPE(cmd) != NVPVA_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVPVA_IOCTL_NUMBER_MAX) || + (_IOC_SIZE(cmd) > sizeof(buf))) + return -ENOIOCTLCMD; + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) { + dev_err(&priv->pva->pdev->dev, + "failed copy ioctl buffer from user; size: %u", + _IOC_SIZE(cmd)); + return -EFAULT; + } + } + + switch (cmd) { + case NVPVA_IOCTL_GET_SYMBOL_ID: + err = pva_get_symbol_id(priv, buf); + break; + case NVPVA_IOCTL_GET_SYM_TAB: + err = pva_get_symtab(priv, buf); + break; + case NVPVA_IOCTL_REGISTER_VPU_EXEC: + err = pva_register_vpu_exec(priv, buf); + break; + case NVPVA_IOCTL_UNREGISTER_VPU_EXEC: + err = pva_unregister_vpu_exec(priv, buf); + break; + case NVPVA_IOCTL_PIN: + err = pva_pin(priv, buf); + break; + case NVPVA_IOCTL_UNPIN: + err = pva_unpin(priv, buf); + break; + case NVPVA_IOCTL_SUBMIT: + err = pva_submit(priv, buf); + break; + case NVPVA_IOCTL_SET_VPU_PRINT_BUFFER_SIZE: + err = pva_set_vpu_print_buffer_size(priv, buf); + break; + default: + err2 = -ENOIOCTLCMD; + break; + } + + if ((err2 == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err2 = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + err = (err == 0) ? err2 : err; + + return err; +} + +static int pva_open(struct inode *inode, struct file *file) +{ + struct nvhost_device_data *pdata = container_of( + inode->i_cdev, struct nvhost_device_data, ctrl_cdev); + struct platform_device *pdev = pdata->pdev; + struct pva *pva = pdata->private_data; + struct pva_private *priv; + int err = 0; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) { + err = -ENOMEM; + goto err_alloc_priv; + } + + file->private_data = priv; + priv->pva = pva; + priv->client = nvpva_client_context_alloc(pdev, pva, current->pid); + if (priv->client == NULL) { + err = -ENOMEM; + dev_err(&pdev->dev, "failed to allocate client context"); + goto err_alloc_context; + } + + priv->queue = nvpva_queue_alloc(pva->pool, + priv->client->cntxt_dev, + MAX_PVA_TASK_COUNT_PER_QUEUE); + + if (IS_ERR(priv->queue)) { + err = PTR_ERR(priv->queue); + goto err_alloc_queue; + } + + sema_init(&priv->queue->task_pool_sem, MAX_PVA_TASK_COUNT_PER_QUEUE); + err = nvhost_module_busy(pva->pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "error in powering up pva %d", + err); + goto err_device_busy; + } + + return nonseekable_open(inode, file); + +err_device_busy: + nvpva_queue_put(priv->queue); +err_alloc_queue: + nvpva_client_context_put(priv->client); +err_alloc_context: + nvhost_module_remove_client(pdev, priv); + kfree(priv); +err_alloc_priv: + return err; +} + +static void pva_queue_flush(struct pva *pva, struct nvpva_queue *queue) +{ + u32 flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + struct pva_cmd_status_regs status = {}; + struct pva_cmd_s cmd = {}; + int err = 0; + u32 nregs; + + nregs = pva_cmd_abort_task(&cmd, queue->id, flags); + err = nvhost_module_busy(pva->pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "error in powering up pva %d", + err); + goto err_out; + } + + err = pva->version_config->submit_cmd_sync(pva, &cmd, nregs, queue->id, + &status); + nvhost_module_idle(pva->pdev); + if (err < 0) { + dev_err(&pva->pdev->dev, "failed to issue FW abort command: %d", + err); + goto err_out; + } + /* Ensure that response is valid */ + if (status.error != PVA_ERR_NO_ERROR) { + dev_err(&pva->pdev->dev, "PVA FW Abort rejected: %d", + status.error); + } + +err_out: + return; +} + +static int pva_release(struct inode *inode, struct file *file) +{ + struct pva_private *priv = file->private_data; + bool queue_empty; + int i; + + flush_workqueue(priv->pva->task_status_workqueue); + mutex_lock(&priv->queue->list_lock); + queue_empty = list_empty(&priv->queue->tasklist); + mutex_unlock(&priv->queue->list_lock); + if (!queue_empty) { + /* Cancel remaining tasks */ + nvpva_dbg_info(priv->pva, "cancel remaining tasks"); + pva_queue_flush(priv->pva, priv->queue); + } + + /* make sure all tasks have been finished */ + for (i = 0; i < MAX_PVA_TASK_COUNT_PER_QUEUE; i++) { + if (down_killable(&priv->queue->task_pool_sem) != 0) { + nvpva_err( + &priv->pva->pdev->dev, + "interrupted while waiting %d tasks\n", + MAX_PVA_TASK_COUNT_PER_QUEUE - i); + pva_abort(priv->pva); + break; + } + } + + nvhost_module_idle(priv->pva->pdev); + + /* Release reference to client */ + nvpva_client_context_put(priv->client); + + /* + * Release handle to the queue (on-going tasks have their + * own references to the queue + */ + nvpva_queue_put(priv->queue); + + /* Free VPU print buffer if allocated */ + if (priv->vpu_print_buffer != NULL) { + pva_free_cb(&priv->pva->pdev->dev, priv->vpu_print_buffer); + priv->vpu_print_buffer = NULL; + } + + /* Finally, release the private data */ + kfree(priv); + + return 0; +} + +static ssize_t pva_read_vpu_print_buffer(struct file *file, + char __user *user_buffer, + size_t buffer_size, loff_t *off) +{ + struct pva_private *priv = file->private_data; + ssize_t ret; + + mutex_lock(&priv->queue->list_lock); + + if (priv->vpu_print_buffer != NULL) { + ret = pva_read_cb(priv->vpu_print_buffer, user_buffer, + buffer_size); + } else { + pr_warn("pva: VPU print buffer size needs to be specified\n"); + ret = -EIO; + } + + mutex_unlock(&priv->queue->list_lock); + + return ret; +} + +const struct file_operations tegra_pva_ctrl_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = pva_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pva_ioctl, +#endif + .open = pva_open, + .release = pva_release, + .read = pva_read_vpu_print_buffer, +}; diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.c b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c new file mode 100644 index 00000000..4dcf306f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c @@ -0,0 +1,237 @@ +/* + * PVA Application Specific Virtual Memory + * + * Copyright (c) 2022-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pva_iommu_context_dev.h" +#include "pva.h" + +static u32 cntxt_dev_count; +static char *dev_names[] = { + "pva0_niso1_ctx0", + "pva0_niso1_ctx1", + "pva0_niso1_ctx2", + "pva0_niso1_ctx3", + "pva0_niso1_ctx4", + "pva0_niso1_ctx5", + "pva0_niso1_ctx6", + "pva0_niso1_ctx7", +}; + +static const struct of_device_id pva_iommu_context_dev_of_match[] = { + {.compatible = "nvidia,pva-tegra186-iommu-context"}, + {}, +}; + +struct pva_iommu_ctx { + struct platform_device *pdev; + struct list_head list; + struct device_dma_parameters dma_parms; + u32 ref_count; + bool allocated; + bool shared; +}; + +static LIST_HEAD(pva_iommu_ctx_list); +static DEFINE_MUTEX(pva_iommu_ctx_list_mutex); + +bool is_cntxt_initialized(void) +{ + return (cntxt_dev_count == 8); +} + +int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt) +{ + struct pva_iommu_ctx *ctx; + int err = 0; + int i; + + *count = 0; + mutex_lock(&pva_iommu_ctx_list_mutex); + for (i = 0; i < max_cnt; i++) { + list_for_each_entry(ctx, &pva_iommu_ctx_list, list) { + if (strnstr(ctx->pdev->name, dev_names[i], 29) != NULL) { + hwids[*count] = nvpva_get_device_hwid(ctx->pdev, 0); + if (hwids[*count] < 0) { + err = hwids[*count]; + break; + } + + ++(*count); + if (*count >= max_cnt) + break; + } + } + } + + mutex_unlock(&pva_iommu_ctx_list_mutex); + + return err; +} + +struct platform_device +*nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared) +{ + struct pva_iommu_ctx *ctx; + struct pva_iommu_ctx *ctx_new = NULL; + + mutex_lock(&pva_iommu_ctx_list_mutex); + + if (identifier == NULL) { + list_for_each_entry(ctx, &pva_iommu_ctx_list, list) + if (!ctx->allocated && !ctx_new) + ctx_new = ctx; + if (!ctx_new && shared) + list_for_each_entry(ctx, &pva_iommu_ctx_list, list) + if ((!ctx->allocated || ctx->shared) && !ctx_new) + ctx_new = ctx; + } else { + list_for_each_entry(ctx, &pva_iommu_ctx_list, list) + if (!ctx_new + && (strncmp(ctx->pdev->name, identifier, len) == 0)) + ctx_new = ctx; + + if (ctx_new && !shared && ctx_new->allocated) + ctx_new = NULL; + + if (ctx_new && shared && (ctx_new->allocated && !ctx_new->shared)) + ctx_new = NULL; + } + + if (ctx_new) { +#ifdef CONFIG_NVMAP + /* + * Ensure that all stashed mappings are removed from this context device + * before this context device gets reassigned to some other process + */ + dma_buf_release_stash(&ctx_new->pdev->dev); +#endif + ctx_new->allocated = true; + ctx_new->shared = shared; + ctx_new->ref_count += 1; + mutex_unlock(&pva_iommu_ctx_list_mutex); + return ctx_new->pdev; + } + + mutex_unlock(&pva_iommu_ctx_list_mutex); + + return NULL; +} + +void nvpva_iommu_context_dev_release(struct platform_device *pdev) +{ + struct pva_iommu_ctx *ctx; + + if (pdev == NULL) + return; + + ctx = platform_get_drvdata(pdev); + mutex_lock(&pva_iommu_ctx_list_mutex); + ctx->ref_count -= 1; + if (ctx->ref_count == 0) { + ctx->allocated = false; + ctx->shared = false; + } + + mutex_unlock(&pva_iommu_ctx_list_mutex); +} + +static int pva_iommu_context_dev_probe(struct platform_device *pdev) +{ + struct pva_iommu_ctx *ctx; + + if (!iommu_get_domain_for_dev(&pdev->dev)) { + dev_err(&pdev->dev, + "iommu is not enabled for context device. aborting."); + return -ENOSYS; + } + + ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + dev_err(&pdev->dev, + "%s: could not allocate iommu ctx\n", __func__); + return -ENOMEM; + } + + if (strnstr(pdev->name, dev_names[7], 29) != NULL) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + else + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); + + INIT_LIST_HEAD(&ctx->list); + ctx->pdev = pdev; + + mutex_lock(&pva_iommu_ctx_list_mutex); + list_add_tail(&ctx->list, &pva_iommu_ctx_list); + cntxt_dev_count++; + mutex_unlock(&pva_iommu_ctx_list_mutex); + + platform_set_drvdata(pdev, ctx); + + pdev->dev.dma_parms = &ctx->dma_parms; + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + +#ifdef CONFIG_NVMAP + /* flag required to handle stashings in context devices */ + pdev->dev.context_dev = true; +#endif + +#if LINUX_VERSION_CODE > KERNEL_VERSION(5, 0, 0) + dev_info(&pdev->dev, "initialized (streamid=%d, iommu=%s)", + nvpva_get_device_hwid(pdev, 0), + dev_name(pdev->dev.iommu->iommu_dev->dev)); +#else + dev_info(&pdev->dev, "initialized (streamid=%d)", + nvpva_get_device_hwid(pdev, 0)); +#endif + return 0; +} + +static int __exit pva_iommu_context_dev_remove(struct platform_device *pdev) +{ + struct pva_iommu_ctx *ctx = platform_get_drvdata(pdev); + + mutex_lock(&pva_iommu_ctx_list_mutex); + list_del(&ctx->list); + mutex_unlock(&pva_iommu_ctx_list_mutex); + + return 0; +} + +struct platform_driver nvpva_iommu_context_dev_driver = { + .probe = pva_iommu_context_dev_probe, + .remove = __exit_p(pva_iommu_context_dev_remove), + .driver = { + .owner = THIS_MODULE, + .name = "pva_iommu_context_dev", +#ifdef CONFIG_OF + .of_match_table = pva_iommu_context_dev_of_match, +#endif + }, +}; + diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.h b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h new file mode 100644 index 00000000..fe4d95f2 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h @@ -0,0 +1,28 @@ +/* + * Host1x Application Specific Virtual Memory + * + * Copyright (c) 2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef IOMMU_CONTEXT_DEV_H +#define IOMMU_CONTEXT_DEV_H + +struct platform_device +*nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared); +void nvpva_iommu_context_dev_release(struct platform_device *pdev); +int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt); +bool is_cntxt_initialized(void); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_isr.c b/drivers/video/tegra/host/pva/pva_isr.c new file mode 100644 index 00000000..40a89da0 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_isr.c @@ -0,0 +1,155 @@ +/* + * PVA ISR code + * + * Copyright (c) 2016-2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define PVA_MASK_LOW_16BITS 0xff + +#include "pva-interface.h" +#include +#include +#include +#include +#include +#include "pva_regs.h" +#include "pva.h" +#include "pva_isr_t23x.h" + +void pva_push_aisr_status(struct pva *pva, uint32_t aisr_status) +{ + struct pva_task_error_s *err_array = pva->priv_circular_array.va; + struct pva_task_error_s *src_va = &err_array[pva->circular_array_wr_pos]; + + src_va->queue = PVA_GET_QUEUE_ID_FROM_STATUS(aisr_status); + src_va->vpu = PVA_GET_VPU_ID_FROM_STATUS(aisr_status); + src_va->error = PVA_GET_ERROR_FROM_STATUS(aisr_status); + src_va->task_id = PVA_GET_TASK_ID_FROM_STATUS(aisr_status); + src_va->valid = 1U; + + if (pva->circular_array_wr_pos == (MAX_PVA_TASK_COUNT-1)) + pva->circular_array_wr_pos = 0; + else + pva->circular_array_wr_pos += 1; +} + +static irqreturn_t pva_system_isr(int irq, void *dev_id) +{ + struct pva *pva = dev_id; + struct platform_device *pdev = pva->pdev; + u32 checkpoint = host1x_readl(pdev, + cfg_ccq_status_r(pva->version, 0, 8)); + u32 status7 = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR); + u32 status5 = pva->version_config->read_mailbox(pdev, PVA_MBOX_AISR); + u32 lic_int_status = host1x_readl(pdev, + sec_lic_intr_status_r(pva->version)); + u32 h1xflgs; + bool recover = false; + + if (status5 & PVA_AISR_INT_PENDING) { + nvpva_dbg_info(pva, "PVA AISR (%x)", status5); + + if (status5 & (PVA_AISR_TASK_COMPLETE | PVA_AISR_TASK_ERROR)) { + atomic_add(1, &pva->n_pending_tasks); + queue_work(pva->task_status_workqueue, + &pva->task_update_work); + if ((status5 & PVA_AISR_ABORT) == 0U) + pva_push_aisr_status(pva, status5); + } + + /* For now, just log the errors */ + if (status5 & PVA_AISR_TASK_ERROR) + nvpva_warn(&pdev->dev, "PVA AISR: PVA_AISR_TASK_ERROR"); + if (status5 & PVA_AISR_ABORT) { + nvpva_warn(&pdev->dev, "PVA AISR: PVA_AISR_ABORT"); + nvpva_warn(&pdev->dev, "Checkpoint value: 0x%08x", + checkpoint); + recover = true; + } + + pva->version_config->write_mailbox(pdev, PVA_MBOX_AISR, 0x0); + } + + if (status7 & PVA_INT_PENDING) { + nvpva_dbg_info(pva, "PVA ISR (%x)", status7); + + pva_mailbox_isr(pva); + } + + + /* Check for watchdog timer interrupt */ + if (lic_int_status & sec_lic_intr_enable_wdt_f(SEC_LIC_INTR_WDT)) { + nvpva_warn(&pdev->dev, "WatchDog Timer"); + recover = true; + } + + /* Check for host1x errors*/ + if (pva->version == PVA_HW_GEN1) { + h1xflgs = sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_19); + } else { + h1xflgs = sec_lic_intr_enable_h1x_f(SEC_LIC_INTR_H1X_ALL_23); + } + if (lic_int_status & h1xflgs) { + nvpva_warn(&pdev->dev, "Pva Host1x errors (0x%x)", + lic_int_status); + + /* Clear the interrupt */ + host1x_writel(pva->pdev, sec_lic_intr_status_r(pva->version), + (lic_int_status & h1xflgs)); + recover = true; + } + + /* Copy trace points to ftrace buffer */ + pva_trace_copy_to_ftrace(pva); + + if (recover) + pva_abort(pva); + + return IRQ_HANDLED; +} + +int pva_register_isr(struct platform_device *dev) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(dev); + struct pva *pva = pdata->private_data; + int err; + int i; + irq_handler_t irq_handler; + + for (i = 0; i < pva->version_config->irq_count; i++) { + pva->irq[i] = platform_get_irq(dev, i); + if (pva->irq[i] <= 0) { + dev_err(&dev->dev, "no irq %d\n", i); + err = -ENOENT; + break; + } + + /* IRQ0 is for mailbox/h1x/watchdog */ + if (i == 0) + irq_handler = pva_system_isr; + else + irq_handler = pva_ccq_isr; + + err = request_threaded_irq(pva->irq[i], NULL, irq_handler, + IRQF_ONESHOT, "pva-isr", pva); + if (err) { + pr_err("%s: request_irq(%d) failed(%d)\n", __func__, + pva->irq[i], err); + break; + } + disable_irq(pva->irq[i]); + } + return err; +} diff --git a/drivers/video/tegra/host/pva/pva_isr_t23x.c b/drivers/video/tegra/host/pva/pva_isr_t23x.c new file mode 100644 index 00000000..688c46db --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_isr_t23x.c @@ -0,0 +1,109 @@ +/* + * PVA ISR code for T23X + * + * Copyright (c) 2019-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pva_isr_t23x.h" + +#include +#include + +#include "pva_regs.h" +#include "pva.h" +#include "pva_ccq_t23x.h" + +#define PVA_MASK_LOW_16BITS 0xff + +irqreturn_t pva_ccq_isr(int irq, void *dev_id) +{ + uint32_t int_status = 0, isr_status = 0, aisr_status = 0; + unsigned int queue_id = MAX_PVA_QUEUE_COUNT + 1; + int i; + struct pva *pva = dev_id; + struct platform_device *pdev = pva->pdev; + bool recover = false; + + for (i = 1; i < MAX_PVA_IRQS; i++) { + if (pva->irq[i] == irq) { + queue_id = i - 1; + break; + } + } + if (queue_id == MAX_PVA_QUEUE_COUNT + 1) { + printk("Invalid IRQ received. Returning from ISR"); + return IRQ_HANDLED; + } + nvpva_dbg_info(pva, "Received ISR from CCQ block, IRQ: %d", irq); + int_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version, + queue_id, PVA_CCQ_STATUS2_INDEX)) + & ~PVA_MASK_LOW_16BITS; + + if (int_status != 0x0) { + nvpva_dbg_info(pva, "Clear CCQ interrupt for %d, \ + current status: 0x%x", + queue_id, int_status); + host1x_writel(pdev, + cfg_ccq_status_r(pva->version, queue_id, + PVA_CCQ_STATUS2_INDEX), + int_status); + } + + if (int_status & PVA_VALID_CCQ_ISR) { + isr_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version, + queue_id, PVA_CCQ_STATUS7_INDEX)); + } + if (int_status & PVA_VALID_CCQ_AISR) { + aisr_status = host1x_readl(pdev, cfg_ccq_status_r(pva->version, + queue_id, PVA_CCQ_STATUS8_INDEX)); + } + if (aisr_status & PVA_AISR_INT_PENDING) { + nvpva_dbg_info(pva, "PVA CCQ AISR (%x)", aisr_status); + if (aisr_status & + (PVA_AISR_TASK_COMPLETE | PVA_AISR_TASK_ERROR)) { + atomic_add(1, &pva->n_pending_tasks); + queue_work(pva->task_status_workqueue, + &pva->task_update_work); + if ((aisr_status & PVA_AISR_ABORT) == 0U) + pva_push_aisr_status(pva, aisr_status); + } + + /* For now, just log the errors */ + + if (aisr_status & PVA_AISR_TASK_ERROR) + nvpva_warn(&pdev->dev, + "PVA AISR: \ + PVA_AISR_TASK_ERROR for queue id = %d", + queue_id); + if (aisr_status & PVA_AISR_ABORT) { + nvpva_warn(&pdev->dev, "PVA AISR: \ + PVA_AISR_ABORT for queue id = %d", + queue_id); + nvpva_warn(&pdev->dev, "Checkpoint value: 0x%08x", + aisr_status); + recover = true; + } + /* Acknowledge AISR by writing status 1 */ + host1x_writel(pdev, cfg_ccq_status_r(pva->version, queue_id, + PVA_CCQ_STATUS1_INDEX), 0x01U); + } + if (isr_status & PVA_INT_PENDING) { + pva_ccq_isr_handler(pva, queue_id); + } + if (recover) + pva_abort(pva); + + return IRQ_HANDLED; +} diff --git a/drivers/video/tegra/host/pva/pva_isr_t23x.h b/drivers/video/tegra/host/pva/pva_isr_t23x.h new file mode 100644 index 00000000..6c9a491a --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_isr_t23x.h @@ -0,0 +1,25 @@ +/* + * PVA ISR interface for T23X + * + * Copyright (c) 2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __NVHOST_PVA_ISR_T23X_H__ +#define __NVHOST_PVA_ISR_T23X_H__ + +#include + +irqreturn_t pva_ccq_isr(int irq, void *dev_id); + +#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/pva_mailbox.c b/drivers/video/tegra/host/pva/pva_mailbox.c new file mode 100644 index 00000000..41b89c65 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox.c @@ -0,0 +1,207 @@ +/* + * PVA mailbox code + * + * Copyright (c) 2016-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif +#include + +#include "pva.h" +#include "pva_mailbox.h" +#include "pva-interface.h" + +static u32 pva_get_mb_reg_id(u32 i) +{ + u32 mb_reg_id[VALID_MB_INPUT_REGS] = { + 0, + 1, + 2, + 3 + }; + + return mb_reg_id[i]; +} + +static int pva_mailbox_send_cmd(struct pva *pva, struct pva_cmd_s *cmd, + u32 nregs) +{ + struct platform_device *pdev = pva->pdev; + u32 reg, status; + int i; + + if (nregs > VALID_MB_INPUT_REGS) { + pr_err("%s nregs %d more than expected\n", __func__, nregs); + return -EINVAL; + } + + /* Make sure the state is what we expect it to be. */ + status = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR); + + WARN_ON((status & PVA_INT_PENDING)); + WARN_ON((status & PVA_READY) == 0); + WARN_ON((status & PVA_BUSY)); + + /*set MSB of mailbox 0 to trigger FW interrupt*/ + cmd->cmd_field[0] |= PVA_BIT(31); + /* Write all of the other command mailbox + * registers before writing mailbox 0. + */ + for (i = (nregs - 1); i >= 0; i--) { + reg = pva_get_mb_reg_id(i); + pva->version_config->write_mailbox(pdev, reg, cmd->cmd_field[i]); + } + + return 0; +} + +int pva_mailbox_wait_event(struct pva *pva, int wait_time) +{ + int timeout = 1; + int err; + /* Wait for the event being triggered in ISR */ + if (pva->timeout_enabled == true) + timeout = wait_event_timeout( + pva->cmd_waitqueue[PVA_MAILBOX_INDEX], + pva->cmd_status[PVA_MAILBOX_INDEX] == + PVA_CMD_STATUS_DONE || + pva->cmd_status[PVA_MAILBOX_INDEX] == + PVA_CMD_STATUS_ABORTED, + msecs_to_jiffies(wait_time)); + else + wait_event(pva->cmd_waitqueue[PVA_MAILBOX_INDEX], + pva->cmd_status[PVA_MAILBOX_INDEX] == + PVA_CMD_STATUS_DONE || + pva->cmd_status[PVA_MAILBOX_INDEX] == + PVA_CMD_STATUS_ABORTED); + + if (timeout <= 0) { + err = -ETIMEDOUT; + pva_abort(pva); + } else if (pva->cmd_status[PVA_MAILBOX_INDEX] == + PVA_CMD_STATUS_ABORTED) + err = -EIO; + else + err = 0; + + return err; +} + +void pva_mailbox_isr(struct pva *pva) +{ + struct platform_device *pdev = pva->pdev; + u32 int_status = pva->version_config->read_mailbox(pdev, PVA_MBOX_ISR); + if (pva->cmd_status[PVA_MAILBOX_INDEX] != PVA_CMD_STATUS_WFI) { + nvpva_warn(&pdev->dev, "Unexpected PVA ISR (%x)", int_status); + return; + } + + /* Save the current command and subcommand for later processing */ + pva->cmd_status_regs[PVA_MAILBOX_INDEX].cmd = + pva->version_config->read_mailbox(pdev, + PVA_MBOX_COMMAND); + pva->version_config->read_status_interface(pva, + PVA_MAILBOX_INDEX, int_status, + &pva->cmd_status_regs[PVA_MAILBOX_INDEX]); + /* Clear the mailbox interrupt status */ + int_status = int_status & PVA_READY; + pva->version_config->write_mailbox(pdev, PVA_MBOX_ISR, int_status); + + /* Wake up the waiters */ + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_DONE; + wake_up(&pva->cmd_waitqueue[PVA_MAILBOX_INDEX]); +} + +int pva_mailbox_send_cmd_sync_locked(struct pva *pva, + struct pva_cmd_s *cmd, u32 nregs, + struct pva_cmd_status_regs *status_regs) +{ + int err = 0; + + if (status_regs == NULL) { + err = -EINVAL; + goto err_invalid_parameter; + } + + /* Ensure that mailbox state is sane */ + if (WARN_ON(pva->cmd_status[PVA_MAILBOX_INDEX] != + PVA_CMD_STATUS_INVALID)) { + err = -EIO; + goto err_check_status; + } + + /* Mark that we are waiting for an interrupt */ + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_WFI; + memset(&pva->cmd_status_regs, 0, sizeof(pva->cmd_status_regs)); + + /* Submit command to PVA */ + err = pva_mailbox_send_cmd(pva, cmd, nregs); + if (err < 0) + goto err_send_command; + + err = pva_mailbox_wait_event(pva, 100); + if (err < 0) + goto err_wait_response; + + /* Return interrupt status back to caller */ + memcpy(status_regs, &pva->cmd_status_regs, + sizeof(struct pva_cmd_status_regs)); + + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID; + return err; + +err_wait_response: +err_send_command: + pva->cmd_status[PVA_MAILBOX_INDEX] = PVA_CMD_STATUS_INVALID; +err_check_status: +err_invalid_parameter: + return err; +} + +int pva_mailbox_send_cmd_sync(struct pva *pva, + struct pva_cmd_s *cmd, u32 nregs, + struct pva_cmd_status_regs *status_regs) +{ + int err = 0; + + if (status_regs == NULL) { + err = -EINVAL; + goto err_invalid_parameter; + } + + mutex_lock(&pva->mailbox_mutex); + err = pva_mailbox_send_cmd_sync_locked(pva, + cmd, + nregs, + status_regs); + mutex_unlock(&pva->mailbox_mutex); + + return err; + +err_invalid_parameter: + return err; +} + + +EXPORT_SYMBOL(pva_mailbox_send_cmd_sync); diff --git a/drivers/video/tegra/host/pva/pva_mailbox.h b/drivers/video/tegra/host/pva/pva_mailbox.h new file mode 100644 index 00000000..577e0b1f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox.h @@ -0,0 +1,134 @@ +/* + * PVA mailbox header + * + * Copyright (c) 2016-2021, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_MAILBOX_H__ +#define __PVA_MAILBOX_H__ + +#include + +#include "pva-interface.h" +#include "pva_status_regs.h" + +/* Total CCQ status registers */ +#define PVA_CCQ_STATUS_REGS 9 + +/* Symbolic definitions of the CCQ status registers */ +#define PVA_CCQ_STATUS0_INDEX 0 +#define PVA_CCQ_STATUS1_INDEX 1 +#define PVA_CCQ_STATUS2_INDEX 2 +#define PVA_CCQ_STATUS3_INDEX 3 +#define PVA_CCQ_STATUS4_INDEX 4 +#define PVA_CCQ_STATUS5_INDEX 5 +#define PVA_CCQ_STATUS6_INDEX 6 +#define PVA_CCQ_STATUS7_INDEX 7 +#define PVA_CCQ_STATUS8_INDEX 8 + + +/* Number of valid MBOX registers used for sending commands */ +#define VALID_MB_INPUT_REGS 4 +/* Number of valid MBOX registers */ +#define VALID_MB_INPUT_REGS_EX 8 +struct pva; + +/** + * enum pva_mailbox_status - PVA mailbox status indication + * + * These enumerations reflect the state of PVA interrupt handler + */ +enum pva_mailbox_status { + PVA_MBOX_STATUS_INVALID = 0, + PVA_MBOX_STATUS_WFI = 1, + PVA_MBOX_STATUS_DONE = 2, + PVA_MBOX_STATUS_ABORTED = 3, +}; + +/** + * struct pva_mailbox_status_regs - Handle the MBOX status based on ISR + * + * @cmd: Holds the current MBOX command + * @error: Holds the any error shown through ISR + * @status: Holds the status of all CCQ registers + * + */ +struct pva_mailbox_status_regs { + uint32_t status[PVA_CCQ_STATUS_REGS]; + uint32_t error; + uint32_t cmd; +}; + +/** + * + * pva_mailbox_send_cmd_sync() - Send a command and wait for response + * + * @pva: Pointer to PVA structure + * @pva_cmd: Pointer to the pva command struct + * @nregs: Number of valid mailbox registers for the command + * @status_regs: Pointer to pva_cmd_status_regs struct + * + * This function called by OS to pass the mailbox commands to + * the PVA uCode. The function returns the output status from PVA + * firmware once the task is completed. + * + * The caller is responsible to ensure that PVA has been powered + * up through nvhost_module_busy() API prior calling this function. + */ + +int pva_mailbox_send_cmd_sync(struct pva *pva, + struct pva_cmd_s *cmd, u32 nregs, + struct pva_cmd_status_regs *status_regs); +/** + * + * pva_mailbox_send_cmd_sync_locked() - Send a command and wait for response + * + * @pva: Pointer to PVA structure + * @pva_cmd: Pointer to the pva command struct + * @nregs: Number of valid mailbox registers for the command + * @status_regs: Pointer to pva_cmd_status_regs struct + * + * This function called by OS to pass the mailbox commands to + * the PVA uCode. The function returns the output status from PVA + * firmware once the task is completed. This function must not be + * used during runtime without holding the mailbox mutex (i.e. + * the function can be called during PVA boot-up). + */ +int pva_mailbox_send_cmd_sync_locked(struct pva *pva, + struct pva_cmd_s *cmd, u32 nregs, + struct pva_cmd_status_regs *status_regs); + +/** + * pva_mailbox_isr() - Handle interrupt for PVA ISR + * + * @pva: Pointer to PVA structure + * + * This function is used to read the CCQ status registers based on + * the status set in mailbox7 by the PVA uCode. + */ +void pva_mailbox_isr(struct pva *pva); + +/** + * pva_mailbox_wait_event() - mailbox wait event + * + * @pva:» Pointer to PVA structure + * @wait_time» WaitTime Interval for the event + * + * This function do the wait until the mailbox isr get invoked based on + * the mailbox register set by the ucode. + */ +int pva_mailbox_wait_event(struct pva *pva, int wait_time); + +#endif /*__PVA_MAINBOX_H__*/ diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t19x.c b/drivers/video/tegra/host/pva/pva_mailbox_t19x.c new file mode 100644 index 00000000..415ec489 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox_t19x.c @@ -0,0 +1,80 @@ +/* + * PVA mailbox code + * + * Copyright (c) 2016-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif +#include +#include + +#include "pva_mailbox.h" +#include "pva_mailbox_t19x.h" +#include "pva_regs.h" + +static u32 pva_get_mb_reg_ex(u32 i) +{ + u32 mb_reg[VALID_MB_INPUT_REGS_EX] = { + hsp_sm0_r(), + hsp_sm1_r(), + hsp_sm2_r(), + hsp_sm3_r(), + hsp_sm4_r(), + hsp_sm5_r(), + hsp_sm6_r(), + hsp_sm7_r() + }; + + return mb_reg[i]; +} + +u32 pva_read_mailbox_t19x(struct platform_device *pdev, u32 mbox_id) +{ + u32 side_bits = 0; + u32 mbox_value = 0; + u32 side_channel_addr = + pva_get_mb_reg_ex(PVA_MBOX_SIDE_CHANNEL_HOST_RD); + + side_bits = host1x_readl(pdev, side_channel_addr); + mbox_value = host1x_readl(pdev, pva_get_mb_reg_ex(mbox_id)); + side_bits = ((side_bits >> mbox_id) & 0x1) << PVA_SIDE_CHANNEL_MBOX_BIT; + mbox_value = (mbox_value & PVA_SIDE_CHANNEL_MBOX_BIT_MASK) | side_bits; + + return mbox_value; +} + +void pva_write_mailbox_t19x(struct platform_device *pdev, + u32 mbox_id, u32 value) +{ + u32 side_bits = 0; + u32 side_channel_addr = + pva_get_mb_reg_ex(PVA_MBOX_SIDE_CHANNEL_HOST_WR); + + side_bits = host1x_readl(pdev, side_channel_addr); + side_bits &= ~(1 << mbox_id); + side_bits |= ((value >> PVA_SIDE_CHANNEL_MBOX_BIT) & 0x1) << mbox_id; + value = (value & PVA_SIDE_CHANNEL_MBOX_BIT_MASK); + host1x_writel(pdev, side_channel_addr, side_bits); + host1x_writel(pdev, pva_get_mb_reg_ex(mbox_id), value); +} diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t19x.h b/drivers/video/tegra/host/pva/pva_mailbox_t19x.h new file mode 100644 index 00000000..62ce1d89 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox_t19x.h @@ -0,0 +1,54 @@ +/* + * PVA mailbox header + * + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_MAILBOX_T19X_H__ +#define __PVA_MAILBOX_T19X_H__ + +#include + +#include "pva-interface.h" +/** + * pva_read_mailbox() - read a mailbox register + * + * @pva: Pointer to PVA structure + * @mbox: mailbox register to be written + * + * This function will read the indicated mailbox register and return its + * contents. it uses side channel B as host would. + * + * Return Value: + * contents of the indicated mailbox register + */ +u32 pva_read_mailbox_t19x(struct platform_device *pdev, u32 mbox_id); + +/** + * pva_write_mailbox() - write to a mailbox register + * + * @pva: Pointer to PVA structure + * @mbox: mailbox register to be written + * @value: value to be written into the mailbox register + * + * This function will write a value into the indicated mailbox register. + * + * Return Value: + * none + */ +void pva_write_mailbox_t19x(struct platform_device *pdev, + u32 mbox_id, u32 value); + +#endif /*__PVA_MAINBOX_T19X_H__*/ diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t23x.c b/drivers/video/tegra/host/pva/pva_mailbox_t23x.c new file mode 100644 index 00000000..8e3b94d9 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox_t23x.c @@ -0,0 +1,56 @@ +/* + * PVA mailbox code for T23x + * + * Copyright (c) 2016-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "pva_mailbox.h" +#include "pva_mailbox_t23x.h" +#include "pva_regs.h" + + +static u32 pva_get_mb_reg_ex(u32 i) +{ + u32 mb_reg[VALID_MB_INPUT_REGS_EX] = { + hsp_sm0_r(), + hsp_sm1_r(), + hsp_sm2_r(), + hsp_sm3_r(), + hsp_sm4_r(), + hsp_sm5_r(), + hsp_sm6_r(), + hsp_sm7_r() + }; + + return mb_reg[i]; +} + +u32 pva_read_mailbox_t23x(struct platform_device *pdev, u32 mbox_id) +{ + return host1x_readl(pdev, pva_get_mb_reg_ex(mbox_id)); +} + +void pva_write_mailbox_t23x(struct platform_device *pdev, u32 mbox_id, u32 value) +{ + host1x_writel(pdev, pva_get_mb_reg_ex(mbox_id), value); +} + diff --git a/drivers/video/tegra/host/pva/pva_mailbox_t23x.h b/drivers/video/tegra/host/pva/pva_mailbox_t23x.h new file mode 100644 index 00000000..536e23fc --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_mailbox_t23x.h @@ -0,0 +1,54 @@ +/* + * PVA mailbox header + * + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_MAILBOX_T23X_H__ +#define __PVA_MAILBOX_T23X_H__ + +#include + +#include "pva-interface.h" + +/** + * pva_read_mailbox() - read a mailbox register + * + * @pva: Pointer to PVA structure + * @mbox: mailbox register to be written + * + * This function will read the indicated mailbox register and return its + * contents. it uses side channel B as host would. + * + * Return Value: + * contents of the indicated mailbox register + */ +u32 pva_read_mailbox_t23x(struct platform_device *pdev, u32 mbox_id); + +/** + * pva_write_mailbox() - write to a mailbox register + * + * @pva: Pointer to PVA structure + * @mbox: mailbox register to be written + * @value: value to be written into the mailbox register + * + * This function will write a value into the indicated mailbox register. + * + * Return Value: + * none + */ +void pva_write_mailbox_t23x(struct platform_device *pdev, u32 mbox_id, u32 value); + +#endif /*__PVA_MAINBOX_T23X_H__*/ diff --git a/drivers/video/tegra/host/pva/pva_nvhost.h b/drivers/video/tegra/host/pva/pva_nvhost.h new file mode 100644 index 00000000..9abed27c --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_nvhost.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2021-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_NVHOST_H +#define PVA_NVHOST_H + +#include +#include + +extern const struct file_operations tegra_pva_ctrl_ops; + +/** + * @brief Finalize the PVA Power-on-Sequence. + * + * This function called from host subsystem driver after the PVA + * partition has been brought up, clocks enabled and reset deasserted. + * In production mode, the function needs to wait until the ready bit + * within the PVA aperture has been set. After that enable the PVA IRQ. + * Register the queue priorities on the PVA. + * + * @param pdev Pointer to PVA device + * @return: 0 on Success or negative error code + * + */ +int pva_finalize_poweron(struct platform_device *pdev); + +/** + * @brief Prepare PVA poweroff. + * + * This function called from host subsystem driver before turning off + * the PVA. The function should turn off the PVA IRQ. + * + * @param pdev Pointer to PVA device + * @return 0 on Success or negative error code + * + */ +int pva_prepare_poweroff(struct platform_device *pdev); + +#ifdef CONFIG_TEGRA_SOC_HWPM + +enum tegra_soc_hwpm_ip_reg_op; + +/** + * @brief pva_hwpm_ip_pm + * + * This function called from Tegra HWPM driver to + * poweron/off pva device. + * + * @param ip_dev Pointer to PVA device + * @param disable disable/enable power management. PVA is + * powered on when false. + * @param reg_offset offset of register relative to PVA HWP base + * @return 0 on Success or negative error code + * + */ +int pva_hwpm_ip_pm(void *ip_dev, bool disable); + +/** + * @brief pva_hwpm_ip_reg_op + * + * This function called from Tegra HWPM driver to + * access PVA HWPM registers. + * + * @param ip_dev Pointer to PVA device + * @param reg_op access operation and can be one of + * TEGRA_SOC_HWPM_IP_REG_OP_READ + * TEGRA_SOC_HWPM_IP_REG_OP_WRITE + * @param inst_element_index element index within PVA instance + * @param reg_offset offset of register relative to PVA HWP base + * @param reg_data pointer to where data is to be placed or read. + * @return 0 on Success or negative error code + * + */ +int pva_hwpm_ip_reg_op(void *ip_dev, + enum tegra_soc_hwpm_ip_reg_op reg_op, + u32 inst_element_index, u64 reg_offset, + u32 *reg_data); +#endif +#endif diff --git a/drivers/video/tegra/host/pva/pva_queue.c b/drivers/video/tegra/host/pva/pva_queue.c new file mode 100644 index 00000000..49ec67c0 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_queue.c @@ -0,0 +1,1567 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include "pva_dma.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_EVENTLIB +#include +#include +#include +#endif + +#include +#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE +#include +#else +#include +#endif + +#include +#include +#include + +#include "pva.h" +#include "nvpva_buffer.h" +#include "nvpva_queue.h" +#include "pva_mailbox.h" +#include "pva_queue.h" +#include "pva_regs.h" + +#include "pva-vpu-perf.h" +#include "pva-interface.h" +#include "pva_vpu_exe.h" +#include "nvpva_client.h" +#include "nvpva_syncpt.h" + +void *pva_dmabuf_vmap(struct dma_buf *dmabuf) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) + struct iosys_map map = {0}; +#else + struct dma_buf_map map = {0}; +#endif + /* Linux v5.11 and later kernels */ + if (dma_buf_vmap(dmabuf, &map)) + return NULL; + + return map.vaddr; +#else + /* Linux v5.10 and earlier kernels */ + return dma_buf_vmap(dmabuf); +#endif +} + +void pva_dmabuf_vunmap(struct dma_buf *dmabuf, void *addr) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) + struct iosys_map map = IOSYS_MAP_INIT_VADDR(addr); +#else + struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(addr); +#endif + /* Linux v5.11 and later kernels */ + dma_buf_vunmap(dmabuf, &map); +#else + /* Linux v5.10 and earlier kernels */ + dma_buf_vunmap(dmabuf, addr); +#endif +} + +static void pva_task_dump(struct pva_submit_task *task) +{ + int i; + + nvpva_dbg_info(task->pva, "task=%p, exe_id=%u", task, task->exe_id); + + for (i = 0; i < task->num_input_task_status; i++) + nvpva_dbg_info(task->pva, "input task status %d: pin_id=%u, offset=%u", i, + task->input_task_status[i].pin_id, + task->input_task_status[i].offset); + + for (i = 0; i < task->num_output_task_status; i++) + nvpva_dbg_info(task->pva, "output task status %d: pin_id=%u, offset=%u", + i, task->output_task_status[i].pin_id, + task->output_task_status[i].offset); + + for (i = 0; i < task->num_user_fence_actions; i++) + nvpva_dbg_info(task->pva, "fence action %d: type=%u", i, + task->user_fence_actions[i].type); +} + +static void pva_task_get_memsize(size_t *dma_size, + size_t *kmem_size, + size_t *aux_dma_size) +{ + /* Align task addr to 64bytes boundary for DMA use*/ + *dma_size = ALIGN(sizeof(struct pva_hw_task) + 64, 64); + *kmem_size = sizeof(struct pva_submit_task); + *aux_dma_size = NVPVA_TASK_MAX_PAYLOAD_SIZE; +} + +static inline void nvpva_fetch_task_status_info(struct pva *pva, + struct pva_task_error_s *info) +{ + struct pva_task_error_s *err_array = pva->priv_circular_array.va; + struct pva_task_error_s *src_va = + &err_array[pva->circular_array_rd_pos]; + const u32 len = MAX_PVA_TASK_COUNT; + + pva->circular_array_rd_pos += 1; + WARN_ON(pva->circular_array_rd_pos > len); + if (pva->circular_array_rd_pos >= len) + pva->circular_array_rd_pos = 0; + + /* Cache coherency is guaranteed by DMA API */ + (void)memcpy(info, src_va, sizeof(struct pva_task_error_s)); + /* clear it for debugging */ + (void)memset(src_va, 0, sizeof(struct pva_task_error_s)); +} + +static void pva_task_unpin_mem(struct pva_submit_task *task) +{ + u32 i; + + for (i = 0; i < task->num_pinned; i++) { + struct pva_pinned_memory *mem = &task->pinned_memory[i]; + + nvpva_buffer_submit_unpin_id(task->client->buffers, + &mem->id, 1); + } + + task->num_pinned = 0; +} + +struct pva_pinned_memory *pva_task_pin_mem(struct pva_submit_task *task, + u32 id) +{ + int err; + struct pva_pinned_memory *mem; + + if (task->num_pinned >= ARRAY_SIZE(task->pinned_memory)) { + task_err(task, "too many objects to pin"); + err = -ENOMEM; + goto err_out; + } + + if (id == 0) { + task_err(task, "pin id is 0"); + err = -EFAULT; + goto err_out; + } + + mem = &task->pinned_memory[task->num_pinned]; + mem->id = id; + err = nvpva_buffer_submit_pin_id(task->client->buffers, &mem->id, 1, + &mem->dmabuf, &mem->dma_addr, + &mem->size, &mem->heap); + if (err) { + task_err(task, "submit pin failed; Is the handled pinned?"); + goto err_out; + } + + task->num_pinned += 1; + + return mem; +err_out: + return ERR_PTR(err); +} + +/* pin fence and return its dma addr */ +static int +pva_task_pin_fence(struct pva_submit_task *task, + struct nvpva_submit_fence *fence, + dma_addr_t *addr) +{ + int err = 0; + + switch (fence->type) { + case NVPVA_FENCE_OBJ_SEM: { + struct pva_pinned_memory *mem; + + mem = pva_task_pin_mem(task, fence->obj.sem.mem.pin_id); + if (IS_ERR(mem)) { + task_err(task, "sempahore submit pin failed"); + err = PTR_ERR(mem); + } else + *addr = mem->dma_addr + fence->obj.sem.mem.offset; + break; + } + case NVPVA_FENCE_OBJ_SYNCPT: { + dma_addr_t syncpt_addr = nvpva_syncpt_address( + task->queue->vm_pdev, fence->obj.syncpt.id, + false); + nvpva_dbg_info(task->pva, + "id = %d, syncpt addr = %llx", + fence->obj.syncpt.id, + syncpt_addr); + + if (syncpt_addr) { + *addr = syncpt_addr; + } else { + task_err(task, + "%s: can't get syncpoint address", + __func__); + err = -EINVAL; + } + break; + } + default: + err = -EINVAL; + task_err(task, "%s: unsupported fence type: %d", + __func__, fence->type); + break; + } + return err; +} + +static int +get_fence_value(struct nvpva_submit_fence *fence, u32 *val) +{ + int err = 0; + + switch (fence->type) { + case NVPVA_FENCE_OBJ_SYNCPT: + *val = fence->obj.syncpt.value; + break; + case NVPVA_FENCE_OBJ_SEM: + *val = fence->obj.sem.value; + break; + default: + err = -EINVAL; + pr_err("%s: unsupported fence type: %d", + __func__, fence->type); + break; + } + return err; +} + +static inline void +pva_task_write_fence_action_op(struct pva_task_action_s *op, + uint8_t action, + uint64_t fence_addr, + uint32_t val, + uint64_t time_stamp_addr) +{ + op->action = action; + op->args.ptr.p = fence_addr; + op->args.ptr.v = val; + op->args.ptr.t = time_stamp_addr; +} + +static inline void +pva_task_write_status_action_op(struct pva_task_action_s *op, + uint8_t action, + uint64_t addr, + uint16_t val) +{ + op->action = action; + op->args.status.p = addr; + op->args.status.status = val; +} + +static inline void +pva_task_write_stats_action_op(struct pva_task_action_s *op, + uint8_t action, + uint64_t addr) +{ + op->action = action; + op->args.statistics.p = addr; +} + +static int +pva_task_process_fence_actions(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + int err = 0; + u32 i; + u32 fence_type; + u32 ts_flag = 0; + u8 *action_counter; + u8 action_code; + struct pva_task_action_s *fw_actions; + struct pva_task_action_s *current_fw_actions; + + for (fence_type = NVPVA_FENCE_SOT_R5; + fence_type < NVPVA_MAX_FENCE_TYPES; fence_type++) { + switch (fence_type) { + case NVPVA_FENCE_SOT_R5: + fw_actions = &hw_task->preactions[0]; + action_code = TASK_ACT_PTR_WRITE_SOT_R; + action_counter = &hw_task->task.num_preactions; + ts_flag = PVA_TASK_FL_SOT_R_TS; + break; + case NVPVA_FENCE_SOT_VPU: + fw_actions = &hw_task->preactions[0]; + action_code = TASK_ACT_PTR_WRITE_SOT_V; + action_counter = &hw_task->task.num_preactions; + ts_flag = PVA_TASK_FL_SOT_V_TS; + break; + case NVPVA_FENCE_EOT_R5: + fw_actions = &hw_task->postactions[0]; + action_code = TASK_ACT_PTR_WRITE_EOT_R; + action_counter = &hw_task->task.num_postactions; + ts_flag = PVA_TASK_FL_EOT_R_TS; + break; + case NVPVA_FENCE_EOT_VPU: + fw_actions = &hw_task->postactions[0]; + action_code = TASK_ACT_PTR_WRITE_EOT_V; + action_counter = &hw_task->task.num_postactions; + ts_flag = PVA_TASK_FL_EOT_V_TS; + break; + case NVPVA_FENCE_POST: + fw_actions = &hw_task->postactions[0]; + action_code = TASK_ACT_PTR_WRITE_EOT; + action_counter = &hw_task->task.num_postactions; + ts_flag = 0; + break; + default: + task_err(task, "unknown fence action type"); + err = -EINVAL; + goto out; + } + + for (i = 0; i < task->num_pva_fence_actions[fence_type]; i++) { + struct nvpva_fence_action *fence_action = + &task->pva_fence_actions[fence_type][i]; + dma_addr_t fence_addr = 0; + u32 fence_value; + dma_addr_t timestamp_addr; + switch (fence_action->fence.type) { + case NVPVA_FENCE_OBJ_SYNCPT: + { + u32 id = task->queue->syncpt_id; + fence_action->fence.obj.syncpt.id = id; + fence_addr = nvpva_syncpt_address( + task->queue->vm_pdev, id, true); + nvpva_dbg_info(task->pva, + "id = %d, fence_addr = %llx ", + task->queue->syncpt_id, + fence_addr); + + if (fence_addr == 0) { + err = -EFAULT; + goto out; + } + task->fence_num += 1; + task->syncpt_thresh += 1; + fence_value = 1; + fence_action->fence.obj.syncpt.value = + task->syncpt_thresh; + break; + } + case NVPVA_FENCE_OBJ_SEM: + { + err = pva_task_pin_fence(task, + &fence_action->fence, + &fence_addr); + if (err) + goto out; + task->sem_num += 1; + task->sem_thresh += 1; + fence_value = task->sem_thresh; + fence_action->fence.obj.sem.value = fence_value; + break; + } + default: + task_err(task, "unknown fence action object"); + err = -EINVAL; + goto out; + } + + if (fence_action->timestamp_buf.pin_id) { + struct pva_pinned_memory *mem; + mem = pva_task_pin_mem( + task, + fence_action->timestamp_buf.pin_id); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err( + task, + "failed to pin timestamp buffer"); + goto out; + } + timestamp_addr = + mem->dma_addr + + fence_action->timestamp_buf.offset; + hw_task->task.flags |= ts_flag; + } else { + timestamp_addr = 0; + } + + current_fw_actions = &fw_actions[*action_counter]; + pva_task_write_fence_action_op(current_fw_actions, + action_code, fence_addr, + fence_value, + timestamp_addr); + *action_counter = *action_counter + 1; + } + } +out: + return err; +} + +static int pva_task_process_prefences(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + u32 i; + int err; + struct pva_task_action_s *fw_preactions = NULL; + for (i = 0; i < task->num_prefences; i++) { + struct nvpva_submit_fence *fence = &task->prefences[i]; + dma_addr_t fence_addr = 0; + u32 fence_val; + + err = pva_task_pin_fence(task, fence, &fence_addr); + if (err) + goto out; + + if (fence_addr == 0) { + err = -EINVAL; + goto out; + } + + err = get_fence_value(fence, &fence_val); + if (err) + goto out; + + fw_preactions = + &hw_task->preactions[hw_task->task.num_preactions]; + pva_task_write_fence_action_op(fw_preactions, + TASK_ACT_PTR_BLK_GTREQL, + fence_addr, fence_val, 0); + ++hw_task->task.num_preactions; + } +out: + return err; +} + +static int pva_task_process_input_status(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + u8 i; + int err = 0; + struct pva_task_action_s *fw_preactions = NULL; + + for (i = 0; i < task->num_input_task_status; i++) { + struct nvpva_mem *status; + struct pva_pinned_memory *mem; + dma_addr_t status_addr; + + status = &task->input_task_status[i]; + mem = pva_task_pin_mem(task, status->pin_id); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + goto out; + } + + status_addr = mem->dma_addr + status->offset; + + fw_preactions = + &hw_task->preactions[hw_task->task.num_preactions]; + pva_task_write_status_action_op(fw_preactions, + (uint8_t)TASK_ACT_READ_STATUS, + status_addr, 0U); + ++hw_task->task.num_preactions; + } +out: + return err; +} + +static int pva_task_process_output_status(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + u32 i; + int err = 0; + dma_addr_t stats_addr; + struct pva_task_action_s *fw_postactions = NULL; + + for (i = 0; i < task->num_output_task_status; i++) { + dma_addr_t status_addr; + struct nvpva_mem *status = &task->output_task_status[i]; + struct pva_pinned_memory *mem; + + mem = pva_task_pin_mem(task, status->pin_id); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + goto out; + } + + status_addr = mem->dma_addr + status->offset; + fw_postactions = &hw_task->postactions[hw_task->task.num_postactions]; + pva_task_write_status_action_op(fw_postactions, + (uint8_t)TASK_ACT_WRITE_STATUS, + status_addr, + 1U /* PVA task error code */); + ++hw_task->task.num_postactions; + } + + stats_addr = task->dma_addr + offsetof(struct pva_hw_task, statistics); + fw_postactions = &hw_task->postactions[hw_task->task.num_postactions]; + if ((task->pva->stats_enabled) + || (task->pva->profiling_level > 0)) { + pva_task_write_stats_action_op(fw_postactions, + (uint8_t)TASK_ACT_PVA_STATISTICS, + stats_addr); + hw_task->task.flags |= PVA_TASK_FL_STATS_ENABLE; + ++hw_task->task.num_postactions; + } +out: + return err; +} +static int +pva_task_write_vpu_parameter(struct pva_submit_task *task, + struct pva_hw_task *hw_task) +{ + int err = 0; + struct pva_elf_image *elf = NULL; + struct nvpva_pointer_symbol *sym_ptr = NULL; + struct nvpva_pointer_symbol_ex *sym_ptr_ex = NULL; + u32 symbolId = 0U; + dma_addr_t symbol_payload = 0U; + u32 size = 0U; + u32 i; + u32 index = 0; + + u32 head_index = 0U; + u8 *headPtr = NULL; + u32 head_size = 0U; + u32 head_count = 0U; + + u32 tail_index = 0U; + u8 *tailPtr = NULL; + u32 tail_count = 0U; + struct pva_vpu_parameters_s *hw_task_param_list; + + if ((task->exe_id == NVPVA_NOOP_EXE_ID) || (task->num_symbols == 0U)) + goto out; + + tail_index = ((u32)task->num_symbols - 1U); + elf = get_elf_image(&task->client->elf_ctx, task->exe_id); + if (task->num_symbols > elf->num_symbols) { + task_err(task, "invalid number of symbols"); + err = -EINVAL; + goto out; + } + + if (task->symbol_payload_size == 0U) { + task_err(task, "Empty Symbol payload"); + err = -EINVAL; + goto out; + } + + symbol_payload = task->aux_dma_addr; + + headPtr = (u8 *)(task->aux_va); + tailPtr = (u8 *)(task->aux_va + task->symbol_payload_size); + hw_task_param_list = hw_task->dma_info_and_params_list.param_list; + + for (i = 0U; i < task->num_symbols; i++) { + symbolId = task->symbols[i].symbol.id; + size = elf->sym[symbolId].size; + if (task->symbols[i].symbol.size != size) { + task_err(task, "size does not match symbol:%s", + elf->sym[symbolId].symbol_name); + err = -EINVAL; + goto out; + } + + if (task->symbols[i].config == NVPVA_SYMBOL_POINTER) { + struct pva_pinned_memory *mem; + + memcpy(headPtr, (task->symbol_payload + task->symbols[i].offset), + sizeof(struct nvpva_pointer_symbol)); + sym_ptr = (struct nvpva_pointer_symbol *)(headPtr); + mem = pva_task_pin_mem(task, PVA_LOW32(sym_ptr->base)); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err(task, "failed to pin symbol pointer"); + err = -EINVAL; + goto out; + } + + sym_ptr->base = mem->dma_addr; + sym_ptr->size = mem->size; + size = sizeof(struct nvpva_pointer_symbol); + } else if (task->symbols[i].config == NVPVA_SYMBOL_POINTER_EX) { + struct pva_pinned_memory *mem; + + memcpy(headPtr, (task->symbol_payload + task->symbols[i].offset), + sizeof(struct nvpva_pointer_symbol_ex)); + sym_ptr_ex = (struct nvpva_pointer_symbol_ex *)(headPtr); + mem = pva_task_pin_mem(task, PVA_LOW32(sym_ptr_ex->base)); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + task_err(task, "failed to pin symbol pointer"); + err = -EINVAL; + goto out; + } + + sym_ptr_ex->base = mem->dma_addr; + sym_ptr_ex->size = mem->size; + size = sizeof(struct nvpva_pointer_symbol_ex); + } else if (size < PVA_DMA_VMEM_COPY_THRESHOLD) { + (void)memcpy(headPtr, + (task->symbol_payload + task->symbols[i].offset), + size); + } else if ((uintptr_t)(tailPtr) < ((uintptr_t)(headPtr) + size)) { + task_err(task, "Symbol payload overflow"); + err = -EINVAL; + goto out; + } else { + tailPtr = (tailPtr - size); + (void)memcpy(tailPtr, + (task->symbol_payload + task->symbols[i].offset), + size); + hw_task_param_list[tail_index].param_base = + (pva_iova)(symbol_payload + + ((uintptr_t)(tailPtr) - + (uintptr_t)(task->aux_va))); + index = tail_index; + tail_index--; + tail_count++; + hw_task_param_list[index].addr = + elf->sym[symbolId].addr; + hw_task_param_list[index].size = size; + continue; + } + + hw_task_param_list[head_index].param_base = + (pva_iova)(symbol_payload + + ((uintptr_t)(headPtr) - + (uintptr_t)(task->aux_va))); + index = head_index; + if ((uintptr_t)(headPtr) > ((uintptr_t)(tailPtr) - size)) { + task_err(task, "Symbol payload overflow"); + err = -EINVAL; + goto out; + } else { + headPtr = (headPtr + size); + head_index++; + head_size += size; + head_count++; + hw_task_param_list[index].addr = + elf->sym[symbolId].addr; + hw_task_param_list[index].size = size; + } + } + + /* Write info for VPU instance data parameter, if available in elf */ + for (i = 0U; i < elf->num_symbols; i++) { + if (elf->sym[i].is_sys) { + hw_task_param_list[task->num_symbols].addr = + elf->sym[i].addr; + hw_task_param_list[task->num_symbols].size = + elf->sym[i].size; + hw_task_param_list[task->num_symbols].param_base = + PVA_SYS_INSTANCE_DATA_V1_IOVA; + ++task->num_symbols; + } + } + + hw_task->param_info.small_vpu_param_data_iova = + (head_size != 0U) ? symbol_payload : 0UL; + + hw_task->param_info.small_vpu_parameter_data_size = head_size; + + hw_task->param_info.large_vpu_parameter_list_start_index = head_count; + hw_task->param_info.vpu_instance_parameter_list_start_index = + (head_count + tail_count); + + hw_task->param_info.parameter_data_iova = task->dma_addr + + offsetof(struct pva_hw_task, dma_info_and_params_list) + + offsetof(struct pva_dma_info_and_params_list_s, param_list); + + hw_task->task.num_parameters = task->num_symbols; + + hw_task->task.parameter_info_base = task->dma_addr + + offsetof(struct pva_hw_task, param_info); + + err = pva_task_acquire_ref_vpu_app(&task->client->elf_ctx, + task->exe_id); + if (err) { + task_err(task, + "unable to acquire ref count for app with id = %u", + task->exe_id); + } + + task->pinned_app = true; +out: + return err; +} + +static int set_flags(struct pva_submit_task *task, struct pva_hw_task *hw_task) +{ + int err = 0; + uint32_t flags = task->flags; + + if (flags & NVPVA_PRE_BARRIER_TASK_TRUE) + hw_task->task.flags |= PVA_TASK_FL_SYNC_TASKS; + if (flags & NVPVA_GR_CHECK_EXE_FLAG) + hw_task->task.flags |= PVA_TASK_FL_GR_CHECK; + if (flags & NVPVA_AFFINITY_VPU0) + hw_task->task.flags |= PVA_TASK_FL_VPU0; + if (flags & NVPVA_AFFINITY_VPU1) + hw_task->task.flags |= PVA_TASK_FL_VPU1; + if ((flags & NVPVA_AFFINITY_VPU_ANY) == 0) { + err = -EINVAL; + task_err(task, "incorrect vpu affinity"); + goto out; + } + if (task->pva->vpu_debug_enabled) + hw_task->task.flags |= PVA_TASK_FL_VPU_DEBUG; + + if (task->special_access) + hw_task->task.flags |= PVA_TASK_FL_SPECIAL_ACCESS; + if (flags & NVPVA_ERR_MASK_ILLEGAL_INSTR) + hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_ILLEGAL_INSTR; + if (flags & NVPVA_ERR_MASK_DIVIDE_BY_0) + hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_DIVIDE_BY_0; + if (flags & NVPVA_ERR_MASK_FP_NAN) + hw_task->task.flags |= PVA_TASK_FL_ERR_MASK_FP_NAN; +out: + return err; +} + +static int pva_task_write(struct pva_submit_task *task) +{ + struct pva_hw_task *hw_task; + u32 pre_ptr, post_ptr; + int err = 0; + + if (!pva_vpu_elf_is_registered(&task->client->elf_ctx, task->exe_id) && + (task->exe_id != NVPVA_NOOP_EXE_ID)) { + task_err(task, "invalid exe id: %d", task->exe_id); + return -EINVAL; + } + /* Task start from the memory base */ + hw_task = task->va; + pre_ptr = 0; + post_ptr = 0; + /* process pre & post actions */ + err = pva_task_process_prefences(task, hw_task); + if (err) + goto out; + + err = pva_task_process_input_status(task, hw_task); + if (err) + goto out; + + err = pva_task_process_output_status(task, hw_task); + if (err) + goto out; + + err = pva_task_process_fence_actions(task, hw_task); + if (err) + goto out; + + err = pva_task_write_dma_info(task, hw_task); + if (err) + goto out; + + err = pva_task_write_dma_misr_info(task, hw_task); + if (err) + goto out; + + err = pva_task_write_vpu_parameter(task, hw_task); + if (err) + goto out; + + hw_task->task.next = 0U; + hw_task->task.preactions = task->dma_addr + offsetof(struct pva_hw_task, + preactions); + hw_task->task.postactions = task->dma_addr + offsetof(struct pva_hw_task, + postactions); + hw_task->task.runlist_version = PVA_RUNLIST_VERSION_ID; + hw_task->task.sid_index = task->client->sid_index; + err = set_flags(task, hw_task); + if (err) + goto out; + + hw_task->task.bin_info = + phys_get_bin_info(&task->client->elf_ctx, task->exe_id); + + if (task->stdout) { + hw_task->stdout_cb_info.buffer = task->stdout->buffer_addr; + hw_task->stdout_cb_info.head = task->stdout->head_addr; + hw_task->stdout_cb_info.tail = task->stdout->tail_addr; + hw_task->stdout_cb_info.err = task->stdout->err_addr; + hw_task->stdout_cb_info.buffer_size = task->stdout->size; + hw_task->task.stdout_info = + task->dma_addr + + offsetof(struct pva_hw_task, stdout_cb_info); + } else + hw_task->task.stdout_info = 0; + +out: + + return err; +} +#ifdef CONFIG_EVENTLIB + +static void +pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, + struct nvpva_submit_fence *src_fence) +{ + static u32 obj_type[] = {NVDEV_FENCE_TYPE_SYNCPT, + NVDEV_FENCE_TYPE_SEMAPHORE, + NVDEV_FENCE_TYPE_SEMAPHORE_TS, + NVDEV_FENCE_TYPE_SYNC_FD}; + + memset(dst_fence, 0, sizeof(struct nvdev_fence)); + dst_fence->type = obj_type[src_fence->type]; + switch (src_fence->type) { + case NVPVA_FENCE_OBJ_SYNCPT: + dst_fence->syncpoint_index = src_fence->obj.syncpt.id; + dst_fence->syncpoint_value = src_fence->obj.syncpt.value; + break; + case NVPVA_FENCE_OBJ_SEM: + case NVPVA_FENCE_OBJ_SEMAPHORE_TS: + dst_fence->semaphore_handle = src_fence->obj.sem.mem.pin_id; + dst_fence->semaphore_offset = src_fence->obj.sem.mem.offset; + dst_fence->semaphore_value = src_fence->obj.sem.value; + break; + case NVPVA_FENCE_OBJ_SYNC_FD: + break; + default: + break; + } +} + +static void +pva_eventlib_record_r5_states(struct platform_device *pdev, + u32 syncpt_id, + u32 syncpt_thresh, + struct pva_task_statistics_s *stats, + struct pva_submit_task *task) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct nvhost_pva_task_state state; + struct nvdev_fence post_fence; + struct nvpva_submit_fence *fence; + u8 i; + + if ((task->pva->profiling_level == 0) || (!pdata->eventlib_id)) + return; + + /* Record task postfences */ + for (i = 0 ; i < task->num_pva_fence_actions[NVPVA_FENCE_POST]; i++) { + fence = &(task->pva_fence_actions[NVPVA_FENCE_POST][i].fence); + pva_eventlib_fill_fence(&post_fence, fence); + nvhost_eventlib_log_fences(pdev, + syncpt_id, + syncpt_thresh, + &post_fence, + 1, + NVDEV_FENCE_KIND_POST, + stats->complete_time); + } + + state.class_id = pdata->class; + state.syncpt_id = syncpt_id; + state.syncpt_thresh = syncpt_thresh; + state.vpu_id = stats->vpu_assigned; + state.queue_id = stats->queue_id; + state.iova = task->dma_addr; + + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN + : NVHOST_PVA_VPU1_BEGIN, + stats->vpu_start_time); + + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END + : NVHOST_PVA_VPU1_END, + stats->vpu_complete_time); + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_PREPARE_END, + stats->vpu_start_time); + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_POST_BEGIN, + stats->vpu_complete_time); + + if (task->pva->profiling_level >= 2) { + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_QUEUE_BEGIN, + stats->queued_time); + + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_QUEUE_END, + stats->vpu_assigned_time); + + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_PREPARE_BEGIN, + stats->vpu_assigned_time); + + keventlib_write(pdata->eventlib_id, + &state, + sizeof(state), + NVHOST_PVA_POST_END, + stats->complete_time); + } +} +#else +static void +pva_eventlib_fill_fence(struct nvdev_fence *dst_fence, + struct nvpva_submit_fence *src_fence) +{ +} +static void +pva_eventlib_record_r5_states(struct platform_device *pdev, + u32 syncpt_id, + u32 syncpt_thresh, + struct pva_task_statistics_s *stats, + struct pva_submit_task *task) +{ +} +#endif + +void pva_task_free(struct kref *ref) +{ + struct pva_submit_task *task = + container_of(ref, struct pva_submit_task, ref); + struct nvpva_queue *my_queue = task->queue; + + mutex_lock(&my_queue->tail_lock); + if (my_queue->hw_task_tail == task->va) + my_queue->hw_task_tail = NULL; + + if (my_queue->old_tail == task->va) + my_queue->old_tail = NULL; + + mutex_unlock(&my_queue->tail_lock); + + pva_task_unpin_mem(task); + if (task->pinned_app) + pva_task_release_ref_vpu_app(&task->client->elf_ctx, + task->exe_id); + + nvhost_module_idle(task->pva->pdev); + nvpva_client_context_put(task->client); + /* Release memory that was allocated for the task */ + nvpva_queue_free_task_memory(task->queue, task->pool_index); + up(&my_queue->task_pool_sem); +} + +static void update_one_task(struct pva *pva) +{ + struct platform_device *pdev = pva->pdev; + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct nvpva_queue *queue; + struct pva_task_error_s task_info; + struct pva_submit_task *task; + struct pva_hw_task *hw_task; + struct pva_task_statistics_s *stats; + bool found; + u64 vpu_time = 0u; + u64 r5_overhead = 0u; + const u32 tsc_ticks_to_us = 31; + u32 vpu_assigned = 0; + + nvpva_fetch_task_status_info(pva, &task_info); + WARN_ON(!task_info.valid); + WARN_ON(task_info.queue >= MAX_PVA_QUEUE_COUNT); + queue = &pva->pool->queues[task_info.queue]; + + /* find the finished task; since two tasks can be scheduled at the same + * time, the finished one is not necessarily the first one + */ + found = false; + mutex_lock(&queue->list_lock); + + /* since we are only taking one entry out, we don't need to use the safe + * version + */ + list_for_each_entry(task, &queue->tasklist, node) { + if (task->pool_index == task_info.task_id) { + list_del(&task->node); + found = true; + break; + } + } + + mutex_unlock(&queue->list_lock); + if (!found) { + pr_err("pva: unexpected task: queue:%u, valid:%u, error:%u, vpu:%u", + task_info.queue, task_info.valid, task_info.error, + task_info.vpu); + return; + } + + WARN_ON(task_info.error == PVA_ERR_BAD_TASK || + task_info.error == PVA_ERR_BAD_TASK_ACTION_LIST); + hw_task = (struct pva_hw_task *)task->va; + stats = &hw_task->statistics; + if (!task->pva->stats_enabled) + goto prof; + + vpu_assigned = (stats->vpu_assigned & 0x1); + vpu_time = (stats->vpu_complete_time - stats->vpu_start_time); + r5_overhead = ((stats->complete_time - stats->queued_time) - vpu_time); + r5_overhead = r5_overhead / tsc_ticks_to_us; + + trace_nvhost_pva_task_timestamp(dev_name(&pdev->dev), + pdata->class, + queue->syncpt_id, + task->local_sync_counter, + stats->vpu_assigned_time, + stats->complete_time); + nvpva_dbg_info(pva, "Completed task %p (0x%llx), " + "start_time=%llu, " + "end_time=%llu", + task, + (u64)task->dma_addr, + stats->vpu_assigned_time, + stats->complete_time); + trace_nvhost_pva_task_stats(pdev->name, + stats->queued_time, + stats->head_time, + stats->input_actions_complete, + stats->vpu_assigned_time, + stats->vpu_start_time, + stats->vpu_complete_time, + stats->complete_time, + stats->vpu_assigned, + r5_overhead); +prof: + if (task->pva->profiling_level == 0) + goto out; + + nvhost_eventlib_log_task(pdev, + queue->syncpt_id, + task->local_sync_counter, + stats->vpu_assigned_time, + stats->complete_time); + pva_eventlib_record_r5_states(pdev, + queue->syncpt_id, + task->local_sync_counter, + stats, + task); +out: + /* Not linked anymore so drop the reference */ + kref_put(&task->ref, pva_task_free); +} + +void pva_task_update(struct work_struct *work) +{ + struct pva *pva = container_of(work, struct pva, task_update_work); + int n_tasks = atomic_read(&pva->n_pending_tasks); + int i; + + atomic_sub(n_tasks, &pva->n_pending_tasks); + for (i = 0; i < n_tasks; i++) + update_one_task(pva); +} +static void +pva_queue_dump(struct nvpva_queue *queue, struct seq_file *s) +{ + struct pva_submit_task *task; + int i = 0; + + seq_printf(s, "Queue %u, Tasks\n", queue->id); + mutex_lock(&queue->list_lock); + list_for_each_entry(task, &queue->tasklist, node) { + seq_printf(s, " #%u: exe_id = %u\n", i++, task->exe_id); + } + + mutex_unlock(&queue->list_lock); +} +static int pva_task_submit_mmio_ccq(struct pva_submit_task *task, u8 batchsize) +{ + u32 flags = PVA_CMD_INT_ON_ERR; + int err = 0; + + /* Construct submit command */ + err = task->pva->version_config->ccq_send_task( + task->pva, task->queue->id, task->dma_addr, batchsize, flags); + return err; +} +static int pva_task_submit_mailbox(struct pva_submit_task *task, u8 batchsize) +{ + struct nvpva_queue *queue = task->queue; + struct pva_cmd_status_regs status; + struct pva_cmd_s cmd; + u32 flags, nregs; + int err = 0; + + /* Construct submit command */ + flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; + nregs = pva_cmd_submit_batch(&cmd, queue->id, task->dma_addr, batchsize, + flags); + /* Submit request to PVA and wait for response */ + err = pva_mailbox_send_cmd_sync(task->pva, &cmd, nregs, &status); + if (err < 0) { + nvpva_warn(&task->pva->pdev->dev, "Failed to submit task: %d", + err); + goto out; + } + + if (status.error != PVA_ERR_NO_ERROR) { + nvpva_warn(&task->pva->pdev->dev, "PVA task rejected: %u", + status.error); + err = -EINVAL; + goto out; + } + +out: + + return err; +} + +static void nvpva_syncpt_dec_max(struct nvpva_queue *queue, u32 val) +{ + atomic_sub(val, &queue->syncpt_maxval); +} + +static void nvpva_syncpt_incr_max(struct nvpva_queue *queue, u32 val) +{ + atomic_add(val, &queue->syncpt_maxval); +} + +static u32 nvpva_syncpt_read_max(struct nvpva_queue *queue) +{ + return (u32)atomic_read(&queue->syncpt_maxval); +} + +static int pva_task_submit(const struct pva_submit_tasks *task_header) +{ + struct pva_submit_task *first_task = task_header->tasks[0]; + struct nvpva_queue *queue = first_task->queue; + u64 timestamp; + int err = 0; + u32 i; + u8 batchsize = task_header->num_tasks - 1U; + nvpva_dbg_info(first_task->pva, "submitting %u tasks; batchsize: %u", + task_header->num_tasks, batchsize); + + /* + * TSC timestamp is same as CNTVCT. Task statistics are being + * reported in TSC ticks. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + timestamp = arch_timer_read_counter(); +#else + timestamp = arch_counter_get_cntvct(); +#endif + for (i = 0; i < task_header->num_tasks; i++) { + struct pva_submit_task *task = task_header->tasks[i]; + struct pva_hw_task *hw_task = task->va; + + /* take the reference until task is finished */ + kref_get(&task->ref); + + nvpva_syncpt_incr_max(queue, task->fence_num); + task->client->curr_sema_value += task->sem_num; + + mutex_lock(&queue->list_lock); + list_add_tail(&task->node, &queue->tasklist); + mutex_unlock(&queue->list_lock); + + hw_task->task.queued_time = timestamp; + } + + /* + * TSC timestamp is same as CNTVCT. Task statistics are being + * reported in TSC ticks. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + timestamp = arch_timer_read_counter(); +#else + timestamp = arch_counter_get_cntvct(); +#endif + + /* Choose the submit policy based on the mode */ + switch (first_task->pva->submit_task_mode) { + case PVA_SUBMIT_MODE_MAILBOX: + err = pva_task_submit_mailbox(first_task, batchsize); + break; + + case PVA_SUBMIT_MODE_MMIO_CCQ: + err = pva_task_submit_mmio_ccq(first_task, batchsize); + break; + } + + if (err) { + /* assume no task has been submitted to firmware from now on */ + pr_err("pva: failed to submit %u tasks", + task_header->num_tasks); + goto remove_tasks; + } + + if (first_task->pva->profiling_level == 0) + goto out; + + for (i = 0; i < task_header->num_tasks; i++) { + u32 j; + struct nvdev_fence pre_fence; + struct pva_submit_task *task = task_header->tasks[i]; + + for (j = 0; j < task->num_prefences; j++) { + pva_eventlib_fill_fence(&pre_fence, + &task->prefences[j]); + nvhost_eventlib_log_fences(task->pva->pdev, + queue->syncpt_id, + task->local_sync_counter, + &pre_fence, + 1, + NVDEV_FENCE_KIND_PRE, + timestamp); + } + + nvhost_eventlib_log_submit(task->pva->pdev, + queue->syncpt_id, + task->local_sync_counter, + timestamp); + } +out: + return 0; + +remove_tasks: + for (i = 0; i < task_header->num_tasks; i++) { + struct pva_submit_task *task = task_header->tasks[i]; + + mutex_lock(&queue->list_lock); + list_del(&task->node); + mutex_unlock(&queue->list_lock); + + nvpva_syncpt_dec_max(queue, task->fence_num); + task->client->curr_sema_value -= task->sem_num; + + kref_put(&task->ref, pva_task_free); + } + + return err; +} + +static void +set_task_parameters(const struct pva_submit_tasks *task_header) +{ + struct pva_submit_task *task = task_header->tasks[0]; + struct pva_hw_task *hw_task = task->va; + struct nvpva_queue *queue = task->queue; + + u8 status_interface = 0U; + u32 flag = 0; + u64 batch_id; + u16 idx; + + /* Storing to local variable to update in task + * Increment the batch ID and let it overflow + * after it reached U8_MAX + */ + batch_id = (queue->batch_id++); + + if (task_header->execution_timeout_us > 0U) { + hw_task = task_header->tasks[0]->va; + hw_task->task.timer_ref_cnt = task_header->num_tasks; + hw_task->task.timeout = task_header->execution_timeout_us; + flag = PVA_TASK_FL_DEC_TIMER; + } + + /* In T19x, there is only 1 CCQ, so the response should come there + * irrespective of the queue ID. In T23x, there are 8 CCQ FIFO's + * thus the response should come in the correct CCQ + */ + if ((task->pva->submit_task_mode == PVA_SUBMIT_MODE_MMIO_CCQ) + && (task_header->tasks[0]->pva->version == PVA_HW_GEN2)) + status_interface = (task->queue->id + 1U); + + for (idx = 0U; idx < task_header->num_tasks; idx++) { + task = task_header->tasks[idx]; + hw_task = task->va; + WARN_ON(task->pool_index > 0xFF); + hw_task->task.task_id = task->pool_index; + hw_task->task.status_interface = status_interface; + hw_task->task.batch_id = batch_id; + + hw_task->task.flags |= flag; + } + +} + +static void +nvpva_task_config_l2sram_window(const struct pva_submit_tasks *task_header, + u32 start_index, u32 end_index, + u32 size) +{ + struct pva_hw_task *hw_task = NULL; + u32 task_num; + + hw_task = task_header->tasks[start_index]->va; + hw_task->task.l2sram_ref_cnt = (end_index - start_index) + 1U; + for (task_num = start_index; task_num <= end_index; task_num++) { + hw_task = task_header->tasks[task_num]->va; + hw_task->task.l2sram_size = size; + hw_task->task.flags |= PVA_TASK_FL_DEC_L2SRAM; + } +} + +static void +update_batch_tasks(const struct pva_submit_tasks *task_header) +{ + struct pva_submit_task *task = NULL; + u32 task_num; + u32 l2s_start_index, l2s_end_index; + u32 l2sram_max_size = 0U; + u32 invalid_index = task_header->num_tasks + 1U; + + l2s_start_index = invalid_index; + l2s_end_index = invalid_index; + + for (task_num = 0; task_num < task_header->num_tasks; task_num++) { + task = task_header->tasks[task_num]; + if (task->l2_alloc_size > 0) { + if (l2s_start_index == invalid_index) + l2s_start_index = task_num; + + l2s_end_index = task_num; + + if (l2sram_max_size < task->l2_alloc_size) + l2sram_max_size = task->l2_alloc_size; + + } else if (l2s_end_index != invalid_index) { + /* An L2SRAM window is found within the batch which + * needs to be sanitized + */ + nvpva_task_config_l2sram_window(task_header, + l2s_start_index, + l2s_end_index, + l2sram_max_size); + l2s_start_index = invalid_index; + l2s_end_index = invalid_index; + l2sram_max_size = 0; + } + } + + /* Last L2SRAM window in batch may need to be sanitized */ + if (l2s_end_index != invalid_index) { + nvpva_task_config_l2sram_window(task_header, + l2s_start_index, + l2s_end_index, + l2sram_max_size); + } +} + +static int pva_queue_submit(struct nvpva_queue *queue, void *args) +{ + const struct pva_submit_tasks *task_header = args; + int err = 0; + int i; + uint32_t thresh, sem_thresh; + struct pva_hw_task *prev_hw_task = NULL; + struct nvpva_client_context *client = task_header->tasks[0]->client; + + mutex_lock(&client->sema_val_lock); + thresh = nvpva_syncpt_read_max(queue); + sem_thresh = client->curr_sema_value; + for (i = 0; i < task_header->num_tasks; i++) { + struct pva_submit_task *task = task_header->tasks[i]; + task->fence_num = 0; + task->syncpt_thresh = thresh; + + task->sem_num = 0; + task->sem_thresh = sem_thresh; + + /* First, dump the task that we are submitting */ + pva_task_dump(task); + + /* Write the task data */ + err = pva_task_write(task); + if (err) + goto unlock; + + thresh = task->syncpt_thresh; + sem_thresh = task->sem_thresh; + queue->local_sync_counter += (1 + task->fence_num); + task->local_sync_counter = queue->local_sync_counter; + if (prev_hw_task) + prev_hw_task->task.next = task->dma_addr; + + prev_hw_task = task->va; + } + + set_task_parameters(task_header); + + /* Update L2SRAM flags for generations T23x and after */ + if (task_header->tasks[0]->pva->version != PVA_HW_GEN1) + update_batch_tasks(task_header); + + mutex_lock(&queue->tail_lock); + + /* Once batch is ready, link it to the FW queue*/ + if (queue->hw_task_tail) + queue->hw_task_tail->task.next = task_header->tasks[0]->dma_addr; + + /* Hold a reference to old tail in case submission fails*/ + queue->old_tail = queue->hw_task_tail; + + queue->hw_task_tail = prev_hw_task; + mutex_unlock(&queue->tail_lock); + + err = pva_task_submit(task_header); + if (err) { + dev_err(&queue->vm_pdev->dev, "failed to submit task"); + mutex_lock(&queue->tail_lock); + queue->hw_task_tail = queue->old_tail; + mutex_unlock(&queue->tail_lock); + } +unlock: + mutex_unlock(&client->sema_val_lock); + return err; +} + +static struct pva_pinned_memory *find_pinned_mem(struct pva_submit_task *task, + int id) +{ + u32 i; + + for (i = 0; i < task->num_pinned; i++) + if (task->pinned_memory[i].id == id) + return &task->pinned_memory[i]; + return NULL; +} + +static void pva_queue_cleanup_semaphore(struct pva_submit_task *task, + struct nvpva_submit_fence *fence) +{ + u8 *dmabuf_cpuva; + u32 *fence_cpuva; + struct pva_pinned_memory *mem; + + if (fence->type != NVPVA_FENCE_OBJ_SEM) + goto out; + + WARN_ON((fence->obj.sem.mem.offset % 4) != 0); + + mem = find_pinned_mem(task, fence->obj.sem.mem.pin_id); + if (mem == NULL) { + task_err(task, "can't find pinned semaphore for cleanup"); + goto out; + } + + dmabuf_cpuva = pva_dmabuf_vmap(mem->dmabuf); + + if (!dmabuf_cpuva) + goto out; + + fence_cpuva = (void *)&dmabuf_cpuva[fence->obj.sem.mem.offset]; + *fence_cpuva = fence->obj.sem.value; + + pva_dmabuf_vunmap(mem->dmabuf, dmabuf_cpuva); +out: + return; +} + +static void pva_queue_cleanup_status(struct pva_submit_task *task, + struct nvpva_mem *status_h) +{ + struct pva_pinned_memory *mem; + u8 *dmabuf_cpuva; + struct pva_gen_task_status_s *status_ptr; + + mem = find_pinned_mem(task, status_h->pin_id); + if (mem == NULL) { + task_err(task, "can't find pinned status for cleanup"); + goto out; + } + + dmabuf_cpuva = pva_dmabuf_vmap(mem->dmabuf); + if (!dmabuf_cpuva) + goto out; + + status_ptr = (void *)&dmabuf_cpuva[status_h->offset]; + status_ptr->status = PVA_ERR_BAD_TASK_STATE; + status_ptr->info32 = PVA_ERR_VPU_BAD_STATE; + + pva_dmabuf_vunmap(mem->dmabuf, dmabuf_cpuva); +out: + return; +} + +static void pva_queue_cleanup(struct nvpva_queue *queue, + struct pva_submit_task *task) +{ + unsigned int i, fence_type; + + /* Write task status first */ + for (i = 0; i < task->num_output_task_status; i++) + pva_queue_cleanup_status(task, &task->output_task_status[i]); + + /* Finish up non-syncpoint fences */ + for (fence_type = NVPVA_FENCE_SOT_R5; + fence_type < NVPVA_MAX_FENCE_TYPES; fence_type++) { + for (i = 0; i < task->num_pva_fence_actions[fence_type]; i++) + pva_queue_cleanup_semaphore( + task, + &task->pva_fence_actions[fence_type][i].fence); + } +} + +static int pva_queue_abort(struct nvpva_queue *queue) +{ + struct pva_submit_task *task, *n; + + mutex_lock(&queue->list_lock); + + list_for_each_entry_safe(task, n, &queue->tasklist, node) { + pva_queue_cleanup(queue, task); + list_del(&task->node); + kref_put(&task->ref, pva_task_free); + } + + /* Finish syncpoint increments to release waiters */ + nvhost_syncpt_set_min_update(queue->vm_pdev, queue->syncpt_id, + atomic_read(&queue->syncpt_maxval)); + mutex_unlock(&queue->list_lock); + + return 0; +} + +struct nvpva_queue_ops pva_queue_ops = { + .abort = pva_queue_abort, + .submit = pva_queue_submit, + .get_task_size = pva_task_get_memsize, + .dump = pva_queue_dump, + .set_attribute = NULL, +}; diff --git a/drivers/video/tegra/host/pva/pva_queue.h b/drivers/video/tegra/host/pva/pva_queue.h new file mode 100644 index 00000000..8130072a --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_queue.h @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_QUEUE_H +#define PVA_QUEUE_H + +#include +#include "nvpva_queue.h" +#include "nvpva_buffer.h" +#include "pva-sys-params.h" +#include "pva-interface.h" +#include "pva-task.h" + +#define task_err(task, fmt, ...) \ + dev_err(&task->pva->pdev->dev, fmt, ##__VA_ARGS__) + +struct dma_buf; + +extern struct nvpva_queue_ops pva_queue_ops; + +struct pva_pinned_memory { + u64 size; + dma_addr_t dma_addr; + struct dma_buf *dmabuf; + int id; + enum nvpva_buffers_heap heap; +}; + +struct pva_cb { + dma_addr_t head_addr; + uint32_t *head_va; + dma_addr_t tail_addr; + uint32_t *tail_va; + dma_addr_t err_addr; + uint32_t *err_va; + dma_addr_t buffer_addr; + uint8_t *buffer_va; + uint32_t tail; + uint32_t size; +}; + +/** + * @brief Describe a task for PVA + * + * This is an internal representation of the task structure. All + * pointers refer to kernel memory. + * + * pva Pointer to struct pva + * buffers Pointer to struct nvpva_buffers + * queue Pointer to struct nvpva_queue + * node Used to build queue task list + * kref Used to manage allocation and freeing + * dma_addr task dma_addr + * aux_dma_addr task auxdma_addr + * va task virtual address + * aux_va task aux virtual address + * pool_index task pool index + * postfence_va postfence virtual address + * num_prefences Number of pre-fences in this task + * num_postfences Number of post-fences in this task + * num_input_surfaces Number of input surfaces + * num_output_surfaces Number of output surfaces + * num_input_task_status Number of input task status structures + * num_output_task_status Number of output task status structures + * operation task operation + * timeout Latest Unix time when the task must complete or + * 0 if disabled. + * prefences Pre-fence structures + * postfences Post-fence structures + * input_surfaces Input surfaces structures + * input_scalars Information for input scalars + * output_surfaces Output surfaces + * output_scalars Information for output scalars + * input_task_status Input status structure + * output_task_status Output status structure + * + */ +struct pva_submit_task { + struct pva *pva; + struct nvpva_queue *queue; + struct nvpva_client_context *client; + + struct list_head node; + struct kref ref; + + dma_addr_t dma_addr; + dma_addr_t aux_dma_addr; + void *va; + void *aux_va; + int pool_index; + + bool pinned_app; + u32 exe_id; + + u32 l2_alloc_size; /* Not applicable for Xavier */ + struct pva_cb *stdout; + u32 symbol_payload_size; + + u32 flags; + u8 num_prefences; + u8 num_user_fence_actions; + u8 num_input_task_status; + u8 num_output_task_status; + u8 num_dma_descriptors; + u8 num_dma_channels; + u8 num_symbols; + u8 special_access; + + u64 timeout; + u64 desc_hwseq_frm; + u32 syncpt_thresh; + u32 fence_num; + u32 local_sync_counter; + + u32 sem_thresh; + u32 sem_num; + + /* Data provided by userspace "as is" */ + struct nvpva_submit_fence prefences[NVPVA_TASK_MAX_PREFENCES]; + struct nvpva_fence_action + user_fence_actions[NVPVA_MAX_FENCE_TYPES * + NVPVA_TASK_MAX_FENCEACTIONS]; + struct nvpva_mem input_task_status[NVPVA_TASK_MAX_INPUT_STATUS]; + struct nvpva_mem output_task_status[NVPVA_TASK_MAX_OUTPUT_STATUS]; + struct nvpva_dma_descriptor + dma_descriptors[NVPVA_TASK_MAX_DMA_DESCRIPTORS]; + struct nvpva_dma_channel dma_channels + [NVPVA_TASK_MAX_DMA_CHANNELS_T23X]; /* max of T19x & T23x */ + struct nvpva_dma_misr dma_misr_config; + struct nvpva_hwseq_config hwseq_config; + struct nvpva_symbol_param symbols[NVPVA_TASK_MAX_SYMBOLS]; + u8 symbol_payload[NVPVA_TASK_MAX_PAYLOAD_SIZE]; + + struct pva_pinned_memory pinned_memory[256]; + u32 num_pinned; + u8 num_pva_fence_actions[NVPVA_MAX_FENCE_TYPES]; + struct nvpva_fence_action + pva_fence_actions[NVPVA_MAX_FENCE_TYPES] + [NVPVA_TASK_MAX_FENCEACTIONS]; + /** Store Suface base address */ + u64 src_surf_base_addr; + u64 dst_surf_base_addr; + bool is_system_app; +}; + +struct pva_submit_tasks { + struct pva_submit_task *tasks[NVPVA_SUBMIT_MAX_TASKS]; + u32 task_thresh[NVPVA_SUBMIT_MAX_TASKS]; + u16 num_tasks; + u64 execution_timeout_us; +}; + +#define ACTION_LIST_FENCE_SIZE 21U +#define ACTION_LIST_STATUS_OPERATION_SIZE 11U +#define ACTION_LIST_TERMINATION_SIZE 1U +#define ACTION_LIST_STATS_SIZE 9U +#define PVA_TSC_TICKS_TO_US_FACTOR (0.032f) + +/* + * The worst-case input action buffer size: + * - Prefences trigger a word memory operation (size 13 bytes) + * - Input status reads trigger a half-word memory operation (size 11 bytes) + * - The action list is terminated by a null action (1 byte) + */ +#define INPUT_ACTION_BUFFER_SIZE \ + ALIGN(((NVPVA_TASK_MAX_PREFENCES * ACTION_LIST_FENCE_SIZE) + \ + ((NVPVA_TASK_MAX_FENCEACTIONS * 2U) * ACTION_LIST_FENCE_SIZE) + \ + NVPVA_TASK_MAX_INPUT_STATUS * \ + ACTION_LIST_STATUS_OPERATION_SIZE + \ + ACTION_LIST_TERMINATION_SIZE), \ + 256) + +/** + * Ensure that sufficient preactions per task are supported by FW/KMD interface. + * Maximum possible number of preactions can be determined by adding below + * limits: + * - Maximum number of prefences allowed per task + * - Maximum number of SOT_R and SOT_V fences allowed per task + * - Maximum number of input status buffers allowed per task + */ +#if ((PVA_MAX_PREACTION_LISTS) < \ + ( \ + (NVPVA_TASK_MAX_PREFENCES) + \ + (NVPVA_TASK_MAX_FENCEACTIONS * 2U) + \ + (NVPVA_TASK_MAX_INPUT_STATUS) \ + ) \ + ) +#error "Insufficient preactions supported by FW/KMD interface" +#endif + +/** + * Ensure that sufficient postactions per task are supported by FW/KMD interface. + * Maximum possible number of postactions can be determined by adding below + * limits: + * - Maximum number of EOT_V, EOT_R and EOT fences allowed per task + * - Maximum number of output status buffers allowed per task + * - Maximum one postaction for statistics + */ +#if ((PVA_MAX_POSTACTION_LISTS) < \ + ( \ + (NVPVA_TASK_MAX_FENCEACTIONS * 3U) + \ + (NVPVA_TASK_MAX_OUTPUT_STATUS) + \ + (1U) \ + ) \ + ) +#error "Insufficient postactions supported by FW/KMD interface" +#endif + +struct PVA_PACKED pva_task_action_ptr_s { + /* IOVA Pointer to update Sync Point Value */ + pva_iova p; + /* Value to be written to Sync Point */ + uint32_t v; + /* Pointer to write timestamp */ + pva_iova t; +}; + +struct PVA_PACKED pva_task_action_status_s { + /* IOVA to pva_gen_task_status_t struct */ + pva_iova p; + uint16_t status; + /* Padding to ensure that structure is 4byte aligned for FW perf optimization */ + uint8_t pad[2]; +}; + +struct PVA_PACKED pva_task_action_statistics_s { + /* IOVA to pva_task_statistics_t struct */ + pva_iova p; +}; +struct PVA_PACKED pva_task_action_s { + uint8_t action; + /* Padding to ensure that structure is 4byte aligned for FW perf optimization */ + uint8_t pad[3]; + union { + struct pva_task_action_ptr_s ptr; + struct pva_task_action_status_s status; + struct pva_task_action_statistics_s statistics; + } args; +}; + +/* This structure is created to ensure dma_info and params_list is always + * stored in contiguous memory within the HW task structure. This is done as a perf + * optimization so that a single dma copy can be triggered by R5 FW for copying both + * the dma_info and param_list. + */ +struct pva_dma_info_and_params_list_s { + struct pva_dma_info_s dma_info; + struct pva_vpu_parameters_s param_list[NVPVA_TASK_MAX_SYMBOLS]; +}; + +struct pva_hw_task { + struct pva_td_s task; + struct pva_task_action_s preactions[PVA_MAX_PREACTION_LISTS]; + struct pva_task_action_s postactions[PVA_MAX_POSTACTION_LISTS]; + struct pva_dma_info_and_params_list_s dma_info_and_params_list; + struct pva_dma_misr_config_s dma_misr_config; + struct pva_dtd_s dma_desc[NVPVA_TASK_MAX_DMA_DESCRIPTORS]; + struct pva_vpu_parameter_info_s param_info; + struct pva_task_statistics_s statistics; + struct pva_circular_buffer_info_s stdout_cb_info; +}; + +void pva_task_remove(struct pva_submit_task *task); +void pva_task_free(struct kref *ref); + +void pva_task_update(struct work_struct *work); + +struct pva_pinned_memory *pva_task_pin_mem(struct pva_submit_task *task, + u32 id); + +void pva_dmabuf_vunmap(struct dma_buf *dmabuf, void *addr); +void *pva_dmabuf_vmap(struct dma_buf *dmabuf); + +#endif diff --git a/drivers/video/tegra/host/pva/pva_regs.h b/drivers/video/tegra/host/pva/pva_regs.h new file mode 100644 index 00000000..4a3e1659 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_regs.h @@ -0,0 +1,205 @@ +/* + * + * Copyright (c) 2016-2023 NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef _PVA_REGS_H_ +#define _PVA_REGS_H_ + +#include "pva-bit.h" +#include "hw_cfg_pva_v1.h" +#include "hw_cfg_pva_v2.h" +#include "hw_dma_ch_pva.h" +#include "hw_dma_desc_pva.h" +#include "hw_proc_pva.h" +#include "hw_hsp_pva.h" +#include "hw_sec_pva_v1.h" +#include "hw_sec_pva_v2.h" +#include "hw_evp_pva.h" +#include "pva-interface.h" +#include "pva_mailbox.h" +#include "pva-ucode-header.h" + +/* Definition for LIC_INTR_ENABLE bits */ +#define SEC_LIC_INTR_HSP1 0x1 +#define SEC_LIC_INTR_HSP2 0x2 +#define SEC_LIC_INTR_HSP3 0x4 +#define SEC_LIC_INTR_HSP4 0x8 +#define SEC_LIC_INTR_HSP_ALL 0xF +#define SEC_LIC_INTR_H1X_ALL_23 0x3 +#define SEC_LIC_INTR_H1X_ALL_19 0x7 + +/* Watchdog support */ +#define SEC_LIC_INTR_WDT 0x1 + +#define SEC_BASE_COMMON 0x20000U + +/* unified register interface for both v1 and v2 */ +static inline u32 sec_lic_intr_status_r(int version) +{ + if (version == 1) + return v1_sec_lic_intr_status_r(); + else + return v2_sec_lic_intr_status_r(); +} + +static inline u32 cfg_ccq_status_r(int version, u32 ccq_idx, u32 status_idx) +{ + if (version == 1) + return v1_cfg_ccq_status_r(status_idx); + else + return v2_cfg_ccq_status_r(ccq_idx, status_idx); +} + +static inline u32 cfg_ccq_r(int version, u32 ccq_idx) +{ + if (version == 1) + return v1_cfg_ccq_r(); + else + return v2_cfg_ccq_r(ccq_idx); +} + +static inline u32 cfg_r5user_lsegreg_r(int version) +{ + if (version == 1) + return v1_cfg_r5user_lsegreg_r(); + else + return v2_cfg_r5user_lsegreg_r(); +} + +static inline u32 cfg_priv_ar1_lsegreg_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar1_lsegreg_r(); + else + return v2_cfg_priv_ar1_lsegreg_r(); +} + +static inline u32 cfg_priv_ar2_lsegreg_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar2_lsegreg_r(); + else + return v2_cfg_priv_ar2_lsegreg_r(); +} + +static inline u32 cfg_r5user_usegreg_r(int version) +{ + if (version == 1) + return v1_cfg_r5user_usegreg_r(); + else + return v2_cfg_r5user_usegreg_r(); +} + +static inline u32 cfg_priv_ar1_usegreg_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar1_usegreg_r(); + else + return v2_cfg_priv_ar1_usegreg_r(); +} + +static inline u32 cfg_priv_ar2_usegreg_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar2_usegreg_r(); + else + return v2_cfg_priv_ar2_usegreg_r(); +} + +static inline u32 cfg_priv_ar1_start_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar1_start_r(); + else + return v2_cfg_priv_ar1_start_r(); +} + +static inline u32 cfg_priv_ar1_end_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar1_end_r(); + else + return v2_cfg_priv_ar1_end_r(); +} + +static inline u32 cfg_priv_ar2_start_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar2_start_r(); + else + return v2_cfg_priv_ar2_start_r(); +} + +static inline u32 cfg_priv_ar2_end_r(int version) +{ + if (version == 1) + return v1_cfg_priv_ar2_end_r(); + else + return v2_cfg_priv_ar2_end_r(); +} + +static inline u32 sec_lic_intr_enable_r(int version) +{ + if (version == 1) + return v1_sec_lic_intr_enable_r(); + else + return v2_sec_lic_intr_enable_r(); +} + +static inline u32 hwpm_get_offset(void) +{ + return 0x200000; +} + +static inline u32 sec_ec_errslice0_missionerr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0x30U); +} + +static inline u32 sec_ec_errslice1_missionerr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0x60U); +} + +static inline u32 sec_ec_errslice2_missionerr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0x90U); +} + +static inline u32 sec_ec_errslice3_missionerr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0xC0U); +} + +static inline u32 sec_ec_errslice0_latenterr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0x40U); +} + +static inline u32 sec_ec_errslice1_latenterr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0x70U); +} + +static inline u32 sec_ec_errslice2_latenterr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0xA0U); +} + +static inline u32 sec_ec_errslice3_latenterr_enable_r(void) +{ + return (SEC_BASE_COMMON + 0xD0U); +} +#endif diff --git a/drivers/video/tegra/host/pva/pva_sec_ec.c b/drivers/video/tegra/host/pva/pva_sec_ec.c new file mode 100644 index 00000000..b32163b9 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_sec_ec.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include "pva_regs.h" +#include "pva.h" + +static u32 pva_get_sec_ec_addrs(u32 index) +{ + u32 sec_ec_miss_addrs[] = { + sec_ec_errslice0_missionerr_enable_r(), + sec_ec_errslice0_latenterr_enable_r(), + sec_ec_errslice1_missionerr_enable_r(), + sec_ec_errslice1_latenterr_enable_r(), + sec_ec_errslice2_missionerr_enable_r(), + sec_ec_errslice2_latenterr_enable_r(), + sec_ec_errslice3_missionerr_enable_r(), + sec_ec_errslice3_latenterr_enable_r() + }; + + return sec_ec_miss_addrs[index]; +}; + +void pva_disable_ec_err_reporting(struct pva *pva) +{ + + u32 n_regs = (pva->version != PVA_HW_GEN1) ? 8 : 4; + u32 i; + + /* save current state */ + for (i = 0; i < n_regs; i++) + pva->ec_state[i] = host1x_readl(pva->pdev, + pva_get_sec_ec_addrs(i)); + + /* disable reporting */ + for (i = 0; i < n_regs; i++) + host1x_writel(pva->pdev, pva_get_sec_ec_addrs(i), 0); +} + +void pva_enable_ec_err_reporting(struct pva *pva) +{ + + u32 n_regs = (pva->version != PVA_HW_GEN1) ? 8 : 4; + u32 i; + + /* enable reporting */ + for (i = 0; i < n_regs; i++) + host1x_writel(pva->pdev, + pva_get_sec_ec_addrs(i), + pva->ec_state[i]); +} diff --git a/drivers/video/tegra/host/pva/pva_sec_ec.h b/drivers/video/tegra/host/pva/pva_sec_ec.h new file mode 100644 index 00000000..c779d97f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_sec_ec.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _PVA_SEC_EC_H_ +#define _PVA_SEC_EC_H_ + +void pva_disable_ec_err_reporting(struct pva *pva); +void pva_enable_ec_err_reporting(struct pva *pva); +#endif diff --git a/drivers/video/tegra/host/pva/pva_sha256.c b/drivers/video/tegra/host/pva/pva_sha256.c new file mode 100644 index 00000000..35bb5900 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_sha256.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include "pva_sha256.h" + +#define ROTLEFT(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) +#define ROTRIGHT(a, b) (((a) >> (b)) | ((a) << (32 - (b)))) + +#define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define SHA_EP0(x) (ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22)) +#define SHA_EP1(x) (ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25)) +#define SIG0(x) (ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ ((x) >> 10)) + +#define SWAP32(x) __builtin_bswap32(x) +#define SWAP64(x) __builtin_bswap64(x) + +/** + * This variable is used internally by \ref sha256_transform() + */ +static const uint32_t k[64] = { + U32(0x428a2f98U), U32(0x71374491U), U32(0xb5c0fbcfU), U32(0xe9b5dba5U), + U32(0x3956c25bU), U32(0x59f111f1U), U32(0x923f82a4U), U32(0xab1c5ed5U), + U32(0xd807aa98U), U32(0x12835b01U), U32(0x243185beU), U32(0x550c7dc3U), + U32(0x72be5d74U), U32(0x80deb1feU), U32(0x9bdc06a7U), U32(0xc19bf174U), + U32(0xe49b69c1U), U32(0xefbe4786U), U32(0x0fc19dc6U), U32(0x240ca1ccU), + U32(0x2de92c6fU), U32(0x4a7484aaU), U32(0x5cb0a9dcU), U32(0x76f988daU), + U32(0x983e5152U), U32(0xa831c66dU), U32(0xb00327c8U), U32(0xbf597fc7U), + U32(0xc6e00bf3U), U32(0xd5a79147U), U32(0x06ca6351U), U32(0x14292967U), + U32(0x27b70a85U), U32(0x2e1b2138U), U32(0x4d2c6dfcU), U32(0x53380d13U), + U32(0x650a7354U), U32(0x766a0abbU), U32(0x81c2c92eU), U32(0x92722c85U), + U32(0xa2bfe8a1U), U32(0xa81a664bU), U32(0xc24b8b70U), U32(0xc76c51a3U), + U32(0xd192e819U), U32(0xd6990624U), U32(0xf40e3585U), U32(0x106aa070U), + U32(0x19a4c116U), U32(0x1e376c08U), U32(0x2748774cU), U32(0x34b0bcb5U), + U32(0x391c0cb3U), U32(0x4ed8aa4aU), U32(0x5b9cca4fU), U32(0x682e6ff3U), + U32(0x748f82eeU), U32(0x78a5636fU), U32(0x84c87814U), U32(0x8cc70208U), + U32(0x90befffaU), U32(0xa4506cebU), U32(0xbef9a3f7U), U32(0xc67178f2U) +}; + +/** + * \brief + * This function is a helper function used by \ref pva_sha256_update + * to hash 512-bit blocks and forms the core of the algorithm. + * Use \ref sha256_init(), \ref pva_sha256_update(), and + * \ref sha256_finalize() instead of calling sha256_transform() directly. + * \param[in] ctx pointer of struct sha256_ctx_s context. + * \param[in] data_in pointer to the data block to be hashed. + * \return Void + */ +static void +sha256_transform(struct sha256_ctx_s *ctx, + const void *data_in) +{ + uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64]; + const uint32_t * const data = data_in; + size_t i; + + for (i = 0; i < U32(16); i++) + m[i] = SWAP32(data[i]); + + for (i = 0; i < U32(64) - U32(16); ++i) + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + m[i + U32(16)] = SIG1(m[U32(14) + i]) + m[U32(9) + i] + + SIG0(m[U32(1) + i]) + m[i]; + + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < U32(64); ++i) { + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + t1 = h + SHA_EP1(e) + CH(e, f, g) + k[i] + m[i]; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + t2 = SHA_EP0(a) + MAJ(a, b, c); + h = g; + g = f; + f = e; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + e = d + t1; + d = c; + c = b; + b = a; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + a = t1 + t2; + } + + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[0] += a; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[1] += b; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[2] += c; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[3] += d; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[4] += e; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[5] += f; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[6] += g; + /* coverity[cert_int30_c_violation]; Deviation-MOD32_DEVIATION_ID */ + ctx->state[7] += h; +} + +void +sha256_init(struct sha256_ctx_s *ctx) +{ + ctx->bitlen = 0; + ctx->state[0] = U32(0x6a09e667); + ctx->state[1] = U32(0xbb67ae85); + ctx->state[2] = U32(0x3c6ef372); + ctx->state[3] = U32(0xa54ff53a); + ctx->state[4] = U32(0x510e527f); + ctx->state[5] = U32(0x9b05688c); + ctx->state[6] = U32(0x1f83d9ab); + ctx->state[7] = U32(0x5be0cd19); +} + +void +pva_sha256_update(struct sha256_ctx_s *ctx, + const void *data, + size_t len) +{ + uint i; + + /*assert(len % 64 == 0); */ + + for (i = 0; i < len; i += U32(64)) { + ctx->bitlen &= U32(0xffffffff); + sha256_transform(ctx, ((const uint8_t *)data) + i); + ctx->bitlen += U32(512); + } +} + +void +sha256_copy(const struct sha256_ctx_s *ctx_in, + struct sha256_ctx_s *ctx_out) +{ + *ctx_out = *ctx_in; +} + +void +sha256_finalize(struct sha256_ctx_s *ctx, + const void *input, + size_t input_size, + uint32_t out[8]) +{ + uint8_t data[64]; + void *p = data; + uint32_t t; + + input_size &= U32(0xffffffff); + ctx->bitlen &= U32(0xffffffff); + + /* the false of this condition is illegal for this API agreement */ + /* this check is here only for Coverity INT30-C */ + ctx->bitlen += input_size * U32(8); + (void)memcpy(p, input, input_size); + data[input_size] = 0x80; + + /* can we fit an 8-byte counter? */ + if (input_size < U32(56)) { + /* Pad whatever data is left in the buffer. */ + (void)memset(data + input_size + U32(1), 0, + U32(56) - input_size - U32(1)); + } else { + /* Go into another block. We are here only for message hashing */ + if (input_size + U32(1) < U32(64)) + (void)memset(data + input_size + U32(1), 0, + U32(64) - input_size - U32(1)); + + sha256_transform(ctx, data); + (void)memset(data, 0, 56); + } + + t = ctx->bitlen_low; + + *(uint32_t *)(void *)(data + 56) = 0; + *(uint32_t *)(void *)(data + 60) = SWAP32(t); + + sha256_transform(ctx, data); + + out[0] = SWAP32(ctx->state[0]); + out[1] = SWAP32(ctx->state[1]); + out[2] = SWAP32(ctx->state[2]); + out[3] = SWAP32(ctx->state[3]); + out[4] = SWAP32(ctx->state[4]); + out[5] = SWAP32(ctx->state[5]); + out[6] = SWAP32(ctx->state[6]); + out[7] = SWAP32(ctx->state[7]); +} diff --git a/drivers/video/tegra/host/pva/pva_sha256.h b/drivers/video/tegra/host/pva/pva_sha256.h new file mode 100644 index 00000000..62b9880d --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_sha256.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_SHA256_H +#define PVA_SHA256_H + +#define U32(x) ((uint32_t)(x)) + +struct sha256_ctx_s { + /* + * On bitlen: + * + * While we don't exceed 2^32 bit (2^29 byte) length for the input buffer, + * size_t is more efficient at least on RISC-V. This particular + * structure is needed to make Coverity happy. + */ + union { + size_t bitlen; + uint32_t bitlen_low; + }; + uint32_t state[8]; +}; + +/** + * Initializes struct sha256_ctx_s + * + * \param[in] ctx pointer of struct sha256_ctx_s context + * + * \return void + */ +void sha256_init(struct sha256_ctx_s *ctx); + +/** + * \brief + * Hash full blocks, in units of 64 bytes + * can be called repeatedly with chunks of the message + * to be hashed (len bytes at data). + * + * \param[in] ctx pointer of struct sha256_ctx_s context + * \param[in] data pointer to the data block to be hashed + * \param[in] len length (in units of 64 bytes) of the data to be hashed. + * + * \return void + */ +void +pva_sha256_update(struct sha256_ctx_s *ctx, + const void *data, + size_t len); + +/** + * \brief + * Finalize the hash and keep the calcualted hash in out. + * Required: input_size < 64. Call pva_sha256_update() first otherwise. + * + * \param[in] ctx pointer of struct sha256_ctx_s context + * \param[in] input pointer to the data block + * (left over from \ref pva_sha256_update) to be hashed + * \param[in] input_size size of the data block to hashed + * (left over from \ref pva_sha256_update to be hashed) + * \param[out] out places the calcuated sha256 key in out. + * + * \return void + */ +void +sha256_finalize(struct sha256_ctx_s *ctx, + const void *input, + size_t input_size, + uint32_t out[8]); + +/** + * \brief + * copy state information to ctx_out from ctx_in + * \param[in] ctx_in input struct sha256_ctx_s + * \param[out] ctx_out output struct sha256_ctx_s + * \return void + */ +void sha256_copy(const struct sha256_ctx_s *ctx_in, + struct sha256_ctx_s *ctx_out); + +#endif /* PVA_SHA256_H */ diff --git a/drivers/video/tegra/host/pva/pva_status_regs.h b/drivers/video/tegra/host/pva/pva_status_regs.h new file mode 100644 index 00000000..6d7da9dc --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_status_regs.h @@ -0,0 +1,44 @@ +/* + * PVA Command header + * + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef __PVA_STATUS_REGS_H__ +#define __PVA_STATUS_REGS_H__ + +#define PVA_CMD_STATUS_REGS 5 + +#define PVA_CMD_STATUS3_INDEX 0u +#define PVA_CMD_STATUS4_INDEX 1u +#define PVA_CMD_STATUS5_INDEX 2u +#define PVA_CMD_STATUS6_INDEX 3u +#define PVA_CMD_STATUS7_INDEX 4u + +enum pva_cmd_status { + PVA_CMD_STATUS_INVALID = 0, + PVA_CMD_STATUS_WFI = 1, + PVA_CMD_STATUS_DONE = 2, + PVA_CMD_STATUS_ABORTED = 3, +}; + +struct pva_cmd_status_regs { + uint32_t status[PVA_CMD_STATUS_REGS]; + uint32_t error; + uint32_t cmd; +}; + +#endif diff --git a/drivers/video/tegra/host/pva/pva_system_allow_list.c b/drivers/video/tegra/host/pva/pva_system_allow_list.c new file mode 100644 index 00000000..28451b0c --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_system_allow_list.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +unsigned char pva_auth_allow_list_sys[] = { +0x08, 0x00, 0x00, 0x00, 0xcd, 0xdb, 0x32, 0x3b, 0xf3, 0x07, 0x1c, 0x33, 0x53, 0x86, 0xfa, 0x41, +0x5e, 0x9b, 0xab, 0x9a, 0x54, 0x0b, 0x8f, 0x24, 0xa3, 0x79, 0xb2, 0x5d, 0xdf, 0xbf, 0x4a, 0x10, +0xfa, 0x08, 0xd3, 0x7c, 0xca, 0xe7, 0x78, 0xb8, 0x19, 0xb1, 0x60, 0xdc, 0xd1, 0xd3, 0xd9, 0x83, +0x10, 0xaa, 0x49, 0xb1, 0x0b, 0x95, 0x28, 0xff, 0x00, 0x0d, 0x9a, 0x50, 0x4d, 0x9b, 0x26, 0x56, +0x85, 0x05, 0x73, 0xac, 0x4f, 0x06, 0xde, 0x93, 0x27, 0xb5, 0xec, 0x16, 0x4e, 0x6d, 0xb4, 0x86, +0x46, 0xac, 0x48, 0xb2, 0x69, 0xdd, 0x45, 0xdd, 0x9b, 0x7c, 0xbe, 0x9d, 0x86, 0xea, 0x29, 0xda, +0x58, 0x7e, 0x62, 0x66, 0x8d, 0x6e, 0xef, 0x80, 0x25, 0xef, 0xbc, 0x46, 0xa5, 0x86, 0x12, 0x2f, +0x97, 0x0b, 0xcc, 0xe5, 0xfa, 0xb8, 0xa4, 0x1d, 0x4d, 0x0f, 0x89, 0xd8, 0xc1, 0xa0, 0xe1, 0x5f, +0xae, 0x41, 0xce, 0x58, 0xe3, 0x70, 0x04, 0xf2, 0x35, 0x9c, 0x06, 0xc5, 0x9d, 0x8b, 0x51, 0x14, +0x8c, 0x4a, 0x18, 0x72, 0xc9, 0xdb, 0xa4, 0x84, 0xd4, 0xeb, 0xed, 0xa8, 0x74, 0xa1, 0x3f, 0x9d, +0x80, 0xae, 0xb3, 0xc6, 0xac, 0x96, 0xb6, 0xb6, 0x8e, 0x39, 0xd4, 0x86, 0xf6, 0x5f, 0xfa, 0x15, +0x16, 0x90, 0x45, 0x5f, 0xfd, 0x2d, 0x70, 0xf5, 0x5a, 0xa7, 0xe9, 0x10, 0x85, 0x10, 0x6a, 0xa1, +0x3f, 0x3f, 0x0e, 0x85, 0x47, 0x76, 0xd5, 0xf4, 0xcf, 0xa1, 0xa3, 0xe2, 0x29, 0xad, 0x07, 0x0f, +0xda, 0x60, 0xdd, 0x6c, 0x42, 0x95, 0xf3, 0xd3, 0x5a, 0xbf, 0xc4, 0x2b, 0x5b, 0x30, 0x73, 0x1f, +0x0b, 0x58, 0xab, 0x9d, 0x88, 0xf5, 0x8f, 0x90, 0x43, 0xb2, 0xe6, 0x99, 0xe3, 0x27, 0xaf, 0xf3, +0x29, 0x60, 0xda, 0xc3, 0x35, 0xe2, 0x33, 0x5e, 0x5a, 0xe3, 0xf5, 0x9c, 0xe5, 0x21, 0xb0, 0xd7, +0x71, 0xd7, 0x5d, 0x89, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x9a, 0xc6, 0x68, 0x10, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xaa, 0x81, 0xa0, 0x39, +0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xe2, 0xae, 0x3b, 0x6f, 0x01, 0x00, 0x00, 0x00, +0x03, 0x00, 0x00, 0x00, 0x03, 0xfc, 0xc3, 0x9b, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, +0x61, 0x00, 0x27, 0xa1, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xde, 0xdb, 0xd2, 0xc1, +0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xfd, 0x45, 0x08, 0xc5, 0x01, 0x00, 0x00, 0x00, +0x07, 0x00, 0x00, 0x00, 0x05, 0xc0, 0xf1, 0xd7 +}; + +unsigned int pva_auth_allow_list_sys_len = 360; diff --git a/drivers/video/tegra/host/pva/pva_system_allow_list.h b/drivers/video/tegra/host/pva/pva_system_allow_list.h new file mode 100644 index 00000000..b9c21bc2 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_system_allow_list.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef PVA_ALLOW_LIST_SYS_H +#define PVA_ALLOW_LIST_SYS_H + +extern unsigned char pva_auth_allow_list_sys[]; +extern unsigned int pva_auth_allow_list_sys_len; +#endif diff --git a/drivers/video/tegra/host/pva/pva_trace.c b/drivers/video/tegra/host/pva/pva_trace.c new file mode 100644 index 00000000..eaca3220 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_trace.c @@ -0,0 +1,107 @@ +/* + * PVA trace log + * + * Copyright (c) 2017-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define CREATE_TRACE_POINTS +#include + +#include "pva.h" +#include "pva_trace.h" + +static void read_linear(struct pva *pva, struct pva_trace_log *trace, u32 toff) +{ + struct pva_trace_header *th = NULL; + struct pva_trace_block_hdr *bh = NULL; + struct pva_trace_point *tp = NULL; + u64 dt; + u32 i; + + const char *name = pva->pdev->name; + + th = (struct pva_trace_header *)trace->addr; + bh = (struct pva_trace_block_hdr *)((u8 *)th + th->head_offset); + while (th->head_offset < toff) { + tp = (struct pva_trace_point *) ((u8 *)bh + sizeof(*bh)); + dt = bh->start_time; + for (i = 0 ; i < bh->n_entries ; i++) { + dt = dt + tp->delta_time; + nvpva_dbg_info(pva, "delta_time: %llu\t %s\t major: %u\t" + "minor: %u\t flags: %u\tsequence: %u\targ1:" + " %u\targ2: %u\n", + dt, name, tp->major, tp->minor, tp->flags, + tp->sequence, tp->arg1, tp->arg2); + + trace_nvhost_pva_write(dt, name, tp->major, + tp->minor, tp->flags, tp->sequence, + tp->arg1, tp->arg2); + tp = tp + 1; + } + + th->head_offset += th->block_size; + + /* head reached end of trace log buffer, break */ + if (th->head_offset >= trace->size) { + th->head_offset = sizeof(*th); + break; + } + bh = (struct pva_trace_block_hdr *) ((u8 *)th + + th->head_offset); + } +} + +/* Read trace points from head to tail pointer */ +void pva_trace_copy_to_ftrace(struct pva *pva) +{ + struct pva_trace_log *trace; + struct pva_trace_header *th; + u32 toff; + + trace = &pva->pva_trace; + th = (struct pva_trace_header *)trace->addr; + + /* + * Read from current head to tail offset. Though tail offset might + * get change in background by FW. Read till current tail ONLY. + */ + if ((th == NULL) || !th->block_size || !th->head_offset + || !th->tail_offset) + return; + + nvpva_dbg_info(pva, "th->block_size: %u\tth->head_offset: %u\tth->tail_offset: %u\n", + th->block_size, th->head_offset, th->tail_offset); + + /* + * If head_offset and tail_offset are same, nothing to read. + */ + if (th->head_offset == th->tail_offset) + return; + + toff = th->tail_offset; + + if (th->head_offset < toff) { + /* No circular read */ + read_linear(pva, trace, toff); + } else { + /* + * Circular read + * Read from head to trace_log buffer size + */ + read_linear(pva, trace, trace->size); + /* Read from head to tail */ + read_linear(pva, trace, toff); + } +} diff --git a/drivers/video/tegra/host/pva/pva_trace.h b/drivers/video/tegra/host/pva/pva_trace.h new file mode 100644 index 00000000..2965263f --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_trace.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _PVA_TRACE_H_ +#define _PVA_TRACE_H_ + +/* + * Individual Trace point + * + * The delta time recorded in each trace point is the time from the previous + * trace point. The first trace point in a block of trace points will have + * a delta time of 0 (it is referencing the absolute time of the block). + */ +struct pva_trace_point { + u32 delta_time; + u8 major; + u8 minor; + u8 flags; + u8 sequence; + u32 arg1; + u32 arg2; +}; + +/* + * Trace block header that is written to DRAM, the indicated number of + * trace points immediately follows the header. + */ +struct pva_trace_block_hdr { + u64 start_time; + u16 n_entries; + u16 reserved_1; + u32 reserved_2; + u8 align[48]; +}; + +struct pva_trace_header { + u32 block_size; + u32 head_offset; + u32 tail_offset; + u8 align[52]; + +}; + +#endif diff --git a/drivers/video/tegra/host/pva/pva_version_config_t19x.c b/drivers/video/tegra/host/pva/pva_version_config_t19x.c new file mode 100644 index 00000000..101288e1 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_version_config_t19x.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016-2021, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pva_mailbox_t19x.h" +#include "pva_interface_regs_t19x.h" +#include "pva_version_config_t19x.h" +#include "pva_ccq_t19x.h" + +static int submit_sync_t19x(struct pva *pva, struct pva_cmd_s *cmd, u32 nregs, + u32 queue_id, + struct pva_cmd_status_regs *cmd_status_out) +{ + (void)queue_id; + return pva_mailbox_send_cmd_sync(pva, cmd, nregs, cmd_status_out); +} + +static int submit_sync_locked_t19x(struct pva *pva, struct pva_cmd_s *cmd, + u32 nregs, u32 queue_id, + struct pva_cmd_status_regs *cmd_status_out) +{ + (void)queue_id; + return pva_mailbox_send_cmd_sync_locked(pva, cmd, nregs, + cmd_status_out); +} + +struct pva_version_config pva_t19x_config = { + .read_mailbox = pva_read_mailbox_t19x, + .write_mailbox = pva_write_mailbox_t19x, + .read_status_interface = read_status_interface_t19x, + .ccq_send_task = pva_ccq_send_task_t19x, + .submit_cmd_sync_locked = submit_sync_locked_t19x, + .submit_cmd_sync = submit_sync_t19x, + .irq_count = 1, +}; diff --git a/drivers/video/tegra/host/pva/pva_version_config_t19x.h b/drivers/video/tegra/host/pva/pva_version_config_t19x.h new file mode 100644 index 00000000..0d85b882 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_version_config_t19x.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_VERSION_CONFIG_T19x_H__ +#define __PVA_VERSION_CONFIG_T19x_H__ + +#include "pva.h" + +extern struct pva_version_config pva_t19x_config; + +#endif diff --git a/drivers/video/tegra/host/pva/pva_version_config_t23x.c b/drivers/video/tegra/host/pva/pva_version_config_t23x.c new file mode 100644 index 00000000..825d7794 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_version_config_t23x.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pva_mailbox_t23x.h" +#include "pva_interface_regs_t23x.h" +#include "pva_ccq_t23x.h" + +struct pva_version_config pva_t23x_config = { + .read_mailbox = pva_read_mailbox_t23x, + .write_mailbox = pva_write_mailbox_t23x, + .read_status_interface = read_status_interface_t23x, + .ccq_send_task = pva_ccq_send_task_t23x, + .submit_cmd_sync_locked = pva_send_cmd_sync_locked, + .submit_cmd_sync = pva_send_cmd_sync, + .irq_count = 9, +}; diff --git a/drivers/video/tegra/host/pva/pva_version_config_t23x.h b/drivers/video/tegra/host/pva/pva_version_config_t23x.h new file mode 100644 index 00000000..dada8520 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_version_config_t23x.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __PVA_VERSION_CONFIG_T23X_H__ +#define __PVA_VERSION_CONFIG_T23X_H__ + + +#include "pva.h" + +extern struct pva_version_config pva_t23x_config; + +#endif diff --git a/drivers/video/tegra/host/pva/pva_vpu_app_auth.c b/drivers/video/tegra/host/pva/pva_vpu_app_auth.c new file mode 100644 index 00000000..9c60e2e3 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_app_auth.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "pva.h" +#include "pva_bit_helpers.h" +#include "pva_vpu_app_auth.h" +#include "pva_sha256.h" + +struct pva_buff_s { + const uint8_t *buff; + uint32_t pos; + uint32_t size; +}; + +s32 read_buff(struct pva_buff_s *src_buf, void *dst, u32 size) +{ + u32 pos = src_buf->pos + size; + + if (pos > src_buf->size) + return -1; + + memcpy(dst, (src_buf->buff + src_buf->pos), size); + src_buf->pos = pos; + + return size; +} + +static int +pva_auth_allow_list_parse_pva_buff(struct platform_device *pdev, + struct pva_vpu_auth_s *pva_auth, + struct pva_buff_s *auth_list_buf) +{ + int err = 0; + + ssize_t read_bytes = 0; + struct vpu_hash_key_pair_s *vhashk; + size_t vkey_size = 0; + size_t vhash_size = 0; + + //Destroy previously parsed allowlist data + pva_auth_allow_list_destroy(pva_auth); + vhashk = kzalloc(sizeof(struct vpu_hash_key_pair_s), GFP_KERNEL); + if (vhashk == NULL) { + nvpva_warn(&pdev->dev, "ERROR: Unable to allocate memory"); + err = -ENOMEM; + goto out; + } + + read_bytes = read_buff(auth_list_buf, + &(vhashk->num_keys), + sizeof(vhashk->num_keys)); + if (read_bytes != (ssize_t)(sizeof(vhashk->num_keys))) { + nvpva_warn(&pdev->dev, "ERROR: read failed"); + err = -EINVAL; + goto free_vhashk; + } + + vkey_size = sizeof(struct shakey_s)*(vhashk->num_keys); + vhashk->psha_key = kzalloc(vkey_size, GFP_KERNEL); + if (vhashk->psha_key == NULL) { + nvpva_warn(&pdev->dev, "ERROR: Unable to allocate memory"); + err = -ENOMEM; + goto free_vhashk; + } + + read_bytes = read_buff(auth_list_buf, vhashk->psha_key, vkey_size); + if (read_bytes != (ssize_t)vkey_size) { + err = -EINVAL; + goto free_shakeys; + } + + read_bytes = read_buff(auth_list_buf, + &(vhashk->num_hashes), + sizeof(vhashk->num_hashes)); + if (read_bytes != (ssize_t)(sizeof(vhashk->num_hashes))) { + nvpva_warn(&pdev->dev, "ERROR: read failed"); + err = -EINVAL; + goto free_shakeys; + } + + vhash_size = sizeof(struct vpu_hash_vector_s)*(vhashk->num_hashes); + vhashk->pvpu_hash_vector = kzalloc(vhash_size, GFP_KERNEL); + if (vhashk->pvpu_hash_vector == NULL) { + nvpva_warn(&pdev->dev, "ERROR: read failed"); + err = -ENOMEM; + goto free_shakeys; + } + + read_bytes = read_buff(auth_list_buf, + vhashk->pvpu_hash_vector, + vhash_size); + if (read_bytes != (ssize_t)vhash_size) { + nvpva_warn(&pdev->dev, "ERROR: read failed"); + err = -EINVAL; + goto free_hashes; + } + + pva_auth->pva_auth_allow_list_parsed = true; + pva_auth->pva_auth_enable = true; + pva_auth->vpu_hash_keys = vhashk; + goto out; + +free_hashes: + kfree(vhashk->pvpu_hash_vector); + vhashk->pvpu_hash_vector = NULL; + +free_shakeys: + kfree(vhashk->psha_key); + vhashk->psha_key = NULL; + +free_vhashk: + + kfree(vhashk); + vhashk = NULL; + +out: + return err; +} + +int +pva_auth_allow_list_parse_buf(struct platform_device *pdev, + struct pva_vpu_auth_s *pva_auth, + u8 *buffer, + u32 length) +{ + int err = 0; + struct pva_buff_s auth_list_buf = {0}; + + auth_list_buf.buff = buffer; + auth_list_buf.size = length; + auth_list_buf.pos = 0; + + err = pva_auth_allow_list_parse_pva_buff(pdev, + pva_auth, + &auth_list_buf); + return err; +} + +int +pva_auth_allow_list_parse(struct platform_device *pdev, + struct pva_vpu_auth_s *pva_auth) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pdev); + struct pva *pva = pdata->private_data; + const struct firmware *pallow_list; + struct pva_buff_s auth_list_buf = {0}; + int err = 0; + + err = nvpva_request_firmware(pdev, + PVA_AUTH_ALLOW_LIST_DEFAULT, &pallow_list); + + if (err) { + nvpva_dbg_fn(pva, "pva allow list request failed"); + nvpva_warn(&pdev->dev, + "Failed to load the allow list\n"); + err = -ENOENT; + goto out; + } + + auth_list_buf.buff = pallow_list->data; + auth_list_buf.size = pallow_list->size; + auth_list_buf.pos = 0; + err = pva_auth_allow_list_parse_pva_buff(pdev, + pva_auth, + &auth_list_buf); + release_firmware(pallow_list); +out: + return err; +} + +void +pva_auth_allow_list_destroy(struct pva_vpu_auth_s *pva_auth) +{ + if (pva_auth->vpu_hash_keys == NULL) + return; + + kfree(pva_auth->vpu_hash_keys->pvpu_hash_vector); + kfree(pva_auth->vpu_hash_keys->psha_key); + kfree(pva_auth->vpu_hash_keys); + pva_auth->vpu_hash_keys = NULL; +} + +/** + * \brief + * is_key_match calculates the sha256 key of ELF and checks if it matches with key. + * \param[in] dataptr Pointer to the data to which sha256 to ba calculated + * \param[in] size length in bytes of the data to which sha256 to be calculated. + * \param[in] key the key with which calculated key would be compared for match. + * \return The completion status of the operation. Possible values are: + * \ref 0 Success. Passed in key matched wth calculated key. + * \ref -EINVAL. Passed in Key doesn't match with calcualted key. + */ +static int32_t +is_key_match(uint8_t *dataptr, + size_t size, + struct shakey_s key) +{ + int32_t err = 0; + uint32_t calc_key[8]; + size_t off; + struct sha256_ctx_s ctx1; + struct sha256_ctx_s ctx2; + + sha256_init(&ctx1); + off = (size / 64U) * 64U; + if (off > 0U) + pva_sha256_update(&ctx1, dataptr, off); + + /* clone */ + sha256_copy(&ctx1, &ctx2); + + /* finalize with leftover, if any */ + sha256_finalize(&ctx2, dataptr + off, size % 64U, calc_key); + + err = memcmp((void *)&(key.sha_key), + (void *)calc_key, + NVPVA_SHA256_DIGEST_SIZE); + if (err != 0) + err = -EINVAL; + + return err; +} + +/** + * \brief + * Keeps checking all the keys accociated with match_hash + * against the calculated sha256 key for dataptr, until it finds a match. + * \param[in] pva Pointer to PVA driver context structure struct \ref nvpva_drv_ctx + * \param[in] dataptr pointer to ELF data + * \param[in] size length (in bytes) of ELF data + * \param[in] match_hash pointer to matching hash structure, \ref struct vpu_hash_vector_s. + * \return Matching status of the calculated key + * against the keys asscociated with match_hash. possible values: + * - 0 Success, one of the keys associated with match_hash + * matches with the calculated sha256 key. + * - -EINVAL, None matches. + */ +static int +check_all_keys_for_match(struct shakey_s *pallkeys, + uint8_t *dataptr, + size_t size, + const struct vpu_hash_vector_s *match_hash) +{ + int32_t err = 0; + uint32_t idx; + uint32_t count; + struct shakey_s key; + uint32_t i; + + idx = match_hash->index; + count = match_hash->count; + if (idx > UINT_MAX - count) { + err = -ERANGE; + goto fail; + } + + for (i = 0; i < count; i++) { + key = pallkeys[idx+i]; + err = is_key_match(dataptr, size, key); + if (err == 0) + break; + } +fail: + return err; +} +/** + * @brief + * Helper function for \ref binary_search. + * Uses a specific field in @ref pkey to compare with the same filed in @ref pbase. + * @param[in] pkey pointer to the object that needs to be compared. + * @param[in] pbase pointer to the starting element of the array. + * @retval + * - -1 when @ref pkey is less than starting element of array pointed to by @ref pbase. + * - 1 when @ref pkey is greater than starting element of array pointed to by @ref pbase. + * - 0 when @ref pkey is equal to starting element of array pointed to by @ref pbase. + */ +static int +compare_hash_value(const void *pkey, + const void *pbase) +{ + int ret; + + if ((((const struct vpu_hash_vector_s *)pkey)->crc32_hash) < + (((const struct vpu_hash_vector_s *)pbase)->crc32_hash)) + ret = -1; + else if ((((const struct vpu_hash_vector_s *)pkey)->crc32_hash) > + (((const struct vpu_hash_vector_s *)pbase)->crc32_hash)) + ret = 1; + else + ret = 0; + + return ret; +} + +/** + * @brief + * calculates crc32. + * @param[in] crc initial crc value. usually 0. + * @param[in] buf pointer to the buffer whose crc32 to be calculated. + * @param[in] len length (in bytes) of data at @ref buf. + * @retval value of calculated crc32. + */ +static uint32_t +pva_crc32(uint32_t crc, + unsigned char *buf, + size_t len) +{ + int k; + + crc = ~crc; + while (len != 0U) { + crc ^= *buf++; + for (k = 0; k < 8; k++) + crc = ((crc & 1U) == 1U) ? + (crc >> 1U) ^ 0xedb88320U : crc >> 1U; + + len--; + } + + return ~crc; +} + +const void +*binary_search(const void *key, + const void *base, + size_t num_elems, + size_t size, + int (*compare)(const void *pkey, const void *pbase)) +{ + size_t low = 0U; + size_t high; + + if (num_elems == 0U || size == 0U) + return NULL; + + high = num_elems - 1U; + for (;;) { + const void *mid_elem; + int r; + size_t mid = low + ((high - low) / 2U); + + /* coverity CERT INT30-C Unsigned integer */ + /* operation mid * size may wrap. */ + if (mid > UINT_MAX/size) + return NULL; + + mid_elem = ((const unsigned char *) base) + + mid * size; + r = compare(key, mid_elem); + + if (r < 0) { + if (mid == 0U) + return NULL; + + high = mid - 1U; + } else if (r > 0) { + low = mid + 1U; + if (low < mid || low > high) + return NULL; + } else { + return mid_elem; + } + } +} + +int +pva_vpu_check_sha256_key(struct pva *pva, + struct vpu_hash_key_pair_s *vpu_hash_keys, + uint8_t *dataptr, + size_t size) +{ + int err = 0; + struct vpu_hash_vector_s cal_Hash; + const struct vpu_hash_vector_s *match_Hash; + + cal_Hash.crc32_hash = pva_crc32(0L, dataptr, size); + + match_Hash = (const struct vpu_hash_vector_s *) + binary_search(&cal_Hash, + vpu_hash_keys->pvpu_hash_vector, + vpu_hash_keys->num_hashes, + sizeof(struct vpu_hash_vector_s), + compare_hash_value); + if (match_Hash == NULL) { + nvpva_dbg_info(pva, "ERROR: No Hash Match Found"); + err = -EINVAL; + goto fail; + } + + err = check_all_keys_for_match(vpu_hash_keys->psha_key, + dataptr, + size, + match_Hash); + if (err != 0) + nvpva_dbg_info(pva, "Error: Match key not found"); +fail: + return err; +} diff --git a/drivers/video/tegra/host/pva/pva_vpu_app_auth.h b/drivers/video/tegra/host/pva/pva_vpu_app_auth.h new file mode 100644 index 00000000..e2804a49 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_app_auth.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVPVA_VPU_HASH_H +#define NVPVA_VPU_HASH_H + +#include "pva_vpu_exe.h" + +/** + * Size of sha256 keys in bytes. + */ +#define NVPVA_SHA256_DIGEST_SIZE 32U +/** + * Maximum length of allowlist file path + */ +#define ALLOWLIST_FILE_LEN 128U +/** + * Default path (including filename) of pva vpu elf authentication allowlist file + */ +#define PVA_AUTH_ALLOW_LIST_DEFAULT "pva_auth_allowlist" +/** + * Array of all VPU Hash'es + */ +struct vpu_hash_vector_s { + /*! Number of Keys for this crc32_hash */ + uint32_t count; + /*! Starting Index into Keys Array */ + uint32_t index; + /*! CRC32 hash value */ + uint32_t crc32_hash; +}; + +/** + * Stores sha256 key + */ +struct shakey_s { + /** 256-bit (32 Bytes) SHA Key */ + uint8_t sha_key[NVPVA_SHA256_DIGEST_SIZE]; +}; + +/** + * Stores Hash Vector and Keys vector + */ +struct vpu_hash_key_pair_s { + /*! Total number of Keys in binary file */ + uint32_t num_keys; + /*! pointer to SHA keys Array. */ + struct shakey_s *psha_key; + /*! Total number of Hashes in binary file */ + uint32_t num_hashes; + /*! pointer to Array of Hash'es */ + struct vpu_hash_vector_s *pvpu_hash_vector; +}; + +/** + * Stores all the information related to pva vpu elf authentication. + */ +struct pva_vpu_auth_s { + /** Stores crc32-sha256 of ELFs */ + struct vpu_hash_key_pair_s *vpu_hash_keys; + struct mutex allow_list_lock; + /** Flag to check if allowlist is enabled */ + bool pva_auth_enable; + /** Flag to track if the allow list is already parsed */ + bool pva_auth_allow_list_parsed; +}; + +struct nvpva_drv_ctx; + +/** + * \brief checks if the sha256 key of ELF has a match in allowlist. + * + * It first checks if the allowlist is available. + * If its not available it returns error code. + * If allowlist is available, then it first calculates the crc32 hash of the elf + * and compares the calculated hash with the available hashes in allowlist. + * If it doesn't find a match of hash in allowlist it returns error code. + * If it finds a matched hash, then it goes ahead and calculates the sha256 key of elf + * and compares it with the keys asscociated with the hash in the allowlist file. + * If there is a key match then it returns successfully. Else it returs error code. + * + * \param[in] vpu_hash_keys Pointer to PVA vpu elf sha256 authentication + * keys structure \ref struct vpu_hash_key_pair_s + * \param[in] dataptr data pointer of ELF to be validate SHA + * \param[in] size 32-bit unsigned int ELF size in number of bytes + * + * \return The completion status of the operation. Possible values are: + * - 0 when there exists a match key for the elf data pointed by dataptr. + * - -EINVAL when allowlist file doesn't exists OR + * when the hash of ELF has no match in allowlist file OR + * when the sha256 key has no match in the list of keys + * associated with the hash of ELF + */ +int pva_vpu_check_sha256_key(struct pva *pva, + struct vpu_hash_key_pair_s *vpu_hash_keys, + uint8_t *dataptr, + size_t size); + + +/** + * Parse binary file containing authentication list stored in firmware dir + * This binary file has + 32-bit num_hashes, + 32-bit num_keys, + Array of {32-bit(4 byte) CRC32 as Hash, 32-bit index into Array of keys, + 32-bit count of keys for this hash} + Array of 256-bit keys. + * Allocate memory for all the fileds and Store them. + * Parse Hash Array and Store in memory + * Parse Keys Array and Store in memory. + * + * \param[in] pva_auth Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth + * \return + * - 0, if everything is successful. + * - -ENOENT, if allowlist file is not found at /proc/boot/ + * - negative of error code from fstat() if fstat fails. + * - -ERANGE, if file size is less than 0 or greater than NVPVA_VPU_ELF_MAX_SZ. + * - -ENOMEM, if any memory allocation fails. + * - negative of error code return from read() + * - -EINVAL, if read() doesn't read expected number of bytes from the file. + */ +int +pva_auth_allow_list_parse(struct platform_device *pdev, + struct pva_vpu_auth_s *pva_auth); + +/** + * Parse allow list stored in memory + * This binary file has + 32-bit num_hashes, + 32-bit num_keys, + Array of {32-bit(4 byte) CRC32 as Hash, 32-bit index into Array of keys, + 32-bit count of keys for this hash} + Array of 256-bit keys. + * Allocate memory for all the fileds and Store them. + * Parse Hash Array and Store in memory + * Parse Keys Array and Store in memory. + * + * \param[in] pva_auth Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth + * \return + * - 0, if everything is successful. + * - -ENOENT, if allowlist file is not found at /proc/boot/ + * - negative of error code from fstat() if fstat fails. + * - -ERANGE, if file size is less than 0 or greater than NVPVA_VPU_ELF_MAX_SZ. + * - -ENOMEM, if any memory allocation fails. + * - negative of error code return from read() + * - -EINVAL, if read() doesn't read expected number of bytes from the file. + */ +int pva_auth_allow_list_parse_buf(struct platform_device *pdev, + struct pva_vpu_auth_s *pva_auth, + u8 *buffer, + u32 length); + +/** + * @brief Frees all the memory utilized for storing elf authentication data. + * @param[in] pva_auth Pointer to PVA vpu elf authentication data struct \ref pva_vpu_auth + */ +void pva_auth_allow_list_destroy(struct pva_vpu_auth_s *pva_auth); + +/** + * The binary_search() function performs a binary search + * on the sorted array of num elements pointed to by base, + * for an item that matches the object pointed to by key. + * + * \param[in] key The object to search for. + * \param[in] base A pointer to the first element in the array + * \param[in] num_elems The number of elements in the array. + * \param[in] size The size of an element, in bytes. + * \param[in] compare A pointer to a user-supplied function + * that lfind() calls to compare an array element with the key. + * \param[in] pkey the same pointer as key + * \param[in] pbase a pointer to an element in the array. + * + * \return A pointer to a matching member of the array, + * or NULL if a matching object couldn't be found + */ +const void *binary_search(const void *key, + const void *base, + size_t num_elems, + size_t size, + int (*compare)(const void *pkey, + const void *pbase)); +#endif diff --git a/drivers/video/tegra/host/pva/pva_vpu_exe.c b/drivers/video/tegra/host/pva/pva_vpu_exe.c new file mode 100644 index 00000000..e3ec28f3 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_exe.c @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include "nvpva_elf_parser.h" +#include "pva_bit_helpers.h" +#include "pva.h" +#include "hw_vmem_pva.h" +#include "pva_vpu_exe.h" + +#define ELF_MAXIMUM_SECTION_NAME 64 +#define ELF_EXPORTS_SECTION "EXPORTS" +#define DATA_SECTION_ALIGNMENT 32 +#define CODE_SECTION_ALIGNMENT 32 + +#define MAX_VPU_SEGMENTS 4 + +#define SWAP_DATA(OUT, IN) \ + do { \ + OUT = PVA_INSERT(PVA_EXTRACT(IN, 31, 24, uint32_t), 7, 0); \ + OUT |= PVA_INSERT(PVA_EXTRACT(IN, 23, 16, uint32_t), 15, 8); \ + OUT |= PVA_INSERT(PVA_EXTRACT(IN, 15, 8, uint32_t), 23, 16); \ + OUT |= PVA_INSERT(PVA_EXTRACT(IN, 7, 0, uint32_t), 31, 24); \ + } while (0) + +/* + * Define mapping from VPU data, rodata and program sections into + * corresponding segment types. + */ + +static const struct pack_rule { + const char *elf_section_name; + int32_t pva_type; +} pack_rules[] = { + { + .elf_section_name = ".data", + .pva_type = PVA_SEG_VPU_DATA, + }, { + .elf_section_name = ".rodata", + .pva_type = PVA_SEG_VPU_DATA, + }, { + .elf_section_name = ".text", + .pva_type = PVA_SEG_VPU_CODE, + } +}; + +static int32_t find_pva_ucode_segment_type(const char *section_name, uint32_t addr) +{ + uint32_t i; + int32_t ret = PVA_SEG_VPU_MAX_TYPE; + + for (i = 0; i < ARRAY_SIZE(pack_rules); i += 1) { + /* Ignore the suffix of the section name */ + if (strncmp(section_name, pack_rules[i].elf_section_name, + strlen(pack_rules[i].elf_section_name)) == 0) { + ret = pack_rules[i].pva_type; + break; + } + } + if (ret == PVA_SEG_VPU_DATA) { + int section_name_len = + strnlen(section_name, ELF_MAXIMUM_SECTION_NAME); + int exports_section_name_len = + strnlen(ELF_EXPORTS_SECTION, ELF_MAXIMUM_SECTION_NAME); + if (section_name_len >= exports_section_name_len && + strncmp((section_name + + (section_name_len - exports_section_name_len)), + ELF_EXPORTS_SECTION, + exports_section_name_len) == 0) { + ret = PVA_SEG_VPU_IN_PARAMS; + } else if (addr == 0xc0000U) { + ret = PVA_SEG_VPU_IN_PARAMS; + } + } + + return ret; +} + +void print_segments_info(struct pva_elf_image *elf_img) +{ + pr_info("PVA_SEG_VPU_CODE =%d", PVA_SEG_VPU_CODE); + pr_info("PVA_SEG_VPU_DATA =%d", PVA_SEG_VPU_DATA); + pr_info("Code Buffer"); + pr_info("vpu_segments_buffer[PVA_SEG_VPU_CODE]"); + pr_info("code_size = %u", + elf_img->vpu_segments_buffer[PVA_SEG_VPU_CODE].localsize); + pr_info("vpu_segments_buffer[PVA_SEG_VPU_DATA]"); + pr_info("data_size = %u", + elf_img->vpu_segments_buffer[PVA_SEG_VPU_DATA].localsize); +} + +int32_t pva_get_sym_offset(struct nvpva_elf_context *d, uint16_t exe_id, + uint32_t sym_id, uint32_t *addr, uint32_t *size) +{ + if ((!pva_vpu_elf_is_registered(d, exe_id)) + || (addr == NULL) + || (size == NULL) + || (sym_id >= get_elf_image(d, exe_id)->num_symbols) + || (sym_id == NVPVA_INVALID_SYMBOL_ID)) + return -EINVAL; + + *addr = get_elf_image(d, exe_id)->sym[sym_id].addr; + *size = get_elf_image(d, exe_id)->sym[sym_id].size; + + return 0; +} + +dma_addr_t phys_get_bin_info(struct nvpva_elf_context *d, uint16_t exe_id) +{ + dma_addr_t addr = 0LL; + + if (pva_vpu_elf_is_registered(d, exe_id)) + addr = get_elf_image(d, exe_id)->vpu_bin_buffer.pa; + + return addr; +} + +static int32_t pva_vpu_elf_alloc_mem(struct pva *pva, + struct platform_device *pdev, + struct pva_elf_buffer *buffer, size_t size) +{ + dma_addr_t pa = 0U; + void *va = NULL; + + va = dma_alloc_coherent(&pdev->dev, size, &pa, GFP_KERNEL); + if (va == NULL) + goto fail; + + nvpva_dbg_info(pva, "vpu app addr = %llx", (u64)pa); + + buffer->size = size; + buffer->va = va; + buffer->pa = pa; + + buffer->alloc_size = size; + buffer->alloc_va = va; + buffer->alloc_pa = pa; + + return 0; +fail: + return -ENOMEM; +} + +static int32_t pva_vpu_bin_info_allocate(struct pva *dev, + struct pva_elf_image *elf_img) +{ + int32_t ret = 0; + size_t aligned_size; + size_t size = sizeof(struct pva_bin_info_s); + + aligned_size = ALIGN(size + 128, 128); + + ret = pva_vpu_elf_alloc_mem(dev, + dev->aux_pdev, + &elf_img->vpu_bin_buffer, + aligned_size); + if (ret) { + pr_err("Memory allocation failed"); + goto fail; + } + + elf_img->vpu_bin_buffer.va = + (void *)ALIGN((uintptr_t)elf_img->vpu_bin_buffer.va, 128); + elf_img->vpu_bin_buffer.pa = ALIGN(elf_img->vpu_bin_buffer.pa, 128); + + (void)memcpy(elf_img->vpu_bin_buffer.va, (void *)&elf_img->info, size); + +fail: + + return ret; +} + +static int32_t pva_vpu_allocate_segment_memory(struct nvpva_elf_context *d, + struct pva_elf_image *elf_img) +{ + int32_t err = 0; + int32_t i; + uint32_t segment_size = 0; + + for (i = 0; i < PVA_SEG_VPU_MAX_TYPE; i++) { + if (i == PVA_SEG_VPU_IN_PARAMS) + continue; + + segment_size = elf_img->vpu_segments_buffer[i].localsize; + if (i == PVA_SEG_VPU_CODE) { + const u32 cache_size = (d->dev->version == PVA_HW_GEN1) ? + (8 * 1024) : + (16 * 1024); + + segment_size += cache_size; + } + segment_size = ALIGN(segment_size + 128, 128); + if (segment_size == 0) + continue; + + err = pva_vpu_elf_alloc_mem(d->dev, + d->cntxt_dev, + &elf_img->vpu_segments_buffer[i], + segment_size); + if (err) { + pr_err("Memory allocation failed"); + break; + } + elf_img->vpu_segments_buffer[i].va = (void *)ALIGN( + (uintptr_t)elf_img->vpu_segments_buffer[i].va, 128); + + elf_img->vpu_segments_buffer[i].pa = + ALIGN(elf_img->vpu_segments_buffer[i].pa, 128); + + memcpy(elf_img->vpu_segments_buffer[i].va, + elf_img->vpu_segments_buffer[i].localbuffer, + elf_img->vpu_segments_buffer[i].localsize); + + kfree(elf_img->vpu_segments_buffer[i].localbuffer); + elf_img->vpu_segments_buffer[i].localbuffer = NULL; + elf_img->vpu_segments_buffer[i].localsize = 0; + } + + return err; +} + +static int32_t +pva_allocate_data_section_info(struct pva *dev, + struct pva_elf_image *elf_img) +{ + int32_t err = 0; + + if (elf_img->vpu_data_segment_info.localsize == 0U) + goto out; + + err = pva_vpu_elf_alloc_mem(dev, + dev->aux_pdev, + &elf_img->vpu_data_segment_info, + elf_img->vpu_data_segment_info.localsize); + if (err != 0) { + pr_err("Failed to allocate data segment info memory"); + goto out; + } + + (void)memset(elf_img->vpu_data_segment_info.va, 0, + elf_img->vpu_data_segment_info.size); + + (void)memcpy(elf_img->vpu_data_segment_info.va, + (void *)elf_img->vpu_data_segment_info.localbuffer, + elf_img->vpu_data_segment_info.localsize); + + kfree(elf_img->vpu_data_segment_info.localbuffer); + elf_img->vpu_data_segment_info.localbuffer = NULL; + elf_img->vpu_data_segment_info.localsize = 0; + +out: + + return err; + +} + +static int32_t write_bin_info(struct nvpva_elf_context *d, + struct pva_elf_image *elf_img) +{ + struct pva_bin_info_s *curr_bin_info; + int32_t err = 0; + + err = pva_vpu_allocate_segment_memory(d, elf_img); + if (err < 0) { + pr_err("pva: failed to allocate segment memory"); + goto fail; + } + + err = pva_allocate_data_section_info(d->dev, elf_img); + if (err < 0) { + pr_err("Failed to allocate data segment info memory"); + goto fail; + } + + curr_bin_info = &elf_img->info; + + curr_bin_info->bin_info_size = sizeof(struct pva_bin_info_s); + curr_bin_info->bin_info_version = PVA_BIN_INFO_VERSION_ID; + curr_bin_info->code_base = + elf_img->vpu_segments_buffer[PVA_SEG_VPU_CODE].pa; + curr_bin_info->data_sec_base = + elf_img->vpu_data_segment_info.pa; + curr_bin_info->data_sec_count = + elf_img->vpu_data_segment_info.num_segments; + curr_bin_info->data_base = + elf_img->vpu_segments_buffer[PVA_SEG_VPU_DATA].pa; + +fail: + + return err; +} + +static int32_t copy_to_elf_buffer_code(struct pva_elf_buffer *buffer, + const void *src, size_t src_size, + uint32_t addr) +{ + uint32_t addr_bytes = addr * 4; + uint32_t *dst_size = NULL; + + dst_size = &buffer->localsize; + if (addr_bytes + src_size > *dst_size) { + size_t aligned_size = addr_bytes + src_size; + + if (aligned_size % DATA_SECTION_ALIGNMENT) + aligned_size += + (DATA_SECTION_ALIGNMENT - + (aligned_size % DATA_SECTION_ALIGNMENT)); + + if (buffer->localbuffer == NULL) { + /* First .text section must load at 0 */ + if (addr_bytes != 0) { + pr_err("First .text section does not start at 0"); + return -EINVAL; + } + buffer->localbuffer = kzalloc(aligned_size, GFP_KERNEL); + if (buffer->localbuffer == NULL) + return -ENOMEM; + + } else { + uint8_t *new_buffer = kzalloc(aligned_size, GFP_KERNEL); + + if (new_buffer == NULL) + return -ENOMEM; + + memcpy(new_buffer, buffer->localbuffer, *dst_size); + kfree(buffer->localbuffer); + buffer->localbuffer = new_buffer; + } + *dst_size = aligned_size; + } + memcpy((void *)((uintptr_t)buffer->localbuffer + addr_bytes), src, + src_size); + return 0; +} + +static int32_t copy_to_elf_buffer(struct pva_elf_buffer *buffer, + const void *src, size_t src_size) +{ + uint8_t *resize_buffer = NULL; + uint32_t *dst_size = NULL; + + dst_size = &buffer->localsize; + if (src != NULL) { + size_t aligned_size = src_size; + + if (src_size % DATA_SECTION_ALIGNMENT) + aligned_size += (DATA_SECTION_ALIGNMENT - + (src_size % DATA_SECTION_ALIGNMENT)); + + if (buffer->localbuffer == NULL) { + buffer->localbuffer = kzalloc(aligned_size, GFP_KERNEL); + if (buffer->localbuffer == NULL) + return -ENOMEM; + + } else { + resize_buffer = kzalloc(*dst_size, GFP_KERNEL); + if (resize_buffer == NULL) + return -ENOMEM; + + memcpy(resize_buffer, buffer->localbuffer, *dst_size); + kfree(buffer->localbuffer); + buffer->localbuffer = NULL; + buffer->localbuffer = + kzalloc((*dst_size) + aligned_size, GFP_KERNEL); + if (buffer->localbuffer == NULL) { + kfree(resize_buffer); + return -ENOMEM; + } + memcpy(buffer->localbuffer, resize_buffer, *dst_size); + kfree(resize_buffer); + } + memcpy((void *)((uintptr_t)buffer->localbuffer + *dst_size), + src, src_size); + *dst_size += aligned_size; + } else { + return -EINVAL; + } + + return 0; +} + +static int32_t +copy_to_elf_data_sec_buffer(struct pva_elf_buffer *buffer, + const void *src, + uint32_t src_size) +{ + uint8_t *resize_buffer = NULL; + uint32_t *dst_size = NULL; + + dst_size = &buffer->localsize; + + if ((src == NULL) || (src_size == 0U)) + return -EINVAL; + + if (buffer->localbuffer == NULL) { + buffer->localbuffer = kzalloc(src_size, GFP_KERNEL); + if (buffer->localbuffer == NULL) + return -ENOMEM; + } else { + resize_buffer = kzalloc((*dst_size) + src_size, GFP_KERNEL); + if (resize_buffer == NULL) + return -ENOMEM; + + (void) memcpy(resize_buffer, buffer->localbuffer, *dst_size); + kfree(buffer->localbuffer); + buffer->localbuffer = resize_buffer; + } + + (void) memcpy((void *)(buffer->localbuffer + *dst_size), src, src_size); + if ((UINT_MAX - *dst_size) < src_size) + return -EINVAL; + + *dst_size += src_size; + + return 0; +} + +static int32_t +copy_segments(void *elf, struct pva_elf_image *elf_img, + const struct elf_section_header *section_header, + const char *section_name, int hw_gen) +{ + int32_t segment_type = 0U; + int32_t ret = 0; + + const u8 *elf_data; + uint32_t *data; + uint32_t sw_data; + uint32_t dst_buffer_size_old = 0; + struct pva_bin_info_s *bin_info = NULL; + struct pva_elf_buffer *buffer = NULL; + struct pva_vpu_data_section_s data_sec_info = {0}; + + if ((section_header == NULL) || (section_name == NULL)) { + ret = -EINVAL; + goto out; + } + + segment_type = find_pva_ucode_segment_type(section_name, + section_header->addr); + + bin_info = &elf_img->info; + if (!(segment_type == PVA_SEG_VPU_DATA) && + !(segment_type == PVA_SEG_VPU_CODE)) { + ret = 0; + goto out; + } + + buffer = &elf_img->vpu_segments_buffer[segment_type]; + dst_buffer_size_old = buffer->localsize; + elf_data = elf_section_contents(elf, section_header); + if (elf_data == NULL) + goto inc_num_segments; + + + + if (segment_type == (int32_t)PVA_SEG_VPU_CODE) { + unsigned int idx; + + for (idx = 0; idx < (section_header->size / 4); idx++) { + data = ((uint32_t *)elf_data) + idx; + SWAP_DATA(sw_data, *data); + *data = sw_data; + } + + ret = copy_to_elf_buffer_code(buffer, + elf_data, + section_header->size, + section_header->addr); + if (ret != 0) + goto out; + + bin_info->code_size = buffer->localsize; + + } else { + ret = copy_to_elf_buffer(buffer, + elf_data, + section_header->size); + if (ret != 0) + goto out; + } + + if (segment_type == (int32_t)PVA_SEG_VPU_DATA) { + struct pva_vpu_data_section_s *pdsec; + struct pva_elf_buffer *buffer_temp; + u32 size_temp; + + pdsec = &data_sec_info; + pdsec->offset = dst_buffer_size_old; + pdsec->addr = section_header->addr; + if (buffer->localsize < dst_buffer_size_old) { + pr_err("Invalid buffer size"); + ret = -EINVAL; + goto out; + } + + pdsec->size = (buffer->localsize - dst_buffer_size_old); + ret = nvpva_validate_vmem_offset(pdsec->addr, + pdsec->size, + hw_gen); + if (ret != 0) + goto out; + + buffer_temp = &elf_img->vpu_data_segment_info; + size_temp = (uint32_t)sizeof(struct pva_vpu_data_section_s); + ret = copy_to_elf_data_sec_buffer(buffer_temp, + &data_sec_info, + size_temp); + if (ret != 0) + goto out; + + if (buffer_temp->num_segments >= (UINT_MAX - 1U)) { + ret = -EINVAL; + pr_err("Number of data segments exceeds UINT_MAX"); + goto out; + } + + buffer_temp->num_segments++; + } + +inc_num_segments: + + buffer->num_segments++; + +out: + return ret; +} + + +static int32_t +populate_segments(void *elf, struct pva_elf_image *elf_img, + int hw_gen) +{ + const struct elf_section_header *section_header; + int32_t ret = 0; + uint32_t index = 0; + const char *section_name; + const u32 sectionCount = elf_shnum(elf); + + for (index = 0; index < sectionCount; index++) { + section_header = elf_section_header(elf, index); + if (section_header == NULL) { + ret = -EINVAL; + goto out; + } + section_name = elf_section_name(elf, section_header); + if (section_header->type == SHT_PROGBITS) { + ret = copy_segments(elf, elf_img, section_header, + section_name, hw_gen); + if (ret) + goto out; + } + } +out: + return ret; +} + +/** + * Data about symbol information in EXPORTS sections is present as follows + * typedef struct { + * uint32_t type; From VMEM_TYPE enums + * uint32_t addr_offset; Offset from VMEM base + * uint32_t size; Size of VMEM region in bytes + * } vmem_symbol_metadata_t; + */ +static int32_t update_exports_symbol(void *elf, const struct elf_section_header *section_header, + struct pva_elf_symbol *symID) +{ + const u8 *data; + const char *section_name; + int32_t section_type; + + section_name = elf_section_name(elf, section_header); + if (section_name == NULL) + return -EINVAL; + + section_type = find_pva_ucode_segment_type(section_name, section_header->addr); + if (section_type == PVA_SEG_VPU_IN_PARAMS) { + uint32_t symOffset = symID->addr - section_header->addr; + data = elf_section_contents(elf, section_header); + if (data == NULL) + return -EINVAL; + symID->type = *(uint32_t *)&data[symOffset]; + if ((symID->type == (uint8_t)VMEM_TYPE_INVALID) + || (symID->type >= (uint8_t)VMEM_TYPE_MAX)) + return -EINVAL; + symID->addr = *(uint32_t *)&data[symOffset + sizeof(uint32_t)]; + symID->size = *(uint32_t *)&data[symOffset + (2UL * sizeof(uint32_t))]; + } + + return 0; +} + +static int32_t +populate_symtab(void *elf, struct nvpva_elf_context *d, + uint16_t exe_id, int hw_gen) +{ + const struct elf_section_header *section_header; + const struct elf_section_header *sym_scn; + int32_t ret = 0; + const struct elf_symbol *sym; + uint32_t i, count; + struct pva_elf_symbol *symID; + uint32_t num_symbols = 0; + uint32_t num_sys_symbols = 0; + uint32_t total_sym_size = 0; + const char *symname = NULL; + const char *section_name; + uint32_t stringsize = 0; + int32_t sec_type; + struct pva_elf_image *image; + + section_header = + (const struct elf_section_header *)elf_named_section_header( + elf, ".symtab"); + if (section_header == NULL) + goto update_elf_info; + + count = section_header->size / section_header->entsize; + for (i = 0; i < count; i++) { + if (num_symbols >= NVPVA_TASK_MAX_SYMBOLS) { + ret = -EINVAL; + goto fail; + } + + sym = elf_symbol(elf, i); + if ((sym == NULL) + || (ELF_ST_BIND(sym) != STB_GLOBAL) + || (ELF_ST_TYPE(sym) == STT_FUNC) + || sym->size <= 0) + continue; + + sym_scn = elf_section_header(elf, sym->shndx); + if (sym_scn == NULL) { + ret = -EINVAL; + goto fail; + } + + section_name = elf_section_name(elf, sym_scn); + if (section_name == NULL) { + ret = -EINVAL; + goto fail; + } + + sec_type = find_pva_ucode_segment_type(section_name, + sym_scn->addr); + if (sec_type != PVA_SEG_VPU_IN_PARAMS) + continue; + + symname = elf_symbol_name(elf, section_header, i); + if (symname == NULL) { + ret = -EINVAL; + goto fail; + } + + stringsize = strnlen(symname, (ELF_MAX_SYMBOL_LENGTH - 1)); + symID = &get_elf_image(d, exe_id)->sym[num_symbols]; + symID->symbol_name = + kcalloc(ELF_MAX_SYMBOL_LENGTH, + sizeof(char), GFP_KERNEL); + if (symID->symbol_name == NULL) { + ret = -ENOMEM; + goto fail; + } + + (void)strncpy(symID->symbol_name, symname, stringsize); + symID->symbol_name[stringsize] = '\0'; + if (strncmp(symID->symbol_name, + PVA_SYS_INSTANCE_DATA_V1_SYMBOL, + ELF_MAX_SYMBOL_LENGTH) == 0) { + ++num_sys_symbols; + symID->is_sys = true; + } else + symID->is_sys = false; + + symID->symbolID = num_symbols; + symID->size = sym->size; + symID->addr = sym->value; + ret = update_exports_symbol(elf, sym_scn, symID); + if (ret != 0) { + kfree(symID->symbol_name); + goto fail; + } + + num_symbols++; + total_sym_size += symID->size; + ret = nvpva_validate_vmem_offset(symID->addr, + symID->size, + hw_gen); + if (ret != 0) + goto fail; + } + +update_elf_info: + get_elf_image(d, exe_id)->num_symbols = num_symbols; + get_elf_image(d, exe_id)->num_sys_symbols = num_sys_symbols; + get_elf_image(d, exe_id)->symbol_size_total = total_sym_size; + + return ret; +fail: + image = get_elf_image(d, exe_id); + for (i = 0; i < image->num_symbols; i++) { + kfree(image->sym[i].symbol_name); + image->sym[i].symbolID = 0; + image->sym[i].size = 0; + image->sym[i].offset = 0; + } + + return ret; +} + +/** + * + * Validate if elf file passed is valid + * + * @param elf Buffer containing elf file + * @param size Size of buffer containing elf file + * + * @return 0 if everything is correct else return error + */ + +static int32_t validate_vpu(const void *elf, size_t size) +{ + int32_t err = 0; + + if (!image_is_elf(elf) || !elf_is_32bit(elf)) { + pr_err("pva: Invalid 32 bit VPU ELF"); + err = -EINVAL; + } + return err; +} + +static void pva_elf_free_buffer(struct platform_device *pdev, + struct pva_elf_buffer *buf) +{ + if (buf->localbuffer != NULL) { + kfree(buf->localbuffer); + buf->localbuffer = NULL; + buf->localsize = 0; + buf->num_segments = 0; + } + if (buf->pa != 0U) { + dma_free_coherent(&pdev->dev, + buf->alloc_size, buf->alloc_va, + buf->alloc_pa); + } +} + +static void +vpu_bin_clean(struct nvpva_elf_context *d, + struct pva_elf_image *elf_img) +{ + size_t i; + + if (elf_img == NULL) + return; + + /* Initialize vpu_bin_buffer */ + pva_elf_free_buffer(d->dev->aux_pdev, &elf_img->vpu_bin_buffer); + + pva_elf_free_buffer(d->dev->aux_pdev, &elf_img->vpu_data_segment_info); + + /* Initiaize VPU segments buffer */ + for (i = 0; i < PVA_SEG_VPU_MAX_TYPE; i++) + pva_elf_free_buffer(d->cntxt_dev, + &elf_img->vpu_segments_buffer[i]); + + /* clean up symbols */ + for (i = 0; i < elf_img->num_symbols; i++) + kfree(elf_img->sym[i].symbol_name); + + /* Clean elf img and set everything to 0 */ + memset(elf_img, 0, sizeof(struct pva_elf_image)); +} + +static int32_t pva_get_vpu_app_id(struct nvpva_elf_context *d, + uint16_t *exe_id, + bool is_system_app) +{ + int32_t ret = 0; + uint16_t index = 0; + struct pva_elf_images *images; + struct pva_elf_image **image; + int32_t alloc_size; + + mutex_lock(&d->elf_mutex); + images = d->elf_images; + image = &images->elf_img[images->num_allocated / ALOC_SEGMENT_SIZE]; + + if (images->num_assigned >= MAX_NUM_VPU_EXE) { + pr_err("No space for more VPU binaries"); + ret = -ENOMEM; + goto out; + } + + if (images->num_assigned >= images->num_allocated) { + alloc_size = ALOC_SEGMENT_SIZE * sizeof(struct pva_elf_image); + *image = kzalloc(alloc_size, GFP_KERNEL); + if (*image == NULL) { + ret = -ENOMEM; + goto out; + } + + images->num_allocated += ALOC_SEGMENT_SIZE; + } + + + index = rmos_find_first_zero_bit(d->elf_images->alloctable, + MAX_NUM_VPU_EXE); + if (index == MAX_NUM_VPU_EXE) { + pr_err("No space for more VPU binaries"); + ret = -ENOMEM; + goto out; + } + + *exe_id = index; + rmos_set_bit32((index%32), &d->elf_images->alloctable[index/32U]); + ++(images->num_assigned); + get_elf_image(d, *exe_id)->elf_id = *exe_id; + get_elf_image(d, *exe_id)->is_system_app = is_system_app; +out: + mutex_unlock(&d->elf_mutex); + return ret; +} + +int32_t +pva_unload_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked) +{ + int32_t err = 0; + struct pva_elf_images *images; + + images = d->elf_images; + + if (!locked) + mutex_lock(&d->elf_mutex); + + if (exe_id >= MAX_NUM_VPU_EXE) { + err = -EINVAL; + goto out; + } + + if (!rmos_test_bit32((exe_id%32), &images->alloctable[exe_id/32])) { + err = -EINVAL; + goto out; + } + + vpu_bin_clean(d, get_elf_image(d, exe_id)); + rmos_clear_bit32((exe_id%32), &images->alloctable[exe_id/32]); + --(images->num_assigned); +out: + if (!locked) + mutex_unlock(&d->elf_mutex); + + return err; +} + +int32_t +pva_get_sym_tab_size(struct nvpva_elf_context *d, + uint16_t exe_id, + u64 *tab_size) +{ + struct pva_elf_image *image; + u32 number_of_symbols; + + image = get_elf_image(d, exe_id); + if (image == NULL) + return -EINVAL; + + number_of_symbols = image->num_symbols - image->num_sys_symbols; + *tab_size = number_of_symbols * sizeof(struct nvpva_sym_info); + + return 0; +} + +int32_t +pva_get_sym_tab(struct nvpva_elf_context *d, + uint16_t exe_id, + struct nvpva_sym_info *sym_tab) + +{ + u32 i; + struct pva_elf_image *image; + + image = get_elf_image(d, exe_id); + if (image == NULL) + return -EINVAL; + + for (i = 0; i < image->num_symbols; i++) { + if (image->sym[i].is_sys) + continue; + memcpy(sym_tab->sym_name, + image->sym[i].symbol_name, + NVPVA_SYM_NAME_MAX_LEN); + sym_tab->sym_size = image->sym[i].size; + sym_tab->sym_type = image->sym[i].type; + sym_tab->sym_id = image->sym[i].symbolID; + ++sym_tab; + } + + return 0; +} +int32_t pva_get_sym_info(struct nvpva_elf_context *d, uint16_t vpu_exe_id, + const char *sym_name, struct pva_elf_symbol *symbol) +{ + struct pva_elf_image *elf; + uint32_t i; + int32_t err = 0; + size_t strSize = strnlen(sym_name, ELF_MAX_SYMBOL_LENGTH); + + elf = get_elf_image(d, vpu_exe_id); + for (i = 0; i < elf->num_symbols; i++) { + if (strncmp(sym_name, elf->sym[i].symbol_name, strSize) == 0) { + symbol->symbolID = elf->sym[i].symbolID; + symbol->size = elf->sym[i].size; + symbol->type = elf->sym[i].type; + break; + } + } + if (i == elf->num_symbols) + err = -EINVAL; + + return err; +} + +int32_t +pva_release_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked) +{ + int32_t err = 0; + struct pva_elf_image *image = NULL; + + image = get_elf_image(d, exe_id); + if (image != NULL && image->user_registered == true) { + image->user_registered = false; + if (atomic_read(&image->submit_refcount) <= 0) + (void)pva_unload_vpu_app(d, exe_id, locked); + } else { + err = -EINVAL; + } + + return err; +} + +int32_t pva_task_release_ref_vpu_app(struct nvpva_elf_context *d, + uint16_t exe_id) +{ + int32_t err = 0; + struct pva_elf_image *image = NULL; + + if (exe_id == NVPVA_NOOP_EXE_ID) + goto out; + + image = get_elf_image(d, exe_id); + if (image == NULL) { + err = -EINVAL; + goto out_err; + } + + atomic_sub(1, &image->submit_refcount); + if ((atomic_read(&image->submit_refcount) <= 0) && + (image->user_registered == false)) + (void)pva_unload_vpu_app(d, exe_id, false); +out_err: +out: + return err; +} + +int32_t pva_task_acquire_ref_vpu_app(struct nvpva_elf_context *d, + uint16_t exe_id) +{ + int32_t err = 0; + struct pva_elf_image *image = get_elf_image(d, exe_id); + + if (image != NULL) + (void)atomic_add(1, &image->submit_refcount); + else + err = -EINVAL; + + return err; +} + +int32_t pva_load_vpu_app(struct nvpva_elf_context *d, uint8_t *buffer, + size_t size, uint16_t *exe_id, + bool is_system_app, int hw_gen) +{ + void *elf = NULL; + int32_t err = 0; + uint16_t assigned_exe_id = 0; + struct pva_elf_image *image = NULL; + struct pva *pva = d->dev; + struct device *dev = &pva->pdev->dev; + + err = validate_vpu((void *)buffer, size); + if (err < 0) { + dev_err(dev, "Not valid elf or null elf"); + goto out; + } + err = pva_get_vpu_app_id(d, &assigned_exe_id, is_system_app); + if (err) { + dev_err(dev, "Unable to get valid VPU id"); + goto out; + } + elf = (void *)buffer; + image = get_elf_image(d, assigned_exe_id); + err = populate_symtab(elf, d, assigned_exe_id, pva->version); + if (err) { + dev_err(dev, "Populating symbol table failed"); + err = -EINVAL; + goto out_elf_end; + } + err = populate_segments(elf, image, hw_gen); + if (err) { + dev_err(dev, "Populating segments failed"); + err = -EINVAL; + goto out_elf_end; + } + err = write_bin_info(d, image); + if (err) { + dev_err(dev, "Writing bin_info failed"); + err = -EINVAL; + goto out_elf_end; + } + err = pva_vpu_bin_info_allocate(pva, image); + if (err) { + dev_err(dev, "Allocating bin info failed"); + err = -EINVAL; + goto out_elf_end; + } + *exe_id = assigned_exe_id; + image->user_registered = true; + (void)atomic_set(&image->submit_refcount, 0); +out_elf_end: + if (err) + pva_unload_vpu_app(d, assigned_exe_id, false); + +out: + return err; +} + +void pva_unload_all_apps(struct nvpva_elf_context *d) +{ + uint32_t elf_alloc_table = 0U; + uint32_t id = 0U; + uint32_t i; + + mutex_lock(&d->elf_mutex); + for (i = 0; i < NUM_ALLOC_SEGMENTS; i++) { + elf_alloc_table = d->elf_images->alloctable[i]; + while (elf_alloc_table != 0U) { + id = rmos_get_first_set_bit(elf_alloc_table); + (void)pva_release_vpu_app(d, (i * 32 + id), true); + rmos_clear_bit32(id, &elf_alloc_table); + } + + d->elf_images->alloctable[i] = 0; + } + mutex_unlock(&d->elf_mutex); +} + +void pva_vpu_deinit(struct nvpva_elf_context *d) +{ + int32_t i; + int32_t allocated_segments; + struct pva_elf_images *images = d->elf_images; + + if (d->elf_images == NULL) + goto out; + + pva_unload_all_apps(d); + allocated_segments = (images->num_allocated/ALOC_SEGMENT_SIZE); + for (i = 0; i < allocated_segments; i++) { + if (images->elf_img[i] != NULL) { + kfree(images->elf_img[i]); + images->elf_img[i] = NULL; + } + } + + d->elf_images->num_allocated = 0; + d->elf_images->num_assigned = 0; + + kfree(d->elf_images); + d->elf_images = NULL; + mutex_destroy(&d->elf_mutex); +out: + return; +} + +int32_t pva_vpu_init(struct pva *dev, struct nvpva_elf_context *d) +{ + int32_t err = 0; + int32_t alloc_size; + + d->dev = dev; + d->elf_images = kzalloc(sizeof(struct pva_elf_images), GFP_KERNEL); + if (d->elf_images == NULL) { + err = -ENOMEM; + goto fail_elf_img_init; + } + + d->elf_images->num_allocated = 0; + d->elf_images->num_assigned = 0; + memset(d->elf_images->elf_img, 0, sizeof(d->elf_images->elf_img)); + alloc_size = ALOC_SEGMENT_SIZE * sizeof(struct pva_elf_image); + d->elf_images->elf_img[0] = kzalloc(alloc_size, GFP_KERNEL); + if (d->elf_images->elf_img[0] == NULL) { + err = -ENOMEM; + kfree(d->elf_images); + goto fail_elf_img_init; + } + + d->elf_images->num_allocated = ALOC_SEGMENT_SIZE; + mutex_init(&d->elf_mutex); + +fail_elf_img_init: + + return err; +} +struct vmem_region { + uint32_t start; + uint32_t end; +}; + +struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT] = { + {{.start = 0, .end = 0}, + {.start = 0, .end = 0}, + {.start = 0, .end = 0}}, + {{.start = T19X_VMEM0_START, .end = T19X_VMEM0_END}, + {.start = T19X_VMEM1_START, .end = T19X_VMEM1_END}, + {.start = T19X_VMEM2_START, .end = T19X_VMEM2_END}}, + {{.start = T23x_VMEM0_START, .end = T23x_VMEM0_END}, + {.start = T23x_VMEM1_START, .end = T23x_VMEM1_END}, + {.start = T23x_VMEM2_START, .end = T23x_VMEM2_END}}, +}; + +int32_t +nvpva_validate_vmem_offset(const uint32_t vmem_offset, + const uint32_t size, + const int hw_gen) +{ + + int i; + int32_t err = -EINVAL; + + if (hw_gen < 0 || hw_gen > NUM_HEM_GEN) { + pr_err("invalid hw_gen index: %d", hw_gen); + return err; + } + + for (i = VMEM_REGION_COUNT; i > 0; i--) { + if (vmem_offset >= vmem_regions_tab[hw_gen][i-1].start) + break; + } + + if ((i > 0) && ((vmem_offset + size) <= vmem_regions_tab[hw_gen][i-1].end)) + err = 0; + + return err; +} diff --git a/drivers/video/tegra/host/pva/pva_vpu_exe.h b/drivers/video/tegra/host/pva/pva_vpu_exe.h new file mode 100644 index 00000000..2ef06734 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_exe.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef NVPVA_VPU_APP_H +#define NVPVA_VPU_APP_H +#include +#include "pva-ucode-header.h" +#include "pva-sys-params.h" +#include "pva-task.h" +#include +#include +#include "pva-bit.h" + +#define ELF_MAX_SYMBOL_LENGTH 64 +#define MAX_NUM_VPU_EXE 65535U +#define ALOC_SEGMENT_SIZE 32U +#define NUM_ALLOC_SEGMENTS ((MAX_NUM_VPU_EXE + 1)/ALOC_SEGMENT_SIZE) + +/** + * enum to identify different types of symbols + */ +enum pva_elf_symbol_type { + /**< Symbol type Invalid */ + VMEM_TYPE_INVALID, + /**< Symbol type Data */ + VMEM_TYPE_DATA, + /**< Symbol type VPU Config Table */ + VMEM_TYPE_VPUC_TABLE, + /**< Symbol type Pointer */ + VMEM_TYPE_POINTER, + /**< Symbol type System */ + VMEM_TYPE_SYSTEM, + /** Symbol type Pointer Extension */ + VMEM_TYPE_POINTER_EX, + /** Symbol type Invalid */ + VMEM_TYPE_MAX +}; + +/** + * enum to identify different segments of VPU ELF + */ +enum pva_elf_seg_type { + /**< Code segment in VPU ELF */ + PVA_SEG_VPU_CODE = 0U, + /**< DATA segment in VPU ELF */ + PVA_SEG_VPU_DATA, + /**< DATA segment in VPU ELF containing symbol information*/ + PVA_SEG_VPU_IN_PARAMS, + /**< Not a valid segment in VPU ELF */ + PVA_SEG_VPU_MAX_TYPE +}; + +/** + * Structure that holds buffer and handles shared with FW + */ +struct pva_elf_buffer { + /**< Aligned size of allocated buffer */ + size_t size; + /**< IOVA address if allocated buffer */ + dma_addr_t pa; + /**< Virtual address of allocated buffer */ + void *va; + + /*original value came out of allocator*/ + size_t alloc_size; + dma_addr_t alloc_pa; + void *alloc_va; + + /*< Local buffer holding data to be copied to allocated buffer. + * May undergo resizing + */ + uint8_t *localbuffer; + /**< Unaligned size of local buffer */ + uint32_t localsize; + /**< Number of segments */ + uint32_t num_segments; +}; + +/* + * Store elf symbols information + */ +struct pva_elf_symbol { + char *symbol_name; + /**elf_images->alloctable[(exe_id/32)] >> (exe_id%32)) & 1U); +} + +static inline +struct pva_elf_image *get_elf_image(struct nvpva_elf_context *d, + uint16_t exe_id) +{ + struct pva_elf_image *image = NULL; + u32 segment; + u32 index; + + segment = exe_id / ALOC_SEGMENT_SIZE; + index = exe_id % ALOC_SEGMENT_SIZE; + + if ((d->elf_images->elf_img[segment] != NULL) + && (pva_vpu_elf_is_registered(d, exe_id))) + image = &d->elf_images->elf_img[segment][index]; + + return image; +} + +/** + * Load VPU APP elf file + * + * @param d Pointer to the Elf Context + * @param *buffer Buffer containing the VPU APP elf file + * @param size Size of the VPU APP elf file + * @param *exe_id ID assigned to the VPU APP in KMD filled + * by this function + * @param hwid HWID associated with the VPU APP used for + * allocation + * + * @return 0 if everything is valid and VPU APP is + * loaded successfully + */ +int32_t +pva_load_vpu_app(struct nvpva_elf_context *d, uint8_t *buffer, + size_t size, uint16_t *exe_id, + bool is_system_app, int hw_gen); + +/** + * Unload VPU APP elf file + * + * @param d Pointer to the Elf Context + * @param exe_id Unique ID of VPU APP to be unloaded + * + * @return 0 if successful + */ +int32_t +pva_unload_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked); + +/** + * Unload all VPU APP elf files associated with the given ELF context + * + * @param d Pointer to the Elf Context + * + */ +void pva_unload_all_apps(struct nvpva_elf_context *d); + +/** + * Get reference to a vpu app for task + * + * @param d Pointer to the Elf Context + * @param exe_id Unique ID of VPU APP to be referenced + * + * @return 0 if successful + */ +int32_t pva_task_acquire_ref_vpu_app(struct nvpva_elf_context *d, + uint16_t exe_id); + +/** + * Unref VPU APP elf file from user side + * + * @param d Pointer to the Elf Context + * @param exe_id Unique ID of VPU APP to be unreferenced + * + * @return 0 if successful + */ +int32_t +pva_release_vpu_app(struct nvpva_elf_context *d, uint16_t exe_id, bool locked); + +/** + * Unref VPU APP elf file from task side + * + * @param d Pointer to the Elf Context + * @param exe_id Unique ID of VPU APP to be unreferenced + * + * @return 0 if successful + */ +int32_t pva_task_release_ref_vpu_app(struct nvpva_elf_context *d, + uint16_t exe_id); + +/** + * Deinitialize and Deallocate memory for VPU parsing + * + * @param Pointer to the Elf Context + * + * @return Void + */ +void pva_vpu_deinit(struct nvpva_elf_context *d); + +/** + * Initialize memory for VPU Parsing + * + * @param Pointer to the Elf Context + * + * @return 0 if no errors encountered + */ +int32_t pva_vpu_init(struct pva *dev, struct nvpva_elf_context *d); + +void print_segments_info(struct pva_elf_image *elf_img); + +int32_t +nvpva_validate_vmem_offset(const uint32_t vmem_offset, + const uint32_t size, + const int hw_gen); +int32_t +pva_get_sym_tab_size(struct nvpva_elf_context *d, + uint16_t exe_id, + u64 *tab_size); +int32_t +pva_get_sym_tab(struct nvpva_elf_context *d, + uint16_t exe_id, + struct nvpva_sym_info *sym_tab); +#endif diff --git a/drivers/video/tegra/host/pva/pva_vpu_ocd.c b/drivers/video/tegra/host/pva/pva_vpu_ocd.c new file mode 100644 index 00000000..7eeb7e32 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_ocd.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include "pva.h" +#include "pva_vpu_ocd.h" + +#define PVA_DEBUG_APERTURE_INDEX 1 +#define VPU_OCD_MAX_NUM_DATA_ACCESS 7 + +static void block_writel(struct pva_vpu_dbg_block *block, u32 offset, u32 val) +{ + void __iomem *addr = block->vbase + offset; + + writel(val, addr); +} + +static u32 block_readl(struct pva_vpu_dbg_block *block, u32 offset) +{ + void __iomem *addr = block->vbase + offset; + + return readl(addr); +} + +static int init_vpu_dbg_block(struct pva *pva, struct pva_vpu_dbg_block *block, + u32 offset) +{ + struct nvhost_device_data *pdata = platform_get_drvdata(pva->pdev); + void __iomem *aperture = pdata->aperture[PVA_DEBUG_APERTURE_INDEX]; + + if (aperture == NULL) + return -EINVAL; + + block->vbase = aperture + offset; + return 0; +} + +int pva_vpu_ocd_init(struct pva *pva) +{ + u32 i; + int err; + const phys_addr_t vpu_dbg_offsets[NUM_VPU_BLOCKS] = { 0x00050000, + 0x00070000 }; + + for (i = 0; i < NUM_VPU_BLOCKS; i++) { + err = init_vpu_dbg_block(pva, &pva->vpu_dbg_blocks[i], + vpu_dbg_offsets[i]); + if (err != 0) + return err; + } + return 0; +} + +int pva_vpu_ocd_io(struct pva_vpu_dbg_block *block, u32 instr, const u32 *wdata, + u32 nw, u32 *rdata, u32 nr) +{ + u32 i; + + if ((nr > VPU_OCD_MAX_NUM_DATA_ACCESS) || + (nw > VPU_OCD_MAX_NUM_DATA_ACCESS)) { + pr_err("pva: too many vpu dbg reg read (%u) or write (%u)\n", + nr, nw); + return -EINVAL; + } + + /* write instruction first */ + block_writel(block, 0, instr); + + /* + * write data + * if there's 1 word, write to addr 0x4, + * if there's 2 words, write to addr 2 * 0x4, + * ... + */ + for (i = 0; i < nw; i++) + block_writel(block, nw * sizeof(u32), wdata[i]); + + /* + * read data + * if there's 1 word, read from addr 0x4, + * if there's 2 words, read from addr 2 * 0x4, + * ... + */ + for (i = 0; i < nr; i++) + rdata[i] = block_readl(block, nr * sizeof(u32)); + + return 0; +} diff --git a/drivers/video/tegra/host/pva/pva_vpu_ocd.h b/drivers/video/tegra/host/pva/pva_vpu_ocd.h new file mode 100644 index 00000000..99d18ab4 --- /dev/null +++ b/drivers/video/tegra/host/pva/pva_vpu_ocd.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PVA_VPU_OCD_H +#define PVA_VPU_OCD_H +#include +#include "pva.h" + +int pva_vpu_ocd_init(struct pva *pva); +int pva_vpu_ocd_io(struct pva_vpu_dbg_block *block, u32 instr, const u32 *wdata, + u32 nw, u32 *rdata, u32 nr); +#endif // PVA_VPU_OCD_H diff --git a/include/trace/events/nvhost_pva.h b/include/trace/events/nvhost_pva.h new file mode 100644 index 00000000..3a243dd5 --- /dev/null +++ b/include/trace/events/nvhost_pva.h @@ -0,0 +1,254 @@ +/* + * Nvhost event logging to ftrace. + * + * Copyright (c) 2017-2022, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvhost_pva + +#if !defined(_TRACE_NVHOST_PVA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVHOST_PVA_H + +#include + + +TRACE_EVENT(nvhost_pva_write, + + TP_PROTO( + u64 delta_time, + const char *name, + u8 major, + u8 minor, + u8 flags, + u8 sequence, + u32 arg1, + u32 arg2 + ), + + TP_ARGS( + delta_time, + name, + major, + minor, + flags, + sequence, + arg1, + arg2 + ), + + TP_STRUCT__entry( + __field(u64, delta_time) + __field(const char *, name) + __field(u8, major) + __field(u8, minor) + __field(u8, flags) + __field(u8, sequence) + __field(u32, arg1) + __field(u32, arg2) + ), + + TP_fast_assign( + __entry->delta_time = delta_time; + __entry->name = name; + __entry->major = major; + __entry->minor = minor; + __entry->flags = flags; + __entry->sequence = sequence; + __entry->arg1 = arg1; + __entry->arg2 = arg2; + ), + + TP_printk("time: %llu\t %s\t major: 0x%x\tminor: 0x%x\tflags: 0x%x\t" + "sequence: 0x%x\targ1: %u\targ2: %u", + __entry->delta_time, __entry->name, __entry->major, + __entry->minor, __entry->flags, __entry->sequence, + __entry->arg1, __entry->arg2) +); + +TRACE_EVENT(nvhost_pva_task_stats, + + TP_PROTO( + const char *name, + u64 queued_time, + u64 head_time, + u64 input_actions_complete, + u64 vpu_assigned_time, + u64 vpu_start_time, + u64 vpu_complete_time, + u64 complete_time, + u8 vpu_assigned, + u64 r5_overhead + ), + + TP_ARGS( + name, + queued_time, + head_time, + input_actions_complete, + vpu_assigned_time, + vpu_start_time, + vpu_complete_time, + complete_time, + vpu_assigned, + r5_overhead + ), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u64, queued_time) + __field(u64, head_time) + __field(u64, input_actions_complete) + __field(u64, vpu_assigned_time) + __field(u64, vpu_start_time) + __field(u64, vpu_complete_time) + __field(u64, complete_time) + __field(u8, vpu_assigned) + __field(u64, r5_overhead) + ), + + TP_fast_assign( + __entry->name = name; + __entry->queued_time = queued_time; + __entry->head_time = head_time; + __entry->input_actions_complete = input_actions_complete; + __entry->vpu_assigned_time = vpu_assigned_time; + __entry->vpu_start_time = vpu_start_time; + __entry->vpu_complete_time = vpu_complete_time; + __entry->complete_time = complete_time; + __entry->vpu_assigned = vpu_assigned; + __entry->r5_overhead = r5_overhead; + ), + + TP_printk("%s\tqueued_time: %llu\thead_time: %llu\t" + "input_actions_complete: %llu\tvpu_assigned_time: %llu\t" + "vpu_start_time: %llu\tvpu_complete_time: %llu\t" + "complete_time: %llu\tvpu_assigned: %d\t" + "r5_overhead: %llu us", + __entry->name, __entry->queued_time, __entry->head_time, + __entry->input_actions_complete, __entry->vpu_assigned_time, + __entry->vpu_start_time, __entry->vpu_complete_time, + __entry->complete_time, __entry->vpu_assigned, + __entry->r5_overhead) +); + +TRACE_EVENT(nvhost_pva_task_vpu_perf, + + TP_PROTO( + const char *name, + u32 index, + u32 count, + u32 sum, + u64 sum_squared, + u32 min, + u32 max + ), + + TP_ARGS( + name, + index, + count, + sum, + sum_squared, + min, + max + ), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, index) + __field(u32, count) + __field(u32, sum) + __field(u64, sum_squared) + __field(u32, min) + __field(u32, max) + ), + + TP_fast_assign( + __entry->name = name; + __entry->index = index; + __entry->count = count; + __entry->sum = sum; + __entry->sum_squared = sum_squared; + __entry->min = min; + __entry->max = max; + ), + + TP_printk("%s\tindex: %u\tcount: %u\taverage: %u\t" + "variance: %llu\tminimum: %u\t" + "maximum: %u", + __entry->name, __entry->index, __entry->count, + __entry->sum / __entry->count, + ((u64)__entry->count * __entry->sum_squared - + (u64)__entry->sum * (u64)__entry->sum) + / __entry->count / __entry->count, + __entry->min, __entry->max) +); + +TRACE_EVENT(nvhost_pva_task_timestamp, + + TP_PROTO( + const char *name, + u32 class, + u32 syncpoint_id, + u32 syncpoint_thresh, + u64 start_time, + u64 end_time + ), + + TP_ARGS( + name, + class, + syncpoint_id, + syncpoint_thresh, + start_time, + end_time + ), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, class) + __field(u32, syncpoint_id) + __field(u32, syncpoint_thresh) + __field(u64, start_time) + __field(u64, end_time) + ), + + TP_fast_assign( + __entry->name = name; + __entry->class = class; + __entry->syncpoint_id = syncpoint_id; + __entry->syncpoint_thresh = syncpoint_thresh; + __entry->start_time = start_time; + __entry->end_time = end_time; + ), + + TP_printk("name=%s, class=0x%02x, syncpoint_id=%u, syncpoint_thresh=%u, start_time=%llu, end_time=%llu", + __entry->name, __entry->class, __entry->syncpoint_id, __entry->syncpoint_thresh, + __entry->start_time, __entry->end_time) +); + +#endif /* _TRACE_NVHOST_PVA_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../include/trace/events + +#define TRACE_INCLUDE_FILE nvhost_pva +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/nvpva_ioctl.h b/include/uapi/linux/nvpva_ioctl.h new file mode 100644 index 00000000..6e53abee --- /dev/null +++ b/include/uapi/linux/nvpva_ioctl.h @@ -0,0 +1,611 @@ +/* + * Tegra PVA Driver ioctls + * + * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; If not, see . + */ + +#ifndef __NVPVA_IOCTL_H__ +#define __NVPVA_IOCTL_H__ + +#include +#include + +#define NVPVA_DEVICE_NODE "/dev/nvhost-ctrl-pva" +/** + * Maximum length of the name of a symbol in a VPU ELF + */ +#define NVPVA_SYM_NAME_MAX_LEN 64U + +/** + * Invalid symbol ID + */ +#define NVPVA_INVALID_SYMBOL_ID 0xFFFF + +/* + * PVA specific error code + */ + +#define NVPVA_ENOSLOT 102 + +struct nvpva_ioctl_part { + uint64_t addr; + uint64_t size; +}; + +/* + * VPU REGISTER UNREGISTER command details + */ + +struct nvpva_vpu_exe_register_in_arg { + struct nvpva_ioctl_part exe_data; +}; + +/* IOCTL magic number - seen available in ioctl-number.txt */ +struct nvpva_vpu_exe_register_out_arg { + /* Exe id assigned by KMD for the executable */ + uint16_t exe_id; + /* Number of symbols */ + uint32_t num_of_symbols; + /* Total size of symbols in executable */ + uint32_t symbol_size_total; +}; + +union nvpva_vpu_exe_register_args { + struct nvpva_vpu_exe_register_in_arg in; + struct nvpva_vpu_exe_register_out_arg out; +}; + +struct nvpva_vpu_exe_unregister_in_arg { + /* Exe id assigned by KMD for the executable */ + uint16_t exe_id; +}; + +union nvpva_vpu_exe_unregister_args { + struct nvpva_vpu_exe_unregister_in_arg in; +}; + +enum nvpva_vpu_elf_symbol_type_e { + /** Symbol type Invalid */ + NVPVA_SYMBOL_TYPE_INVALID = 0U, + /** Symbol type Data */ + NVPVA_SYMBOL_TYPE_DATA = 1U, + /** Symbol type VPU Config Table */ + NVPVA_SYMBOL_TYPE_VPUC_TABLE = 2U, + /** Symbol type Pointer */ + NVPVA_SYMBOL_TYPE_POINTER = 3U, + /** Symbol type System */ + NVPVA_SYMBOL_TYPE_SYSTEM = 4U, + /** Symbol type Pointer which uses extended address apace */ + NVPVA_SYMBOL_TYPE_POINTER_EX = 5U, + /** Symbol type upper limit */ + NVPVA_SYMBOL_TYPE_MAX = 6U +}; +/* + * VPU SYMBOL command details + */ + +struct nvpva_symbol { + uint32_t size; + uint16_t id; + /* 1 = true; 0 = false */ + uint8_t isPointer; +}; + +struct nvpva_sym_info { + /** Null-terminated string indicating the name of the symbol */ + char sym_name[NVPVA_SYM_NAME_MAX_LEN]; + /** Size (in bytes) of the symbol */ + uint32_t sym_size; + /** Registered ID of the symbol*/ + uint16_t sym_id; + /** Type of the symbol */ + uint8_t sym_type; +}; + +struct nvpva_get_symbol_in_arg { + uint16_t exe_id; + struct nvpva_ioctl_part name; /*size including null*/ +}; + +struct nvpva_get_symbol_out_arg { + struct nvpva_symbol symbol; +}; + +union nvpva_get_symbol_args { + struct nvpva_get_symbol_in_arg in; + struct nvpva_get_symbol_out_arg out; +}; + +struct nvpva_get_sym_tab_in_arg { + uint16_t exe_id; + struct nvpva_ioctl_part tab; +}; + +union nvpva_get_sym_tab_args { + struct nvpva_get_sym_tab_in_arg in; +}; + +/* + * PIN UNPIN command details + */ + +enum nvpva_pin_segment { + NVPVA_SEGMENT_PRIV = 1U, + NVPVA_SEGMENT_USER = 2U, + NVPVA_SEGMENT_CVSRAM = 3U, + NVPVA_SEGMENT_MAX +}; + +enum nvpva_pin_buf { + NVPVA_BUFFER_GEN = 0U, + NVPVA_BUFFER_SEM = 1U, +}; + +enum nvpva_pin_access { + NVPVA_ACCESS_RD = 1U, + NVPVA_ACCESS_WR = 2U, + NVPVA_ACCESS_RW = 3U, +}; + +struct nvpva_pin_handle { + uint64_t offset; + uint64_t size; + int32_t handle; + uint32_t access; + uint32_t segment; + uint32_t type; +}; + +struct nvpva_pin_in_arg { + struct nvpva_pin_handle pin; +}; + +struct nvpva_pin_out_arg { + uint32_t pin_id; /* Unique ID assigned by KMD for the Pin */ + uint32_t error_code; +}; + +union nvpva_pin_args { + struct nvpva_pin_in_arg in; + struct nvpva_pin_out_arg out; +}; + +struct nvpva_unpin_in_arg { + uint32_t pin_id; +}; + +union nvpva_unpin_args { + struct nvpva_unpin_in_arg in; +}; + +/* + * TASK SUBMIT command details + */ + +enum nvpva_flags { + NVPVA_AFFINITY_VPU0 = 1U, + NVPVA_AFFINITY_VPU1 = 1U << 1U, + NVPVA_AFFINITY_VPU_ANY = NVPVA_AFFINITY_VPU0 | NVPVA_AFFINITY_VPU1, + NVPVA_PRE_BARRIER_TASK_TRUE = 1U << 2U, + NVPVA_ERR_MASK_ILLEGAL_INSTR = 1U << 3U, + NVPVA_ERR_MASK_DIVIDE_BY_0 = 1U << 4U, + NVPVA_ERR_MASK_FP_NAN = 1U << 5U, + NVPVA_GR_CHECK_EXE_FLAG = 1U << 6U +}; + +enum nvpva_fence_action_type { + NVPVA_FENCE_PRE = 1U, + NVPVA_FENCE_SOT_R5 = 2U, + NVPVA_FENCE_SOT_VPU = 3U, + NVPVA_FENCE_EOT_VPU = 4U, + NVPVA_FENCE_EOT_R5 = 5U, + NVPVA_FENCE_POST = 6U, + NVPVA_MAX_FENCE_TYPES = 7U, +}; + +enum nvpva_fence_obj_type { + NVPVA_FENCE_OBJ_SYNCPT = 0U, + NVPVA_FENCE_OBJ_SEM = 1U, + /* Below types are not being used in QNX KMD for now */ + NVPVA_FENCE_OBJ_SEMAPHORE_TS = 2U, + NVPVA_FENCE_OBJ_SYNC_FD = 3U, +}; + +enum nvpva_symbol_config { + NVPVA_SYMBOL_PARAM = 0U, + NVPVA_SYMBOL_POINTER = 1U, + NVPVA_SYMBOL_POINTER_EX = 2U, +}; + +enum nvpva_hwseq_trigger_mode { + NVPVA_HWSEQTM_VPUTRIG = 0U, + NVPVA_HWSEQTM_DMATRIG = 1U, +}; + +enum nvpva_system_test_id { + NVPVA_STRESS_POWER = 0U, + NVPVA_STRESS_POWER_DIDT = 1U, + NVPVA_STRESS_TIMING = 2U, + NVPVA_MAX_TEST_ID = 2U, +}; + +#define NVPVA_MEM_REGISTERED_SIZE (0U) +struct nvpva_mem { + uint32_t pin_id; + uint32_t offset; + /* size=NVPVA_MEM_REGISTERED_SIZE + *considered as entire pinned area + */ + uint32_t size; +}; + +struct nvpva_fence_obj_syncpt { + uint32_t id; + uint32_t value; +}; + +struct nvpva_fence_obj_sem { + struct nvpva_mem mem; + uint32_t value; +}; + +struct nvpva_fence_obj_syncfd { + uint32_t fd; +}; + +union nvpva_fence_obj { + struct nvpva_fence_obj_syncpt syncpt; + struct nvpva_fence_obj_sem sem; + struct nvpva_fence_obj_syncfd syncfd; +}; + +struct nvpva_submit_fence { + uint32_t type; + uint32_t reserved; + union nvpva_fence_obj obj; +}; + +struct nvpva_fence_action { + uint32_t type; + uint32_t reserved; + /* For syncpt, ID is the per-queue ID allocated by KMD */ + struct nvpva_submit_fence fence; + /* Buffer to capture event timestamp */ + struct nvpva_mem timestamp_buf; +}; + +struct nvpva_pointer_symbol { + /* Base address of pinned area, where + * lower 32bits filled with pin_id by UMD and + * at KMD will replace it with actual base address. + */ + uint64_t base; + /* Offset in pinned area */ + uint32_t offset; + /* Size of pinned area, filled by KMD */ + uint32_t size; +}; + +struct nvpva_pointer_symbol_ex { + /* Base address of pinned area, where + * lower 32bits filled with pin_id by UMD and + * at KMD will replace it with actual base address. + */ + uint64_t base; + /* Offset in pinned area */ + uint64_t offset; + /* Size of pinned area, filled by KMD */ + uint64_t size; +}; + +/* Used to pass both param and pointer type symbols. + * Based on nvpva_symbol_config selection the data in payload + * pointed by offset will differ. + * For NVPVA_SYMBOL_PARAM, payload data is raw data. + * For NVPVA_SYMBOL_POINTER, data is of type nvpva_pointer_symbol. + */ +struct nvpva_symbol_param { + uint32_t config; /* Type of symbol configuration */ + uint32_t offset; /* Offset of symbol data in payload */ + struct nvpva_symbol symbol; /* Symbol to be configured */ +}; + +/* NOTE: Redefining the user side structure here + * This is done to allow UMD to pass the descriptor as it is and + * to handle the (user struct -> hw struct) coversion at KMD side. + * KMD needs redefinition to avoid circular dependency. + * + * An update in user structure would need corresponding change here + */ +struct nvpva_dma_descriptor { + uint32_t srcPtr; + uint32_t dstPtr; + uint32_t dst2Ptr; + uint64_t src_offset; + uint64_t dst_offset; + uint64_t dst2Offset; + uint64_t surfBLOffset; + uint16_t tx; + uint16_t ty; + uint16_t srcLinePitch; + uint16_t dstLinePitch; + int32_t srcAdv1; + int32_t dstAdv1; + int32_t srcAdv2; + int32_t dstAdv2; + int32_t srcAdv3; + int32_t dstAdv3; + uint8_t srcRpt1; + uint8_t dstRpt1; + uint8_t srcRpt2; + uint8_t dstRpt2; + uint8_t srcRpt3; + uint8_t dstRpt3; + uint8_t linkDescId; + uint8_t px; + uint32_t py; + uint8_t srcCbEnable; + uint8_t dstCbEnable; + uint32_t srcCbStart; + uint32_t dstCbStart; + uint32_t srcCbSize; + uint32_t dstCbSize; + uint8_t trigEventMode; + uint8_t trigVpuEvents; + uint8_t descReloadEnable; + uint8_t srcTransferMode; + uint8_t dstTransferMode; + uint8_t srcFormat; + uint8_t dstFormat; + uint8_t bytePerPixel; + uint8_t pxDirection; + uint8_t pyDirection; + uint8_t boundaryPixelExtension; + uint8_t transTrueCompletion; + uint8_t prefetchEnable; +}; + +/* NOTE: Redefining the user side structure here + * This is done to allow UMD to pass the channel info as it is and + * to handle the (user struct -> hw struct) coversion at KMD side. + * KMD needs redefinition to avoid circular dependency. + * + * An update in user structure would need corresponding change here + */ +struct nvpva_dma_channel { + uint8_t descIndex; + uint8_t blockHeight; + uint16_t adbSize; + uint8_t vdbSize; + uint16_t adbOffset; + uint8_t vdbOffset; + uint32_t outputEnableMask; + uint32_t padValue; + uint8_t reqPerGrant; + uint8_t prefetchEnable; + uint8_t chRepFactor; + uint8_t hwseqStart; + uint8_t hwseqEnd; + uint8_t hwseqEnable; + uint8_t hwseqTraversalOrder; + uint8_t hwseqTxSelect; + uint8_t hwseqTriggerDone; +}; + +/** + * + * @brief DMA MISR configuration information. This information is used by R5 + * to program MISR registers if a task requests MISR computation on its + * output DMA channels. + * + */ +struct nvpva_dma_misr { + /* Enable flag for MISR. Set to 0 if MISR check + * is not needed for the task, non-zero otherwise + */ + uint32_t enable; + /* Reference value for CRC computed on write + * addresses, i.e., MISR 1 + */ + uint32_t ref_addr; + /* Seed value for address CRC */ + uint32_t seed_crc0; + /* Reference value for CRC computed on first + * 256-bits of AXI write data + */ + uint32_t ref_data_1; + /* Seed value for write data CRC*/ + uint32_t seed_crc1; + /* Reference value for CRC computed on + * second 256-bits of AXI write data + */ + uint32_t ref_data_2; + /* Bitmap indicating channels participating + * in MISR checks + */ + uint32_t channel_mask; + /* Bitmap indicating descriptors participating + * in MISR checks. These are the descriptors on + * channels identified by the channel_mask field + * that perform write through AXI interface to + * MC or L2SRAM + */ + uint64_t descriptor_mask; + /* + * MISR timeout value configured in DMA common register + * @ref PVA_DMA_COMMON_MISR_ENABLE. Timeout is caclutated as + * number of AXI clock cycles. + */ + uint32_t misr_timeout; +}; + +/** + * Used to pass config for Hardware Sequencer (HWSeq). + * For HWSeq operations, all DMA channels will be configured + * based on the selection of hardware sequencer trigger mode. + * For NVPVA_HWSEQTM_VPUTRIG, VPU trigger mode will be used. + * For NVPVA_HWSEQTM_DMATRIG, DMA trigger mode will be used. + */ +struct nvpva_hwseq_config { + uint32_t hwseqTrigMode; + uint32_t reserved; + struct nvpva_mem hwseqBuf; +}; + +struct nvpva_ioctl_task { + uint16_t exe_id; + uint32_t flags; + uint32_t l2_alloc_size; /* Not applicable for Xavier */ + struct nvpva_ioctl_part prefences; + struct nvpva_ioctl_part user_fence_actions; + struct nvpva_ioctl_part input_task_status; + struct nvpva_ioctl_part output_task_status; + struct nvpva_ioctl_part dma_descriptors; + struct nvpva_ioctl_part dma_channels; + struct nvpva_ioctl_part dma_misr_config; + struct nvpva_ioctl_part hwseq_config; + struct nvpva_ioctl_part symbols; + struct nvpva_ioctl_part symbol_payload; +}; + +struct nvpva_ioctl_submit_in_arg { + uint32_t version; + uint64_t submission_timeout_us; + uint64_t execution_timeout_us; + struct nvpva_ioctl_part tasks; +}; + +struct nvpva_submit_in_arg_s { + uint32_t version; + uint16_t num_tasks; + uint64_t submission_timeout_us; + uint64_t execution_timeout_us; +}; + +union nvpva_ioctl_submit_args { + struct nvpva_ioctl_submit_in_arg in; +}; + +struct nvpva_set_vpu_print_buffer_size_in_arg { + uint32_t size; +}; + +union nvpva_set_vpu_print_buffer_size_args { + struct nvpva_set_vpu_print_buffer_size_in_arg in; +}; + +/* There are 64 DMA descriptors in T19x and T23x. But R5 FW reserves + * 4 DMA descriptors for internal use. + */ +#define NVPVA_TASK_MAX_DMA_DESCRIPTORS (60U) +/*TODO: Remove NVPVA_TASK_MAX_DMA_CHANNELS */ +/*There are 14 DMA channels in T19x and 16 DMA channels in T23X. + * R5 FW reserves one DMA channel for internal use. + */ +#define NVPVA_TASK_MAX_DMA_CHANNELS 16U +#define NVPVA_TASK_MAX_DMA_CHANNELS_T19X (13U) +#define NVPVA_TASK_MAX_DMA_CHANNELS_T23X (15U) +#define NVPVA_NOOP_EXE_ID 65535 +#define NVPVA_SUBMIT_MAX_TASKS 256U + +#define NVPVA_IOCTL_MAGIC 'Q' + +#define NVPVA_IOCTL_REGISTER_VPU_EXEC \ + _IOWR(NVPVA_IOCTL_MAGIC, 1, union nvpva_vpu_exe_register_args) + +#define NVPVA_IOCTL_UNREGISTER_VPU_EXEC \ + _IOW(NVPVA_IOCTL_MAGIC, 2, union nvpva_vpu_exe_unregister_args) + +#define NVPVA_IOCTL_GET_SYMBOL_ID \ + _IOWR(NVPVA_IOCTL_MAGIC, 3, union nvpva_get_symbol_args) + +#define NVPVA_IOCTL_PIN \ + _IOWR(NVPVA_IOCTL_MAGIC, 4, union nvpva_pin_args) + +#define NVPVA_IOCTL_UNPIN \ + _IOW(NVPVA_IOCTL_MAGIC, 5, union nvpva_unpin_args) + +#define NVPVA_IOCTL_SUBMIT \ + _IOW(NVPVA_IOCTL_MAGIC, 6, union nvpva_ioctl_submit_args) + +#define NVPVA_IOCTL_NOP \ + _IOW(NVPVA_IOCTL_MAGIC, 7) + +#define NVPVA_IOCTL_ACQUIRE_QUEUE \ + _IOW(NVPVA_IOCTL_MAGIC, 8) + +#define NVPVA_IOCTL_RELEASE_QUEUE \ + _IOW(NVPVA_IOCTL_MAGIC, 9) + +#define NVPVA_IOCTL_GET_SYM_TAB \ + _IOWR(NVPVA_IOCTL_MAGIC, 10, union nvpva_get_sym_tab_args) + +#define NVPVA_IOCTL_SET_VPU_PRINT_BUFFER_SIZE \ + _IOW(NVPVA_IOCTL_MAGIC, 11, union nvpva_set_vpu_print_buffer_size_args) + +#define NVPVA_IOCTL_NUMBER_MAX 11 + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define NVPVA_IOCTL_MAX_SIZE \ + MAX(sizeof(union nvpva_vpu_exe_register_args), \ + MAX(sizeof(union nvpva_vpu_exe_unregister_args), \ + MAX(sizeof(union nvpva_get_symbol_args), \ + MAX(sizeof(union nvpva_pin_args), \ + MAX(sizeof(union nvpva_unpin_args), \ + MAX(sizeof(union nvpva_ioctl_submit_args), \ + MAX(sizeof(union nvpva_get_sym_tab_args), \ + MAX(sizeof(union nvpva_set_vpu_print_buffer_size_args), \ + 0)))))))) + +/* NvPva Task param limits */ +#define NVPVA_TASK_MAX_PREFENCES 8U +#define NVPVA_TASK_MAX_FENCEACTIONS 4U +#define NVPVA_TASK_MAX_INPUT_STATUS 8U +#define NVPVA_TASK_MAX_OUTPUT_STATUS 8U +#define NVPVA_TASK_MAX_SYMBOLS 128U +/* VMEM configurable size */ +#define NVPVA_TASK_MAX_PAYLOAD_SIZE 8192U +#define NVPVA_TASK_MAX_SIZE \ + (sizeof(struct nvpva_submit_task_header) + \ + NVPVA_TASK_MAX_PREFENCES * sizeof(struct nvpva_submit_fence) + \ + NVPVA_TASK_MAX_FENCEACTIONS * \ + NVPVA_MAX_FENCE_TYPES * sizeof(struct nvpva_fence_action) + \ + NVPVA_TASK_MAX_INPUT_STATUS * sizeof(struct nvpva_mem) + \ + NVPVA_TASK_MAX_OUTPUT_STATUS * sizeof(struct nvpva_mem) + \ + NVPVA_TASK_MAX_DMA_DESCRIPTORS * \ + sizeof(struct nvpva_dma_descriptor) + \ + NVPVA_TASK_MAX_DMA_CHANNELS * sizeof(struct nvpva_dma_channel) + \ + sizeof(struct nvpva_hwseq_config) + \ + NVPVA_TASK_MAX_SYMBOLS * sizeof(struct nvpva_symbol_param) + \ + NVPVA_TASK_MAX_PAYLOAD_SIZE) + +/* NvPva submit param limits */ +#define NVPVA_SUBMIT_MAX_SIZE \ + (NVPVA_SUBMIT_MAX_TASKS * NVPVA_TASK_MAX_SIZE + \ + sizeof(struct nvpva_submit_in_arg_s)) + +struct pva_ocd_ioctl_vpu_io_param { + uint32_t instr; + uint32_t n_write; + uint32_t n_read; + uint32_t data[7]; +}; + +#define PVA_OCD_MAGIC 'V' + +#define PVA_OCD_IOCTL_VPU_IO \ + _IOWR(PVA_OCD_MAGIC, 1, struct pva_ocd_ioctl_vpu_io_param) + +#endif /* __NVPVA_IOCTL_H__ */ diff --git a/kernel-src-files-copy-list.txt b/kernel-src-files-copy-list.txt index 01be0fa5..3aefe972 100644 --- a/kernel-src-files-copy-list.txt +++ b/kernel-src-files-copy-list.txt @@ -2,7 +2,3 @@ nvidia/drivers/platform/tegra/cvnas drivers/platform/tegra nvidia/include/linux/cvnas.h include/linux/cvnas.h -# Files/directories for NVPVA -nvidia/drivers/video/tegra/host/pva drivers/video/tegra/host -nvidia/include/trace/events/nvhost_pva.h include/trace/events/nvhost_pva.h -nvidia/include/uapi/linux/nvpva_ioctl.h include/uapi/linux/nvpva_ioctl.h