From 1e36fa1086b25e045aa28e890f263f6536fcab16 Mon Sep 17 00:00:00 2001 From: nanwa Date: Fri, 2 May 2025 19:58:13 +0000 Subject: [PATCH] pva: mirror from gitlab cv/pva-sys-sw Gitlab commit 52e76e5680447 ("Map only PVA's syncpoints as R...") Changes since last deployment: - Map only PVA's syncpoints as RW - fw: treat unregister resource ID 0 as noop - Fix tegrastats issue that mixes two reads together - Delpoy VPU ELF files to tegra tree - kmd: bugfix: check if ctx inited before async ops Change-Id: Ife641b1a27580e6f74f2b6e806eebc6750f3f846 Signed-off-by: nanwa Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3354736 Reviewed-by: Mohnish Jain GVS: buildbot_gerritrpt --- .../host/pva/src/kmd/common/pva_kmd_debugfs.c | 6 + .../pva/src/kmd/common/pva_kmd_op_handler.c | 7 + .../pva/src/kmd/linux/pva_kmd_linux_device.c | 157 +++++++++++++----- .../pva/src/kmd/linux/pva_kmd_linux_device.h | 6 +- .../pva/src/kmd/linux/pva_kmd_linux_driver.c | 6 +- .../pva/src/kmd/linux/pva_kmd_linux_smmu.c | 12 +- 6 files changed, 149 insertions(+), 45 deletions(-) diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c index f9066016..26e7798e 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c @@ -100,6 +100,12 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, char kernel_buffer[256]; int64_t formatted_len; + // We don't support partial reads for vpu stats because we cannot mix two + // reads at different times together. + if (offset != 0) { + return 0; + } + formatted_len = snprintf( kernel_buffer, sizeof(kernel_buffer), "%llu\n%llu\n%llu\n%llu\n", diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c index 30f20479..cdae9e47 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c @@ -329,6 +329,13 @@ static enum pva_error pva_kmd_async_ops_handler( uint32_t wait_time = 0; enum pva_error submit_error = PVA_SUCCESS; + if (ctx->inited == false) { + pva_kmd_log_err( + "pva_kmd_async_ops_handler: Context is not initialized"); + err = PVA_INVAL; + goto out; + } + //first check if we have space in queue while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) { pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c index b9e7c97d..6d8ca6bd 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -53,68 +54,142 @@ void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id, } } -void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) +int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva) { - phys_addr_t base; - size_t size; + phys_addr_t syncpt_phys_base; + size_t all_syncpt_size; int err = 0; uint32_t stride, num_syncpts; uint32_t syncpt_page_size; dma_addr_t sp_start; - struct device *dev; + int count; struct pva_kmd_linux_device_data *device_data = pva_kmd_linux_device_get_data(pva); struct nvpva_device_data *props = device_data->pva_device_properties; + struct device *dev = + &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; + + if (iommu_get_domain_for_dev(dev) == NULL) { + dev_err(dev, "Cannot use syncpt without IOMMU"); + err = -EFAULT; + goto err_out; + } + props->host1x = nvpva_device_to_host1x(props->pdev); err = nvpva_syncpt_unit_interface_init(props->pdev); if (err < 0) { - FAULT("Failed syncpt unit interface init\n"); + dev_err(dev, "Failed syncpt unit interface init"); + goto err_out; } - err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride, - &num_syncpts); + err = host1x_syncpt_get_shim_info(props->host1x, &syncpt_phys_base, + &stride, &num_syncpts); if (err < 0) { - FAULT("Failed to get syncpt shim_info\n"); + dev_err(dev, "Failed to get syncpt shim_info"); + goto err_out; } - size = stride * num_syncpts; - /** Get page size of a syncpoint */ + + all_syncpt_size = stride * num_syncpts; syncpt_page_size = nvpva_syncpt_unit_interface_get_byte_offset_ext(1); - dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; - if (iommu_get_domain_for_dev(dev)) { - sp_start = dma_map_resource(dev, base, size, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, sp_start)) { - FAULT("Failed to pin syncpoints\n"); - } - } else { - FAULT("Failed to pin syncpoints\n"); + sp_start = dma_map_resource(dev, syncpt_phys_base, all_syncpt_size, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, sp_start)) { + dev_err(dev, "Failed to map RO syncpoints"); + goto err_out; } + pva->ro_syncpt_base_iova = sp_start; pva->syncpt_page_size = syncpt_page_size; pva->num_ro_syncpts = num_syncpts; - // The same region is also used for RW syncpts... - pva->rw_syncpt_base_iova = sp_start; - pva->rw_syncpt_region_size = size; + dev_info(dev, "PVA RO syncpt iova: %llx, size: %lx\n", + pva->ro_syncpt_base_iova, all_syncpt_size); + + // Create a scatterlist to store all physical addresses of syncpts. + // They may be non-contiguous so we prepare one scatterlist entry per syncpt. + // Later, we map the scatterlist into a contiguous IOVA region. + sg_init_table(device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS); for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { uint32_t syncpt_id; - uint64_t syncpt_iova; + phys_addr_t syncpt_phys_addr; syncpt_id = nvpva_get_syncpt_client_managed(props->pdev, "pva_syncpt"); if (syncpt_id == 0) { - FAULT("Failed to get syncpt\n"); + dev_err(dev, "Failed to allocate RW syncpt"); + err = -EFAULT; + goto free_syncpts; } - syncpt_iova = safe_addu64( - sp_start, + + pva->rw_syncpts[i].syncpt_id = syncpt_id; + + syncpt_phys_addr = safe_addu64( + syncpt_phys_base, nvpva_syncpt_unit_interface_get_byte_offset_ext( syncpt_id)); - - pva->rw_syncpts[i].syncpt_iova = syncpt_iova; - pva->rw_syncpts[i].syncpt_id = syncpt_id; + //Store the syncpt physical address in the scatterlist. Since the + //scatterlist API only takes struct page as input, so we first convert + //the physical address to a struct page address. + sg_set_page(&device_data->syncpt_sg[i], + phys_to_page(syncpt_phys_addr), syncpt_page_size, + 0); } + + count = dma_map_sg_attrs(dev, device_data->syncpt_sg, + PVA_NUM_RW_SYNCPTS, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + ASSERT(count > 0); + { + //Validate that syncpt IOVAs are contiguous + //This is an assertion and should never fail + uint64_t prev_iova = 0; + uint64_t prev_len = 0; + for (uint32_t i = 0; i < count; i++) { + if (prev_iova != 0) { + if (safe_addu64(prev_iova, prev_len) != + sg_dma_address( + &device_data->syncpt_sg[i])) { + dev_err(dev, + "RW syncpt IOVAs are not contiguous. This should never happen!"); + err = -EFAULT; + goto free_syncpts; + } + prev_iova = sg_dma_address( + &device_data->syncpt_sg[i]); + prev_len = + sg_dma_len(&device_data->syncpt_sg[i]); + } + } + } + + pva->rw_syncpt_base_iova = sg_dma_address(&device_data->syncpt_sg[0]); + pva->rw_syncpt_region_size = + safe_mulu32(syncpt_page_size, PVA_NUM_RW_SYNCPTS); + + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { + pva->rw_syncpts[i].syncpt_iova = + safe_addu64(pva->rw_syncpt_base_iova, + safe_mulu32(i, syncpt_page_size)); + } + + dev_info(dev, "PVA RW syncpt iova: %llx, size: %x\n", + pva->rw_syncpt_base_iova, pva->rw_syncpt_region_size); + + return 0; + +free_syncpts: + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { + if (pva->rw_syncpts[i].syncpt_id != 0) { + nvpva_syncpt_put_ref_ext(props->pdev, + pva->rw_syncpts[i].syncpt_id); + pva->rw_syncpts[i].syncpt_id = 0; + } + } + +err_out: + return err; } void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva) @@ -127,25 +202,31 @@ void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva) phys_addr_t base; size_t size; uint32_t stride, num_syncpts; - struct device *dev; struct pva_kmd_linux_device_data *device_data = pva_kmd_linux_device_get_data(pva); struct nvpva_device_data *props = device_data->pva_device_properties; + struct device *dev = + &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; + + if (iommu_get_domain_for_dev(dev) == NULL) { + dev_err(dev, "Cannot use syncpt without IOMMU"); + return; + } err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride, &num_syncpts); if (err < 0) { - FAULT("Failed to get syncpt shim_info\n"); + dev_err(dev, "Failed to get syncpt shim_info when deiniting"); + return; } size = stride * num_syncpts; - dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev; - if (iommu_get_domain_for_dev(dev)) { - dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size, - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); - } else { - FAULT("Failed to unmap syncpts\n"); - } + dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + + dma_unmap_sg_attrs(dev, device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) { nvpva_syncpt_put_ref_ext(props->pdev, pva->rw_syncpts[i].syncpt_id); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h index c4a2c50b..8e77c503 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h @@ -4,9 +4,11 @@ #ifndef PVA_KMD_LINUX_DEVICE_H #define PVA_KMD_LINUX_DEVICE_H +#include "pva_kmd_constants.h" #include #include #include +#include #include "pva_api.h" #include "pva_kmd_device.h" @@ -26,6 +28,8 @@ struct pva_kmd_linux_device_data { /* Global states required by a PVA device instance go here */ struct platform_device *smmu_contexts[PVA_MAX_NUM_SMMU_CONTEXTS]; struct pva_kmd_isr_data isr[PVA_KMD_INTR_LINE_COUNT]; + + struct scatterlist syncpt_sg[PVA_NUM_RW_SYNCPTS]; }; struct pva_kmd_linux_device_data * @@ -34,7 +38,7 @@ pva_kmd_linux_device_get_data(struct pva_kmd_device *device); void pva_kmd_linux_device_set_data(struct pva_kmd_device *device, struct pva_kmd_linux_device_data *data); -void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva); +int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva); void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva); struct nvpva_device_data * diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c index c819d02e..ed1dfce4 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c @@ -402,7 +402,11 @@ static int pva_probe(struct platform_device *pdev) goto err_cdev_init; } - pva_kmd_linux_host1x_init(pva_device); + err = pva_kmd_linux_host1x_init(pva_device); + if (err < 0) { + dev_err(dev, "pva_kmd_linux_host1x_init failed\n"); + goto err_cdev_init; + } err = pva_kmd_debugfs_create_nodes(pva_device); if (err != PVA_SUCCESS) { diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c index 5b944141..0474f789 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c @@ -120,6 +120,7 @@ bool pva_kmd_linux_smmu_contexts_initialized(enum pva_chip_id chip_id) void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device) { uint32_t sid_idx; + struct device *dev; struct pva_kmd_linux_device_data *device_data = pva_kmd_linux_device_get_data(pva_device); @@ -134,14 +135,14 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device) uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U); struct pva_kmd_linux_smmu_ctx *smmu_ctx = &g_smmu_ctxs[sid_idx]; + dev = &smmu_ctx->pdev->dev; pva_device->stream_ids[smmu_ctx_idx] = smmu_ctx->sid; device_data->smmu_contexts[smmu_ctx_idx] = smmu_ctx->pdev; - dma_set_mask_and_coherent(&smmu_ctx->pdev->dev, - DMA_BIT_MASK(39)); + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); //set max segment size to UINT_MAX to avoid creating scatterlist >= 4GB //during IOVA mapping, which will overflow the scatterlist length field, //causing IOVA leak - dma_set_max_seg_size(&smmu_ctx->pdev->dev, UINT_MAX); + dma_set_max_seg_size(dev, UINT_MAX); } /* Configure SMMU contexts for privileged operations */ @@ -153,9 +154,10 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device) // Question: Is it necessary that priv SID is the last one? pva_device->stream_ids[0] = g_smmu_ctxs[sid_idx].sid; device_data->smmu_contexts[0] = g_smmu_ctxs[sid_idx].pdev; + dev = &device_data->smmu_contexts[0]->dev; dma_set_mask_and_coherent( - &device_data->smmu_contexts[0]->dev, - DMA_BIT_MASK(31)); //only 2GB R5 space is accessible + dev, DMA_BIT_MASK(31)); //only 2GB R5 space is accessible + dma_set_max_seg_size(dev, UINT_MAX); } struct platform_driver pva_kmd_linux_smmu_context_driver = {