pva: mirror from gitlab cv/pva-sys-sw

Gitlab commit 52e76e5680447 ("Map only PVA's syncpoints as R...") Changes since last deployment: - Map only PVA's syncpoints as RW - fw: treat unregister resource ID 0 as noop - Fix tegrastats issue that mixes two reads together - Delpoy VPU ELF files to tegra tree - kmd: bugfix: check if ctx inited before async ops Change-Id: Ife641b1a27580e6f74f2b6e806eebc6750f3f846 Signed-off-by: nanwa <nanwa@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3354736 Reviewed-by: Mohnish Jain <mohnishj@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:11:26 +03:00 · 2025-05-02 19:58:13 +00:00
parent c85efbe2a7
commit 1e36fa1086
6 changed files with 149 additions and 45 deletions
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c
@@ -100,6 +100,12 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
 	char kernel_buffer[256];
 	int64_t formatted_len;

+	// We don't support partial reads for vpu stats because we cannot mix two
+	// reads at different times together.
+	if (offset != 0) {
+		return 0;
+	}
+
 	formatted_len = snprintf(
 		kernel_buffer, sizeof(kernel_buffer),
 		"%llu\n%llu\n%llu\n%llu\n",
--- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
+++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c
@@ -329,6 +329,13 @@ static enum pva_error pva_kmd_async_ops_handler(
 	uint32_t wait_time = 0;
 	enum pva_error submit_error = PVA_SUCCESS;

+	if (ctx->inited == false) {
+		pva_kmd_log_err(
+			"pva_kmd_async_ops_handler: Context is not initialized");
+		err = PVA_INVAL;
+		goto out;
+	}
+
 	//first check if we have space in queue
 	while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) {
 		pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US);
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c
@@ -7,6 +7,7 @@
 #include <linux/debugfs.h>
 #include <linux/firmware.h>
 #include <linux/version.h>
+#include <linux/mm.h>
 #include <linux/iommu.h>
 #include <linux/dma-mapping.h>
 #include <soc/tegra/virt/syscalls.h>
@@ -53,68 +54,142 @@ void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
 	}
 }

-void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
+int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
 {
-	phys_addr_t base;
-	size_t size;
+	phys_addr_t syncpt_phys_base;
+	size_t all_syncpt_size;
 	int err = 0;
 	uint32_t stride, num_syncpts;
 	uint32_t syncpt_page_size;
 	dma_addr_t sp_start;
-	struct device *dev;
+	int count;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvpva_device_data *props = device_data->pva_device_properties;
+	struct device *dev =
+		&device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
+
+	if (iommu_get_domain_for_dev(dev) == NULL) {
+		dev_err(dev, "Cannot use syncpt without IOMMU");
+		err = -EFAULT;
+		goto err_out;
+	}
+
 	props->host1x = nvpva_device_to_host1x(props->pdev);

 	err = nvpva_syncpt_unit_interface_init(props->pdev);
 	if (err < 0) {
-		FAULT("Failed syncpt unit interface init\n");
+		dev_err(dev, "Failed syncpt unit interface init");
+		goto err_out;
 	}

-	err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride,
-					  &num_syncpts);
+	err = host1x_syncpt_get_shim_info(props->host1x, &syncpt_phys_base,
+					  &stride, &num_syncpts);
 	if (err < 0) {
-		FAULT("Failed to get syncpt shim_info\n");
+		dev_err(dev, "Failed to get syncpt shim_info");
+		goto err_out;
 	}
-	size = stride * num_syncpts;
-	/** Get page size of a syncpoint */
+
+	all_syncpt_size = stride * num_syncpts;
 	syncpt_page_size = nvpva_syncpt_unit_interface_get_byte_offset_ext(1);
-	dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
-	if (iommu_get_domain_for_dev(dev)) {
-		sp_start = dma_map_resource(dev, base, size, DMA_BIDIRECTIONAL,
-					    DMA_ATTR_SKIP_CPU_SYNC);
+	sp_start = dma_map_resource(dev, syncpt_phys_base, all_syncpt_size,
+				    DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(dev, sp_start)) {
-			FAULT("Failed to pin syncpoints\n");
-		}
-	} else {
-		FAULT("Failed to pin syncpoints\n");
+		dev_err(dev, "Failed to map RO syncpoints");
+		goto err_out;
 	}
+
 	pva->ro_syncpt_base_iova = sp_start;
 	pva->syncpt_page_size = syncpt_page_size;
 	pva->num_ro_syncpts = num_syncpts;

-	// The same region is also used for RW syncpts...
-	pva->rw_syncpt_base_iova = sp_start;
-	pva->rw_syncpt_region_size = size;
+	dev_info(dev, "PVA RO syncpt iova: %llx, size: %lx\n",
+		 pva->ro_syncpt_base_iova, all_syncpt_size);
+
+	// Create a scatterlist to store all physical addresses of syncpts.
+	// They may be non-contiguous so we prepare one scatterlist entry per syncpt.
+	// Later, we map the scatterlist into a contiguous IOVA region.
+	sg_init_table(device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS);

 	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
 		uint32_t syncpt_id;
-		uint64_t syncpt_iova;
+		phys_addr_t syncpt_phys_addr;

 		syncpt_id = nvpva_get_syncpt_client_managed(props->pdev,
 							    "pva_syncpt");
 		if (syncpt_id == 0) {
-			FAULT("Failed to get syncpt\n");
+			dev_err(dev, "Failed to allocate RW syncpt");
+			err = -EFAULT;
+			goto free_syncpts;
 		}
-		syncpt_iova = safe_addu64(
-			sp_start,
+
+		pva->rw_syncpts[i].syncpt_id = syncpt_id;
+
+		syncpt_phys_addr = safe_addu64(
+			syncpt_phys_base,
 			nvpva_syncpt_unit_interface_get_byte_offset_ext(
 				syncpt_id));
-
-		pva->rw_syncpts[i].syncpt_iova = syncpt_iova;
-		pva->rw_syncpts[i].syncpt_id = syncpt_id;
+		//Store the syncpt physical address in the scatterlist. Since the
+		//scatterlist API only takes struct page as input, so we first convert
+		//the physical address to a struct page address.
+		sg_set_page(&device_data->syncpt_sg[i],
+			    phys_to_page(syncpt_phys_addr), syncpt_page_size,
+			    0);
 	}
+
+	count = dma_map_sg_attrs(dev, device_data->syncpt_sg,
+				 PVA_NUM_RW_SYNCPTS, DMA_BIDIRECTIONAL,
+				 DMA_ATTR_SKIP_CPU_SYNC);
+	ASSERT(count > 0);
+	{
+		//Validate that syncpt IOVAs are contiguous
+		//This is an assertion and should never fail
+		uint64_t prev_iova = 0;
+		uint64_t prev_len = 0;
+		for (uint32_t i = 0; i < count; i++) {
+			if (prev_iova != 0) {
+				if (safe_addu64(prev_iova, prev_len) !=
+				    sg_dma_address(
+					    &device_data->syncpt_sg[i])) {
+					dev_err(dev,
+						"RW syncpt IOVAs are not contiguous. This should never happen!");
+					err = -EFAULT;
+					goto free_syncpts;
+				}
+				prev_iova = sg_dma_address(
+					&device_data->syncpt_sg[i]);
+				prev_len =
+					sg_dma_len(&device_data->syncpt_sg[i]);
+			}
+		}
+	}
+
+	pva->rw_syncpt_base_iova = sg_dma_address(&device_data->syncpt_sg[0]);
+	pva->rw_syncpt_region_size =
+		safe_mulu32(syncpt_page_size, PVA_NUM_RW_SYNCPTS);
+
+	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
+		pva->rw_syncpts[i].syncpt_iova =
+			safe_addu64(pva->rw_syncpt_base_iova,
+				    safe_mulu32(i, syncpt_page_size));
+	}
+
+	dev_info(dev, "PVA RW syncpt iova: %llx, size: %x\n",
+		 pva->rw_syncpt_base_iova, pva->rw_syncpt_region_size);
+
+	return 0;
+
+free_syncpts:
+	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
+		if (pva->rw_syncpts[i].syncpt_id != 0) {
+			nvpva_syncpt_put_ref_ext(props->pdev,
+						 pva->rw_syncpts[i].syncpt_id);
+			pva->rw_syncpts[i].syncpt_id = 0;
+		}
+	}
+
+err_out:
+	return err;
 }

 void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva)
@@ -127,25 +202,31 @@ void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
 	phys_addr_t base;
 	size_t size;
 	uint32_t stride, num_syncpts;
-	struct device *dev;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva);
 	struct nvpva_device_data *props = device_data->pva_device_properties;
+	struct device *dev =
+		&device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
+
+	if (iommu_get_domain_for_dev(dev) == NULL) {
+		dev_err(dev, "Cannot use syncpt without IOMMU");
+		return;
+	}

 	err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride,
 					  &num_syncpts);
 	if (err < 0) {
-		FAULT("Failed to get syncpt shim_info\n");
+		dev_err(dev, "Failed to get syncpt shim_info when deiniting");
+		return;
 	}
 	size = stride * num_syncpts;

-	dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
-	if (iommu_get_domain_for_dev(dev)) {
-		dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size,
+	dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size, DMA_TO_DEVICE,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+
+	dma_unmap_sg_attrs(dev, device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS,
 			   DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-	} else {
-		FAULT("Failed to unmap syncpts\n");
-	}
+
 	for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
 		nvpva_syncpt_put_ref_ext(props->pdev,
 					 pva->rw_syncpts[i].syncpt_id);
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h
@@ -4,9 +4,11 @@
 #ifndef PVA_KMD_LINUX_DEVICE_H
 #define PVA_KMD_LINUX_DEVICE_H

+#include "pva_kmd_constants.h"
 #include <linux/types.h>
 #include <linux/platform_device.h>
 #include <linux/cdev.h>
+#include <linux/scatterlist.h>

 #include "pva_api.h"
 #include "pva_kmd_device.h"
@@ -26,6 +28,8 @@ struct pva_kmd_linux_device_data {
 	/* Global states required by a PVA device instance go here */
 	struct platform_device *smmu_contexts[PVA_MAX_NUM_SMMU_CONTEXTS];
 	struct pva_kmd_isr_data isr[PVA_KMD_INTR_LINE_COUNT];
+
+	struct scatterlist syncpt_sg[PVA_NUM_RW_SYNCPTS];
 };

 struct pva_kmd_linux_device_data *
@@ -34,7 +38,7 @@ pva_kmd_linux_device_get_data(struct pva_kmd_device *device);
 void pva_kmd_linux_device_set_data(struct pva_kmd_device *device,
 				   struct pva_kmd_linux_device_data *data);

-void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva);
+int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva);
 void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva);

 struct nvpva_device_data *
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c
@@ -402,7 +402,11 @@ static int pva_probe(struct platform_device *pdev)
 		goto err_cdev_init;
 	}

-	pva_kmd_linux_host1x_init(pva_device);
+	err = pva_kmd_linux_host1x_init(pva_device);
+	if (err < 0) {
+		dev_err(dev, "pva_kmd_linux_host1x_init failed\n");
+		goto err_cdev_init;
+	}

 	err = pva_kmd_debugfs_create_nodes(pva_device);
 	if (err != PVA_SUCCESS) {
--- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c
+++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_smmu.c
@@ -120,6 +120,7 @@ bool pva_kmd_linux_smmu_contexts_initialized(enum pva_chip_id chip_id)
 void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
 {
 	uint32_t sid_idx;
+	struct device *dev;
 	struct pva_kmd_linux_device_data *device_data =
 		pva_kmd_linux_device_get_data(pva_device);

@@ -134,14 +135,14 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
 		uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U);
 		struct pva_kmd_linux_smmu_ctx *smmu_ctx = &g_smmu_ctxs[sid_idx];

+		dev = &smmu_ctx->pdev->dev;
 		pva_device->stream_ids[smmu_ctx_idx] = smmu_ctx->sid;
 		device_data->smmu_contexts[smmu_ctx_idx] = smmu_ctx->pdev;
-		dma_set_mask_and_coherent(&smmu_ctx->pdev->dev,
-					  DMA_BIT_MASK(39));
+		dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
 		//set max segment size to UINT_MAX to avoid creating scatterlist >= 4GB
 		//during IOVA mapping, which will overflow the scatterlist length field,
 		//causing IOVA leak
-		dma_set_max_seg_size(&smmu_ctx->pdev->dev, UINT_MAX);
+		dma_set_max_seg_size(dev, UINT_MAX);
 	}

 	/* Configure SMMU contexts for privileged operations */
@@ -153,9 +154,10 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
 	//      Question: Is it necessary that priv SID is the last one?
 	pva_device->stream_ids[0] = g_smmu_ctxs[sid_idx].sid;
 	device_data->smmu_contexts[0] = g_smmu_ctxs[sid_idx].pdev;
+	dev = &device_data->smmu_contexts[0]->dev;
 	dma_set_mask_and_coherent(
-		&device_data->smmu_contexts[0]->dev,
-		DMA_BIT_MASK(31)); //only 2GB R5 space is accessible
+		dev, DMA_BIT_MASK(31)); //only 2GB R5 space is accessible
+	dma_set_max_seg_size(dev, UINT_MAX);
 }

 struct platform_driver pva_kmd_linux_smmu_context_driver = {