pva: mirror from gitlab cv/pva-sys-sw

Gitlab commit 52e76e5680447 ("Map only PVA's syncpoints as R...")

Changes since last deployment:

- Map only PVA's syncpoints as RW
- fw: treat unregister resource ID 0 as noop
- Fix tegrastats issue that mixes two reads together
- Delpoy VPU ELF files to tegra tree
- kmd: bugfix: check if ctx inited before async ops

Change-Id: Ife641b1a27580e6f74f2b6e806eebc6750f3f846
Signed-off-by: nanwa <nanwa@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3354736
Reviewed-by: Mohnish Jain <mohnishj@nvidia.com>
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
nanwa
2025-05-02 19:58:13 +00:00
committed by Jon Hunter
parent c85efbe2a7
commit 1e36fa1086
6 changed files with 149 additions and 45 deletions

View File

@@ -100,6 +100,12 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats,
char kernel_buffer[256];
int64_t formatted_len;
// We don't support partial reads for vpu stats because we cannot mix two
// reads at different times together.
if (offset != 0) {
return 0;
}
formatted_len = snprintf(
kernel_buffer, sizeof(kernel_buffer),
"%llu\n%llu\n%llu\n%llu\n",

View File

@@ -329,6 +329,13 @@ static enum pva_error pva_kmd_async_ops_handler(
uint32_t wait_time = 0;
enum pva_error submit_error = PVA_SUCCESS;
if (ctx->inited == false) {
pva_kmd_log_err(
"pva_kmd_async_ops_handler: Context is not initialized");
err = PVA_INVAL;
goto out;
}
//first check if we have space in queue
while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) {
pva_kmd_sleep_us(PVA_KMD_WAIT_FW_POLL_INTERVAL_US);

View File

@@ -7,6 +7,7 @@
#include <linux/debugfs.h>
#include <linux/firmware.h>
#include <linux/version.h>
#include <linux/mm.h>
#include <linux/iommu.h>
#include <linux/dma-mapping.h>
#include <soc/tegra/virt/syscalls.h>
@@ -53,68 +54,142 @@ void pva_kmd_read_syncpt_val(struct pva_kmd_device *pva, uint32_t syncpt_id,
}
}
void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva)
{
phys_addr_t base;
size_t size;
phys_addr_t syncpt_phys_base;
size_t all_syncpt_size;
int err = 0;
uint32_t stride, num_syncpts;
uint32_t syncpt_page_size;
dma_addr_t sp_start;
struct device *dev;
int count;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvpva_device_data *props = device_data->pva_device_properties;
struct device *dev =
&device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev) == NULL) {
dev_err(dev, "Cannot use syncpt without IOMMU");
err = -EFAULT;
goto err_out;
}
props->host1x = nvpva_device_to_host1x(props->pdev);
err = nvpva_syncpt_unit_interface_init(props->pdev);
if (err < 0) {
FAULT("Failed syncpt unit interface init\n");
dev_err(dev, "Failed syncpt unit interface init");
goto err_out;
}
err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride,
&num_syncpts);
err = host1x_syncpt_get_shim_info(props->host1x, &syncpt_phys_base,
&stride, &num_syncpts);
if (err < 0) {
FAULT("Failed to get syncpt shim_info\n");
dev_err(dev, "Failed to get syncpt shim_info");
goto err_out;
}
size = stride * num_syncpts;
/** Get page size of a syncpoint */
all_syncpt_size = stride * num_syncpts;
syncpt_page_size = nvpva_syncpt_unit_interface_get_byte_offset_ext(1);
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
sp_start = dma_map_resource(dev, base, size, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
sp_start = dma_map_resource(dev, syncpt_phys_base, all_syncpt_size,
DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(dev, sp_start)) {
FAULT("Failed to pin syncpoints\n");
}
} else {
FAULT("Failed to pin syncpoints\n");
dev_err(dev, "Failed to map RO syncpoints");
goto err_out;
}
pva->ro_syncpt_base_iova = sp_start;
pva->syncpt_page_size = syncpt_page_size;
pva->num_ro_syncpts = num_syncpts;
// The same region is also used for RW syncpts...
pva->rw_syncpt_base_iova = sp_start;
pva->rw_syncpt_region_size = size;
dev_info(dev, "PVA RO syncpt iova: %llx, size: %lx\n",
pva->ro_syncpt_base_iova, all_syncpt_size);
// Create a scatterlist to store all physical addresses of syncpts.
// They may be non-contiguous so we prepare one scatterlist entry per syncpt.
// Later, we map the scatterlist into a contiguous IOVA region.
sg_init_table(device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS);
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
uint32_t syncpt_id;
uint64_t syncpt_iova;
phys_addr_t syncpt_phys_addr;
syncpt_id = nvpva_get_syncpt_client_managed(props->pdev,
"pva_syncpt");
if (syncpt_id == 0) {
FAULT("Failed to get syncpt\n");
dev_err(dev, "Failed to allocate RW syncpt");
err = -EFAULT;
goto free_syncpts;
}
syncpt_iova = safe_addu64(
sp_start,
pva->rw_syncpts[i].syncpt_id = syncpt_id;
syncpt_phys_addr = safe_addu64(
syncpt_phys_base,
nvpva_syncpt_unit_interface_get_byte_offset_ext(
syncpt_id));
pva->rw_syncpts[i].syncpt_iova = syncpt_iova;
pva->rw_syncpts[i].syncpt_id = syncpt_id;
//Store the syncpt physical address in the scatterlist. Since the
//scatterlist API only takes struct page as input, so we first convert
//the physical address to a struct page address.
sg_set_page(&device_data->syncpt_sg[i],
phys_to_page(syncpt_phys_addr), syncpt_page_size,
0);
}
count = dma_map_sg_attrs(dev, device_data->syncpt_sg,
PVA_NUM_RW_SYNCPTS, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
ASSERT(count > 0);
{
//Validate that syncpt IOVAs are contiguous
//This is an assertion and should never fail
uint64_t prev_iova = 0;
uint64_t prev_len = 0;
for (uint32_t i = 0; i < count; i++) {
if (prev_iova != 0) {
if (safe_addu64(prev_iova, prev_len) !=
sg_dma_address(
&device_data->syncpt_sg[i])) {
dev_err(dev,
"RW syncpt IOVAs are not contiguous. This should never happen!");
err = -EFAULT;
goto free_syncpts;
}
prev_iova = sg_dma_address(
&device_data->syncpt_sg[i]);
prev_len =
sg_dma_len(&device_data->syncpt_sg[i]);
}
}
}
pva->rw_syncpt_base_iova = sg_dma_address(&device_data->syncpt_sg[0]);
pva->rw_syncpt_region_size =
safe_mulu32(syncpt_page_size, PVA_NUM_RW_SYNCPTS);
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
pva->rw_syncpts[i].syncpt_iova =
safe_addu64(pva->rw_syncpt_base_iova,
safe_mulu32(i, syncpt_page_size));
}
dev_info(dev, "PVA RW syncpt iova: %llx, size: %x\n",
pva->rw_syncpt_base_iova, pva->rw_syncpt_region_size);
return 0;
free_syncpts:
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
if (pva->rw_syncpts[i].syncpt_id != 0) {
nvpva_syncpt_put_ref_ext(props->pdev,
pva->rw_syncpts[i].syncpt_id);
pva->rw_syncpts[i].syncpt_id = 0;
}
}
err_out:
return err;
}
void pva_kmd_allocate_syncpts(struct pva_kmd_device *pva)
@@ -127,25 +202,31 @@ void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva)
phys_addr_t base;
size_t size;
uint32_t stride, num_syncpts;
struct device *dev;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva);
struct nvpva_device_data *props = device_data->pva_device_properties;
struct device *dev =
&device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev) == NULL) {
dev_err(dev, "Cannot use syncpt without IOMMU");
return;
}
err = host1x_syncpt_get_shim_info(props->host1x, &base, &stride,
&num_syncpts);
if (err < 0) {
FAULT("Failed to get syncpt shim_info\n");
dev_err(dev, "Failed to get syncpt shim_info when deiniting");
return;
}
size = stride * num_syncpts;
dev = &device_data->smmu_contexts[PVA_R5_SMMU_CONTEXT_ID]->dev;
if (iommu_get_domain_for_dev(dev)) {
dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size,
dma_unmap_resource(dev, pva->ro_syncpt_base_iova, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
dma_unmap_sg_attrs(dev, device_data->syncpt_sg, PVA_NUM_RW_SYNCPTS,
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
} else {
FAULT("Failed to unmap syncpts\n");
}
for (uint32_t i = 0; i < PVA_NUM_RW_SYNCPTS; i++) {
nvpva_syncpt_put_ref_ext(props->pdev,
pva->rw_syncpts[i].syncpt_id);

View File

@@ -4,9 +4,11 @@
#ifndef PVA_KMD_LINUX_DEVICE_H
#define PVA_KMD_LINUX_DEVICE_H
#include "pva_kmd_constants.h"
#include <linux/types.h>
#include <linux/platform_device.h>
#include <linux/cdev.h>
#include <linux/scatterlist.h>
#include "pva_api.h"
#include "pva_kmd_device.h"
@@ -26,6 +28,8 @@ struct pva_kmd_linux_device_data {
/* Global states required by a PVA device instance go here */
struct platform_device *smmu_contexts[PVA_MAX_NUM_SMMU_CONTEXTS];
struct pva_kmd_isr_data isr[PVA_KMD_INTR_LINE_COUNT];
struct scatterlist syncpt_sg[PVA_NUM_RW_SYNCPTS];
};
struct pva_kmd_linux_device_data *
@@ -34,7 +38,7 @@ pva_kmd_linux_device_get_data(struct pva_kmd_device *device);
void pva_kmd_linux_device_set_data(struct pva_kmd_device *device,
struct pva_kmd_linux_device_data *data);
void pva_kmd_linux_host1x_init(struct pva_kmd_device *pva);
int pva_kmd_linux_host1x_init(struct pva_kmd_device *pva);
void pva_kmd_linux_host1x_deinit(struct pva_kmd_device *pva);
struct nvpva_device_data *

View File

@@ -402,7 +402,11 @@ static int pva_probe(struct platform_device *pdev)
goto err_cdev_init;
}
pva_kmd_linux_host1x_init(pva_device);
err = pva_kmd_linux_host1x_init(pva_device);
if (err < 0) {
dev_err(dev, "pva_kmd_linux_host1x_init failed\n");
goto err_cdev_init;
}
err = pva_kmd_debugfs_create_nodes(pva_device);
if (err != PVA_SUCCESS) {

View File

@@ -120,6 +120,7 @@ bool pva_kmd_linux_smmu_contexts_initialized(enum pva_chip_id chip_id)
void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
{
uint32_t sid_idx;
struct device *dev;
struct pva_kmd_linux_device_data *device_data =
pva_kmd_linux_device_get_data(pva_device);
@@ -134,14 +135,14 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
uint32_t smmu_ctx_idx = safe_addu32(sid_idx, 1U);
struct pva_kmd_linux_smmu_ctx *smmu_ctx = &g_smmu_ctxs[sid_idx];
dev = &smmu_ctx->pdev->dev;
pva_device->stream_ids[smmu_ctx_idx] = smmu_ctx->sid;
device_data->smmu_contexts[smmu_ctx_idx] = smmu_ctx->pdev;
dma_set_mask_and_coherent(&smmu_ctx->pdev->dev,
DMA_BIT_MASK(39));
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
//set max segment size to UINT_MAX to avoid creating scatterlist >= 4GB
//during IOVA mapping, which will overflow the scatterlist length field,
//causing IOVA leak
dma_set_max_seg_size(&smmu_ctx->pdev->dev, UINT_MAX);
dma_set_max_seg_size(dev, UINT_MAX);
}
/* Configure SMMU contexts for privileged operations */
@@ -153,9 +154,10 @@ void pva_kmd_linux_device_smmu_contexts_init(struct pva_kmd_device *pva_device)
// Question: Is it necessary that priv SID is the last one?
pva_device->stream_ids[0] = g_smmu_ctxs[sid_idx].sid;
device_data->smmu_contexts[0] = g_smmu_ctxs[sid_idx].pdev;
dev = &device_data->smmu_contexts[0]->dev;
dma_set_mask_and_coherent(
&device_data->smmu_contexts[0]->dev,
DMA_BIT_MASK(31)); //only 2GB R5 space is accessible
dev, DMA_BIT_MASK(31)); //only 2GB R5 space is accessible
dma_set_max_seg_size(dev, UINT_MAX);
}
struct platform_driver pva_kmd_linux_smmu_context_driver = {