PCI: Add T264 PCIe DMA support in PCIe EPF and client driver

Replace the exisiting T234 DMA APIs with common APIs and also add chip
specific changes like BAR.

Bug 4549851

Change-Id: I021e1e93a4fa4ff1d4429bd9db852e0e707ba879
Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3116906
Reviewed-by: Nagarjuna Kristam <nkristam@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Manikanta Maddireddy
2024-04-30 06:47:51 +00:00
committed by mobile promotions
parent 82e2f7b2d5
commit b0c580762b
3 changed files with 591 additions and 245 deletions

View File

@@ -8,52 +8,63 @@
#include <nvidia/conftest.h>
#include <linux/aer.h>
#include <linux/delay.h>
#include <linux/crc32.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/pci.h>
#include <linux/pcie_dma.h>
#include <linux/random.h>
#include <linux/types.h>
#include <linux/tegra-pcie-edma-test-common.h>
#include <linux/types.h>
#include <soc/tegra/fuse-helper.h>
#define MODULENAME "pcie_dma_host"
struct ep_pvt {
struct pci_dev *pdev;
void __iomem *bar0_virt;
void __iomem *dma_base;
u32 dma_size;
void *dma_virt;
dma_addr_t dma_phy;
dma_addr_t bar0_phy;
/* Configurable BAR0/BAR2 virt and phy base addresses */
void __iomem *bar_virt;
dma_addr_t bar_phy;
/* DMA register BAR virt and phy base addresses */
void __iomem *dma_virt;
phys_addr_t dma_phy_base;
u32 dma_phy_size;
/* dma_alloc_coherent() using RP pci_dev */
void *rp_dma_virt;
dma_addr_t rp_dma_phy;
/* dma_alloc_coherent() using EP pci_dev */
void *ep_dma_virt;
dma_addr_t ep_dma_phy;
struct dentry *debugfs;
void *cookie;
u32 dma_size;
u32 stress_count;
u32 edma_ch;
u32 prev_edma_ch;
u32 msi_irq;
u64 msi_addr;
u16 msi_data;
phys_addr_t dma_phy_base;
u32 dma_phy_size;
u64 tsz;
ktime_t edma_start_time[DMA_WR_CHNL_NUM];
u32 msi_data;
u32 pmsi_irq;
u64 pmsi_addr;
u32 pmsi_data;
u8 chip_id;
struct edmalib_common edma;
};
static irqreturn_t ep_isr(int irq, void *arg)
{
struct ep_pvt *ep = (struct ep_pvt *)arg;
struct pcie_epf_bar0 *epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt;
struct pcie_epf_bar *epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt;
struct sanity_data *wr_data = &epf_bar->wr_data[0];
epf_bar0->wr_data[0].crc = crc32_le(~0, ep->dma_virt + BAR0_DMA_BUF_OFFSET,
epf_bar0->wr_data[0].size);
wr_data->crc = crc32_le(~0, ep->ep_dma_virt + BAR0_DMA_BUF_OFFSET + wr_data->dst_offset,
wr_data->size);
return IRQ_HANDLED;
}
@@ -61,64 +72,44 @@ static irqreturn_t ep_isr(int irq, void *arg)
static void tegra_pcie_dma_raise_irq(void *p)
{
pr_err("%s: donot support raise IRQ from RP. CRC test if any started may fail.\n",
__func__);
}
static struct device *tegra_pci_dma_get_host_bridge_device(struct pci_dev *dev)
{
struct pci_bus *bus = dev->bus;
struct device *bridge;
while (bus->parent)
bus = bus->parent;
bridge = bus->bridge;
kobject_get(&bridge->kobj);
return bridge;
}
static void tegra_pci_dma_put_host_bridge_device(struct device *dev)
{
kobject_put(&dev->kobj);
__func__);
}
/* debugfs to perform eDMA lib transfers */
static int edmalib_test(struct seq_file *s, void *data)
{
struct ep_pvt *ep = (struct ep_pvt *)dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt;
/* RP uses 128M(used by EP) + 1M(reserved) offset for source and dest data transfers */
dma_addr_t ep_dma_addr = epf_bar0->ep_phy_addr + SZ_128M + SZ_1M;
dma_addr_t bar0_dma_addr = ep->bar0_phy + SZ_128M + SZ_1M;
dma_addr_t rp_dma_addr = ep->dma_phy + SZ_128M + SZ_1M;
struct pcie_epf_bar *epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt;
struct pci_dev *pdev = ep->pdev;
struct device *bridge, *rdev;
struct edmalib_common *edma = &ep->edma;
struct pci_dev *ppdev = pcie_find_root_port(pdev);
ep->edma.src_dma_addr = rp_dma_addr;
ep->edma.src_virt = ep->dma_virt + SZ_128M + SZ_1M;
ep->edma.fdev = &ep->pdev->dev;
ep->edma.epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt;
ep->edma.bar0_phy = ep->bar0_phy;
ep->edma.dma_base = ep->dma_base;
ep->edma.epf_bar = epf_bar;
ep->edma.bar_phy = ep->bar_phy;
ep->edma.dma_virt = ep->dma_virt;
ep->edma.priv = (void *)ep;
ep->edma.raise_irq = tegra_pcie_dma_raise_irq;
/* RP uses "Base + SZ_16M + 1M(reserved)" offset for DMA data transfers */
if (REMOTE_EDMA_TEST_EN) {
ep->edma.dst_dma_addr = ep_dma_addr;
ep->edma.edma_remote.msi_addr = ep->msi_addr;
ep->edma.edma_remote.msi_data = ep->msi_data;
ep->edma.edma_remote.msi_irq = ep->msi_irq;
ep->edma.edma_remote.dma_phy_base = ep->dma_phy_base;
ep->edma.edma_remote.dma_size = ep->dma_phy_size;
ep->edma.edma_remote.dev = &pdev->dev;
ep->edma.src_virt = ep->ep_dma_virt + SZ_16M + SZ_1M;
ep->edma.src_dma_addr = ep->ep_dma_phy + SZ_16M + SZ_1M;
ep->edma.dst_dma_addr = epf_bar->ep_phy_addr + SZ_16M + SZ_1M;
ep->edma.msi_addr = ep->msi_addr;
ep->edma.msi_data = ep->msi_data;
ep->edma.msi_irq = ep->msi_irq;
ep->edma.cdev = &pdev->dev;
ep->edma.remote.dma_phy_base = ep->dma_phy_base;
ep->edma.remote.dma_size = ep->dma_phy_size;
} else {
bridge = tegra_pci_dma_get_host_bridge_device(pdev);
rdev = bridge->parent;
tegra_pci_dma_put_host_bridge_device(bridge);
ep->edma.of_node = rdev->of_node;
ep->edma.dst_dma_addr = bar0_dma_addr;
ep->edma.src_dma_addr = ep->rp_dma_phy + SZ_16M + SZ_1M;
ep->edma.src_virt = ep->rp_dma_virt + SZ_16M + SZ_1M;
ep->edma.dst_dma_addr = ep->bar_phy + SZ_16M + SZ_1M;
ep->edma.msi_addr = ep->pmsi_addr;
ep->edma.msi_data = ep->pmsi_data;
ep->edma.msi_irq = ep->pmsi_irq;
ep->edma.cdev = &ppdev->dev;
}
return edmalib_common_test(&ep->edma);
@@ -144,13 +135,13 @@ static void init_debugfs(struct ep_pvt *ep)
ep->edma.nents = DMA_LL_DEFAULT_SIZE;
}
static int ep_test_dma_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
static int ep_test_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ep_pvt *ep;
struct pcie_epf_bar0 *epf_bar0;
struct pcie_epf_bar *epf_bar;
struct pci_dev *ppdev = pcie_find_root_port(pdev);
int ret = 0;
u32 val, i;
u32 val, i, bar, dma_bar;
u16 val_16;
char *name;
@@ -158,6 +149,12 @@ static int ep_test_dma_probe(struct pci_dev *pdev,
if (!ep)
return -ENOMEM;
ep->chip_id = __tegra_get_chip_id();
if (ep->chip_id == TEGRA234)
ep->edma.chip_id = NVPCIE_DMA_SOC_T234;
else
ep->edma.chip_id = NVPCIE_DMA_SOC_T264;
ep->edma.ll_desc = devm_kzalloc(&pdev->dev, sizeof(*ep->edma.ll_desc) * NUM_EDMA_DESC,
GFP_KERNEL);
if (!ep->edma.ll_desc)
@@ -184,49 +181,65 @@ static int ep_test_dma_probe(struct pci_dev *pdev,
goto fail_region_request;
}
ep->bar0_phy = pci_resource_start(pdev, 0);
ep->bar0_virt = devm_ioremap(&pdev->dev, ep->bar0_phy, pci_resource_len(pdev, 0));
if (!ep->bar0_virt) {
dev_err(&pdev->dev, "Failed to IO remap BAR0\n");
if (ep->chip_id == TEGRA234)
bar = 0;
else
bar = 2;
ep->bar_phy = pci_resource_start(pdev, bar);
ep->bar_virt = devm_ioremap_wc(&pdev->dev, ep->bar_phy, pci_resource_len(pdev, bar));
if (!ep->bar_virt) {
dev_err(&pdev->dev, "Failed to IO remap BAR%d\n", bar);
ret = -ENOMEM;
goto fail_region_remap;
}
ep->dma_base = devm_ioremap(&pdev->dev, pci_resource_start(pdev, 4),
pci_resource_len(pdev, 4));
if (!ep->dma_base) {
dev_err(&pdev->dev, "Failed to IO remap BAR4\n");
if (ep->chip_id == TEGRA234)
dma_bar = 4;
else
dma_bar = 0;
ep->dma_phy_base = pci_resource_start(pdev, dma_bar);
ep->dma_phy_size = pci_resource_len(pdev, dma_bar);
ep->dma_virt = devm_ioremap(&pdev->dev, ep->dma_phy_base, ep->dma_phy_size);
if (!ep->dma_virt) {
dev_err(&pdev->dev, "Failed to IO remap BAR%d\n", dma_bar);
ret = -ENOMEM;
goto fail_region_remap;
}
ret = pci_alloc_irq_vectors(pdev, 2, 2, PCI_IRQ_MSI);
ret = pci_alloc_irq_vectors(pdev, 16, 16, PCI_IRQ_MSI);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to enable MSI interrupt\n");
ret = -ENODEV;
goto fail_region_remap;
}
ret = request_irq(pci_irq_vector(pdev, 1), ep_isr, IRQF_SHARED,
"pcie_ep_isr", ep);
ret = request_irq(pci_irq_vector(pdev, 1), ep_isr, IRQF_SHARED, "pcie_ep_isr", ep);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to register isr\n");
goto fail_isr;
}
ep->dma_virt = dma_alloc_coherent(&pdev->dev, BAR0_SIZE, &ep->dma_phy,
GFP_KERNEL);
if (!ep->dma_virt) {
ep->rp_dma_virt = dma_alloc_coherent(&ppdev->dev, BAR0_SIZE, &ep->rp_dma_phy, GFP_KERNEL);
if (!ep->rp_dma_virt) {
dev_err(&pdev->dev, "Failed to allocate DMA memory\n");
ret = -ENOMEM;
goto fail_dma_alloc;
goto fail_rp_dma_alloc;
}
get_random_bytes(ep->dma_virt, BAR0_SIZE);
get_random_bytes(ep->rp_dma_virt, BAR0_SIZE);
dev_info(&ppdev->dev, "DMA mem ppdev, IOVA: 0x%llx size: %d\n", ep->rp_dma_phy, BAR0_SIZE);
/* Update RP DMA system memory base address in BAR0 */
epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt;
epf_bar0->rp_phy_addr = ep->dma_phy;
dev_info(&pdev->dev, "DMA mem, IOVA: 0x%llx size: %d\n", ep->dma_phy, BAR0_SIZE);
ep->ep_dma_virt = dma_alloc_coherent(&pdev->dev, BAR0_SIZE, &ep->ep_dma_phy, GFP_KERNEL);
if (!ep->ep_dma_virt) {
dev_err(&pdev->dev, "Failed to allocate DMA memory for EP\n");
ret = -ENOMEM;
goto fail_ep_dma_alloc;
}
get_random_bytes(ep->ep_dma_virt, BAR0_SIZE);
dev_info(&pdev->dev, "DMA mem pdev, IOVA: 0x%llx size: %d\n", ep->ep_dma_phy, BAR0_SIZE);
/* Update RP DMA system memory base address allocated with EP pci_dev in BAR0 */
epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt;
epf_bar->rp_phy_addr = ep->ep_dma_phy;
pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &val_16);
if (val_16 & PCI_MSI_FLAGS_64BIT) {
@@ -242,8 +255,22 @@ static int ep_test_dma_probe(struct pci_dev *pdev,
pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, &val);
ep->msi_addr = (ep->msi_addr << 32) | val;
ep->msi_irq = pci_irq_vector(pdev, 0);
ep->dma_phy_base = pci_resource_start(pdev, 4);
ep->dma_phy_size = pci_resource_len(pdev, 4);
pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_FLAGS, &val_16);
if (val_16 & PCI_MSI_FLAGS_64BIT) {
pci_read_config_dword(ppdev, ppdev->msi_cap + PCI_MSI_ADDRESS_HI, &val);
ep->pmsi_addr = val;
pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_DATA_64, &val_16);
ep->pmsi_data = val_16;
} else {
pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_DATA_32, &val_16);
ep->pmsi_data = val_16;
}
pci_read_config_dword(ppdev, ppdev->msi_cap + PCI_MSI_ADDRESS_LO, &val);
ep->pmsi_addr = (ep->pmsi_addr << 32) | val;
ep->pmsi_irq = pci_irq_vector(ppdev, 0);
ep->pmsi_data += 0;
name = devm_kasprintf(&ep->pdev->dev, GFP_KERNEL, "%s_pcie_dma_test", dev_name(&pdev->dev));
if (!name) {
@@ -252,20 +279,19 @@ static int ep_test_dma_probe(struct pci_dev *pdev,
goto fail_name;
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++)
for (i = 0; i < TEGRA_PCIE_DMA_WRITE; i++)
init_waitqueue_head(&ep->edma.wr_wq[i]);
for (i = 0; i < DMA_RD_CHNL_NUM; i++)
init_waitqueue_head(&ep->edma.rd_wq[i]);
ep->debugfs = debugfs_create_dir(name, NULL);
init_debugfs(ep);
return ret;
fail_name:
dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->dma_virt, ep->dma_phy);
fail_dma_alloc:
dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->ep_dma_virt, ep->ep_dma_phy);
fail_ep_dma_alloc:
dma_free_coherent(&ppdev->dev, BAR0_SIZE, ep->rp_dma_virt, ep->rp_dma_phy);
fail_rp_dma_alloc:
free_irq(pci_irq_vector(pdev, 1), ep);
fail_isr:
pci_free_irq_vectors(pdev);
@@ -280,10 +306,12 @@ fail_region_request:
static void ep_test_dma_remove(struct pci_dev *pdev)
{
struct ep_pvt *ep = pci_get_drvdata(pdev);
struct pci_dev *ppdev = pcie_find_root_port(pdev);
debugfs_remove_recursive(ep->debugfs);
tegra_pcie_edma_deinit(ep->edma.cookie);
dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->dma_virt, ep->dma_phy);
tegra_pcie_dma_deinit(&ep->edma.cookie);
dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->ep_dma_virt, ep->ep_dma_phy);
dma_free_coherent(&ppdev->dev, BAR0_SIZE, ep->rp_dma_virt, ep->rp_dma_phy);
free_irq(pci_irq_vector(pdev, 1), ep);
pci_free_irq_vectors(pdev);
pci_release_regions(pdev);
@@ -293,6 +321,7 @@ static void ep_test_dma_remove(struct pci_dev *pdev)
static const struct pci_device_id ep_pci_tbl[] = {
{ PCI_DEVICE(0x10DE, 0x1AD4)},
{ PCI_DEVICE(0x10DE, 0x1AD5)},
{ PCI_DEVICE(0x10DE, 0x229a)},
{},
};

View File

@@ -14,10 +14,10 @@
#include <linux/of_platform.h>
#include <linux/pci-epc.h>
#include <linux/pci-epf.h>
#include <linux/pcie_dma.h>
#include <linux/platform_device.h>
#include <linux/kthread.h>
#include <linux/tegra-pcie-edma-test-common.h>
#include <linux/version.h>
#include <soc/tegra/fuse-helper.h>
#include "pci-epf-wrapper.h"
static struct pcie_epf_dma *gepfnv;
@@ -28,43 +28,21 @@ struct pcie_epf_dma {
struct pci_epc *epc;
struct device *fdev;
struct device *cdev;
void *bar0_virt;
void *bar_virt;
struct dentry *debugfs;
void __iomem *dma_base;
void __iomem *dma_virt;
int irq;
u8 chip_id;
u32 dma_size;
u32 stress_count;
u32 async_count;
struct task_struct *wr0_task;
struct task_struct *wr1_task;
struct task_struct *wr2_task;
struct task_struct *wr3_task;
struct task_struct *rd0_task;
struct task_struct *rd1_task;
u8 task_done;
wait_queue_head_t task_wq;
void *cookie;
wait_queue_head_t wr_wq[DMA_WR_CHNL_NUM];
wait_queue_head_t rd_wq[DMA_RD_CHNL_NUM];
unsigned long wr_busy;
unsigned long rd_busy;
ktime_t wr_start_time[DMA_WR_CHNL_NUM];
ktime_t wr_end_time[DMA_WR_CHNL_NUM];
ktime_t rd_start_time[DMA_RD_CHNL_NUM];
ktime_t rd_end_time[DMA_RD_CHNL_NUM];
u32 wr_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
u32 rd_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
bool pcs[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
bool async_dma;
ktime_t edma_start_time[DMA_WR_CHNL_NUM];
u64 tsz;
u32 edma_ch;
u32 prev_edma_ch;
u32 nents;
struct tegra_pcie_edma_desc *ll_desc;
struct edmalib_common edma;
};
@@ -82,27 +60,54 @@ static void edma_lib_test_raise_irq(void *p)
/* debugfs to perform eDMA lib transfers and do CRC check */
static int edmalib_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)dev_get_drvdata(s->private);
struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt;
if (!epf_bar0->rp_phy_addr) {
if (!epf_bar->rp_phy_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return -1;
}
epfnv->edma.src_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dst_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.src_dma_addr = epf_bar->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dst_dma_addr = epf_bar->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.fdev = epfnv->fdev;
epfnv->edma.epf_bar0 = (struct pcie_epf_bar0 *)epfnv->bar0_virt;
epfnv->edma.src_virt = epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dma_base = epfnv->dma_base;
epfnv->edma.cdev = epfnv->cdev;
epfnv->edma.epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt;
epfnv->edma.src_virt = epfnv->bar_virt + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dma_virt = epfnv->dma_virt;
epfnv->edma.dma_size = epfnv->dma_size;
epfnv->edma.stress_count = epfnv->stress_count;
epfnv->edma.edma_ch = epfnv->edma_ch;
epfnv->edma.nents = epfnv->nents;
epfnv->edma.priv = (void *)epfnv;
epfnv->edma.raise_irq = edma_lib_test_raise_irq;
return edmalib_common_test(&epfnv->edma);
}
/* debugfs to perform eDMA lib transfers and do CRC check */
static int edmalib_read_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)dev_get_drvdata(s->private);
struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt;
if (!epf_bar->rp_phy_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return -1;
}
epfnv->edma.dst_dma_addr = epf_bar->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dst_dma_addr = epf_bar->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.fdev = epfnv->fdev;
epfnv->edma.cdev = epfnv->cdev;
epfnv->edma.epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt;
epfnv->edma.src_virt = epfnv->bar_virt + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dma_virt = epfnv->dma_virt;
epfnv->edma.dma_size = epfnv->dma_size;
epfnv->edma.stress_count = epfnv->stress_count;
epfnv->edma.edma_ch = epfnv->edma_ch;
epfnv->edma.nents = epfnv->nents;
epfnv->edma.of_node = epfnv->cdev->of_node;
epfnv->edma.priv = (void *)epfnv;
epfnv->edma.raise_irq = edma_lib_test_raise_irq;
@@ -111,8 +116,9 @@ static int edmalib_test(struct seq_file *s, void *data)
static void init_debugfs(struct pcie_epf_dma *epfnv)
{
debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_test", epfnv->debugfs,
edmalib_test);
debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_test", epfnv->debugfs, edmalib_test);
debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_read_test", epfnv->debugfs,
edmalib_read_test);
debugfs_create_u32("dma_size", 0644, epfnv->debugfs, &epfnv->dma_size);
epfnv->dma_size = SZ_1M;
@@ -132,16 +138,17 @@ static void init_debugfs(struct pcie_epf_dma *epfnv)
/* Set DMA_LL_DEFAULT_SIZE as default nents, Max NUM_EDMA_DESC */
epfnv->nents = DMA_LL_DEFAULT_SIZE;
debugfs_create_u32("stress_count", 0644, epfnv->debugfs,
&epfnv->stress_count);
debugfs_create_u32("stress_count", 0644, epfnv->debugfs, &epfnv->stress_count);
epfnv->stress_count = DEFAULT_STRESS_COUNT;
}
static int pcie_dma_epf_core_init(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
struct device *fdev = &epf->dev;
struct pci_epf_bar *epf_bar;
enum pci_barno bar;
int ret;
ret = lpci_epc_write_header(epc, epf->func_no, epf->header);
@@ -150,15 +157,24 @@ static int pcie_dma_epf_core_init(struct pci_epf *epf)
return ret;
}
epf_bar = &epf->bar[BAR_0];
if (epfnv->chip_id == TEGRA234)
bar = BAR_0;
else
bar = BAR_1;
epf_bar = &epf->bar[bar];
ret = lpci_epc_set_bar(epc, epf->func_no, epf_bar);
if (ret < 0) {
dev_err(fdev, "PCIe set BAR0 failed: %d\n", ret);
return ret;
}
dev_info(fdev, "BAR0 phy_addr: %llx size: %lx\n",
epf_bar->phys_addr, epf_bar->size);
dev_info(fdev, "BAR0 phy_addr: %llx size: %lx\n", epf_bar->phys_addr, epf_bar->size);
if (epf->msi_interrupts == 0) {
dev_err(fdev, "pci_epc_set_msi() failed: %d\n", epf->msi_interrupts);
epf->msi_interrupts = 16;
}
ret = lpci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts);
if (ret) {
@@ -166,6 +182,22 @@ static int pcie_dma_epf_core_init(struct pci_epf *epf)
return ret;
}
if (epfnv->chip_id == TEGRA234)
return 0;
/* Expose MSI address as BAR2 to allow RP to send MSI to EP. */
epf_bar = &epf->bar[BAR_2];
epf_bar[bar].phys_addr = gepfnv->edma.msi_addr & ~(SZ_32M - 1);
epf_bar[bar].addr = NULL;
epf_bar[bar].size = SZ_32M;
epf_bar[bar].barno = BAR_2;
epf_bar[bar].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64;
ret = lpci_epc_set_bar(epc, epf->func_no, epf_bar);
if (ret < 0) {
dev_err(fdev, "PCIe set BAR2 failed: %d\n", ret);
return ret;
}
return 0;
}
@@ -174,14 +206,22 @@ static int pcie_dma_epf_core_deinit(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
void *cookie = epfnv->edma.cookie;
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt;
struct pcie_epf_bar *epf_bar_virt = (struct pcie_epf_bar *)epfnv->bar_virt;
struct pci_epc *epc = epf->epc;
struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
struct pci_epf_bar *epf_bar;
enum pci_barno bar;
if (epfnv->chip_id == TEGRA234)
bar = BAR_0;
else
bar = BAR_1;
epf_bar = &epf->bar[bar];
epfnv->edma.cookie = NULL;
epf_bar0->rp_phy_addr = 0;
tegra_pcie_edma_deinit(cookie);
epf_bar_virt->rp_phy_addr = 0;
tegra_pcie_dma_deinit(&cookie);
lpci_epc_clear_bar(epc, epf->func_no, epf_bar);
if (epfnv->chip_id == TEGRA264)
lpci_epc_clear_bar(epc, epf->func_no, &epf->bar[BAR_2]);
return 0;
}
@@ -192,16 +232,54 @@ static void pcie_dma_epf_unbind(struct pci_epf *epf)
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
void *cookie = epfnv->edma.cookie;
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt;
struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt;
struct device *cdev = epc->dev.parent;
struct platform_device *pdev = of_find_device_by_node(cdev->of_node);
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
struct msi_desc *desc;
#endif
enum pci_barno bar;
u32 irq;
debugfs_remove_recursive(epfnv->debugfs);
epfnv->edma.cookie = NULL;
epf_bar0->rp_phy_addr = 0;
tegra_pcie_edma_deinit(cookie);
epf_bar->rp_phy_addr = 0;
tegra_pcie_dma_deinit(&cookie);
if (epfnv->chip_id == TEGRA264) {
platform_msi_domain_free_irqs(&pdev->dev);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
irq = msi_get_virq(&pdev->dev, 0);
#else
for_each_msi_entry(desc, cdev) {
if (desc->platform.msi_index == 0)
irq = desc->irq;
}
#endif
free_irq(irq, epfnv);
}
pci_epc_stop(epc);
lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0);
if (epfnv->chip_id == TEGRA234)
bar = BAR_0;
else
bar = BAR_1;
lpci_epf_free_space(epf, epfnv->bar_virt, bar);
}
static void pcie_dma_epf_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
{
if (gepfnv->edma.msi_addr == 0) {
gepfnv->edma.msi_addr = msg->address_hi;
gepfnv->edma.msi_addr <<= 32;
gepfnv->edma.msi_addr |= msg->address_lo;
gepfnv->edma.msi_data = msg->data + 1;
}
}
static irqreturn_t pcie_dma_epf_irq(int irq, void *arg)
{
return IRQ_HANDLED;
}
static int pcie_dma_epf_bind(struct pci_epf *epf)
@@ -212,11 +290,26 @@ static int pcie_dma_epf_bind(struct pci_epf *epf)
struct device *fdev = &epf->dev;
struct device *cdev = epc->dev.parent;
struct platform_device *pdev = of_find_device_by_node(cdev->of_node);
struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
struct pcie_epf_bar0 *epf_bar0;
struct pcie_epf_bar *epf_bar_virt;
struct pci_epf_bar *epf_bar;
struct irq_domain *domain;
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
struct msi_desc *desc;
#endif
enum pci_barno bar;
char *name;
int ret, i;
u32 irq;
epfnv->chip_id = __tegra_get_chip_id();
if (epfnv->chip_id == TEGRA234) {
bar = BAR_0;
epfnv->edma.chip_id = NVPCIE_DMA_SOC_T234;
} else {
bar = BAR_1;
epfnv->edma.chip_id = NVPCIE_DMA_SOC_T264;
}
epf_bar = &epf->bar[bar];
epfnv->fdev = fdev;
epfnv->cdev = cdev;
epfnv->epf = epf;
@@ -229,25 +322,22 @@ static int pcie_dma_epf_bind(struct pci_epf *epf)
}
#if defined(NV_PCI_EPF_ALLOC_SPACE_HAS_EPC_FEATURES_ARG) /* Linux v6.9 */
epfnv->bar0_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0,
epc_features);
epfnv->bar_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, epc_features);
#else
epfnv->bar0_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0,
epc_features->align);
epfnv->bar_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, epc_features->align);
#endif
if (!epfnv->bar0_virt) {
if (!epfnv->bar_virt) {
dev_err(fdev, "Failed to allocate memory for BAR0\n");
return -ENOMEM;
}
get_random_bytes(epfnv->bar0_virt, BAR0_SIZE);
memset(epfnv->bar0_virt, 0, BAR0_HEADER_SIZE);
get_random_bytes(epfnv->bar_virt, BAR0_SIZE);
memset(epfnv->bar_virt, 0, BAR0_HEADER_SIZE);
/* Update BAR header with EP DMA PHY addr */
epf_bar0 = (struct pcie_epf_bar0 *)epfnv->bar0_virt;
epf_bar0->ep_phy_addr = epf_bar->phys_addr;
epf_bar_virt = (struct pcie_epf_bar *)epfnv->bar_virt;
epf_bar_virt->ep_phy_addr = epf_bar->phys_addr;
/* Set BAR0 mem type as 64-bit */
epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH;
epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH;
name = devm_kasprintf(fdev, GFP_KERNEL, "%s_epf_dma_test", pdev->name);
if (!name) {
@@ -255,21 +345,73 @@ static int pcie_dma_epf_bind(struct pci_epf *epf)
goto fail_atu_dma;
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
for (i = 0; i < TEGRA_PCIE_DMA_WRITE; i++)
init_waitqueue_head(&epfnv->edma.wr_wq[i]);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
init_waitqueue_head(&epfnv->edma.rd_wq[i]);
if (epfnv->chip_id == TEGRA234) {
domain = dev_get_msi_domain(&pdev->dev);
if (!domain) {
dev_err(fdev, "failed to get MSI domain\n");
ret = -ENOMEM;
goto fail_kasnprintf;
}
ret = platform_msi_domain_alloc_irqs(&pdev->dev, 2, pcie_dma_epf_write_msi_msg);
if (ret < 0) {
dev_err(fdev, "failed to allocate MSIs: %d\n", ret);
goto fail_kasnprintf;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0)
epfnv->edma.msi_irq = msi_get_virq(&pdev->dev, 1);
irq = msi_get_virq(&pdev->dev, 0);
#else
for_each_msi_entry(desc, cdev) {
if (desc->platform.msi_index == 0)
irq = desc->irq;
else if (desc->platform.msi_index == 1)
epfnv->edma.msi_irq = desc->irq;
}
#endif
ret = request_irq(irq, pcie_dma_epf_irq, IRQF_SHARED, "pcie_dma_epf_isr", epfnv);
if (ret < 0) {
dev_err(fdev, "failed to request irq: %d\n", ret);
goto fail_msi_alloc;
}
}
epfnv->debugfs = debugfs_create_dir(name, NULL);
init_debugfs(epfnv);
epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
if (!epc_features) {
dev_err(fdev, "epc_features not implemented\n");
ret = -EOPNOTSUPP;
goto fail_get_features;
}
if (!epc_features->core_init_notifier) {
ret = pcie_dma_epf_core_init(epf);
if (ret) {
dev_err(fdev, "EPF core init failed with err: %d\n", ret);
goto fail_core_init;
}
}
return 0;
fail_core_init:
fail_get_features:
debugfs_remove_recursive(epfnv->debugfs);
if (epfnv->chip_id == TEGRA264)
free_irq(irq, epfnv);
fail_msi_alloc:
if (epfnv->chip_id == TEGRA264)
platform_msi_domain_free_irqs(&pdev->dev);
fail_kasnprintf:
devm_kfree(fdev, name);
fail_atu_dma:
lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0);
lpci_epf_free_space(epf, epfnv->bar_virt, bar);
return ret;
}
@@ -301,10 +443,10 @@ static int pcie_dma_epf_probe(struct pci_epf *epf)
if (!epfnv)
return -ENOMEM;
epfnv->edma.ll_desc = devm_kzalloc(dev, sizeof(*epfnv->ll_desc) * NUM_EDMA_DESC,
epfnv->edma.ll_desc = devm_kzalloc(dev, sizeof(*epfnv->edma.ll_desc) * NUM_EDMA_DESC,
GFP_KERNEL);
gepfnv = epfnv;
epf_set_drvdata(epf, epfnv);
gepfnv = epfnv;
epf->event_ops = &pci_epf_dma_test_event_ops;

View File

@@ -9,62 +9,123 @@
#define TEGRA_PCIE_EDMA_TEST_COMMON_H
#include <linux/pci-epf.h>
#include <linux/pcie_dma.h>
#include <linux/tegra-pcie-edma.h>
#include <linux/tegra-pcie-dma.h>
#define DMA_WRITE_DOORBELL_OFF 0x10
#define DMA_WRITE_DOORBELL_OFF_WR_STOP BIT(31)
#define DMA_READ_DOORBELL_OFF 0x30
static inline void dma_common_wr(void __iomem *p, u32 val, u32 offset)
{
writel(val, offset + p);
}
#define REMOTE_EDMA_TEST_EN (edma->edma_ch & 0x80000000)
#define EDMA_ABORT_TEST_EN (edma->edma_ch & 0x40000000)
#define EDMA_STOP_TEST_EN (edma->edma_ch & 0x20000000)
#define EDMA_CRC_TEST_EN (edma->edma_ch & 0x10000000)
#define EDMA_READ_TEST_EN (edma->edma_ch & 0x08000000)
#define EDMA_SANITY_TEST_EN (edma->edma_ch & 0x04000000)
#define EDMA_UNALIGN_SRC_TEST_EN (edma->edma_ch & 0x02000000)
#define EDMA_UNALIGN_DST_TEST_EN (edma->edma_ch & 0x01000000)
#define EDMA_UNALIGN_SRC_DST_TEST_EN (edma->edma_ch & 0x00800000)
#define IS_EDMA_CH_ENABLED(i) (edma->edma_ch & ((BIT(i) << 4)))
#define IS_EDMA_CH_ASYNC(i) (edma->edma_ch & BIT(i))
#define REMOTE_EDMA_TEST_EN (edma->edma_ch & 0x80000000)
#define EDMA_PERF (edma->tsz / (diff / 1000))
#define EDMA_CPERF ((edma->tsz * (edma->nents / edma->nents_per_ch)) / (diff / 1000))
#define NUM_EDMA_DESC 4096
#define TEGRA234_PCIE_DMA_RD_CHNL_NUM 2
#define EDMA_PRIV_CH_OFF 32
#define EDMA_PRIV_LR_OFF (EDMA_PRIV_CH_OFF + 2)
#define EDMA_PRIV_XF_OFF (EDMA_PRIV_LR_OFF + 1)
/* Update DMA_DD_BUF_SIZE and DMA_LL_BUF_SIZE when changing BAR0_SIZE */
#define BAR0_SIZE SZ_256M
/* Header includes RP/EP DMA addresses, EP MSI, LL, etc. */
#define BAR0_HEADER_OFFSET 0x0
#define BAR0_HEADER_SIZE SZ_1M
#define DMA_LL_DEFAULT_SIZE 8
#define BAR0_MSI_OFFSET SZ_64K
/* DMA'able memory range */
#define BAR0_DMA_BUF_OFFSET SZ_1M
#define BAR0_DMA_BUF_SIZE (BAR0_SIZE - SZ_1M)
#define DEFAULT_STRESS_COUNT 10
#define MAX_DMA_ELE_SIZE SZ_16M
/* DMA base offset starts at 0x20000 from ATU_DMA base */
#define DMA_OFFSET 0x20000
struct sanity_data {
u32 size;
u32 src_offset;
u32 dst_offset;
u32 crc;
};
/* First 1MB of BAR0 is reserved for control data */
struct pcie_epf_bar {
/* RP system memory allocated for EP DMA operations */
u64 rp_phy_addr;
/* EP system memory allocated as BAR */
u64 ep_phy_addr;
/* MSI data for RP -> EP interrupts */
u32 msi_data[TEGRA_PCIE_DMA_WR_CHNL_NUM + TEGRA_PCIE_DMA_RD_CHNL_NUM];
struct sanity_data wr_data[TEGRA_PCIE_DMA_WR_CHNL_NUM];
struct sanity_data rd_data[TEGRA_PCIE_DMA_RD_CHNL_NUM];
};
struct edmalib_common {
struct device *fdev;
struct device *cdev;
void (*raise_irq)(void *p);
void *priv;
struct pcie_epf_bar0 *epf_bar0;
struct pcie_epf_bar *epf_bar;
void *src_virt;
void __iomem *dma_base;
void __iomem *dma_virt;
u32 dma_size;
dma_addr_t src_dma_addr;
dma_addr_t dst_dma_addr;
dma_addr_t bar0_phy;
dma_addr_t bar_phy;
u32 stress_count;
void *cookie;
struct device_node *of_node;
wait_queue_head_t wr_wq[DMA_WR_CHNL_NUM];
wait_queue_head_t rd_wq[DMA_RD_CHNL_NUM];
wait_queue_head_t wr_wq[TEGRA_PCIE_DMA_WR_CHNL_NUM];
unsigned long wr_busy;
unsigned long rd_busy;
ktime_t edma_start_time[DMA_WR_CHNL_NUM];
ktime_t edma_start_time[TEGRA_PCIE_DMA_WR_CHNL_NUM];
u64 tsz;
u32 edma_ch;
u32 prev_edma_ch;
u32 nents;
struct tegra_pcie_edma_desc *ll_desc;
u64 priv_iter[DMA_WR_CHNL_NUM];
struct pcie_tegra_edma_remote_info edma_remote;
struct tegra_pcie_dma_desc *ll_desc;
u64 priv_iter[TEGRA_PCIE_DMA_WR_CHNL_NUM];
struct tegra_pcie_dma_remote_info remote;
u32 nents_per_ch;
u32 st_as_ch;
u32 ls_as_ch;
u64 msi_addr;
u32 msi_data;
u32 msi_irq;
nvpcie_dma_soc_t chip_id;
};
static struct edmalib_common *l_edma;
static void edma_final_complete(void *priv, edma_xfer_status_t status,
struct tegra_pcie_edma_desc *desc)
static void edma_final_complete(void *priv, tegra_pcie_dma_status_t status)
{
struct edmalib_common *edma = l_edma;
u64 cb = *(u64 *)priv;
u32 ch = (cb >> EDMA_PRIV_CH_OFF) & 0x3;
edma_xfer_type_t xfer_type = (cb >> EDMA_PRIV_XF_OFF) & 0x1;
tegra_pcie_dma_xfer_type_t xfer_type = (cb >> EDMA_PRIV_XF_OFF) & 0x1;
char *xfer_str[2] = {"WR", "RD"};
u32 l_r = (cb >> EDMA_PRIV_LR_OFF) & 0x1;
char *l_r_str[2] = {"local", "remote"};
@@ -72,9 +133,12 @@ static void edma_final_complete(void *priv, edma_xfer_status_t status,
u64 cdiff = ktime_to_ns(ktime_get()) - ktime_to_ns(edma->edma_start_time[edma->st_as_ch]);
cb = cb & 0xFFFFFFFF;
if (EDMA_ABORT_TEST_EN && status == EDMA_XFER_SUCCESS)
dma_common_wr(edma->dma_base, DMA_WRITE_DOORBELL_OFF_WR_STOP | (ch + 1),
DMA_WRITE_DOORBELL_OFF);
/* TODO support abort test case for T264 */
if (edma->chip_id == NVPCIE_DMA_SOC_T234) {
if (EDMA_ABORT_TEST_EN && status == TEGRA_PCIE_DMA_SUCCESS)
dma_common_wr(edma->dma_virt, DMA_WRITE_DOORBELL_OFF_WR_STOP | (ch + 1),
DMA_WRITE_DOORBELL_OFF);
}
dev_info(edma->fdev, "%s: %s-%s-Async complete for chan %d with status %d. Total desc %llu of Sz %d Bytes done in time %llu nsec. Perf is %llu Mbps\n",
__func__, xfer_str[xfer_type], l_r_str[l_r], ch, status, edma->nents_per_ch*(cb+1),
@@ -85,7 +149,7 @@ static void edma_final_complete(void *priv, edma_xfer_status_t status,
__func__, EDMA_CPERF, cdiff);
}
static void edma_complete(void *priv, edma_xfer_status_t status, struct tegra_pcie_edma_desc *desc)
static void edma_complete(void *priv, tegra_pcie_dma_status_t status)
{
struct edmalib_common *edma = l_edma;
u64 cb = *(u64 *)priv;
@@ -103,24 +167,26 @@ static void edma_complete(void *priv, edma_xfer_status_t status, struct tegra_pc
/* debugfs to perform eDMA lib transfers and do CRC check */
static int edmalib_common_test(struct edmalib_common *edma)
{
struct tegra_pcie_edma_desc *ll_desc = edma->ll_desc;
struct tegra_pcie_dma_desc *ll_desc = edma->ll_desc;
dma_addr_t src_dma_addr = edma->src_dma_addr;
dma_addr_t dst_dma_addr = edma->dst_dma_addr;
u32 nents = edma->nents, num_chans = 0, nents_per_ch = 0, nent_id = 0, chan_count;
u32 i, j, k, max_size, db_off, num_descriptors;
edma_xfer_status_t ret;
struct tegra_pcie_edma_init_info info = {};
struct tegra_pcie_edma_chans_info *chan_info;
struct tegra_pcie_edma_xfer_info tx_info = {};
u32 i, j, k, max_size, num_descriptors;
u32 db_off;
tegra_pcie_dma_status_t ret;
struct tegra_pcie_dma_init_info info = {};
struct tegra_pcie_dma_chans_info *chan_info;
struct tegra_pcie_dma_xfer_info tx_info = {};
u64 diff;
edma_xfer_type_t xfer_type;
tegra_pcie_dma_xfer_type_t xfer_type;
char *xfer_str[2] = {"WR", "RD"};
u32 l_r;
char *l_r_str[2] = {"local", "remote"};
struct pcie_epf_bar0 *epf_bar0 = edma->epf_bar0;
struct pcie_epf_bar *epf_bar = edma->epf_bar;
u32 crc;
if (!edma->stress_count) {
tegra_pcie_edma_deinit(edma->cookie);
tegra_pcie_dma_deinit(&edma->cookie);
edma->cookie = NULL;
return 0;
}
@@ -133,52 +199,133 @@ static int edmalib_common_test(struct edmalib_common *edma)
edma->edma_ch |= 0xF5;
}
/* FIXME This is causing crash for remote dma when BAR MMIO virt address is used. */
#if 0
epf_bar->wr_data[0].src_offset = 0;
epf_bar->wr_data[0].dst_offset = 0;
#endif
if (EDMA_CRC_TEST_EN) {
/* 4 channels in sync mode */
edma->edma_ch = (EDMA_CRC_TEST_EN | 0xF0);
edma->edma_ch = (0x10000000 | 0xF0);
/* Single SZ_4K packet on each channel, so total SZ_16K of data */
edma->stress_count = 1;
edma->dma_size = SZ_4K;
edma->nents = nents = 4;
epf_bar0->wr_data[0].size = edma->dma_size * edma->nents;
epf_bar->wr_data[0].size = edma->dma_size * edma->nents;
}
if (EDMA_UNALIGN_SRC_TEST_EN) {
/* 4 channels in sync mode */
edma->edma_ch = (0x02000000 | 0x10000000 | 0x10);
/* Single SZ_4K packet on each channel, so total SZ_16K of data */
edma->stress_count = 1;
edma->dma_size = SZ_4K;
edma->nents = nents = 4;
epf_bar->wr_data[0].size = edma->dma_size * edma->nents;
src_dma_addr += 11;
epf_bar->wr_data[0].src_offset = 11;
}
if (EDMA_UNALIGN_DST_TEST_EN) {
/* 4 channels in sync mode */
edma->edma_ch = (0x01000000 | 0x10000000 | 0x10);
/* Single SZ_4K packet on each channel, so total SZ_16K of data */
edma->stress_count = 1;
edma->dma_size = SZ_4K;
edma->nents = nents = 4;
epf_bar->wr_data[0].size = edma->dma_size * edma->nents;
dst_dma_addr += 7;
epf_bar->wr_data[0].dst_offset = 7;
}
if (EDMA_UNALIGN_SRC_DST_TEST_EN) {
/* 4 channels in sync mode */
edma->edma_ch = (0x00800000 | 0x10000000 | 0x10);
/* Single SZ_4K packet on each channel, so total SZ_16K of data */
edma->stress_count = 1;
edma->dma_size = SZ_4K;
edma->nents = nents = 4;
epf_bar->wr_data[0].size = edma->dma_size * edma->nents;
src_dma_addr += 7;
dst_dma_addr += 13;
epf_bar->wr_data[0].src_offset = 7;
epf_bar->wr_data[0].dst_offset = 13;
}
if (EDMA_SANITY_TEST_EN) {
edma->dma_size = SZ_1K;
edma->nents = nents = 128;
edma->stress_count = 2;
}
if (edma->cookie && edma->prev_edma_ch != edma->edma_ch) {
edma->st_as_ch = -1;
dev_info(edma->fdev, "edma_ch changed from 0x%x != 0x%x, deinit\n",
edma->prev_edma_ch, edma->edma_ch);
tegra_pcie_edma_deinit(edma->cookie);
edma->prev_edma_ch, edma->edma_ch);
tegra_pcie_dma_deinit(&edma->cookie);
edma->cookie = NULL;
}
info.np = edma->of_node;
info.dev = edma->cdev;
info.soc = edma->chip_id;
if (REMOTE_EDMA_TEST_EN) {
num_descriptors = 1024;
info.rx[0].desc_phy_base = edma->bar0_phy + SZ_512K;
info.rx[0].desc_iova = 0xf0000000 + SZ_512K;
info.rx[1].desc_phy_base = edma->bar0_phy + SZ_512K + SZ_256K;
info.rx[1].desc_iova = 0xf0000000 + SZ_512K + SZ_256K;
info.edma_remote = &edma->edma_remote;
chan_count = DMA_RD_CHNL_NUM;
info.rx[0].desc_phy_base = edma->bar_phy + SZ_128K;
info.rx[0].desc_iova = epf_bar->ep_phy_addr + SZ_128K;
info.rx[1].desc_phy_base = edma->bar_phy + SZ_256K;
info.rx[1].desc_iova = epf_bar->ep_phy_addr + SZ_128K;
info.rx[2].desc_phy_base = edma->bar_phy + SZ_256K + SZ_128K;
info.rx[2].desc_iova = epf_bar->ep_phy_addr + SZ_256K + SZ_128K;
info.rx[3].desc_phy_base = edma->bar_phy + SZ_512K;
info.rx[3].desc_iova = epf_bar->ep_phy_addr + SZ_512K;
info.remote = &edma->remote;
info.msi_irq = edma->msi_irq;
info.msi_data = edma->msi_data;
info.msi_addr = edma->msi_addr;
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
chan_count = TEGRA234_PCIE_DMA_RD_CHNL_NUM;
else
chan_count = TEGRA_PCIE_DMA_RD_CHNL_NUM;
chan_info = &info.rx[0];
xfer_type = EDMA_XFER_READ;
db_off = DMA_WRITE_DOORBELL_OFF;
xfer_type = TEGRA_PCIE_DMA_READ;
/* TODO support abort test case for T264 */
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
db_off = DMA_WRITE_DOORBELL_OFF;
l_r = 1;
} else {
chan_count = DMA_WR_CHNL_NUM;
chan_count = TEGRA_PCIE_DMA_WR_CHNL_NUM;
num_descriptors = 4096;
chan_info = &info.tx[0];
xfer_type = EDMA_XFER_WRITE;
db_off = DMA_READ_DOORBELL_OFF;
xfer_type = TEGRA_PCIE_DMA_WRITE;
/* TODO support abort test case for T264 */
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
db_off = DMA_READ_DOORBELL_OFF;
l_r = 0;
info.msi_irq = edma->msi_irq;
info.msi_data = edma->msi_data;
info.msi_addr = edma->msi_addr;
}
if (EDMA_READ_TEST_EN) {
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
chan_count = TEGRA234_PCIE_DMA_RD_CHNL_NUM;
else
chan_count = TEGRA_PCIE_DMA_RD_CHNL_NUM;
num_descriptors = 4096;
chan_info = &info.rx[0];
xfer_type = TEGRA_PCIE_DMA_READ;
/* TODO support abort test case for T264 */
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
db_off = DMA_READ_DOORBELL_OFF;
l_r = 1;
}
for (i = 0; i < chan_count; i++) {
struct tegra_pcie_edma_chans_info *ch = chan_info + i;
struct tegra_pcie_dma_chans_info *ch = chan_info + i;
ch->ch_type = IS_EDMA_CH_ASYNC(i) ? EDMA_CHAN_XFER_ASYNC :
EDMA_CHAN_XFER_SYNC;
ch->ch_type = IS_EDMA_CH_ASYNC(i) ? TEGRA_PCIE_DMA_CHAN_XFER_ASYNC :
TEGRA_PCIE_DMA_CHAN_XFER_SYNC;
if (IS_EDMA_CH_ENABLED(i)) {
if (edma->st_as_ch == -1)
edma->st_as_ch = i;
@@ -200,15 +347,19 @@ static int edmalib_common_test(struct edmalib_common *edma)
nents_per_ch = nents / num_chans;
if (nents_per_ch == 0) {
dev_err(edma->fdev, "%s: nents(%d) < enabled chanes(%d)\n",
dev_err(edma->fdev, "%s: nents(%d) < enabled channels(%d)\n",
__func__, nents, num_chans);
return 0;
}
for (j = 0; j < nents; j++) {
ll_desc->src = src_dma_addr + (j * edma->dma_size);
ll_desc->dst = dst_dma_addr + (j * edma->dma_size);
dev_dbg(edma->fdev, "src %llx, dst %llx at %d\n", ll_desc->src, ll_desc->dst, j);
if (EDMA_READ_TEST_EN) {
ll_desc->dst = src_dma_addr + (j * edma->dma_size);
ll_desc->src = dst_dma_addr + (j * edma->dma_size);
} else {
ll_desc->src = src_dma_addr + (j * edma->dma_size);
ll_desc->dst = dst_dma_addr + (j * edma->dma_size);
}
ll_desc->sz = edma->dma_size;
ll_desc++;
}
@@ -219,14 +370,33 @@ static int edmalib_common_test(struct edmalib_common *edma)
if (!edma->cookie || (edma->prev_edma_ch != edma->edma_ch)) {
dev_info(edma->fdev, "%s: re-init edma lib prev_ch(%x) != current chans(%x)\n",
__func__, edma->prev_edma_ch, edma->edma_ch);
edma->cookie = tegra_pcie_edma_initialize(&info);
ret = tegra_pcie_dma_initialize(&info, &edma->cookie);
if (ret != TEGRA_PCIE_DMA_SUCCESS) {
dev_info(edma->fdev, "%s: tegra_pcie_dma_initialize() fail: %d\n",
__func__, ret);
return -1;
}
edma->prev_edma_ch = edma->edma_ch;
if (edma->chip_id == NVPCIE_DMA_SOC_T264) {
ret = tegra_pcie_dma_set_msi(edma->cookie, edma->msi_addr, edma->msi_data);
if (ret != TEGRA_PCIE_DMA_SUCCESS) {
dev_info(edma->fdev, "%s: tegra_pcie_dma_set_msi() fail: %d\n",
__func__, ret);
return -1;
}
}
}
edma->nents_per_ch = nents_per_ch;
/* generate random bytes to transfer */
get_random_bytes(edma->src_virt, edma->dma_size * nents_per_ch);
if (EDMA_SANITY_TEST_EN) {
for (j = 0; j < num_descriptors; j++)
memset((u8 *)edma->src_virt + (j * SZ_1K), j, SZ_1K);
} else {
get_random_bytes(edma->src_virt, edma->dma_size * nents_per_ch);
}
dev_info(edma->fdev, "%s: EDMA LIB %s started for %d chans, size %d Bytes, iterations: %d of descriptors %d\n",
__func__, xfer_str[xfer_type], num_chans, edma->dma_size, edma->stress_count,
nents_per_ch);
@@ -234,7 +404,7 @@ static int edmalib_common_test(struct edmalib_common *edma)
/* LL DMA with size epfnv->dma_size per desc */
for (i = 0; i < chan_count; i++) {
int ch = i;
struct tegra_pcie_edma_chans_info *ch_info = chan_info + i;
struct tegra_pcie_dma_chans_info *ch_info = chan_info + i;
if (ch_info->num_descriptors == 0)
continue;
@@ -245,20 +415,20 @@ static int edmalib_common_test(struct edmalib_common *edma)
tx_info.channel_num = ch;
tx_info.type = xfer_type;
tx_info.nents = nents_per_ch;
if (ch_info->ch_type == EDMA_CHAN_XFER_ASYNC) {
if (ch_info->ch_type == TEGRA_PCIE_DMA_CHAN_XFER_ASYNC) {
if (k == edma->stress_count - 1)
tx_info.complete = edma_final_complete;
else
tx_info.complete = edma_complete;
}
edma->priv_iter[ch] = k | (((u64)xfer_type) << EDMA_PRIV_XF_OFF) |
(((u64)l_r) << EDMA_PRIV_LR_OFF) |
(((u64)ch) << EDMA_PRIV_CH_OFF);
(((u64)l_r) << EDMA_PRIV_LR_OFF) |
(((u64)ch) << EDMA_PRIV_CH_OFF);
tx_info.priv = &edma->priv_iter[ch];
ret = tegra_pcie_edma_submit_xfer(edma->cookie, &tx_info);
if (ret == EDMA_XFER_FAIL_NOMEM) {
ret = tegra_pcie_dma_submit_xfer(edma->cookie, &tx_info);
if (ret == TEGRA_PCIE_DMA_FAIL_NOMEM) {
/** Retry after 20 msec */
dev_dbg(edma->fdev, "%s: EDMA_XFER_FAIL_NOMEM stress count %d on channel %d iter %d\n",
dev_dbg(edma->fdev, "%s: TEGRA_PCIE_DMA_FAIL_NOMEM stress count %d on channel %d iter %d\n",
__func__, edma->stress_count, i, k);
ret = wait_event_timeout(edma->wr_wq[i],
!(edma->wr_busy & (1 << i)),
@@ -272,7 +442,8 @@ static int edmalib_common_test(struct edmalib_common *edma)
}
k--;
continue;
} else if ((ret != EDMA_XFER_SUCCESS) && (ret != EDMA_XFER_FAIL_NOMEM)) {
} else if ((ret != TEGRA_PCIE_DMA_SUCCESS) &&
(ret != TEGRA_PCIE_DMA_FAIL_NOMEM)) {
dev_err(edma->fdev, "%s: LL %d, SZ: %u B CH: %d failed. %d at iter %d ret: %d\n",
__func__, xfer_type, edma->dma_size, ch, ret, k, ret);
if (EDMA_STOP_TEST_EN) {
@@ -289,44 +460,48 @@ static int edmalib_common_test(struct edmalib_common *edma)
if (i == 0) {
if (EDMA_ABORT_TEST_EN) {
msleep(edma->stress_count);
dma_common_wr(edma->dma_base, DMA_WRITE_DOORBELL_OFF_WR_STOP,
db_off);
/* TODO support abort test case for T264 */
if (edma->chip_id == NVPCIE_DMA_SOC_T234)
dma_common_wr(edma->dma_virt,
DMA_WRITE_DOORBELL_OFF_WR_STOP, db_off);
} else if (EDMA_STOP_TEST_EN) {
bool stop_status;
msleep(edma->stress_count);
stop_status = tegra_pcie_edma_stop(edma->cookie);
stop_status = tegra_pcie_dma_stop(edma->cookie);
dev_info(edma->fdev, "%s: EDMA LIB, status of stop DMA is %d",
__func__, stop_status);
}
}
diff = ktime_to_ns(ktime_get()) - ktime_to_ns(edma->edma_start_time[i]);
if (ch_info->ch_type == EDMA_CHAN_XFER_SYNC) {
if (ret == EDMA_XFER_SUCCESS)
if (ch_info->ch_type == TEGRA_PCIE_DMA_CHAN_XFER_SYNC) {
if (ret == TEGRA_PCIE_DMA_SUCCESS)
dev_info(edma->fdev, "%s: EDMA LIB %s-%s-SYNC done for %d iter on channel %d. Total Size %llu bytes, time %llu nsec. Perf is %llu Mbps\n",
__func__, xfer_str[xfer_type], l_r_str[l_r], edma->stress_count, i,
edma->tsz, diff, EDMA_PERF);
__func__, xfer_str[xfer_type], l_r_str[l_r],
edma->stress_count, i, edma->tsz, diff, EDMA_PERF);
}
}
if (EDMA_CRC_TEST_EN && !REMOTE_EDMA_TEST_EN) {
u32 crc;
if (EDMA_SANITY_TEST_EN)
edma->raise_irq(edma->priv);
crc = crc32_le(~0, edma->src_virt, epf_bar0->wr_data[0].size);
if (EDMA_CRC_TEST_EN && !REMOTE_EDMA_TEST_EN) {
edma->raise_irq(edma->priv);
crc = crc32_le(~0, edma->src_virt + epf_bar->wr_data[0].src_offset,
epf_bar->wr_data[0].size);
msleep(100);
if (crc != epf_bar0->wr_data[0].crc)
if (crc != epf_bar->wr_data[0].crc)
dev_err(edma->fdev, "CRC check failed, LCRC: 0x%x RCRC: 0x%x\n",
crc, epf_bar0->wr_data[0].crc);
crc, epf_bar->wr_data[0].crc);
else
dev_err(edma->fdev, "CRC check pass\n");
}
dev_info(edma->fdev, "%s: EDMA LIB submit done\n", __func__);
return 0;
fail:
if (ret != EDMA_XFER_DEINIT) {
tegra_pcie_edma_deinit(edma->cookie);
if (ret != TEGRA_PCIE_DMA_DEINIT) {
tegra_pcie_dma_deinit(&edma->cookie);
edma->cookie = NULL;
}
return -1;