diff --git a/drivers/misc/tegra-pcie-dma-test.c b/drivers/misc/tegra-pcie-dma-test.c index 3bf21153..eec13218 100644 --- a/drivers/misc/tegra-pcie-dma-test.c +++ b/drivers/misc/tegra-pcie-dma-test.c @@ -8,52 +8,63 @@ #include #include -#include #include #include +#include #include #include #include #include #include #include -#include #include -#include #include +#include +#include #define MODULENAME "pcie_dma_host" struct ep_pvt { struct pci_dev *pdev; - void __iomem *bar0_virt; - void __iomem *dma_base; - u32 dma_size; - void *dma_virt; - dma_addr_t dma_phy; - dma_addr_t bar0_phy; + /* Configurable BAR0/BAR2 virt and phy base addresses */ + void __iomem *bar_virt; + dma_addr_t bar_phy; + /* DMA register BAR virt and phy base addresses */ + void __iomem *dma_virt; + phys_addr_t dma_phy_base; + u32 dma_phy_size; + + /* dma_alloc_coherent() using RP pci_dev */ + void *rp_dma_virt; + dma_addr_t rp_dma_phy; + /* dma_alloc_coherent() using EP pci_dev */ + void *ep_dma_virt; + dma_addr_t ep_dma_phy; + struct dentry *debugfs; - void *cookie; + + u32 dma_size; u32 stress_count; u32 edma_ch; u32 prev_edma_ch; u32 msi_irq; u64 msi_addr; - u16 msi_data; - phys_addr_t dma_phy_base; - u32 dma_phy_size; - u64 tsz; - ktime_t edma_start_time[DMA_WR_CHNL_NUM]; + u32 msi_data; + u32 pmsi_irq; + u64 pmsi_addr; + u32 pmsi_data; + u8 chip_id; struct edmalib_common edma; }; static irqreturn_t ep_isr(int irq, void *arg) { struct ep_pvt *ep = (struct ep_pvt *)arg; - struct pcie_epf_bar0 *epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt; + struct pcie_epf_bar *epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt; + struct sanity_data *wr_data = &epf_bar->wr_data[0]; - epf_bar0->wr_data[0].crc = crc32_le(~0, ep->dma_virt + BAR0_DMA_BUF_OFFSET, - epf_bar0->wr_data[0].size); + wr_data->crc = crc32_le(~0, ep->ep_dma_virt + BAR0_DMA_BUF_OFFSET + wr_data->dst_offset, + wr_data->size); return IRQ_HANDLED; } @@ -61,64 +72,44 @@ static irqreturn_t ep_isr(int irq, void *arg) static void tegra_pcie_dma_raise_irq(void *p) { pr_err("%s: donot support raise IRQ from RP. CRC test if any started may fail.\n", - __func__); -} - -static struct device *tegra_pci_dma_get_host_bridge_device(struct pci_dev *dev) -{ - struct pci_bus *bus = dev->bus; - struct device *bridge; - - while (bus->parent) - bus = bus->parent; - - bridge = bus->bridge; - kobject_get(&bridge->kobj); - - return bridge; -} - -static void tegra_pci_dma_put_host_bridge_device(struct device *dev) -{ - kobject_put(&dev->kobj); + __func__); } /* debugfs to perform eDMA lib transfers */ static int edmalib_test(struct seq_file *s, void *data) { struct ep_pvt *ep = (struct ep_pvt *)dev_get_drvdata(s->private); - struct pcie_epf_bar0 *epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt; - /* RP uses 128M(used by EP) + 1M(reserved) offset for source and dest data transfers */ - dma_addr_t ep_dma_addr = epf_bar0->ep_phy_addr + SZ_128M + SZ_1M; - dma_addr_t bar0_dma_addr = ep->bar0_phy + SZ_128M + SZ_1M; - dma_addr_t rp_dma_addr = ep->dma_phy + SZ_128M + SZ_1M; + struct pcie_epf_bar *epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt; struct pci_dev *pdev = ep->pdev; - struct device *bridge, *rdev; struct edmalib_common *edma = &ep->edma; + struct pci_dev *ppdev = pcie_find_root_port(pdev); - ep->edma.src_dma_addr = rp_dma_addr; - ep->edma.src_virt = ep->dma_virt + SZ_128M + SZ_1M; ep->edma.fdev = &ep->pdev->dev; - ep->edma.epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt; - ep->edma.bar0_phy = ep->bar0_phy; - ep->edma.dma_base = ep->dma_base; + ep->edma.epf_bar = epf_bar; + ep->edma.bar_phy = ep->bar_phy; + ep->edma.dma_virt = ep->dma_virt; ep->edma.priv = (void *)ep; ep->edma.raise_irq = tegra_pcie_dma_raise_irq; + /* RP uses "Base + SZ_16M + 1M(reserved)" offset for DMA data transfers */ if (REMOTE_EDMA_TEST_EN) { - ep->edma.dst_dma_addr = ep_dma_addr; - ep->edma.edma_remote.msi_addr = ep->msi_addr; - ep->edma.edma_remote.msi_data = ep->msi_data; - ep->edma.edma_remote.msi_irq = ep->msi_irq; - ep->edma.edma_remote.dma_phy_base = ep->dma_phy_base; - ep->edma.edma_remote.dma_size = ep->dma_phy_size; - ep->edma.edma_remote.dev = &pdev->dev; + ep->edma.src_virt = ep->ep_dma_virt + SZ_16M + SZ_1M; + ep->edma.src_dma_addr = ep->ep_dma_phy + SZ_16M + SZ_1M; + ep->edma.dst_dma_addr = epf_bar->ep_phy_addr + SZ_16M + SZ_1M; + ep->edma.msi_addr = ep->msi_addr; + ep->edma.msi_data = ep->msi_data; + ep->edma.msi_irq = ep->msi_irq; + ep->edma.cdev = &pdev->dev; + ep->edma.remote.dma_phy_base = ep->dma_phy_base; + ep->edma.remote.dma_size = ep->dma_phy_size; } else { - bridge = tegra_pci_dma_get_host_bridge_device(pdev); - rdev = bridge->parent; - tegra_pci_dma_put_host_bridge_device(bridge); - ep->edma.of_node = rdev->of_node; - ep->edma.dst_dma_addr = bar0_dma_addr; + ep->edma.src_dma_addr = ep->rp_dma_phy + SZ_16M + SZ_1M; + ep->edma.src_virt = ep->rp_dma_virt + SZ_16M + SZ_1M; + ep->edma.dst_dma_addr = ep->bar_phy + SZ_16M + SZ_1M; + ep->edma.msi_addr = ep->pmsi_addr; + ep->edma.msi_data = ep->pmsi_data; + ep->edma.msi_irq = ep->pmsi_irq; + ep->edma.cdev = &ppdev->dev; } return edmalib_common_test(&ep->edma); @@ -144,13 +135,13 @@ static void init_debugfs(struct ep_pvt *ep) ep->edma.nents = DMA_LL_DEFAULT_SIZE; } -static int ep_test_dma_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int ep_test_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct ep_pvt *ep; - struct pcie_epf_bar0 *epf_bar0; + struct pcie_epf_bar *epf_bar; + struct pci_dev *ppdev = pcie_find_root_port(pdev); int ret = 0; - u32 val, i; + u32 val, i, bar, dma_bar; u16 val_16; char *name; @@ -158,6 +149,12 @@ static int ep_test_dma_probe(struct pci_dev *pdev, if (!ep) return -ENOMEM; + ep->chip_id = __tegra_get_chip_id(); + if (ep->chip_id == TEGRA234) + ep->edma.chip_id = NVPCIE_DMA_SOC_T234; + else + ep->edma.chip_id = NVPCIE_DMA_SOC_T264; + ep->edma.ll_desc = devm_kzalloc(&pdev->dev, sizeof(*ep->edma.ll_desc) * NUM_EDMA_DESC, GFP_KERNEL); if (!ep->edma.ll_desc) @@ -184,49 +181,65 @@ static int ep_test_dma_probe(struct pci_dev *pdev, goto fail_region_request; } - ep->bar0_phy = pci_resource_start(pdev, 0); - ep->bar0_virt = devm_ioremap(&pdev->dev, ep->bar0_phy, pci_resource_len(pdev, 0)); - if (!ep->bar0_virt) { - dev_err(&pdev->dev, "Failed to IO remap BAR0\n"); + if (ep->chip_id == TEGRA234) + bar = 0; + else + bar = 2; + ep->bar_phy = pci_resource_start(pdev, bar); + ep->bar_virt = devm_ioremap_wc(&pdev->dev, ep->bar_phy, pci_resource_len(pdev, bar)); + if (!ep->bar_virt) { + dev_err(&pdev->dev, "Failed to IO remap BAR%d\n", bar); ret = -ENOMEM; goto fail_region_remap; } - ep->dma_base = devm_ioremap(&pdev->dev, pci_resource_start(pdev, 4), - pci_resource_len(pdev, 4)); - if (!ep->dma_base) { - dev_err(&pdev->dev, "Failed to IO remap BAR4\n"); + if (ep->chip_id == TEGRA234) + dma_bar = 4; + else + dma_bar = 0; + ep->dma_phy_base = pci_resource_start(pdev, dma_bar); + ep->dma_phy_size = pci_resource_len(pdev, dma_bar); + ep->dma_virt = devm_ioremap(&pdev->dev, ep->dma_phy_base, ep->dma_phy_size); + if (!ep->dma_virt) { + dev_err(&pdev->dev, "Failed to IO remap BAR%d\n", dma_bar); ret = -ENOMEM; goto fail_region_remap; } - ret = pci_alloc_irq_vectors(pdev, 2, 2, PCI_IRQ_MSI); + ret = pci_alloc_irq_vectors(pdev, 16, 16, PCI_IRQ_MSI); if (ret < 0) { dev_err(&pdev->dev, "Failed to enable MSI interrupt\n"); ret = -ENODEV; goto fail_region_remap; } - ret = request_irq(pci_irq_vector(pdev, 1), ep_isr, IRQF_SHARED, - "pcie_ep_isr", ep); + ret = request_irq(pci_irq_vector(pdev, 1), ep_isr, IRQF_SHARED, "pcie_ep_isr", ep); if (ret < 0) { dev_err(&pdev->dev, "Failed to register isr\n"); goto fail_isr; } - ep->dma_virt = dma_alloc_coherent(&pdev->dev, BAR0_SIZE, &ep->dma_phy, - GFP_KERNEL); - if (!ep->dma_virt) { + ep->rp_dma_virt = dma_alloc_coherent(&ppdev->dev, BAR0_SIZE, &ep->rp_dma_phy, GFP_KERNEL); + if (!ep->rp_dma_virt) { dev_err(&pdev->dev, "Failed to allocate DMA memory\n"); ret = -ENOMEM; - goto fail_dma_alloc; + goto fail_rp_dma_alloc; } - get_random_bytes(ep->dma_virt, BAR0_SIZE); + get_random_bytes(ep->rp_dma_virt, BAR0_SIZE); + dev_info(&ppdev->dev, "DMA mem ppdev, IOVA: 0x%llx size: %d\n", ep->rp_dma_phy, BAR0_SIZE); - /* Update RP DMA system memory base address in BAR0 */ - epf_bar0 = (__force struct pcie_epf_bar0 *)ep->bar0_virt; - epf_bar0->rp_phy_addr = ep->dma_phy; - dev_info(&pdev->dev, "DMA mem, IOVA: 0x%llx size: %d\n", ep->dma_phy, BAR0_SIZE); + ep->ep_dma_virt = dma_alloc_coherent(&pdev->dev, BAR0_SIZE, &ep->ep_dma_phy, GFP_KERNEL); + if (!ep->ep_dma_virt) { + dev_err(&pdev->dev, "Failed to allocate DMA memory for EP\n"); + ret = -ENOMEM; + goto fail_ep_dma_alloc; + } + get_random_bytes(ep->ep_dma_virt, BAR0_SIZE); + dev_info(&pdev->dev, "DMA mem pdev, IOVA: 0x%llx size: %d\n", ep->ep_dma_phy, BAR0_SIZE); + + /* Update RP DMA system memory base address allocated with EP pci_dev in BAR0 */ + epf_bar = (__force struct pcie_epf_bar *)ep->bar_virt; + epf_bar->rp_phy_addr = ep->ep_dma_phy; pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &val_16); if (val_16 & PCI_MSI_FLAGS_64BIT) { @@ -242,8 +255,22 @@ static int ep_test_dma_probe(struct pci_dev *pdev, pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, &val); ep->msi_addr = (ep->msi_addr << 32) | val; ep->msi_irq = pci_irq_vector(pdev, 0); - ep->dma_phy_base = pci_resource_start(pdev, 4); - ep->dma_phy_size = pci_resource_len(pdev, 4); + + pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_FLAGS, &val_16); + if (val_16 & PCI_MSI_FLAGS_64BIT) { + pci_read_config_dword(ppdev, ppdev->msi_cap + PCI_MSI_ADDRESS_HI, &val); + ep->pmsi_addr = val; + + pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_DATA_64, &val_16); + ep->pmsi_data = val_16; + } else { + pci_read_config_word(ppdev, ppdev->msi_cap + PCI_MSI_DATA_32, &val_16); + ep->pmsi_data = val_16; + } + pci_read_config_dword(ppdev, ppdev->msi_cap + PCI_MSI_ADDRESS_LO, &val); + ep->pmsi_addr = (ep->pmsi_addr << 32) | val; + ep->pmsi_irq = pci_irq_vector(ppdev, 0); + ep->pmsi_data += 0; name = devm_kasprintf(&ep->pdev->dev, GFP_KERNEL, "%s_pcie_dma_test", dev_name(&pdev->dev)); if (!name) { @@ -252,20 +279,19 @@ static int ep_test_dma_probe(struct pci_dev *pdev, goto fail_name; } - for (i = 0; i < DMA_WR_CHNL_NUM; i++) + for (i = 0; i < TEGRA_PCIE_DMA_WRITE; i++) init_waitqueue_head(&ep->edma.wr_wq[i]); - for (i = 0; i < DMA_RD_CHNL_NUM; i++) - init_waitqueue_head(&ep->edma.rd_wq[i]); - ep->debugfs = debugfs_create_dir(name, NULL); init_debugfs(ep); return ret; fail_name: - dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->dma_virt, ep->dma_phy); -fail_dma_alloc: + dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->ep_dma_virt, ep->ep_dma_phy); +fail_ep_dma_alloc: + dma_free_coherent(&ppdev->dev, BAR0_SIZE, ep->rp_dma_virt, ep->rp_dma_phy); +fail_rp_dma_alloc: free_irq(pci_irq_vector(pdev, 1), ep); fail_isr: pci_free_irq_vectors(pdev); @@ -280,10 +306,12 @@ fail_region_request: static void ep_test_dma_remove(struct pci_dev *pdev) { struct ep_pvt *ep = pci_get_drvdata(pdev); + struct pci_dev *ppdev = pcie_find_root_port(pdev); debugfs_remove_recursive(ep->debugfs); - tegra_pcie_edma_deinit(ep->edma.cookie); - dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->dma_virt, ep->dma_phy); + tegra_pcie_dma_deinit(&ep->edma.cookie); + dma_free_coherent(&pdev->dev, BAR0_SIZE, ep->ep_dma_virt, ep->ep_dma_phy); + dma_free_coherent(&ppdev->dev, BAR0_SIZE, ep->rp_dma_virt, ep->rp_dma_phy); free_irq(pci_irq_vector(pdev, 1), ep); pci_free_irq_vectors(pdev); pci_release_regions(pdev); @@ -293,6 +321,7 @@ static void ep_test_dma_remove(struct pci_dev *pdev) static const struct pci_device_id ep_pci_tbl[] = { { PCI_DEVICE(0x10DE, 0x1AD4)}, { PCI_DEVICE(0x10DE, 0x1AD5)}, + { PCI_DEVICE(0x10DE, 0x229a)}, {}, }; diff --git a/drivers/pci/endpoint/functions/pci-epf-dma-test.c b/drivers/pci/endpoint/functions/pci-epf-dma-test.c index 232879aa..a0b453ac 100644 --- a/drivers/pci/endpoint/functions/pci-epf-dma-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-dma-test.c @@ -14,10 +14,10 @@ #include #include #include -#include #include -#include #include +#include +#include #include "pci-epf-wrapper.h" static struct pcie_epf_dma *gepfnv; @@ -28,43 +28,21 @@ struct pcie_epf_dma { struct pci_epc *epc; struct device *fdev; struct device *cdev; - void *bar0_virt; + void *bar_virt; struct dentry *debugfs; - void __iomem *dma_base; + void __iomem *dma_virt; int irq; + u8 chip_id; + u32 dma_size; u32 stress_count; u32 async_count; - struct task_struct *wr0_task; - struct task_struct *wr1_task; - struct task_struct *wr2_task; - struct task_struct *wr3_task; - struct task_struct *rd0_task; - struct task_struct *rd1_task; - u8 task_done; - wait_queue_head_t task_wq; - void *cookie; - - wait_queue_head_t wr_wq[DMA_WR_CHNL_NUM]; - wait_queue_head_t rd_wq[DMA_RD_CHNL_NUM]; - unsigned long wr_busy; - unsigned long rd_busy; - ktime_t wr_start_time[DMA_WR_CHNL_NUM]; - ktime_t wr_end_time[DMA_WR_CHNL_NUM]; - ktime_t rd_start_time[DMA_RD_CHNL_NUM]; - ktime_t rd_end_time[DMA_RD_CHNL_NUM]; - u32 wr_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM]; - u32 rd_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM]; - bool pcs[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM]; - bool async_dma; - ktime_t edma_start_time[DMA_WR_CHNL_NUM]; u64 tsz; u32 edma_ch; u32 prev_edma_ch; u32 nents; - struct tegra_pcie_edma_desc *ll_desc; struct edmalib_common edma; }; @@ -82,27 +60,54 @@ static void edma_lib_test_raise_irq(void *p) /* debugfs to perform eDMA lib transfers and do CRC check */ static int edmalib_test(struct seq_file *s, void *data) { - struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *) - dev_get_drvdata(s->private); - struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) - epfnv->bar0_virt; + struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)dev_get_drvdata(s->private); + struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt; - if (!epf_bar0->rp_phy_addr) { + if (!epf_bar->rp_phy_addr) { dev_err(epfnv->fdev, "RP DMA address is null\n"); return -1; } - epfnv->edma.src_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET; - epfnv->edma.dst_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET; + epfnv->edma.src_dma_addr = epf_bar->ep_phy_addr + BAR0_DMA_BUF_OFFSET; + epfnv->edma.dst_dma_addr = epf_bar->rp_phy_addr + BAR0_DMA_BUF_OFFSET; epfnv->edma.fdev = epfnv->fdev; - epfnv->edma.epf_bar0 = (struct pcie_epf_bar0 *)epfnv->bar0_virt; - epfnv->edma.src_virt = epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET; - epfnv->edma.dma_base = epfnv->dma_base; + epfnv->edma.cdev = epfnv->cdev; + epfnv->edma.epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt; + epfnv->edma.src_virt = epfnv->bar_virt + BAR0_DMA_BUF_OFFSET; + epfnv->edma.dma_virt = epfnv->dma_virt; + epfnv->edma.dma_size = epfnv->dma_size; + epfnv->edma.stress_count = epfnv->stress_count; + epfnv->edma.edma_ch = epfnv->edma_ch; + epfnv->edma.nents = epfnv->nents; + epfnv->edma.priv = (void *)epfnv; + epfnv->edma.raise_irq = edma_lib_test_raise_irq; + + return edmalib_common_test(&epfnv->edma); +} + +/* debugfs to perform eDMA lib transfers and do CRC check */ +static int edmalib_read_test(struct seq_file *s, void *data) +{ + struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)dev_get_drvdata(s->private); + struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt; + + if (!epf_bar->rp_phy_addr) { + dev_err(epfnv->fdev, "RP DMA address is null\n"); + return -1; + } + + epfnv->edma.dst_dma_addr = epf_bar->ep_phy_addr + BAR0_DMA_BUF_OFFSET; + epfnv->edma.dst_dma_addr = epf_bar->rp_phy_addr + BAR0_DMA_BUF_OFFSET; + epfnv->edma.fdev = epfnv->fdev; + epfnv->edma.cdev = epfnv->cdev; + + epfnv->edma.epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt; + epfnv->edma.src_virt = epfnv->bar_virt + BAR0_DMA_BUF_OFFSET; + epfnv->edma.dma_virt = epfnv->dma_virt; epfnv->edma.dma_size = epfnv->dma_size; epfnv->edma.stress_count = epfnv->stress_count; epfnv->edma.edma_ch = epfnv->edma_ch; epfnv->edma.nents = epfnv->nents; - epfnv->edma.of_node = epfnv->cdev->of_node; epfnv->edma.priv = (void *)epfnv; epfnv->edma.raise_irq = edma_lib_test_raise_irq; @@ -111,8 +116,9 @@ static int edmalib_test(struct seq_file *s, void *data) static void init_debugfs(struct pcie_epf_dma *epfnv) { - debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_test", epfnv->debugfs, - edmalib_test); + debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_test", epfnv->debugfs, edmalib_test); + debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_read_test", epfnv->debugfs, + edmalib_read_test); debugfs_create_u32("dma_size", 0644, epfnv->debugfs, &epfnv->dma_size); epfnv->dma_size = SZ_1M; @@ -132,16 +138,17 @@ static void init_debugfs(struct pcie_epf_dma *epfnv) /* Set DMA_LL_DEFAULT_SIZE as default nents, Max NUM_EDMA_DESC */ epfnv->nents = DMA_LL_DEFAULT_SIZE; - debugfs_create_u32("stress_count", 0644, epfnv->debugfs, - &epfnv->stress_count); + debugfs_create_u32("stress_count", 0644, epfnv->debugfs, &epfnv->stress_count); epfnv->stress_count = DEFAULT_STRESS_COUNT; } static int pcie_dma_epf_core_init(struct pci_epf *epf) { + struct pcie_epf_dma *epfnv = epf_get_drvdata(epf); struct pci_epc *epc = epf->epc; struct device *fdev = &epf->dev; struct pci_epf_bar *epf_bar; + enum pci_barno bar; int ret; ret = lpci_epc_write_header(epc, epf->func_no, epf->header); @@ -150,15 +157,24 @@ static int pcie_dma_epf_core_init(struct pci_epf *epf) return ret; } - epf_bar = &epf->bar[BAR_0]; + if (epfnv->chip_id == TEGRA234) + bar = BAR_0; + else + bar = BAR_1; + + epf_bar = &epf->bar[bar]; ret = lpci_epc_set_bar(epc, epf->func_no, epf_bar); if (ret < 0) { dev_err(fdev, "PCIe set BAR0 failed: %d\n", ret); return ret; } - dev_info(fdev, "BAR0 phy_addr: %llx size: %lx\n", - epf_bar->phys_addr, epf_bar->size); + dev_info(fdev, "BAR0 phy_addr: %llx size: %lx\n", epf_bar->phys_addr, epf_bar->size); + + if (epf->msi_interrupts == 0) { + dev_err(fdev, "pci_epc_set_msi() failed: %d\n", epf->msi_interrupts); + epf->msi_interrupts = 16; + } ret = lpci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts); if (ret) { @@ -166,6 +182,22 @@ static int pcie_dma_epf_core_init(struct pci_epf *epf) return ret; } + if (epfnv->chip_id == TEGRA234) + return 0; + + /* Expose MSI address as BAR2 to allow RP to send MSI to EP. */ + epf_bar = &epf->bar[BAR_2]; + epf_bar[bar].phys_addr = gepfnv->edma.msi_addr & ~(SZ_32M - 1); + epf_bar[bar].addr = NULL; + epf_bar[bar].size = SZ_32M; + epf_bar[bar].barno = BAR_2; + epf_bar[bar].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; + ret = lpci_epc_set_bar(epc, epf->func_no, epf_bar); + if (ret < 0) { + dev_err(fdev, "PCIe set BAR2 failed: %d\n", ret); + return ret; + } + return 0; } @@ -174,14 +206,22 @@ static int pcie_dma_epf_core_deinit(struct pci_epf *epf) { struct pcie_epf_dma *epfnv = epf_get_drvdata(epf); void *cookie = epfnv->edma.cookie; - struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt; + struct pcie_epf_bar *epf_bar_virt = (struct pcie_epf_bar *)epfnv->bar_virt; struct pci_epc *epc = epf->epc; - struct pci_epf_bar *epf_bar = &epf->bar[BAR_0]; + struct pci_epf_bar *epf_bar; + enum pci_barno bar; + if (epfnv->chip_id == TEGRA234) + bar = BAR_0; + else + bar = BAR_1; + epf_bar = &epf->bar[bar]; epfnv->edma.cookie = NULL; - epf_bar0->rp_phy_addr = 0; - tegra_pcie_edma_deinit(cookie); + epf_bar_virt->rp_phy_addr = 0; + tegra_pcie_dma_deinit(&cookie); lpci_epc_clear_bar(epc, epf->func_no, epf_bar); + if (epfnv->chip_id == TEGRA264) + lpci_epc_clear_bar(epc, epf->func_no, &epf->bar[BAR_2]); return 0; } @@ -192,16 +232,54 @@ static void pcie_dma_epf_unbind(struct pci_epf *epf) struct pcie_epf_dma *epfnv = epf_get_drvdata(epf); struct pci_epc *epc = epf->epc; void *cookie = epfnv->edma.cookie; - struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt; + struct pcie_epf_bar *epf_bar = (struct pcie_epf_bar *)epfnv->bar_virt; + struct device *cdev = epc->dev.parent; + struct platform_device *pdev = of_find_device_by_node(cdev->of_node); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + struct msi_desc *desc; +#endif + enum pci_barno bar; + u32 irq; debugfs_remove_recursive(epfnv->debugfs); epfnv->edma.cookie = NULL; - epf_bar0->rp_phy_addr = 0; - tegra_pcie_edma_deinit(cookie); + epf_bar->rp_phy_addr = 0; + tegra_pcie_dma_deinit(&cookie); + if (epfnv->chip_id == TEGRA264) { + platform_msi_domain_free_irqs(&pdev->dev); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + irq = msi_get_virq(&pdev->dev, 0); +#else + for_each_msi_entry(desc, cdev) { + if (desc->platform.msi_index == 0) + irq = desc->irq; + } +#endif + free_irq(irq, epfnv); + } pci_epc_stop(epc); - lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0); + if (epfnv->chip_id == TEGRA234) + bar = BAR_0; + else + bar = BAR_1; + lpci_epf_free_space(epf, epfnv->bar_virt, bar); +} + +static void pcie_dma_epf_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + if (gepfnv->edma.msi_addr == 0) { + gepfnv->edma.msi_addr = msg->address_hi; + gepfnv->edma.msi_addr <<= 32; + gepfnv->edma.msi_addr |= msg->address_lo; + gepfnv->edma.msi_data = msg->data + 1; + } +} + +static irqreturn_t pcie_dma_epf_irq(int irq, void *arg) +{ + return IRQ_HANDLED; } static int pcie_dma_epf_bind(struct pci_epf *epf) @@ -212,11 +290,26 @@ static int pcie_dma_epf_bind(struct pci_epf *epf) struct device *fdev = &epf->dev; struct device *cdev = epc->dev.parent; struct platform_device *pdev = of_find_device_by_node(cdev->of_node); - struct pci_epf_bar *epf_bar = &epf->bar[BAR_0]; - struct pcie_epf_bar0 *epf_bar0; + struct pcie_epf_bar *epf_bar_virt; + struct pci_epf_bar *epf_bar; + struct irq_domain *domain; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + struct msi_desc *desc; +#endif + enum pci_barno bar; char *name; int ret, i; + u32 irq; + epfnv->chip_id = __tegra_get_chip_id(); + if (epfnv->chip_id == TEGRA234) { + bar = BAR_0; + epfnv->edma.chip_id = NVPCIE_DMA_SOC_T234; + } else { + bar = BAR_1; + epfnv->edma.chip_id = NVPCIE_DMA_SOC_T264; + } + epf_bar = &epf->bar[bar]; epfnv->fdev = fdev; epfnv->cdev = cdev; epfnv->epf = epf; @@ -229,25 +322,22 @@ static int pcie_dma_epf_bind(struct pci_epf *epf) } #if defined(NV_PCI_EPF_ALLOC_SPACE_HAS_EPC_FEATURES_ARG) /* Linux v6.9 */ - epfnv->bar0_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, - epc_features); + epfnv->bar_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, epc_features); #else - epfnv->bar0_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, - epc_features->align); + epfnv->bar_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, epc_features->align); #endif - if (!epfnv->bar0_virt) { + if (!epfnv->bar_virt) { dev_err(fdev, "Failed to allocate memory for BAR0\n"); return -ENOMEM; } - get_random_bytes(epfnv->bar0_virt, BAR0_SIZE); - memset(epfnv->bar0_virt, 0, BAR0_HEADER_SIZE); + get_random_bytes(epfnv->bar_virt, BAR0_SIZE); + memset(epfnv->bar_virt, 0, BAR0_HEADER_SIZE); /* Update BAR header with EP DMA PHY addr */ - epf_bar0 = (struct pcie_epf_bar0 *)epfnv->bar0_virt; - epf_bar0->ep_phy_addr = epf_bar->phys_addr; + epf_bar_virt = (struct pcie_epf_bar *)epfnv->bar_virt; + epf_bar_virt->ep_phy_addr = epf_bar->phys_addr; /* Set BAR0 mem type as 64-bit */ - epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH; + epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH; name = devm_kasprintf(fdev, GFP_KERNEL, "%s_epf_dma_test", pdev->name); if (!name) { @@ -255,21 +345,73 @@ static int pcie_dma_epf_bind(struct pci_epf *epf) goto fail_atu_dma; } - for (i = 0; i < DMA_WR_CHNL_NUM; i++) { + for (i = 0; i < TEGRA_PCIE_DMA_WRITE; i++) init_waitqueue_head(&epfnv->edma.wr_wq[i]); - } - for (i = 0; i < DMA_RD_CHNL_NUM; i++) { - init_waitqueue_head(&epfnv->edma.rd_wq[i]); + if (epfnv->chip_id == TEGRA234) { + domain = dev_get_msi_domain(&pdev->dev); + if (!domain) { + dev_err(fdev, "failed to get MSI domain\n"); + ret = -ENOMEM; + goto fail_kasnprintf; + } + + ret = platform_msi_domain_alloc_irqs(&pdev->dev, 2, pcie_dma_epf_write_msi_msg); + if (ret < 0) { + dev_err(fdev, "failed to allocate MSIs: %d\n", ret); + goto fail_kasnprintf; + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + epfnv->edma.msi_irq = msi_get_virq(&pdev->dev, 1); + irq = msi_get_virq(&pdev->dev, 0); +#else + for_each_msi_entry(desc, cdev) { + if (desc->platform.msi_index == 0) + irq = desc->irq; + else if (desc->platform.msi_index == 1) + epfnv->edma.msi_irq = desc->irq; + } +#endif + + ret = request_irq(irq, pcie_dma_epf_irq, IRQF_SHARED, "pcie_dma_epf_isr", epfnv); + if (ret < 0) { + dev_err(fdev, "failed to request irq: %d\n", ret); + goto fail_msi_alloc; + } } epfnv->debugfs = debugfs_create_dir(name, NULL); init_debugfs(epfnv); + epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no); + if (!epc_features) { + dev_err(fdev, "epc_features not implemented\n"); + ret = -EOPNOTSUPP; + goto fail_get_features; + } + + if (!epc_features->core_init_notifier) { + ret = pcie_dma_epf_core_init(epf); + if (ret) { + dev_err(fdev, "EPF core init failed with err: %d\n", ret); + goto fail_core_init; + } + } + return 0; +fail_core_init: +fail_get_features: + debugfs_remove_recursive(epfnv->debugfs); + if (epfnv->chip_id == TEGRA264) + free_irq(irq, epfnv); +fail_msi_alloc: + if (epfnv->chip_id == TEGRA264) + platform_msi_domain_free_irqs(&pdev->dev); +fail_kasnprintf: + devm_kfree(fdev, name); fail_atu_dma: - lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0); + lpci_epf_free_space(epf, epfnv->bar_virt, bar); return ret; } @@ -301,10 +443,10 @@ static int pcie_dma_epf_probe(struct pci_epf *epf) if (!epfnv) return -ENOMEM; - epfnv->edma.ll_desc = devm_kzalloc(dev, sizeof(*epfnv->ll_desc) * NUM_EDMA_DESC, + epfnv->edma.ll_desc = devm_kzalloc(dev, sizeof(*epfnv->edma.ll_desc) * NUM_EDMA_DESC, GFP_KERNEL); - gepfnv = epfnv; epf_set_drvdata(epf, epfnv); + gepfnv = epfnv; epf->event_ops = &pci_epf_dma_test_event_ops; diff --git a/include/linux/tegra-pcie-edma-test-common.h b/include/linux/tegra-pcie-edma-test-common.h index 12b693ac..83dc1c7a 100644 --- a/include/linux/tegra-pcie-edma-test-common.h +++ b/include/linux/tegra-pcie-edma-test-common.h @@ -9,62 +9,123 @@ #define TEGRA_PCIE_EDMA_TEST_COMMON_H #include -#include -#include +#include +#define DMA_WRITE_DOORBELL_OFF 0x10 +#define DMA_WRITE_DOORBELL_OFF_WR_STOP BIT(31) + +#define DMA_READ_DOORBELL_OFF 0x30 + +static inline void dma_common_wr(void __iomem *p, u32 val, u32 offset) +{ + writel(val, offset + p); +} + +#define REMOTE_EDMA_TEST_EN (edma->edma_ch & 0x80000000) #define EDMA_ABORT_TEST_EN (edma->edma_ch & 0x40000000) #define EDMA_STOP_TEST_EN (edma->edma_ch & 0x20000000) #define EDMA_CRC_TEST_EN (edma->edma_ch & 0x10000000) +#define EDMA_READ_TEST_EN (edma->edma_ch & 0x08000000) +#define EDMA_SANITY_TEST_EN (edma->edma_ch & 0x04000000) +#define EDMA_UNALIGN_SRC_TEST_EN (edma->edma_ch & 0x02000000) +#define EDMA_UNALIGN_DST_TEST_EN (edma->edma_ch & 0x01000000) +#define EDMA_UNALIGN_SRC_DST_TEST_EN (edma->edma_ch & 0x00800000) #define IS_EDMA_CH_ENABLED(i) (edma->edma_ch & ((BIT(i) << 4))) #define IS_EDMA_CH_ASYNC(i) (edma->edma_ch & BIT(i)) -#define REMOTE_EDMA_TEST_EN (edma->edma_ch & 0x80000000) #define EDMA_PERF (edma->tsz / (diff / 1000)) #define EDMA_CPERF ((edma->tsz * (edma->nents / edma->nents_per_ch)) / (diff / 1000)) +#define NUM_EDMA_DESC 4096 + +#define TEGRA234_PCIE_DMA_RD_CHNL_NUM 2 + #define EDMA_PRIV_CH_OFF 32 #define EDMA_PRIV_LR_OFF (EDMA_PRIV_CH_OFF + 2) #define EDMA_PRIV_XF_OFF (EDMA_PRIV_LR_OFF + 1) +/* Update DMA_DD_BUF_SIZE and DMA_LL_BUF_SIZE when changing BAR0_SIZE */ +#define BAR0_SIZE SZ_256M + +/* Header includes RP/EP DMA addresses, EP MSI, LL, etc. */ +#define BAR0_HEADER_OFFSET 0x0 +#define BAR0_HEADER_SIZE SZ_1M +#define DMA_LL_DEFAULT_SIZE 8 + +#define BAR0_MSI_OFFSET SZ_64K + +/* DMA'able memory range */ +#define BAR0_DMA_BUF_OFFSET SZ_1M +#define BAR0_DMA_BUF_SIZE (BAR0_SIZE - SZ_1M) + +#define DEFAULT_STRESS_COUNT 10 + +#define MAX_DMA_ELE_SIZE SZ_16M + +/* DMA base offset starts at 0x20000 from ATU_DMA base */ +#define DMA_OFFSET 0x20000 + +struct sanity_data { + u32 size; + u32 src_offset; + u32 dst_offset; + u32 crc; +}; + +/* First 1MB of BAR0 is reserved for control data */ +struct pcie_epf_bar { + /* RP system memory allocated for EP DMA operations */ + u64 rp_phy_addr; + /* EP system memory allocated as BAR */ + u64 ep_phy_addr; + /* MSI data for RP -> EP interrupts */ + u32 msi_data[TEGRA_PCIE_DMA_WR_CHNL_NUM + TEGRA_PCIE_DMA_RD_CHNL_NUM]; + struct sanity_data wr_data[TEGRA_PCIE_DMA_WR_CHNL_NUM]; + struct sanity_data rd_data[TEGRA_PCIE_DMA_RD_CHNL_NUM]; +}; + struct edmalib_common { struct device *fdev; + struct device *cdev; void (*raise_irq)(void *p); void *priv; - struct pcie_epf_bar0 *epf_bar0; + struct pcie_epf_bar *epf_bar; void *src_virt; - void __iomem *dma_base; + void __iomem *dma_virt; u32 dma_size; dma_addr_t src_dma_addr; dma_addr_t dst_dma_addr; - dma_addr_t bar0_phy; + dma_addr_t bar_phy; u32 stress_count; void *cookie; struct device_node *of_node; - wait_queue_head_t wr_wq[DMA_WR_CHNL_NUM]; - wait_queue_head_t rd_wq[DMA_RD_CHNL_NUM]; + wait_queue_head_t wr_wq[TEGRA_PCIE_DMA_WR_CHNL_NUM]; unsigned long wr_busy; unsigned long rd_busy; - ktime_t edma_start_time[DMA_WR_CHNL_NUM]; + ktime_t edma_start_time[TEGRA_PCIE_DMA_WR_CHNL_NUM]; u64 tsz; u32 edma_ch; u32 prev_edma_ch; u32 nents; - struct tegra_pcie_edma_desc *ll_desc; - u64 priv_iter[DMA_WR_CHNL_NUM]; - struct pcie_tegra_edma_remote_info edma_remote; + struct tegra_pcie_dma_desc *ll_desc; + u64 priv_iter[TEGRA_PCIE_DMA_WR_CHNL_NUM]; + struct tegra_pcie_dma_remote_info remote; u32 nents_per_ch; u32 st_as_ch; u32 ls_as_ch; + u64 msi_addr; + u32 msi_data; + u32 msi_irq; + nvpcie_dma_soc_t chip_id; }; static struct edmalib_common *l_edma; -static void edma_final_complete(void *priv, edma_xfer_status_t status, - struct tegra_pcie_edma_desc *desc) +static void edma_final_complete(void *priv, tegra_pcie_dma_status_t status) { struct edmalib_common *edma = l_edma; u64 cb = *(u64 *)priv; u32 ch = (cb >> EDMA_PRIV_CH_OFF) & 0x3; - edma_xfer_type_t xfer_type = (cb >> EDMA_PRIV_XF_OFF) & 0x1; + tegra_pcie_dma_xfer_type_t xfer_type = (cb >> EDMA_PRIV_XF_OFF) & 0x1; char *xfer_str[2] = {"WR", "RD"}; u32 l_r = (cb >> EDMA_PRIV_LR_OFF) & 0x1; char *l_r_str[2] = {"local", "remote"}; @@ -72,9 +133,12 @@ static void edma_final_complete(void *priv, edma_xfer_status_t status, u64 cdiff = ktime_to_ns(ktime_get()) - ktime_to_ns(edma->edma_start_time[edma->st_as_ch]); cb = cb & 0xFFFFFFFF; - if (EDMA_ABORT_TEST_EN && status == EDMA_XFER_SUCCESS) - dma_common_wr(edma->dma_base, DMA_WRITE_DOORBELL_OFF_WR_STOP | (ch + 1), - DMA_WRITE_DOORBELL_OFF); + /* TODO support abort test case for T264 */ + if (edma->chip_id == NVPCIE_DMA_SOC_T234) { + if (EDMA_ABORT_TEST_EN && status == TEGRA_PCIE_DMA_SUCCESS) + dma_common_wr(edma->dma_virt, DMA_WRITE_DOORBELL_OFF_WR_STOP | (ch + 1), + DMA_WRITE_DOORBELL_OFF); + } dev_info(edma->fdev, "%s: %s-%s-Async complete for chan %d with status %d. Total desc %llu of Sz %d Bytes done in time %llu nsec. Perf is %llu Mbps\n", __func__, xfer_str[xfer_type], l_r_str[l_r], ch, status, edma->nents_per_ch*(cb+1), @@ -85,7 +149,7 @@ static void edma_final_complete(void *priv, edma_xfer_status_t status, __func__, EDMA_CPERF, cdiff); } -static void edma_complete(void *priv, edma_xfer_status_t status, struct tegra_pcie_edma_desc *desc) +static void edma_complete(void *priv, tegra_pcie_dma_status_t status) { struct edmalib_common *edma = l_edma; u64 cb = *(u64 *)priv; @@ -103,24 +167,26 @@ static void edma_complete(void *priv, edma_xfer_status_t status, struct tegra_pc /* debugfs to perform eDMA lib transfers and do CRC check */ static int edmalib_common_test(struct edmalib_common *edma) { - struct tegra_pcie_edma_desc *ll_desc = edma->ll_desc; + struct tegra_pcie_dma_desc *ll_desc = edma->ll_desc; dma_addr_t src_dma_addr = edma->src_dma_addr; dma_addr_t dst_dma_addr = edma->dst_dma_addr; u32 nents = edma->nents, num_chans = 0, nents_per_ch = 0, nent_id = 0, chan_count; - u32 i, j, k, max_size, db_off, num_descriptors; - edma_xfer_status_t ret; - struct tegra_pcie_edma_init_info info = {}; - struct tegra_pcie_edma_chans_info *chan_info; - struct tegra_pcie_edma_xfer_info tx_info = {}; + u32 i, j, k, max_size, num_descriptors; + u32 db_off; + tegra_pcie_dma_status_t ret; + struct tegra_pcie_dma_init_info info = {}; + struct tegra_pcie_dma_chans_info *chan_info; + struct tegra_pcie_dma_xfer_info tx_info = {}; u64 diff; - edma_xfer_type_t xfer_type; + tegra_pcie_dma_xfer_type_t xfer_type; char *xfer_str[2] = {"WR", "RD"}; u32 l_r; char *l_r_str[2] = {"local", "remote"}; - struct pcie_epf_bar0 *epf_bar0 = edma->epf_bar0; + struct pcie_epf_bar *epf_bar = edma->epf_bar; + u32 crc; if (!edma->stress_count) { - tegra_pcie_edma_deinit(edma->cookie); + tegra_pcie_dma_deinit(&edma->cookie); edma->cookie = NULL; return 0; } @@ -133,52 +199,133 @@ static int edmalib_common_test(struct edmalib_common *edma) edma->edma_ch |= 0xF5; } + /* FIXME This is causing crash for remote dma when BAR MMIO virt address is used. */ +#if 0 + epf_bar->wr_data[0].src_offset = 0; + epf_bar->wr_data[0].dst_offset = 0; +#endif if (EDMA_CRC_TEST_EN) { /* 4 channels in sync mode */ - edma->edma_ch = (EDMA_CRC_TEST_EN | 0xF0); + edma->edma_ch = (0x10000000 | 0xF0); /* Single SZ_4K packet on each channel, so total SZ_16K of data */ edma->stress_count = 1; edma->dma_size = SZ_4K; edma->nents = nents = 4; - epf_bar0->wr_data[0].size = edma->dma_size * edma->nents; + epf_bar->wr_data[0].size = edma->dma_size * edma->nents; + } + + if (EDMA_UNALIGN_SRC_TEST_EN) { + /* 4 channels in sync mode */ + edma->edma_ch = (0x02000000 | 0x10000000 | 0x10); + /* Single SZ_4K packet on each channel, so total SZ_16K of data */ + edma->stress_count = 1; + edma->dma_size = SZ_4K; + edma->nents = nents = 4; + epf_bar->wr_data[0].size = edma->dma_size * edma->nents; + src_dma_addr += 11; + epf_bar->wr_data[0].src_offset = 11; + } + + if (EDMA_UNALIGN_DST_TEST_EN) { + /* 4 channels in sync mode */ + edma->edma_ch = (0x01000000 | 0x10000000 | 0x10); + /* Single SZ_4K packet on each channel, so total SZ_16K of data */ + edma->stress_count = 1; + edma->dma_size = SZ_4K; + edma->nents = nents = 4; + epf_bar->wr_data[0].size = edma->dma_size * edma->nents; + dst_dma_addr += 7; + epf_bar->wr_data[0].dst_offset = 7; + } + + if (EDMA_UNALIGN_SRC_DST_TEST_EN) { + /* 4 channels in sync mode */ + edma->edma_ch = (0x00800000 | 0x10000000 | 0x10); + /* Single SZ_4K packet on each channel, so total SZ_16K of data */ + edma->stress_count = 1; + edma->dma_size = SZ_4K; + edma->nents = nents = 4; + epf_bar->wr_data[0].size = edma->dma_size * edma->nents; + src_dma_addr += 7; + dst_dma_addr += 13; + epf_bar->wr_data[0].src_offset = 7; + epf_bar->wr_data[0].dst_offset = 13; + } + + if (EDMA_SANITY_TEST_EN) { + edma->dma_size = SZ_1K; + edma->nents = nents = 128; + edma->stress_count = 2; } if (edma->cookie && edma->prev_edma_ch != edma->edma_ch) { edma->st_as_ch = -1; dev_info(edma->fdev, "edma_ch changed from 0x%x != 0x%x, deinit\n", - edma->prev_edma_ch, edma->edma_ch); - tegra_pcie_edma_deinit(edma->cookie); + edma->prev_edma_ch, edma->edma_ch); + tegra_pcie_dma_deinit(&edma->cookie); edma->cookie = NULL; } - info.np = edma->of_node; + info.dev = edma->cdev; + info.soc = edma->chip_id; if (REMOTE_EDMA_TEST_EN) { num_descriptors = 1024; - info.rx[0].desc_phy_base = edma->bar0_phy + SZ_512K; - info.rx[0].desc_iova = 0xf0000000 + SZ_512K; - info.rx[1].desc_phy_base = edma->bar0_phy + SZ_512K + SZ_256K; - info.rx[1].desc_iova = 0xf0000000 + SZ_512K + SZ_256K; - info.edma_remote = &edma->edma_remote; - chan_count = DMA_RD_CHNL_NUM; + info.rx[0].desc_phy_base = edma->bar_phy + SZ_128K; + info.rx[0].desc_iova = epf_bar->ep_phy_addr + SZ_128K; + info.rx[1].desc_phy_base = edma->bar_phy + SZ_256K; + info.rx[1].desc_iova = epf_bar->ep_phy_addr + SZ_128K; + info.rx[2].desc_phy_base = edma->bar_phy + SZ_256K + SZ_128K; + info.rx[2].desc_iova = epf_bar->ep_phy_addr + SZ_256K + SZ_128K; + info.rx[3].desc_phy_base = edma->bar_phy + SZ_512K; + info.rx[3].desc_iova = epf_bar->ep_phy_addr + SZ_512K; + info.remote = &edma->remote; + info.msi_irq = edma->msi_irq; + info.msi_data = edma->msi_data; + info.msi_addr = edma->msi_addr; + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + chan_count = TEGRA234_PCIE_DMA_RD_CHNL_NUM; + else + chan_count = TEGRA_PCIE_DMA_RD_CHNL_NUM; chan_info = &info.rx[0]; - xfer_type = EDMA_XFER_READ; - db_off = DMA_WRITE_DOORBELL_OFF; + xfer_type = TEGRA_PCIE_DMA_READ; + /* TODO support abort test case for T264 */ + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + db_off = DMA_WRITE_DOORBELL_OFF; l_r = 1; } else { - chan_count = DMA_WR_CHNL_NUM; + chan_count = TEGRA_PCIE_DMA_WR_CHNL_NUM; num_descriptors = 4096; chan_info = &info.tx[0]; - xfer_type = EDMA_XFER_WRITE; - db_off = DMA_READ_DOORBELL_OFF; + xfer_type = TEGRA_PCIE_DMA_WRITE; + /* TODO support abort test case for T264 */ + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + db_off = DMA_READ_DOORBELL_OFF; l_r = 0; + info.msi_irq = edma->msi_irq; + info.msi_data = edma->msi_data; + info.msi_addr = edma->msi_addr; + } + + if (EDMA_READ_TEST_EN) { + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + chan_count = TEGRA234_PCIE_DMA_RD_CHNL_NUM; + else + chan_count = TEGRA_PCIE_DMA_RD_CHNL_NUM; + num_descriptors = 4096; + chan_info = &info.rx[0]; + xfer_type = TEGRA_PCIE_DMA_READ; + /* TODO support abort test case for T264 */ + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + db_off = DMA_READ_DOORBELL_OFF; + l_r = 1; } for (i = 0; i < chan_count; i++) { - struct tegra_pcie_edma_chans_info *ch = chan_info + i; + struct tegra_pcie_dma_chans_info *ch = chan_info + i; - ch->ch_type = IS_EDMA_CH_ASYNC(i) ? EDMA_CHAN_XFER_ASYNC : - EDMA_CHAN_XFER_SYNC; + ch->ch_type = IS_EDMA_CH_ASYNC(i) ? TEGRA_PCIE_DMA_CHAN_XFER_ASYNC : + TEGRA_PCIE_DMA_CHAN_XFER_SYNC; if (IS_EDMA_CH_ENABLED(i)) { if (edma->st_as_ch == -1) edma->st_as_ch = i; @@ -200,15 +347,19 @@ static int edmalib_common_test(struct edmalib_common *edma) nents_per_ch = nents / num_chans; if (nents_per_ch == 0) { - dev_err(edma->fdev, "%s: nents(%d) < enabled chanes(%d)\n", + dev_err(edma->fdev, "%s: nents(%d) < enabled channels(%d)\n", __func__, nents, num_chans); return 0; } for (j = 0; j < nents; j++) { - ll_desc->src = src_dma_addr + (j * edma->dma_size); - ll_desc->dst = dst_dma_addr + (j * edma->dma_size); - dev_dbg(edma->fdev, "src %llx, dst %llx at %d\n", ll_desc->src, ll_desc->dst, j); + if (EDMA_READ_TEST_EN) { + ll_desc->dst = src_dma_addr + (j * edma->dma_size); + ll_desc->src = dst_dma_addr + (j * edma->dma_size); + } else { + ll_desc->src = src_dma_addr + (j * edma->dma_size); + ll_desc->dst = dst_dma_addr + (j * edma->dma_size); + } ll_desc->sz = edma->dma_size; ll_desc++; } @@ -219,14 +370,33 @@ static int edmalib_common_test(struct edmalib_common *edma) if (!edma->cookie || (edma->prev_edma_ch != edma->edma_ch)) { dev_info(edma->fdev, "%s: re-init edma lib prev_ch(%x) != current chans(%x)\n", __func__, edma->prev_edma_ch, edma->edma_ch); - edma->cookie = tegra_pcie_edma_initialize(&info); + ret = tegra_pcie_dma_initialize(&info, &edma->cookie); + if (ret != TEGRA_PCIE_DMA_SUCCESS) { + dev_info(edma->fdev, "%s: tegra_pcie_dma_initialize() fail: %d\n", + __func__, ret); + return -1; + } edma->prev_edma_ch = edma->edma_ch; + + if (edma->chip_id == NVPCIE_DMA_SOC_T264) { + ret = tegra_pcie_dma_set_msi(edma->cookie, edma->msi_addr, edma->msi_data); + if (ret != TEGRA_PCIE_DMA_SUCCESS) { + dev_info(edma->fdev, "%s: tegra_pcie_dma_set_msi() fail: %d\n", + __func__, ret); + return -1; + } + } } edma->nents_per_ch = nents_per_ch; /* generate random bytes to transfer */ - get_random_bytes(edma->src_virt, edma->dma_size * nents_per_ch); + if (EDMA_SANITY_TEST_EN) { + for (j = 0; j < num_descriptors; j++) + memset((u8 *)edma->src_virt + (j * SZ_1K), j, SZ_1K); + } else { + get_random_bytes(edma->src_virt, edma->dma_size * nents_per_ch); + } dev_info(edma->fdev, "%s: EDMA LIB %s started for %d chans, size %d Bytes, iterations: %d of descriptors %d\n", __func__, xfer_str[xfer_type], num_chans, edma->dma_size, edma->stress_count, nents_per_ch); @@ -234,7 +404,7 @@ static int edmalib_common_test(struct edmalib_common *edma) /* LL DMA with size epfnv->dma_size per desc */ for (i = 0; i < chan_count; i++) { int ch = i; - struct tegra_pcie_edma_chans_info *ch_info = chan_info + i; + struct tegra_pcie_dma_chans_info *ch_info = chan_info + i; if (ch_info->num_descriptors == 0) continue; @@ -245,20 +415,20 @@ static int edmalib_common_test(struct edmalib_common *edma) tx_info.channel_num = ch; tx_info.type = xfer_type; tx_info.nents = nents_per_ch; - if (ch_info->ch_type == EDMA_CHAN_XFER_ASYNC) { + if (ch_info->ch_type == TEGRA_PCIE_DMA_CHAN_XFER_ASYNC) { if (k == edma->stress_count - 1) tx_info.complete = edma_final_complete; else tx_info.complete = edma_complete; } edma->priv_iter[ch] = k | (((u64)xfer_type) << EDMA_PRIV_XF_OFF) | - (((u64)l_r) << EDMA_PRIV_LR_OFF) | - (((u64)ch) << EDMA_PRIV_CH_OFF); + (((u64)l_r) << EDMA_PRIV_LR_OFF) | + (((u64)ch) << EDMA_PRIV_CH_OFF); tx_info.priv = &edma->priv_iter[ch]; - ret = tegra_pcie_edma_submit_xfer(edma->cookie, &tx_info); - if (ret == EDMA_XFER_FAIL_NOMEM) { + ret = tegra_pcie_dma_submit_xfer(edma->cookie, &tx_info); + if (ret == TEGRA_PCIE_DMA_FAIL_NOMEM) { /** Retry after 20 msec */ - dev_dbg(edma->fdev, "%s: EDMA_XFER_FAIL_NOMEM stress count %d on channel %d iter %d\n", + dev_dbg(edma->fdev, "%s: TEGRA_PCIE_DMA_FAIL_NOMEM stress count %d on channel %d iter %d\n", __func__, edma->stress_count, i, k); ret = wait_event_timeout(edma->wr_wq[i], !(edma->wr_busy & (1 << i)), @@ -272,7 +442,8 @@ static int edmalib_common_test(struct edmalib_common *edma) } k--; continue; - } else if ((ret != EDMA_XFER_SUCCESS) && (ret != EDMA_XFER_FAIL_NOMEM)) { + } else if ((ret != TEGRA_PCIE_DMA_SUCCESS) && + (ret != TEGRA_PCIE_DMA_FAIL_NOMEM)) { dev_err(edma->fdev, "%s: LL %d, SZ: %u B CH: %d failed. %d at iter %d ret: %d\n", __func__, xfer_type, edma->dma_size, ch, ret, k, ret); if (EDMA_STOP_TEST_EN) { @@ -289,44 +460,48 @@ static int edmalib_common_test(struct edmalib_common *edma) if (i == 0) { if (EDMA_ABORT_TEST_EN) { msleep(edma->stress_count); - dma_common_wr(edma->dma_base, DMA_WRITE_DOORBELL_OFF_WR_STOP, - db_off); + /* TODO support abort test case for T264 */ + if (edma->chip_id == NVPCIE_DMA_SOC_T234) + dma_common_wr(edma->dma_virt, + DMA_WRITE_DOORBELL_OFF_WR_STOP, db_off); } else if (EDMA_STOP_TEST_EN) { bool stop_status; msleep(edma->stress_count); - stop_status = tegra_pcie_edma_stop(edma->cookie); + stop_status = tegra_pcie_dma_stop(edma->cookie); dev_info(edma->fdev, "%s: EDMA LIB, status of stop DMA is %d", __func__, stop_status); } } diff = ktime_to_ns(ktime_get()) - ktime_to_ns(edma->edma_start_time[i]); - if (ch_info->ch_type == EDMA_CHAN_XFER_SYNC) { - if (ret == EDMA_XFER_SUCCESS) + if (ch_info->ch_type == TEGRA_PCIE_DMA_CHAN_XFER_SYNC) { + if (ret == TEGRA_PCIE_DMA_SUCCESS) dev_info(edma->fdev, "%s: EDMA LIB %s-%s-SYNC done for %d iter on channel %d. Total Size %llu bytes, time %llu nsec. Perf is %llu Mbps\n", - __func__, xfer_str[xfer_type], l_r_str[l_r], edma->stress_count, i, - edma->tsz, diff, EDMA_PERF); + __func__, xfer_str[xfer_type], l_r_str[l_r], + edma->stress_count, i, edma->tsz, diff, EDMA_PERF); } } - if (EDMA_CRC_TEST_EN && !REMOTE_EDMA_TEST_EN) { - u32 crc; + if (EDMA_SANITY_TEST_EN) edma->raise_irq(edma->priv); - crc = crc32_le(~0, edma->src_virt, epf_bar0->wr_data[0].size); + + if (EDMA_CRC_TEST_EN && !REMOTE_EDMA_TEST_EN) { + edma->raise_irq(edma->priv); + crc = crc32_le(~0, edma->src_virt + epf_bar->wr_data[0].src_offset, + epf_bar->wr_data[0].size); msleep(100); - if (crc != epf_bar0->wr_data[0].crc) + if (crc != epf_bar->wr_data[0].crc) dev_err(edma->fdev, "CRC check failed, LCRC: 0x%x RCRC: 0x%x\n", - crc, epf_bar0->wr_data[0].crc); + crc, epf_bar->wr_data[0].crc); else dev_err(edma->fdev, "CRC check pass\n"); } - dev_info(edma->fdev, "%s: EDMA LIB submit done\n", __func__); return 0; fail: - if (ret != EDMA_XFER_DEINIT) { - tegra_pcie_edma_deinit(edma->cookie); + if (ret != TEGRA_PCIE_DMA_DEINIT) { + tegra_pcie_dma_deinit(&edma->cookie); edma->cookie = NULL; } return -1;