Files
linux-nv-oot/drivers/pci/endpoint/functions/pci-epf-dma-test.c
Nagarjuna Kristam e1f7bf59ed PCI: EPF: dma-test: Use callback instead of notifier
EPF framework no longer supports notifers but supports callbacks instead.
Register callbacks for init and deinit.
Enable DMA test driver only if EP support is enabled.

Bug 3790460

Change-Id: Ic1e14cdc1a0bd80d49991b5f94713b705085825b
Signed-off-by: Nagarjuna Kristam <nkristam@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2702389
Reviewed-by: Bitan Biswas <bbiswas@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2022-11-15 11:19:59 -08:00

1743 lines
48 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* PCIe DMA EPF test framework for Tegra PCIe
*
* Copyright (C) 2021-2022 NVIDIA Corporation. All rights reserved.
*/
#include <linux/crc32.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/of_platform.h>
#include <linux/pci-epc.h>
#include <linux/pci-epf.h>
#include <linux/pcie_dma.h>
#include <linux/platform_device.h>
#include <linux/kthread.h>
#include <linux/tegra-pcie-edma-test-common.h>
#include "pci-epf-wrapper.h"
static struct pcie_epf_dma *gepfnv;
struct pcie_epf_dma {
struct pci_epf_header header;
struct device *fdev;
struct device *cdev;
void *bar0_virt;
struct dentry *debugfs;
void __iomem *dma_base;
int irq;
u32 dma_size;
u32 stress_count;
u32 async_count;
struct task_struct *wr0_task;
struct task_struct *wr1_task;
struct task_struct *wr2_task;
struct task_struct *wr3_task;
struct task_struct *rd0_task;
struct task_struct *rd1_task;
u8 task_done;
wait_queue_head_t task_wq;
void *cookie;
wait_queue_head_t wr_wq[DMA_WR_CHNL_NUM];
wait_queue_head_t rd_wq[DMA_RD_CHNL_NUM];
unsigned long wr_busy;
unsigned long rd_busy;
ktime_t wr_start_time[DMA_WR_CHNL_NUM];
ktime_t wr_end_time[DMA_WR_CHNL_NUM];
ktime_t rd_start_time[DMA_RD_CHNL_NUM];
ktime_t rd_end_time[DMA_RD_CHNL_NUM];
u32 wr_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
u32 rd_cnt[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
bool pcs[DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM];
bool async_dma;
ktime_t edma_start_time[DMA_WR_CHNL_NUM];
u64 tsz;
u32 edma_ch;
u32 prev_edma_ch;
u32 nents;
struct tegra_pcie_edma_desc *ll_desc;
struct edmalib_common edma;
};
struct edma_desc {
dma_addr_t src;
dma_addr_t dst;
size_t sz;
};
static irqreturn_t pcie_dma_epf_wr0_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 0;
epfnv->wr_busy &= ~BIT(bit);
wake_up(&epfnv->wr_wq[bit]);
return IRQ_HANDLED;
}
static irqreturn_t pcie_dma_epf_wr1_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 1;
epfnv->wr_busy &= ~BIT(bit);
wake_up(&epfnv->wr_wq[bit]);
return IRQ_HANDLED;
}
static irqreturn_t pcie_dma_epf_wr2_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 2;
epfnv->wr_busy &= ~BIT(bit);
wake_up(&epfnv->wr_wq[bit]);
return IRQ_HANDLED;
}
static irqreturn_t pcie_dma_epf_wr3_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 3;
epfnv->wr_busy &= ~BIT(bit);
wake_up(&epfnv->wr_wq[bit]);
return IRQ_HANDLED;
}
static irqreturn_t pcie_dma_epf_rd0_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 0;
epfnv->rd_busy &= ~BIT(bit);
wake_up(&epfnv->rd_wq[bit]);
return IRQ_HANDLED;
}
static irqreturn_t pcie_dma_epf_rd1_msi(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 1;
epfnv->rd_busy &= ~BIT(bit);
wake_up(&epfnv->rd_wq[bit]);
return IRQ_HANDLED;
}
void pcie_async_dma_handler(struct pcie_epf_dma *epfnv)
{
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
u64 llp_base, llp_iova;
u32 llp_idx, ridx;
int i;
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
llp_iova = dma_channel_rd(epfnv->dma_base, i,
DMA_LLP_HIGH_OFF_WRCH);
llp_iova = (llp_iova << 32);
llp_iova |= dma_channel_rd(epfnv->dma_base, i,
DMA_LLP_LOW_OFF_WRCH);
llp_base = epf_bar0->ep_phy_addr + DMA_LL_WR_OFFSET(i);
llp_idx = ((llp_iova - llp_base) / sizeof(struct dma_ll));
llp_idx = llp_idx % DMA_ASYNC_LL_SIZE;
if (!llp_idx)
continue;
ridx = epfnv->rd_cnt[i] % DMA_ASYNC_LL_SIZE;
while (llp_idx != ridx) {
epfnv->rd_cnt[i]++;
ridx = epfnv->rd_cnt[i] % DMA_ASYNC_LL_SIZE;
}
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
llp_iova = dma_channel_rd(epfnv->dma_base, i,
DMA_LLP_HIGH_OFF_RDCH);
llp_iova = (llp_iova << 32);
llp_iova |= dma_channel_rd(epfnv->dma_base, i,
DMA_LLP_LOW_OFF_RDCH);
llp_base = epf_bar0->ep_phy_addr + DMA_LL_RD_OFFSET(i);
llp_idx = ((llp_iova - llp_base) / sizeof(struct dma_ll));
llp_idx = llp_idx % DMA_ASYNC_LL_SIZE;
if (!llp_idx)
continue;
ridx = epfnv->rd_cnt[DMA_WR_CHNL_NUM + i] % DMA_ASYNC_LL_SIZE;
while (llp_idx != ridx) {
epfnv->rd_cnt[DMA_WR_CHNL_NUM + i]++;
ridx = epfnv->rd_cnt[DMA_WR_CHNL_NUM + i] %
DMA_ASYNC_LL_SIZE;
}
}
}
static irqreturn_t pcie_dma_epf_irq(int irq, void *arg)
{
return IRQ_WAKE_THREAD;
}
static irqreturn_t pcie_dma_epf_irq_handler(int irq, void *arg)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)arg;
int bit = 0;
u32 val;
val = dma_common_rd(epfnv->dma_base, DMA_WRITE_INT_STATUS_OFF);
for_each_set_bit(bit, &epfnv->wr_busy, DMA_WR_CHNL_NUM) {
if (BIT(bit) & val) {
dma_common_wr(epfnv->dma_base, BIT(bit),
DMA_WRITE_INT_CLEAR_OFF);
epfnv->wr_end_time[bit] = ktime_get();
epfnv->wr_busy &= ~(BIT(bit));
wake_up(&epfnv->wr_wq[bit]);
}
}
val = dma_common_rd(epfnv->dma_base, DMA_READ_INT_STATUS_OFF);
for_each_set_bit(bit, &epfnv->rd_busy, DMA_RD_CHNL_NUM) {
if (BIT(bit) & val) {
dma_common_wr(epfnv->dma_base, BIT(bit),
DMA_READ_INT_CLEAR_OFF);
epfnv->rd_end_time[bit] = ktime_get();
epfnv->rd_busy &= ~(BIT(bit));
wake_up(&epfnv->rd_wq[bit]);
}
}
if (epfnv->async_dma) {
val = dma_common_rd(epfnv->dma_base, DMA_WRITE_INT_STATUS_OFF);
dma_common_wr(epfnv->dma_base, val, DMA_WRITE_INT_CLEAR_OFF);
val = dma_common_rd(epfnv->dma_base, DMA_READ_INT_STATUS_OFF);
dma_common_wr(epfnv->dma_base, val, DMA_READ_INT_CLEAR_OFF);
pcie_async_dma_handler(epfnv);
}
return IRQ_HANDLED;
}
static int edma_init(struct pcie_epf_dma *epfnv, bool lie)
{
u32 val;
int i;
/* Enable LIE or RIE for all write channels */
if (lie) {
val = dma_common_rd(epfnv->dma_base, DMA_WRITE_INT_MASK_OFF);
val &= ~0xf;
val &= ~(0xf << 16);
dma_common_wr(epfnv->dma_base, val, DMA_WRITE_INT_MASK_OFF);
} else {
val = dma_common_rd(epfnv->dma_base, DMA_WRITE_INT_MASK_OFF);
val |= 0xf;
val |= (0xf << 16);
dma_common_wr(epfnv->dma_base, val, DMA_WRITE_INT_MASK_OFF);
}
val = DMA_CH_CONTROL1_OFF_WRCH_LIE;
if (!lie)
val |= DMA_CH_CONTROL1_OFF_WRCH_RIE;
for (i = 0; i < DMA_WR_CHNL_NUM; i++)
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_WRCH);
/* Enable LIE or RIE for all read channels */
if (lie) {
val = dma_common_rd(epfnv->dma_base, DMA_READ_INT_MASK_OFF);
val &= ~0x3;
val &= ~(0x3 << 16);
dma_common_wr(epfnv->dma_base, val, DMA_READ_INT_MASK_OFF);
} else {
val = dma_common_rd(epfnv->dma_base, DMA_READ_INT_MASK_OFF);
val |= 0x3;
val |= (0x3 << 16);
dma_common_wr(epfnv->dma_base, val, DMA_READ_INT_MASK_OFF);
}
val = DMA_CH_CONTROL1_OFF_RDCH_LIE;
if (!lie)
val |= DMA_CH_CONTROL1_OFF_RDCH_RIE;
for (i = 0; i < DMA_RD_CHNL_NUM; i++)
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_RDCH);
dma_common_wr(epfnv->dma_base, WRITE_ENABLE, DMA_WRITE_ENGINE_EN_OFF);
dma_common_wr(epfnv->dma_base, READ_ENABLE, DMA_READ_ENGINE_EN_OFF);
return 0;
}
static void edma_deinit(struct pcie_epf_dma *epfnv)
{
u32 val;
/* Mask channel interrupts */
val = dma_common_rd(epfnv->dma_base, DMA_WRITE_INT_MASK_OFF);
val |= 0xf;
val |= (0xf << 16);
dma_common_wr(epfnv->dma_base, val, DMA_WRITE_INT_MASK_OFF);
val = dma_common_rd(epfnv->dma_base, DMA_READ_INT_MASK_OFF);
val |= 0x3;
val |= (0x3 << 16);
dma_common_wr(epfnv->dma_base, val, DMA_READ_INT_MASK_OFF);
dma_common_wr(epfnv->dma_base, WRITE_DISABLE, DMA_WRITE_ENGINE_EN_OFF);
dma_common_wr(epfnv->dma_base, READ_DISABLE, DMA_READ_ENGINE_EN_OFF);
}
static int edma_ll_init(struct pcie_epf_dma *epfnv)
{
u32 val;
int i;
/* Enable linked list mode and set CCS */
val = DMA_CH_CONTROL1_OFF_WRCH_LLE | DMA_CH_CONTROL1_OFF_WRCH_CCS;
for (i = 0; i < DMA_WR_CHNL_NUM; i++)
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_WRCH);
val = DMA_CH_CONTROL1_OFF_RDCH_LLE | DMA_CH_CONTROL1_OFF_WRCH_CCS;
for (i = 0; i < DMA_RD_CHNL_NUM; i++)
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_RDCH);
return 0;
}
static void edma_ll_deinit(struct pcie_epf_dma *epfnv)
{
u32 val;
int i;
/* Disable linked list mode and clear CCS */
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
val = dma_channel_rd(epfnv->dma_base, i,
DMA_CH_CONTROL1_OFF_WRCH);
val &= ~(DMA_CH_CONTROL1_OFF_WRCH_LLE |
DMA_CH_CONTROL1_OFF_WRCH_CCS);
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_WRCH);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
val = dma_channel_rd(epfnv->dma_base, i,
DMA_CH_CONTROL1_OFF_RDCH);
val &= ~(DMA_CH_CONTROL1_OFF_RDCH_LLE |
DMA_CH_CONTROL1_OFF_RDCH_CCS);
dma_channel_wr(epfnv->dma_base, i, val,
DMA_CH_CONTROL1_OFF_RDCH);
}
}
static int edma_submit_direct_tx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc, int ch)
{
int ret = 0;
epfnv->wr_busy |= 1 << ch;
/* Populate desc in DMA registers */
dma_channel_wr(epfnv->dma_base, ch, desc->sz,
DMA_TRANSFER_SIZE_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(desc->src),
DMA_SAR_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(desc->src),
DMA_SAR_HIGH_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(desc->dst),
DMA_DAR_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(desc->dst),
DMA_DAR_HIGH_OFF_WRCH);
epfnv->wr_start_time[ch] = ktime_get();
dma_common_wr(epfnv->dma_base, ch, DMA_WRITE_DOORBELL_OFF);
/* Wait 5 sec to get DMA done interrupt */
ret = wait_event_timeout(epfnv->wr_wq[ch],
!(epfnv->wr_busy & (1 << ch)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: DD WR CH: %d TO\n", __func__, ch);
ret = -ETIMEDOUT;
}
return ret;
}
static int edma_submit_direct_rx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc, int ch)
{
int ret = 0;
epfnv->rd_busy |= 1 << ch;
/* Populate desc in DMA registers */
dma_channel_wr(epfnv->dma_base, ch, desc->sz,
DMA_TRANSFER_SIZE_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(desc->src),
DMA_SAR_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(desc->src),
DMA_SAR_HIGH_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(desc->dst),
DMA_DAR_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(desc->dst),
DMA_DAR_HIGH_OFF_RDCH);
epfnv->rd_start_time[ch] = ktime_get();
dma_common_wr(epfnv->dma_base, ch, DMA_READ_DOORBELL_OFF);
ret = wait_event_timeout(epfnv->rd_wq[ch],
!(epfnv->rd_busy & (1 << ch)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: DD RD CH: %d TO\n",
__func__, ch);
ret = -ETIMEDOUT;
}
return ret;
}
static int edma_submit_direct_txrx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc_wr,
struct edma_desc *desc_rd)
{
int ret = 0, i;
/* Configure all DMA write and read channels */
epfnv->wr_busy = DMA_WR_CHNL_MASK;
epfnv->rd_busy = DMA_RD_CHNL_MASK;
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
dma_channel_wr(epfnv->dma_base, i, desc_wr[i].sz,
DMA_TRANSFER_SIZE_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(desc_wr[i].src),
DMA_SAR_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(desc_wr[i].src),
DMA_SAR_HIGH_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(desc_wr[i].dst),
DMA_DAR_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(desc_wr[i].dst),
DMA_DAR_HIGH_OFF_WRCH);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
dma_channel_wr(epfnv->dma_base, i, desc_rd[i].sz,
DMA_TRANSFER_SIZE_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(desc_rd[i].src),
DMA_SAR_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(desc_rd[i].src),
DMA_SAR_HIGH_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(desc_rd[i].dst),
DMA_DAR_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(desc_rd[i].dst),
DMA_DAR_HIGH_OFF_RDCH);
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
dma_common_wr(epfnv->dma_base, i, DMA_WRITE_DOORBELL_OFF);
if (i < DMA_RD_CHNL_NUM)
dma_common_wr(epfnv->dma_base, i,
DMA_READ_DOORBELL_OFF);
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
ret = wait_event_timeout(epfnv->wr_wq[i],
!(epfnv->wr_busy & (1 << i)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: DD WR CH: %d TO\n",
__func__, i);
ret = -ETIMEDOUT;
goto fail;
}
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
ret = wait_event_timeout(epfnv->rd_wq[i],
!(epfnv->rd_busy & (1 << i)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: DD RD CH: %d TO\n",
__func__, i);
ret = -ETIMEDOUT;
goto fail;
}
}
fail:
return ret;
}
static int edma_submit_sync_tx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc,
int nents, int ch, bool lie)
{
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
dma_addr_t ll_phy_addr = epf_bar0->ep_phy_addr + DMA_LL_WR_OFFSET(ch);
struct dma_ll *dma_ll_virt;
int i, ret;
epfnv->wr_busy |= 1 << ch;
/* Program DMA LL base address in DMA LL pointer register */
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(ll_phy_addr),
DMA_LLP_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(ll_phy_addr),
DMA_LLP_HIGH_OFF_WRCH);
/* Populate DMA descriptors in LL */
dma_ll_virt = (struct dma_ll *)
(epfnv->bar0_virt + DMA_LL_WR_OFFSET(ch));
for (i = 0; i < nents; i++) {
dma_ll_virt->size = desc[i].sz;
dma_ll_virt->src_low = lower_32_bits(desc[i].src);
dma_ll_virt->src_high = upper_32_bits(desc[i].src);
dma_ll_virt->dst_low = lower_32_bits(desc[i].dst);
dma_ll_virt->dst_high = upper_32_bits(desc[i].dst);
dma_ll_virt->ele.cb = 1;
dma_ll_virt++;
}
/* Set LIE or RIE in last element */
dma_ll_virt--;
dma_ll_virt->ele.lie = 1;
if (!lie)
dma_ll_virt->ele.rie = 1;
epfnv->wr_start_time[ch] = ktime_get();
dma_common_wr(epfnv->dma_base, ch, DMA_WRITE_DOORBELL_OFF);
ret = wait_event_timeout(epfnv->wr_wq[ch],
!(epfnv->wr_busy & (1 << ch)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: LL WR CH: %d TO\n", __func__, ch);
ret = -ETIMEDOUT;
}
return ret;
}
static int edma_submit_sync_rx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc,
int nents, int ch, bool lie)
{
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
dma_addr_t ll_phy_addr = epf_bar0->ep_phy_addr + DMA_LL_RD_OFFSET(ch);
struct dma_ll *dma_ll_virt;
int i, ret;
epfnv->rd_busy |= 1 << ch;
/* Program DMA LL base address in DMA LL pointer register */
dma_channel_wr(epfnv->dma_base, ch, lower_32_bits(ll_phy_addr),
DMA_LLP_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, ch, upper_32_bits(ll_phy_addr),
DMA_LLP_HIGH_OFF_RDCH);
/* Populate DMA descriptors in LL */
dma_ll_virt = (struct dma_ll *)
(epfnv->bar0_virt + DMA_LL_RD_OFFSET(ch));
for (i = 0; i < nents; i++) {
dma_ll_virt->size = desc[i].sz;
dma_ll_virt->src_low = lower_32_bits(desc[i].src);
dma_ll_virt->src_high = upper_32_bits(desc[i].src);
dma_ll_virt->dst_low = lower_32_bits(desc[i].dst);
dma_ll_virt->dst_high = upper_32_bits(desc[i].dst);
dma_ll_virt->ele.cb = 1;
dma_ll_virt++;
}
/* Set LIE or RIE in last element */
dma_ll_virt--;
dma_ll_virt->ele.lie = 1;
if (!lie)
dma_ll_virt->ele.rie = 1;
epfnv->rd_start_time[ch] = ktime_get();
dma_common_wr(epfnv->dma_base, ch, DMA_READ_DOORBELL_OFF);
ret = wait_event_timeout(epfnv->rd_wq[ch],
!(epfnv->rd_busy & (1 << ch)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: LL RD CH: %d TO\n", __func__, ch);
ret = -ETIMEDOUT;
}
return ret;
}
static int edma_submit_sync_txrx(struct pcie_epf_dma *epfnv,
struct edma_desc *desc_wr,
struct edma_desc *desc_rd, int nents)
{
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
dma_addr_t phy_addr = epf_bar0->ep_phy_addr;
struct dma_ll *dma_ll_virt;
int i, j, ret;
epfnv->wr_busy = DMA_WR_CHNL_MASK;
epfnv->rd_busy = DMA_RD_CHNL_MASK;
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(phy_addr + DMA_LL_WR_OFFSET(i)),
DMA_LLP_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(phy_addr + DMA_LL_WR_OFFSET(i)),
DMA_LLP_HIGH_OFF_WRCH);
dma_ll_virt = (struct dma_ll *)
(epfnv->bar0_virt + DMA_LL_WR_OFFSET(i));
for (j = i * nents; j < ((i + 1) * nents); j++) {
dma_ll_virt->size = desc_wr[j].sz;
dma_ll_virt->src_low = lower_32_bits(desc_wr[j].src);
dma_ll_virt->src_high = upper_32_bits(desc_wr[j].src);
dma_ll_virt->dst_low = lower_32_bits(desc_wr[j].dst);
dma_ll_virt->dst_high = upper_32_bits(desc_wr[j].dst);
dma_ll_virt->ele.cb = 1;
dma_ll_virt++;
}
dma_ll_virt--;
dma_ll_virt->ele.lie = 1;
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
dma_channel_wr(epfnv->dma_base, i,
lower_32_bits(phy_addr + DMA_LL_RD_OFFSET(i)),
DMA_LLP_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i,
upper_32_bits(phy_addr + DMA_LL_RD_OFFSET(i)),
DMA_LLP_HIGH_OFF_RDCH);
dma_ll_virt = (struct dma_ll *)
(epfnv->bar0_virt + DMA_LL_RD_OFFSET(i));
for (j = i * nents; j < ((i + 1) * nents); j++) {
dma_ll_virt->size = desc_rd[j].sz;
dma_ll_virt->src_low = lower_32_bits(desc_rd[j].src);
dma_ll_virt->src_high = upper_32_bits(desc_rd[j].src);
dma_ll_virt->dst_low = lower_32_bits(desc_rd[j].dst);
dma_ll_virt->dst_high = upper_32_bits(desc_rd[j].dst);
dma_ll_virt->ele.cb = 1;
dma_ll_virt++;
}
dma_ll_virt--;
dma_ll_virt->ele.lie = 1;
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
dma_common_wr(epfnv->dma_base, i, DMA_WRITE_DOORBELL_OFF);
if (i < DMA_RD_CHNL_NUM)
dma_common_wr(epfnv->dma_base, i,
DMA_READ_DOORBELL_OFF);
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
ret = wait_event_timeout(epfnv->wr_wq[i],
!(epfnv->wr_busy & (1 << i)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: LL WR CH: %d TO\n",
__func__, i);
ret = -ETIMEDOUT;
goto fail;
}
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
ret = wait_event_timeout(epfnv->rd_wq[i],
!(epfnv->rd_busy & (1 << i)),
msecs_to_jiffies(5000));
if (ret == 0) {
dev_err(epfnv->fdev, "%s: LL RD CH: %d TO\n",
__func__, i);
ret = -ETIMEDOUT;
goto fail;
}
}
fail:
return ret;
}
/* debugfs to measure direct and LL DMA read/write perf on channel 0 */
static int perf_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
struct edma_desc desc;
struct edma_desc ll_desc[DMA_LL_DEFAULT_SIZE];
dma_addr_t ep_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
dma_addr_t rp_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
long long time;
int ch = 0, nents = DMA_LL_MIN_SIZE, i, ret;
if (!rp_dma_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return 0;
}
edma_init(epfnv, 1);
/* Direct DMA perf test with size BAR0_DMA_BUF_SIZE */
desc.src = ep_dma_addr;
desc.dst = rp_dma_addr;
desc.sz = BAR0_DMA_BUF_SIZE;
ret = edma_submit_direct_tx(epfnv, &desc, ch);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DD WR, SZ: %lu B CH: %d failed\n",
__func__, desc.sz, ch);
goto fail;
}
time = ktime_to_ns(epfnv->wr_end_time[ch]) -
ktime_to_ns(epfnv->wr_start_time[ch]);
dev_info(epfnv->fdev, "%s: DD WR, CH: %d SZ: %lu B, time: %lld ns\n",
__func__, ch, desc.sz, time);
desc.src = rp_dma_addr;
desc.dst = ep_dma_addr;
desc.sz = BAR0_DMA_BUF_SIZE;
ret = edma_submit_direct_rx(epfnv, &desc, ch);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DD RD, SZ: %lu B CH: %d failed\n",
__func__, desc.sz, ch);
goto fail;
}
time = ktime_to_ns(epfnv->rd_end_time[ch]) -
ktime_to_ns(epfnv->rd_start_time[ch]);
dev_info(epfnv->fdev, "%s: DD RD, CH: %d SZ: %lu B, time: %lld ns\n",
__func__, ch, desc.sz, time);
/* Clean DMA LL */
memset(epfnv->bar0_virt + DMA_LL_WR_OFFSET(0), 0, 6 * DMA_LL_SIZE);
edma_ll_init(epfnv);
/* LL DMA perf test with size BAR0_DMA_BUF_SIZE and one desc */
for (i = 0; i < nents; i++) {
ll_desc[i].src = ep_dma_addr + (i * BAR0_DMA_BUF_SIZE);
ll_desc[i].dst = rp_dma_addr + (i * BAR0_DMA_BUF_SIZE);
ll_desc[i].sz = BAR0_DMA_BUF_SIZE;
}
ret = edma_submit_sync_tx(epfnv, ll_desc, nents, ch, 1);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: LL WR, SZ: %u B CH: %d failed\n",
__func__, BAR0_DMA_BUF_SIZE * nents, ch);
goto fail;
}
time = ktime_to_ns(epfnv->wr_end_time[ch]) -
ktime_to_ns(epfnv->wr_start_time[ch]);
dev_info(epfnv->fdev, "%s: LL WR, CH: %d N: %d SZ: %d B, time: %lld ns\n",
__func__, ch, nents, BAR0_DMA_BUF_SIZE, time);
for (i = 0; i < nents; i++) {
ll_desc[i].src = rp_dma_addr + (i * BAR0_DMA_BUF_SIZE);
ll_desc[i].dst = ep_dma_addr + (i * BAR0_DMA_BUF_SIZE);
ll_desc[i].sz = BAR0_DMA_BUF_SIZE;
}
ret = edma_submit_sync_rx(epfnv, ll_desc, nents, ch, 1);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: LL RD, SZ: %u B CH: %d failed\n",
__func__, BAR0_DMA_BUF_SIZE * nents, ch);
goto fail;
}
time = ktime_to_ns(epfnv->rd_end_time[ch]) -
ktime_to_ns(epfnv->rd_start_time[ch]);
dev_info(epfnv->fdev, "%s: LL RD, CH: %d N: %d SZ: %d B, time: %lld ns\n",
__func__, ch, nents, BAR0_DMA_BUF_SIZE, time);
edma_ll_deinit(epfnv);
edma_deinit(epfnv);
fail:
return 0;
}
/* debugfs to stress direct and LL DMA on all wr & rd channels */
static int stress_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
struct edma_desc desc_wr[DMA_WR_CHNL_NUM], desc_rd[DMA_RD_CHNL_NUM];
struct edma_desc ll_desc_wr[DMA_WR_CHNL_NUM * DMA_LL_DEFAULT_SIZE];
struct edma_desc ll_desc_rd[DMA_RD_CHNL_NUM * DMA_LL_DEFAULT_SIZE];
dma_addr_t ep_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
dma_addr_t rp_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
int i, j, ret, nents = DMA_LL_DEFAULT_SIZE;
if (!rp_dma_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return 0;
}
edma_init(epfnv, 1);
/* Direct DMA stress test with rand size < DMA_DD_BUF_SIZE */
for (j = 0; j < DMA_WR_CHNL_NUM; j++) {
desc_wr[j].src = ep_dma_addr + (j * DMA_DD_BUF_SIZE);
desc_wr[j].dst = rp_dma_addr + (j * DMA_DD_BUF_SIZE);
}
for (j = 0; j < DMA_RD_CHNL_NUM; j++) {
desc_rd[j].src = rp_dma_addr +
((j + DMA_WR_CHNL_NUM) * DMA_DD_BUF_SIZE);
desc_rd[j].dst = ep_dma_addr +
((j + DMA_WR_CHNL_NUM) * DMA_DD_BUF_SIZE);
}
for (i = 0; i < epfnv->stress_count; i++) {
for (j = 0; j < DMA_WR_CHNL_NUM; j++)
desc_wr[j].sz =
(get_random_u32() % DMA_DD_BUF_SIZE) + 1;
for (j = 0; j < DMA_RD_CHNL_NUM; j++)
desc_rd[j].sz =
(get_random_u32() % DMA_DD_BUF_SIZE) + 1;
ret = edma_submit_direct_txrx(epfnv, desc_wr, desc_rd);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DD stress failed\n",
__func__);
goto fail;
}
dev_info(epfnv->fdev, "%s: DD stress test iteration %d done\n",
__func__, i);
}
dev_info(epfnv->fdev, "%s: DD stress: all CH, rand SZ, count: %d success\n",
__func__, epfnv->stress_count);
/* Clean DMA LL */
memset(epfnv->bar0_virt + DMA_LL_WR_OFFSET(0), 0, 6 * DMA_LL_SIZE);
edma_ll_init(epfnv);
/* LL DMA stress test with rand size < DMA_LL_BUF_SIZE per desc */
for (i = 0; i < DMA_WR_CHNL_NUM * nents; i++) {
ll_desc_wr[i].src = ep_dma_addr + (i * DMA_LL_BUF_SIZE);
ll_desc_wr[i].dst = rp_dma_addr + (i * DMA_LL_BUF_SIZE);
}
for (i = 0; i < DMA_RD_CHNL_NUM * nents; i++) {
ll_desc_rd[i].src = rp_dma_addr +
((i + DMA_WR_CHNL_NUM) * DMA_LL_BUF_SIZE);
ll_desc_rd[i].dst = ep_dma_addr +
((i + DMA_WR_CHNL_NUM) * DMA_LL_BUF_SIZE);
}
for (i = 0; i < epfnv->stress_count; i++) {
for (j = 0; j < DMA_WR_CHNL_NUM * nents; j++)
ll_desc_wr[j].sz =
(get_random_u32() % DMA_LL_BUF_SIZE) + 1;
for (j = 0; j < DMA_RD_CHNL_NUM * nents; j++)
ll_desc_rd[j].sz =
(get_random_u32() % DMA_LL_BUF_SIZE) + 1;
ret = edma_submit_sync_txrx(epfnv, ll_desc_wr, ll_desc_rd,
nents);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DMA LL stress failed\n",
__func__);
goto fail;
}
dev_info(epfnv->fdev, "%s: LL stress test iteration %d done\n",
__func__, i);
}
dev_info(epfnv->fdev, "%s: LL stress: all CH, rand SZ, count: %d success\n",
__func__, epfnv->stress_count);
edma_ll_deinit(epfnv);
edma_deinit(epfnv);
fail:
return 0;
}
/* debugfs to perform eDMA lib transfers and do CRC check */
static int edmalib_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
if (!epf_bar0->rp_phy_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return -1;
}
epfnv->edma.src_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dst_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
epfnv->edma.fdev = epfnv->fdev;
epfnv->edma.bar0_virt = epfnv->bar0_virt;
epfnv->edma.src_virt = epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET;
epfnv->edma.dma_base = epfnv->dma_base;
epfnv->edma.dma_size = epfnv->dma_size;
epfnv->edma.stress_count = epfnv->stress_count;
epfnv->edma.edma_ch = epfnv->edma_ch;
epfnv->edma.nents = epfnv->nents;
epfnv->edma.of_node = epfnv->cdev->of_node;
return edmalib_common_test(&epfnv->edma);
}
/* debugfs to perform direct & LL DMA and do CRC check */
static int sanity_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
struct edma_desc desc;
struct edma_desc ll_desc[DMA_LL_DEFAULT_SIZE];
dma_addr_t ep_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET;
dma_addr_t rp_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET;
int nents = DMA_LL_DEFAULT_SIZE;
int i, j, ret;
u32 crc;
if (epfnv->dma_size > MAX_DMA_ELE_SIZE) {
dev_err(epfnv->fdev, "%s: dma_size should be <= 0x%x\n",
__func__, MAX_DMA_ELE_SIZE);
goto fail;
}
if (!rp_dma_addr) {
dev_err(epfnv->fdev, "RP DMA address is null\n");
return 0;
}
edma_init(epfnv, 0);
/* Direct DMA of size epfnv->dma_size */
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
desc.src = ep_dma_addr;
desc.dst = rp_dma_addr;
desc.sz = epfnv->dma_size;
epf_bar0->wr_data[i].size = desc.sz;
/* generate random bytes to transfer */
get_random_bytes(epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
desc.sz);
ret = edma_submit_direct_tx(epfnv, &desc, i);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DD WR CH: %d failed\n",
__func__, i);
goto fail;
}
crc = crc32_le(~0, epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
desc.sz);
if (crc != epf_bar0->wr_data[i].crc) {
dev_err(epfnv->fdev, "%s: DD WR, SZ: %lu B CH: %d CRC failed\n",
__func__, desc.sz, i);
goto fail;
}
dev_info(epfnv->fdev, "%s: DD WR, SZ: %lu B CH: %d success\n",
__func__, desc.sz, i);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
desc.src = rp_dma_addr;
desc.dst = ep_dma_addr;
desc.sz = epfnv->dma_size;
epf_bar0->rd_data[i].size = desc.sz;
/* Clear memory to receive data */
memset(epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET, 0, desc.sz);
ret = edma_submit_direct_rx(epfnv, &desc, i);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: DD RD CH: %d failed\n",
__func__, i);
goto fail;
}
crc = crc32_le(~0, epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
desc.sz);
if (crc != epf_bar0->rd_data[i].crc) {
dev_err(epfnv->fdev, "%s: DD RD, SZ: %lu B CH: %d CRC failed\n",
__func__, desc.sz, i);
goto fail;
}
dev_info(epfnv->fdev, "%s: DD RD, SZ: %lu B CH: %d success\n",
__func__, desc.sz, i);
}
/* Clean DMA LL all 6 channels */
memset(epfnv->bar0_virt + DMA_LL_WR_OFFSET(0), 0, 6 * DMA_LL_SIZE);
edma_ll_init(epfnv);
/* LL DMA with size epfnv->dma_size per desc */
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
for (j = 0; j < nents; j++) {
ll_desc[j].src = ep_dma_addr + (j * epfnv->dma_size);
ll_desc[j].dst = rp_dma_addr + (j * epfnv->dma_size);
ll_desc[j].sz = epfnv->dma_size;
}
epf_bar0->wr_data[i].size = epfnv->dma_size * nents;
/* generate random bytes to transfer */
get_random_bytes(epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
epf_bar0->wr_data[i].size);
ret = edma_submit_sync_tx(epfnv, ll_desc, nents, i, 0);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: LL WR CH: %d failed\n",
__func__, i);
goto fail;
}
crc = crc32_le(~0, epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
epfnv->dma_size * nents);
if (crc != epf_bar0->wr_data[i].crc) {
dev_err(epfnv->fdev, "%s: LL WR, SZ: %u B CH: %d CRC failed\n",
__func__, epfnv->dma_size, i);
goto fail;
}
dev_info(epfnv->fdev, "%s: LL WR, SZ: %u B CH: %d success\n",
__func__, epfnv->dma_size, i);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
for (j = 0; j < nents; j++) {
ll_desc[j].src = rp_dma_addr + (j * epfnv->dma_size);
ll_desc[j].dst = ep_dma_addr + (j * epfnv->dma_size);
ll_desc[j].sz = epfnv->dma_size;
}
epf_bar0->rd_data[i].size = epfnv->dma_size * nents;
/* Clear memory to receive data */
memset(epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET, 0,
epf_bar0->rd_data[i].size);
ret = edma_submit_sync_rx(epfnv, ll_desc, nents, i, 0);
if (ret < 0) {
dev_err(epfnv->fdev, "%s: LL RD failed\n", __func__);
goto fail;
}
crc = crc32_le(~0, epfnv->bar0_virt + BAR0_DMA_BUF_OFFSET,
epfnv->dma_size * nents);
if (crc != epf_bar0->rd_data[i].crc) {
dev_err(epfnv->fdev, "%s: LL RD, SZ: %u B CH: %d CRC failed\n",
__func__, epfnv->dma_size, i);
goto fail;
}
dev_info(epfnv->fdev, "%s: LL RD, SZ: %u B CH: %d success\n",
__func__, epfnv->dma_size, i);
}
edma_ll_deinit(epfnv);
edma_deinit(epfnv);
fail:
return 0;
}
#ifdef THREAD_ON_CPU
static int async_dma_test_fn(struct pcie_epf_dma *epfnv, int ch)
{
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
dma_addr_t ep_dma_addr, rp_dma_addr, phy_addr;
struct dma_ll *dma_ll_virt;
u32 nents = epfnv->async_count, count = 0, idx, i;
ep_dma_addr = epf_bar0->ep_phy_addr + BAR0_DMA_BUF_OFFSET +
(ch * DMA_ASYNC_LL_SIZE * SZ_64K);
rp_dma_addr = epf_bar0->rp_phy_addr + BAR0_DMA_BUF_OFFSET +
(ch * DMA_ASYNC_LL_SIZE * SZ_64K);
epfnv->wr_cnt[ch] = 0;
epfnv->rd_cnt[ch] = 0;
dma_ll_virt = (struct dma_ll *)
(epfnv->bar0_virt + DMA_LL_WR_OFFSET(ch));
phy_addr = epf_bar0->ep_phy_addr + DMA_LL_WR_OFFSET(ch);
dma_ll_virt[DMA_ASYNC_LL_SIZE].src_low = lower_32_bits(phy_addr);
dma_ll_virt[DMA_ASYNC_LL_SIZE].src_high = upper_32_bits(phy_addr);
dma_ll_virt[DMA_ASYNC_LL_SIZE].ele.llp = 1;
dma_ll_virt[DMA_ASYNC_LL_SIZE].ele.tcb = 1;
epfnv->pcs[ch] = 1;
dma_ll_virt[DMA_ASYNC_LL_SIZE].ele.cb = !epfnv->pcs[ch];
for (i = 0; i < nents; i++) {
while ((epfnv->wr_cnt[ch] - epfnv->rd_cnt[ch] + 2) >=
DMA_ASYNC_LL_SIZE) {
msleep(100);
if (++count == 100) {
dev_info(epfnv->fdev, "%s: CH: %d LL is full wr_cnt: %u rd_cnt: %u\n",
__func__, ch, epfnv->wr_cnt[ch],
epfnv->rd_cnt[ch]);
goto fail;
}
}
count = 0;
idx = i % DMA_ASYNC_LL_SIZE;
dma_ll_virt[idx].size = SZ_64K;
if (ch < DMA_WR_CHNL_NUM) {
phy_addr = ep_dma_addr +
(idx % DMA_ASYNC_LL_SIZE) * SZ_64K;
dma_ll_virt[idx].src_low = lower_32_bits(phy_addr);
dma_ll_virt[idx].src_high = upper_32_bits(phy_addr);
phy_addr = rp_dma_addr +
(idx % DMA_ASYNC_LL_SIZE) * SZ_64K;
dma_ll_virt[idx].dst_low = lower_32_bits(phy_addr);
dma_ll_virt[idx].dst_high = upper_32_bits(phy_addr);
} else {
phy_addr = rp_dma_addr +
(idx % DMA_ASYNC_LL_SIZE) * SZ_64K;
dma_ll_virt[idx].src_low = lower_32_bits(phy_addr);
dma_ll_virt[idx].src_high = upper_32_bits(phy_addr);
phy_addr = ep_dma_addr +
(idx % DMA_ASYNC_LL_SIZE) * SZ_64K;
dma_ll_virt[idx].dst_low = lower_32_bits(phy_addr);
dma_ll_virt[idx].dst_high = upper_32_bits(phy_addr);
}
dma_ll_virt[idx].ele.lie = 1;
/*
* DMA desc should not be touched after CB bit set, add
* write barrier to stop desc writes going out of order
* wrt CB bit set.
*/
wmb();
dma_ll_virt[idx].ele.cb = epfnv->pcs[ch];
if (idx == (DMA_ASYNC_LL_SIZE - 1)) {
epfnv->pcs[ch] = !epfnv->pcs[ch];
dma_ll_virt[idx + 1].ele.cb = epfnv->pcs[ch];
}
if (ch < DMA_WR_CHNL_NUM)
dma_common_wr(epfnv->dma_base, ch,
DMA_WRITE_DOORBELL_OFF);
else
dma_common_wr(epfnv->dma_base, ch - DMA_WR_CHNL_NUM,
DMA_READ_DOORBELL_OFF);
epfnv->wr_cnt[ch]++;
/* Print status for very 10000 iterations */
if ((i % 10000) == 0)
dev_info(epfnv->fdev, "%s: CH: %u async DMA test itr: %u done, wr_cnt: %u rd_cnt: %u\n",
__func__, ch, i, epfnv->wr_cnt[ch],
epfnv->rd_cnt[ch]);
}
count = 0;
while (epfnv->wr_cnt[ch] != epfnv->rd_cnt[ch]) {
msleep(20);
if (++count == 100) {
dev_info(epfnv->fdev, "%s: CH: %d async DMA test failed, wr_cnt: %u rd_cnt: %u\n",
__func__, ch, epfnv->wr_cnt[ch],
epfnv->rd_cnt[ch]);
goto fail;
}
}
dev_info(epfnv->fdev, "%s: CH: %d async DMA success\n", __func__, ch);
fail:
epfnv->wr_cnt[ch] = 0;
epfnv->rd_cnt[ch] = 0;
return 0;
}
static int async_wr0_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 0);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
static int async_wr1_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 1);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
static int async_wr2_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 2);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
static int async_wr3_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 3);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
static int async_rd0_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 4);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
static int async_rd1_work(void *data)
{
struct pcie_epf_dma *epfnv = data;
async_dma_test_fn(epfnv, 5);
epfnv->task_done++;
wake_up(&epfnv->task_wq);
return 0;
}
#endif
static int async_dma_test(struct seq_file *s, void *data)
{
struct pcie_epf_dma *epfnv = (struct pcie_epf_dma *)
dev_get_drvdata(s->private);
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
epfnv->bar0_virt;
dma_addr_t phy_addr;
int i;
epfnv->task_done = 0;
epfnv->async_dma = true;
edma_init(epfnv, 1);
/* Clean DMA LL all 6 channels */
memset(epfnv->bar0_virt + DMA_LL_WR_OFFSET(0), 0, 6 * DMA_LL_SIZE);
edma_ll_init(epfnv);
/* Program DMA LL base address in DMA LL pointer register */
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
phy_addr = epf_bar0->ep_phy_addr + DMA_LL_WR_OFFSET(i);
dma_channel_wr(epfnv->dma_base, i, lower_32_bits(phy_addr),
DMA_LLP_LOW_OFF_WRCH);
dma_channel_wr(epfnv->dma_base, i, upper_32_bits(phy_addr),
DMA_LLP_HIGH_OFF_WRCH);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
phy_addr = epf_bar0->ep_phy_addr + DMA_LL_RD_OFFSET(i);
dma_channel_wr(epfnv->dma_base, i, lower_32_bits(phy_addr),
DMA_LLP_LOW_OFF_RDCH);
dma_channel_wr(epfnv->dma_base, i, upper_32_bits(phy_addr),
DMA_LLP_HIGH_OFF_RDCH);
}
#ifdef THREAD_ON_CPU
epfnv->wr0_task = kthread_create_on_cpu(async_wr0_work, epfnv, 0,
"async_wr0_work");
if (IS_ERR(epfnv->wr0_task)) {
dev_err(epfnv->fdev, "failed to create async_wr0 thread\n");
goto wr0_task;
}
epfnv->wr1_task = kthread_create_on_cpu(async_wr1_work, epfnv, 1,
"async_wr1_work");
if (IS_ERR(epfnv->wr1_task)) {
dev_err(epfnv->fdev, "failed to create async_wr1 thread\n");
goto wr1_task;
}
epfnv->wr2_task = kthread_create_on_cpu(async_wr2_work, epfnv, 2,
"async_wr2_work");
if (IS_ERR(epfnv->wr2_task)) {
dev_err(epfnv->fdev, "failed to create async_wr2 thread\n");
goto wr2_task;
}
epfnv->wr3_task = kthread_create_on_cpu(async_wr3_work, epfnv, 3,
"async_wr3_work");
if (IS_ERR(epfnv->wr3_task)) {
dev_err(epfnv->fdev, "failed to create async_wr3 thread\n");
goto wr3_task;
}
epfnv->rd0_task = kthread_create_on_cpu(async_rd0_work, epfnv, 4,
"async_rd0_work");
if (IS_ERR(epfnv->rd0_task)) {
dev_err(epfnv->fdev, "failed to create async_rd0 thread\n");
goto rd0_task;
}
epfnv->rd1_task = kthread_create_on_cpu(async_rd1_work, epfnv, 5,
"async_rd1_work");
if (IS_ERR(epfnv->rd1_task)) {
dev_err(epfnv->fdev, "failed to create async_rd1 thread\n");
goto rd1_task;
}
#endif
init_waitqueue_head(&epfnv->task_wq);
wake_up_process(epfnv->wr0_task);
wake_up_process(epfnv->wr1_task);
wake_up_process(epfnv->wr2_task);
wake_up_process(epfnv->wr3_task);
wake_up_process(epfnv->rd0_task);
wake_up_process(epfnv->rd1_task);
wait_event(epfnv->task_wq,
(epfnv->task_done == (DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM)));
dev_info(epfnv->fdev, "%s: Async DMA test done\n", __func__);
edma_ll_deinit(epfnv);
edma_deinit(epfnv);
epfnv->async_dma = false;
epfnv->task_done = 0;
return 0;
#ifdef THREAD_ON_CPU
rd1_task:
kthread_stop(epfnv->rd0_task);
rd0_task:
kthread_stop(epfnv->wr3_task);
wr3_task:
kthread_stop(epfnv->wr2_task);
wr2_task:
kthread_stop(epfnv->wr1_task);
wr1_task:
kthread_stop(epfnv->wr0_task);
wr0_task:
#endif
epfnv->async_dma = false;
epfnv->task_done = 0;
return 0;
}
static void init_debugfs(struct pcie_epf_dma *epfnv)
{
debugfs_create_devm_seqfile(epfnv->fdev, "perf_test", epfnv->debugfs,
perf_test);
debugfs_create_devm_seqfile(epfnv->fdev, "stress_test", epfnv->debugfs,
stress_test);
debugfs_create_devm_seqfile(epfnv->fdev, "sanity_test", epfnv->debugfs,
sanity_test);
debugfs_create_devm_seqfile(epfnv->fdev, "async_dma_test",
epfnv->debugfs, async_dma_test);
debugfs_create_devm_seqfile(epfnv->fdev, "edmalib_test", epfnv->debugfs,
edmalib_test);
debugfs_create_u32("dma_size", 0644, epfnv->debugfs, &epfnv->dma_size);
epfnv->dma_size = SZ_1M;
epfnv->edma.st_as_ch = -1;
debugfs_create_u32("edma_ch", 0644, epfnv->debugfs, &epfnv->edma_ch);
/* Enable ASYNC for ch 0 as default and other channels. Usage:
* BITS 0-3 - Async mode or sync mode for corresponding WR channels
* BITS 4-7 - Enable/disable corresponding WR channel
* BITS 8-9 - Async mode or sync mode for corresponding RD channels
* BITS 10-11 - Enable/disable or corresponding RD channels
* Bit 12 - Abort testing.
*/
epfnv->edma_ch = 0xF1;
debugfs_create_u32("nents", 0644, epfnv->debugfs, &epfnv->nents);
/* Set DMA_LL_DEFAULT_SIZE as default nents, Max NUM_EDMA_DESC */
epfnv->nents = DMA_LL_DEFAULT_SIZE;
debugfs_create_u32("stress_count", 0644, epfnv->debugfs,
&epfnv->stress_count);
epfnv->stress_count = DEFAULT_STRESS_COUNT;
debugfs_create_u32("async_count", 0644, epfnv->debugfs,
&epfnv->async_count);
epfnv->async_count = 4096;
}
static void pcie_dma_epf_write_msi_msg(struct msi_desc *desc,
struct msi_msg *msg)
{
/* TODO get rid of global variable gepfnv */
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *)
gepfnv->bar0_virt;
struct device *cdev = msi_desc_to_dev(desc);
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
int idx = desc->platform.msi_index;
#else
int idx = desc->msi_index;
#endif
epf_bar0->msi_data[idx] = msg->data;
dev_info(cdev, "%s: MSI idx: %d data: %d\n", __func__, idx, msg->data);
}
static int pcie_dma_epf_core_init(struct pci_epf *epf)
{
struct pci_epc *epc = epf->epc;
struct device *fdev = &epf->dev;
struct pci_epf_bar *epf_bar;
int ret;
ret = lpci_epc_write_header(epc, epf->func_no, epf->header);
if (ret < 0) {
dev_err(fdev, "Failed to write PCIe header: %d\n", ret);
return ret;
}
epf_bar = &epf->bar[BAR_0];
ret = lpci_epc_set_bar(epc, epf->func_no, epf_bar);
if (ret < 0) {
dev_err(fdev, "PCIe set BAR0 failed: %d\n", ret);
return ret;
}
dev_info(fdev, "BAR0 phy_addr: %llx size: %lx\n",
epf_bar->phys_addr, epf_bar->size);
ret = lpci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts);
if (ret) {
dev_err(fdev, "pci_epc_set_msi() failed: %d\n", ret);
return ret;
}
return 0;
}
static int pcie_dma_epf_msi_init(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
struct device *cdev = epc->dev.parent;
struct device *fdev = &epf->dev;
struct msi_desc *desc;
int ret;
/* LL DMA in sanity test will not work without MSI for EP */
if (!dev_get_msi_domain(cdev)) {
dev_info(fdev, "msi_domain absent, no interrupts\n");
return 0;
}
ret = platform_msi_domain_alloc_irqs(cdev,
DMA_WR_CHNL_NUM + DMA_RD_CHNL_NUM,
pcie_dma_epf_write_msi_msg);
if (ret < 0) {
dev_err(fdev, "failed to allocate MSIs\n");
return ret;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
for_each_msi_entry(desc, cdev) {
switch (desc->platform.msi_index) {
#else
msi_for_each_desc(desc, cdev, MSI_DESC_ALL) {
switch (desc->msi_index) {
#endif
case 0:
ret = request_irq(desc->irq, pcie_dma_epf_wr0_msi, 0,
"pcie_dma_wr0", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register wr0 irq\n");
break;
case 1:
ret = request_irq(desc->irq, pcie_dma_epf_wr1_msi, 0,
"pcie_dma_wr1", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register wr1 irq\n");
break;
case 2:
ret = request_irq(desc->irq, pcie_dma_epf_wr2_msi, 0,
"pcie_dma_wr2", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register wr2 irq\n");
break;
case 3:
ret = request_irq(desc->irq, pcie_dma_epf_wr3_msi, 0,
"pcie_dma_wr3", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register wr3 irq\n");
break;
case 4:
ret = request_irq(desc->irq, pcie_dma_epf_rd0_msi, 0,
"pcie_dma_rd0", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register rd0 irq\n");
break;
case 5:
ret = request_irq(desc->irq, pcie_dma_epf_rd1_msi, 0,
"pcie_dma_rd1", epfnv);
if (ret < 0)
dev_err(fdev, "failed to register rd1 irq\n");
break;
default:
dev_err(fdev, "Unknown MSI irq: %d\n", desc->irq);
continue;
}
}
return 0;
}
static void pcie_dma_epf_msi_deinit(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
struct device *cdev = epc->dev.parent;
struct device *fdev = &epf->dev;
struct msi_desc *desc;
/* LL DMA in sanity test will not work without MSI for EP */
if (!dev_get_msi_domain(cdev)) {
dev_info(fdev, "msi_domain absent, no interrupts\n");
return;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)
for_each_msi_entry(desc, cdev)
#else
msi_for_each_desc(desc, cdev, MSI_DESC_ALL)
#endif
free_irq(desc->irq, epfnv);
platform_msi_domain_free_irqs(cdev);
}
static int pcie_dma_epf_core_deinit(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
void *cookie = epfnv->edma.cookie;
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt;
struct pci_epc *epc = epf->epc;
struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
epfnv->edma.cookie = NULL;
epf_bar0->rp_phy_addr = 0;
tegra_pcie_edma_deinit(cookie);
lpci_epc_clear_bar(epc, epf->func_no, epf_bar);
return 0;
}
static void pcie_dma_epf_unbind(struct pci_epf *epf)
{
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct pci_epc *epc = epf->epc;
struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
void *cookie = epfnv->edma.cookie;
struct pcie_epf_bar0 *epf_bar0 = (struct pcie_epf_bar0 *) epfnv->bar0_virt;
debugfs_remove_recursive(epfnv->debugfs);
epfnv->edma.cookie = NULL;
epf_bar0->rp_phy_addr = 0;
tegra_pcie_edma_deinit(cookie);
pcie_dma_epf_msi_deinit(epf);
pci_epc_stop(epc);
lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0);
}
static int pcie_dma_epf_bind(struct pci_epf *epf)
{
struct pci_epc *epc = epf->epc;
struct pcie_epf_dma *epfnv = epf_get_drvdata(epf);
struct device *fdev = &epf->dev;
struct device *cdev = epc->dev.parent;
struct platform_device *pdev = of_find_device_by_node(cdev->of_node);
struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
struct pcie_epf_bar0 *epf_bar0;
struct resource *res;
char *name;
int ret, i;
epfnv->fdev = fdev;
epfnv->cdev = cdev;
epfnv->bar0_virt = lpci_epf_alloc_space(epf, BAR0_SIZE, BAR_0, SZ_64K);
if (!epfnv->bar0_virt) {
dev_err(fdev, "Failed to allocate memory for BAR0\n");
return -ENOMEM;
}
get_random_bytes(epfnv->bar0_virt, BAR0_SIZE);
memset(epfnv->bar0_virt, 0, BAR0_HEADER_SIZE);
/* Update BAR header with EP DMA PHY addr */
epf_bar0 = (struct pcie_epf_bar0 *)epfnv->bar0_virt;
epf_bar0->ep_phy_addr = epf_bar->phys_addr;
/* Set BAR0 mem type as 64-bit */
epf_bar->flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH;
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu_dma");
if (!res) {
dev_err(fdev, "missing atu_dma resource in DT\n");
ret = PTR_ERR(res);
goto fail_atu_dma;
}
epfnv->dma_base = devm_ioremap(fdev, res->start + DMA_OFFSET,
resource_size(res) - DMA_OFFSET);
if (IS_ERR(epfnv->dma_base)) {
ret = PTR_ERR(epfnv->dma_base);
dev_err(fdev, "dma region map failed: %d\n", ret);
goto fail_atu_dma;
}
epfnv->irq = platform_get_irq_byname(pdev, "intr");
if (!epfnv->irq) {
dev_err(fdev, "failed to get intr interrupt\n");
ret = -ENODEV;
goto fail_atu_dma;
}
name = devm_kasprintf(fdev, GFP_KERNEL, "%s_epf_dma_test", pdev->name);
if (!name) {
ret = -ENOMEM;
goto fail_atu_dma;
}
ret = devm_request_threaded_irq(fdev, epfnv->irq, pcie_dma_epf_irq,
pcie_dma_epf_irq_handler,
IRQF_SHARED,
name, epfnv);
if (ret < 0) {
dev_err(fdev, "failed to request \"intr\" irq\n");
goto fail_atu_dma;
}
ret = pcie_dma_epf_msi_init(epf);
if (ret < 0) {
dev_err(fdev, "failed to init platform msi: %d\n", ret);
goto fail_atu_dma;
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
init_waitqueue_head(&epfnv->wr_wq[i]);
init_waitqueue_head(&epfnv->edma.wr_wq[i]);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
init_waitqueue_head(&epfnv->rd_wq[i]);
init_waitqueue_head(&epfnv->edma.rd_wq[i]);
}
epfnv->debugfs = debugfs_create_dir(name, NULL);
init_debugfs(epfnv);
return 0;
fail_atu_dma:
lpci_epf_free_space(epf, epfnv->bar0_virt, BAR_0);
return ret;
}
static const struct pci_epf_device_id pcie_dma_epf_ids[] = {
{
.name = "tegra_pcie_dma_epf",
},
{},
};
static const struct pci_epc_event_ops pci_epf_dma_test_event_ops = {
.core_init = pcie_dma_epf_core_init,
.core_deinit = pcie_dma_epf_core_deinit,
};
static int pcie_dma_epf_probe(struct pci_epf *epf)
{
struct device *dev = &epf->dev;
struct pcie_epf_dma *epfnv;
epfnv = devm_kzalloc(dev, sizeof(*epfnv), GFP_KERNEL);
if (!epfnv)
return -ENOMEM;
epfnv->edma.ll_desc = devm_kzalloc(dev, sizeof(*epfnv->ll_desc) * NUM_EDMA_DESC,
GFP_KERNEL);
if (!epfnv)
return -ENOMEM;
gepfnv = epfnv;
epf_set_drvdata(epf, epfnv);
epf->event_ops = &pci_epf_dma_test_event_ops;
epfnv->header.vendorid = PCI_VENDOR_ID_NVIDIA;
epfnv->header.deviceid = 0x1AD6;
epfnv->header.baseclass_code = PCI_BASE_CLASS_MEMORY;
epfnv->header.interrupt_pin = PCI_INTERRUPT_INTA;
epf->header = &epfnv->header;
return 0;
}
static struct pci_epf_ops ops = {
.unbind = pcie_dma_epf_unbind,
.bind = pcie_dma_epf_bind,
};
static struct pci_epf_driver test_driver = {
.driver.name = "pcie_dma_epf",
.probe = pcie_dma_epf_probe,
.id_table = pcie_dma_epf_ids,
.ops = &ops,
.owner = THIS_MODULE,
};
static int __init pcie_dma_epf_init(void)
{
int ret;
ret = pci_epf_register_driver(&test_driver);
if (ret < 0) {
pr_err("Failed to register PCIe DMA EPF driver: %d\n", ret);
return ret;
}
return 0;
}
module_init(pcie_dma_epf_init);
static void __exit pcie_dma_epf_exit(void)
{
pci_epf_unregister_driver(&test_driver);
}
module_exit(pcie_dma_epf_exit);
MODULE_DESCRIPTION("TEGRA PCIe DMA EPF driver");
MODULE_AUTHOR("Om Prakash Singh <omp@nvidia.com>");
MODULE_LICENSE("GPL v2");