PCI: tegra194: Add PCIe EDMA and its RP drivers

Add PCI EDMA drivers from nvidia repo.

Bug 3583632

Change-Id: Id54bec8d70424d56d4b91f781568edbfa27c5a82
Signed-off-by: Nagarjuna Kristam <nkristam@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2736242
Reviewed-by: Laxman Dewangan <ldewangan@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Nagarjuna Kristam
2022-06-28 11:47:02 +05:30
committed by mobile promotions
parent 5b33382ce2
commit 0d83600487
6 changed files with 1100 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ obj-m += hwmon/
obj-m += i2c/busses/
obj-m += mfd/
obj-m += misc/
obj-m += pci/
obj-m += pinctrl/
obj-m += platform/tegra/
obj-m += pwm/

4
drivers/pci/Makefile Normal file
View File

@@ -0,0 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
obj-m += controller/

View File

@@ -0,0 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
obj-m += tegra-pcie-edma.o

View File

@@ -0,0 +1,146 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* PCIe EDMA Framework
*
* Copyright (C) 2021-2022 NVIDIA Corporation. All rights reserved.
*/
#ifndef TEGRA_PCIE_DMA_OSI_H
#define TEGRA_PCIE_DMA_OSI_H
#define OSI_BIT(b) (1U << (b))
/** generates bit mask for 32 bit value */
#define OSI_GENMASK(h, l) (((~0U) << (l)) & (~0U >> (31U - (h))))
/* Channel specific registers */
#define DMA_CH_CONTROL1_OFF_WRCH 0x0
#define DMA_CH_CONTROL1_OFF_WRCH_LLE OSI_BIT(9)
#define DMA_CH_CONTROL1_OFF_WRCH_CCS OSI_BIT(8)
#define DMA_CH_CONTROL1_OFF_WRCH_CS_MASK OSI_GENMASK(6U, 5U)
#define DMA_CH_CONTROL1_OFF_WRCH_CS_SHIFT 5
#define DMA_CH_CONTROL1_OFF_WRCH_RIE OSI_BIT(4)
#define DMA_CH_CONTROL1_OFF_WRCH_LIE OSI_BIT(3)
#define DMA_CH_CONTROL1_OFF_WRCH_LLP OSI_BIT(2)
#define DMA_CH_CONTROL1_OFF_WRCH_CB OSI_BIT(0)
#define DMA_WRITE_ENGINE_EN_OFF 0xC
#define WRITE_ENABLE OSI_BIT(0)
#define WRITE_DISABLE 0x0
#define DMA_WRITE_DOORBELL_OFF 0x10
#define DMA_WRITE_DOORBELL_OFF_WR_STOP OSI_BIT(31)
#define DMA_READ_ENGINE_EN_OFF 0x2C
#define READ_ENABLE OSI_BIT(0)
#define READ_DISABLE 0x0
#define DMA_READ_DOORBELL_OFF 0x30
#define DMA_READ_DOORBELL_OFF_RD_STOP OSI_BIT(31)
#define DMA_TRANSFER_SIZE_OFF_WRCH 0x8
#define DMA_SAR_LOW_OFF_WRCH 0xC
#define DMA_SAR_HIGH_OFF_WRCH 0x10
#define DMA_DAR_LOW_OFF_WRCH 0x14
#define DMA_DAR_HIGH_OFF_WRCH 0x18
#define DMA_LLP_LOW_OFF_WRCH 0x1C
#define DMA_LLP_HIGH_OFF_WRCH 0x20
#define DMA_WRITE_DONE_IMWR_LOW_OFF 0x60
#define DMA_WRITE_DONE_IMWR_HIGH_OFF 0x64
#define DMA_WRITE_ABORT_IMWR_LOW_OFF 0x68
#define DMA_WRITE_ABORT_IMWR_HIGH_OFF 0x6c
#define DMA_WRITE_CH01_IMWR_DATA_OFF 0x70
#define DMA_WRITE_CH23_IMWR_DATA_OFF 0x74
#define DMA_WRITE_LINKED_LIST_ERR_EN_OFF 0x90
#define DMA_READ_LINKED_LIST_ERR_EN_OFF 0xC4
#define DMA_READ_DONE_IMWR_LOW_OFF 0xcc
#define DMA_READ_DONE_IMWR_HIGH_OFF 0xd0
#define DMA_READ_ABORT_IMWR_LOW_OFF 0xd4
#define DMA_READ_ABORT_IMWR_HIGH_OFF 0xd8
#define DMA_READ_CH01_IMWR_DATA_OFF 0xdc
#define DMA_CH_CONTROL1_OFF_RDCH 0x100
#define DMA_CH_CONTROL1_OFF_RDCH_LLE OSI_BIT(9)
#define DMA_CH_CONTROL1_OFF_RDCH_CCS OSI_BIT(8)
#define DMA_CH_CONTROL1_OFF_RDCH_CS_MASK OSI_GENMASK(6U, 5U)
#define DMA_CH_CONTROL1_OFF_RDCH_CS_SHIFT 5
#define DMA_CH_CONTROL1_OFF_RDCH_RIE OSI_BIT(4)
#define DMA_CH_CONTROL1_OFF_RDCH_LIE OSI_BIT(3)
#define DMA_CH_CONTROL1_OFF_RDCH_LLP OSI_BIT(2)
#define DMA_CH_CONTROL1_OFF_RDCH_CB OSI_BIT(0)
#define DMA_TRANSFER_SIZE_OFF_RDCH 0x108
#define DMA_SAR_LOW_OFF_RDCH 0x10c
#define DMA_SAR_HIGH_OFF_RDCH 0x110
#define DMA_DAR_LOW_OFF_RDCH 0x114
#define DMA_DAR_HIGH_OFF_RDCH 0x118
#define DMA_LLP_LOW_OFF_RDCH 0x11c
#define DMA_LLP_HIGH_OFF_RDCH 0x120
#define DMA_WRITE_INT_STATUS_OFF 0x4C
#define DMA_WRITE_INT_MASK_OFF 0x54
#define DMA_WRITE_INT_CLEAR_OFF 0x58
#define DMA_READ_INT_STATUS_OFF 0xA0
#define DMA_READ_INT_MASK_OFF 0xA8
#define DMA_READ_INT_CLEAR_OFF 0xAC
struct edma_ctrl {
uint32_t cb:1;
uint32_t tcb:1;
uint32_t llp:1;
uint32_t lie:1;
uint32_t rie:1;
};
struct edma_hw_desc {
volatile union {
struct edma_ctrl ctrl_e;
uint32_t ctrl_d;
} ctrl_reg;
uint32_t size;
uint32_t sar_low;
uint32_t sar_high;
uint32_t dar_low;
uint32_t dar_high;
};
struct edma_hw_desc_llp {
volatile union {
struct edma_ctrl ctrl_e;
uint32_t ctrl_d;
} ctrl_reg;
uint32_t size;
uint32_t sar_low;
uint32_t sar_high;
};
struct edma_dblock {
struct edma_hw_desc desc[2];
struct edma_hw_desc_llp llp;
};
static inline unsigned int dma_common_rd(void __iomem *p, unsigned int offset)
{
return readl(p + offset);
}
static inline void dma_common_wr(void __iomem *p, unsigned int val, unsigned int offset)
{
writel(val, p + offset);
}
static inline void dma_channel_wr(void __iomem *p, unsigned char c, unsigned int val,
u32 offset)
{
writel(val, (0x200 * (c + 1)) + p + offset);
}
static inline unsigned int dma_channel_rd(void __iomem *p, unsigned char c, u32 offset)
{
return readl((0x200 * (c + 1)) + p + offset);
}
#endif // TEGRA_PCIE_DMA_OSI_H

View File

@@ -0,0 +1,785 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* PCIe EDMA Library Framework
*
* Copyright (C) 2021-2022 NVIDIA Corporation. All rights reserved.
*/
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/tegra-pcie-edma.h>
#include <linux/limits.h>
#include "tegra-pcie-dma-osi.h"
/** Default number of descriptors used */
#define NUM_EDMA_DESC 4096
/* DMA base offset starts at 0x20000 from ATU_DMA base */
#define DMA_OFFSET 0x20000
/** Calculates timeout based on size.
* Time in nano sec = size in bytes / (1000000 * 2).
* 2Gbps is max speed for Gen 1 with 2.5GT/s at 8/10 encoding.
* Convert to milli seconds and add 1sec timeout
*/
#define GET_SYNC_TIMEOUT(s) ((((s) * 8UL) / 2000000) + 1000)
#define INCR_DESC(idx, i) ((idx) = ((idx) + (i)) % (ch->desc_sz))
struct edma_chan {
void *desc;
void __iomem *remap_desc;
struct tegra_pcie_edma_xfer_info *ring;
dma_addr_t dma_iova;
uint32_t desc_sz;
/* descriptor size that is allocated for a channel */
u64 edma_desc_size;
/** Index from where cleanup needs to be done */
volatile uint32_t r_idx;
/** Index from where descriptor update is needed */
volatile uint32_t w_idx;
struct mutex lock;
wait_queue_head_t wq;
edma_chan_type_t type;
u64 wcount;
u64 rcount;
bool busy;
bool pcs;
bool db_pos;
/** This field is updated to abort or de-init to stop further xfer submits */
edma_xfer_status_t st;
};
struct edma_prv {
int irq;
char *irq_name;
bool is_remote_dma;
uint16_t msi_data;
uint64_t msi_addr;
/** EDMA base address */
void __iomem *edma_base;
/** EDMA base address size */
uint32_t edma_base_size;
struct device *dev;
struct edma_chan tx[DMA_WR_CHNL_NUM];
struct edma_chan rx[DMA_RD_CHNL_NUM];
/* BIT(0) - Write initialized, BIT(1) - Read initialized */
uint32_t ch_init;
};
/** TODO: Define osi_ll_init strcuture and make this as OSI */
static inline void edma_ll_ch_init(void __iomem *edma_base, uint32_t ch,
dma_addr_t ll_phy_addr, bool rw_type,
bool is_remote_dma)
{
uint32_t int_mask_val = OSI_BIT(ch);
uint32_t val;
/** configure write by default and overwrite if read */
uint32_t int_mask = DMA_WRITE_INT_MASK_OFF;
uint32_t ctrl1_offset = DMA_CH_CONTROL1_OFF_WRCH;
uint32_t low_offset = DMA_LLP_LOW_OFF_WRCH;
uint32_t high_offset = DMA_LLP_HIGH_OFF_WRCH;
uint32_t lle_ccs = DMA_CH_CONTROL1_OFF_WRCH_LIE | DMA_CH_CONTROL1_OFF_WRCH_LLE |
DMA_CH_CONTROL1_OFF_WRCH_CCS;
uint32_t rie = DMA_CH_CONTROL1_OFF_WRCH_RIE;
uint32_t err_off = DMA_WRITE_LINKED_LIST_ERR_EN_OFF;
uint32_t err_val = 0;
if (!rw_type) {
int_mask = DMA_READ_INT_MASK_OFF;
low_offset = DMA_LLP_LOW_OFF_RDCH;
high_offset = DMA_LLP_HIGH_OFF_RDCH;
ctrl1_offset = DMA_CH_CONTROL1_OFF_RDCH;
lle_ccs = DMA_CH_CONTROL1_OFF_RDCH_LIE | DMA_CH_CONTROL1_OFF_RDCH_LLE |
DMA_CH_CONTROL1_OFF_RDCH_CCS;
rie = DMA_CH_CONTROL1_OFF_RDCH_RIE;
err_off = DMA_READ_LINKED_LIST_ERR_EN_OFF;
}
/* Enable LIE or RIE for all write channels */
val = dma_common_rd(edma_base, int_mask);
err_val = dma_common_rd(edma_base, err_off);
if (!is_remote_dma) {
val &= ~int_mask_val;
val &= ~(int_mask_val << 16);
err_val |= OSI_BIT((16 + ch));
} else {
val |= int_mask_val;
val |= (int_mask_val << 16);
err_val |= OSI_BIT(ch);
}
dma_common_wr(edma_base, val, int_mask);
dma_common_wr(edma_base, err_val, err_off);
val = lle_ccs;
/* Enable RIE for remote DMA */
val |= (is_remote_dma ? rie : 0);
dma_channel_wr(edma_base, ch, val, ctrl1_offset);
dma_channel_wr(edma_base, ch, lower_32_bits(ll_phy_addr), low_offset);
dma_channel_wr(edma_base, ch, upper_32_bits(ll_phy_addr), high_offset);
}
static inline void edma_hw_init(void *cookie, bool rw_type)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
uint32_t msi_data;
u32 eng_off[2] = {DMA_WRITE_ENGINE_EN_OFF, DMA_READ_ENGINE_EN_OFF};
if (prv->ch_init & OSI_BIT(rw_type))
dma_common_wr(prv->edma_base, WRITE_ENABLE, eng_off[rw_type]);
/* Program MSI addr & data for remote DMA use case */
if (prv->is_remote_dma) {
msi_data = prv->msi_data;
msi_data |= (msi_data << 16);
dma_common_wr(prv->edma_base, lower_32_bits(prv->msi_addr),
DMA_WRITE_DONE_IMWR_LOW_OFF);
dma_common_wr(prv->edma_base, upper_32_bits(prv->msi_addr),
DMA_WRITE_DONE_IMWR_HIGH_OFF);
dma_common_wr(prv->edma_base, lower_32_bits(prv->msi_addr),
DMA_WRITE_ABORT_IMWR_LOW_OFF);
dma_common_wr(prv->edma_base, upper_32_bits(prv->msi_addr),
DMA_WRITE_ABORT_IMWR_HIGH_OFF);
dma_common_wr(prv->edma_base, msi_data,
DMA_WRITE_CH01_IMWR_DATA_OFF);
dma_common_wr(prv->edma_base, msi_data,
DMA_WRITE_CH23_IMWR_DATA_OFF);
dma_common_wr(prv->edma_base, lower_32_bits(prv->msi_addr),
DMA_READ_DONE_IMWR_LOW_OFF);
dma_common_wr(prv->edma_base, upper_32_bits(prv->msi_addr),
DMA_READ_DONE_IMWR_HIGH_OFF);
dma_common_wr(prv->edma_base, lower_32_bits(prv->msi_addr),
DMA_READ_ABORT_IMWR_LOW_OFF);
dma_common_wr(prv->edma_base, upper_32_bits(prv->msi_addr),
DMA_READ_ABORT_IMWR_HIGH_OFF);
dma_common_wr(prv->edma_base, msi_data,
DMA_READ_CH01_IMWR_DATA_OFF);
}
}
static inline int edma_ch_init(struct edma_prv *prv, struct edma_chan *ch)
{
struct edma_dblock *db;
dma_addr_t addr;
uint32_t j;
if ((ch->desc_sz <= 1) || (ch->desc_sz & (ch->desc_sz - 1)))
return -EINVAL;
if (prv->is_remote_dma)
memset_io(ch->remap_desc, 0,
(sizeof(struct edma_dblock)) * ((ch->desc_sz / 2) + 1));
else
memset(ch->desc, 0, (sizeof(struct edma_dblock)) * ((ch->desc_sz / 2) + 1));
db = (struct edma_dblock *)ch->desc + ((ch->desc_sz / 2) - 1);
db->llp.sar_low = lower_32_bits(ch->dma_iova);
db->llp.sar_high = upper_32_bits(ch->dma_iova);
db->llp.ctrl_reg.ctrl_e.llp = 1;
db->llp.ctrl_reg.ctrl_e.tcb = 1;
for (j = 0; j < (ch->desc_sz / 2 - 1); j++) {
db = (struct edma_dblock *)ch->desc + j;
addr = ch->dma_iova + (sizeof(struct edma_dblock) * (j + 1));
db->llp.sar_low = lower_32_bits(addr);
db->llp.sar_high = upper_32_bits(addr);
db->llp.ctrl_reg.ctrl_e.llp = 1;
}
ch->wcount = 0;
ch->rcount = 0;
ch->w_idx = 0;
ch->r_idx = 0;
ch->pcs = 1;
ch->st = EDMA_XFER_SUCCESS;
if (!ch->ring) {
ch->ring = kcalloc(ch->desc_sz, sizeof(*ch->ring), GFP_KERNEL);
if (!ch->ring)
return -ENOMEM;
}
return 0;
}
static inline void edma_hw_deinit(void *cookie, bool rw_type)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
int i;
u32 eng_off[2] = {DMA_WRITE_ENGINE_EN_OFF, DMA_READ_ENGINE_EN_OFF};
u32 ctrl_off[2] = {DMA_CH_CONTROL1_OFF_WRCH, DMA_CH_CONTROL1_OFF_RDCH};
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
if (prv->ch_init & OSI_BIT(rw_type)) {
dma_common_wr(prv->edma_base, 0, eng_off[rw_type]);
for (i = 0; i < mode_cnt[rw_type]; i++)
dma_channel_wr(prv->edma_base, i, 0, ctrl_off[rw_type]);
}
}
/** From OSI */
static inline u32 get_dma_idx_from_llp(struct edma_prv *prv, u32 chan, struct edma_chan *ch,
u32 type)
{
u64 cur_iova;
u64 high_iova, tmp_iova;
u32 cur_idx;
u32 llp_low_off[2] = {DMA_LLP_LOW_OFF_WRCH, DMA_LLP_LOW_OFF_RDCH};
u32 llp_high_off[2] = {DMA_LLP_HIGH_OFF_WRCH, DMA_LLP_HIGH_OFF_RDCH};
u64 block_idx, idx_in_block;
/*
* Read current element address in DMA_LLP register which pending
* DMA request and validate for spill over.
*/
high_iova = dma_channel_rd(prv->edma_base, chan, llp_high_off[type]);
cur_iova = (high_iova << 32);
cur_iova |= dma_channel_rd(prv->edma_base, chan, llp_low_off[type]);
tmp_iova = dma_channel_rd(prv->edma_base, chan, llp_high_off[type]);
if (tmp_iova > high_iova) {
/* Take latest reading of low offset and use it with new high offset */
cur_iova = dma_channel_rd(prv->edma_base, chan, llp_low_off[type]);
cur_iova |= (tmp_iova << 32);
}
/* Compute DMA desc index */
block_idx = ((cur_iova - ch->dma_iova) / sizeof(struct edma_dblock));
idx_in_block = (cur_iova & (sizeof(struct edma_dblock) - 1)) /
sizeof(struct edma_hw_desc);
cur_idx = (block_idx * 2) + idx_in_block;
return cur_idx % (ch->desc_sz);
}
static inline void process_r_idx(struct edma_chan *ch, edma_xfer_status_t st, u32 idx)
{
u32 count = 0;
struct edma_hw_desc *dma_ll_virt;
struct edma_dblock *db;
struct tegra_pcie_edma_xfer_info *ring;
while ((ch->r_idx != idx) && (count < ch->desc_sz)) {
count++;
ring = &ch->ring[ch->r_idx];
db = (struct edma_dblock *)ch->desc + ch->r_idx / 2;
dma_ll_virt = &db->desc[ch->r_idx % 2];
INCR_DESC(ch->r_idx, 1);
ch->rcount++;
/* clear lie and rie if any set */
dma_ll_virt->ctrl_reg.ctrl_e.lie = 0;
dma_ll_virt->ctrl_reg.ctrl_e.rie = 0;
if (ch->type == EDMA_CHAN_XFER_ASYNC && ring->complete) {
ring->complete(ring->priv, st, NULL);
/* Clear ring callback */
ring->complete = NULL;
}
}
}
static inline void process_ch_irq(struct edma_prv *prv, u32 chan, struct edma_chan *ch,
u32 type)
{
u32 idx;
idx = get_dma_idx_from_llp(prv, chan, ch, type);
if (ch->type == EDMA_CHAN_XFER_SYNC) {
if (ch->busy) {
ch->busy = false;
wake_up(&ch->wq);
} else
dev_info(prv->dev, "SYNC mode with chan %d busy not set r_idx %d, cur_idx %d, w_idx is %d\n",
chan, ch->r_idx, idx, ch->w_idx);
}
if (ch->st == EDMA_XFER_ABORT) {
dev_info(prv->dev, "Abort: ch %d at r_idx %d->idx %d, w_idx is %d\n", chan,
ch->r_idx, idx, ch->w_idx);
if (ch->r_idx == idx)
goto process_abort;
}
process_r_idx(ch, EDMA_XFER_SUCCESS, idx);
process_abort:
if (ch->st == EDMA_XFER_ABORT)
process_r_idx(ch, EDMA_XFER_ABORT, ch->w_idx);
}
static irqreturn_t edma_irq(int irq, void *cookie)
{
/* Disable irq before wake thread handler */
disable_irq_nosync(irq);
return IRQ_WAKE_THREAD;
}
static irqreturn_t edma_irq_handler(int irq, void *cookie)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
int bit = 0;
u32 val, i = 0;
struct edma_chan *chan[2] = {&prv->tx[0], &prv->rx[0]};
struct edma_chan *ch;
u32 int_status_off[2] = {DMA_WRITE_INT_STATUS_OFF, DMA_READ_INT_STATUS_OFF};
u32 int_clear_off[2] = {DMA_WRITE_INT_CLEAR_OFF, DMA_READ_INT_CLEAR_OFF};
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
for (i = 0; i < 2; i++) {
if (!(prv->ch_init & OSI_BIT(i)))
continue;
val = dma_common_rd(prv->edma_base, int_status_off[i]);
if ((val & OSI_GENMASK(31, 16)) != 0U) {
/**
* If ABORT, immediately update state for all channels as aborted.
* This setting stop further SW queuing
*/
dev_info(prv->dev, "Abort int status 0x%x", val);
for (bit = 0; bit < mode_cnt[i]; bit++) {
ch = chan[i] + bit;
ch->st = EDMA_XFER_ABORT;
}
edma_hw_deinit(prv, !!i);
/** Perform abort handling */
for (bit = 0; bit < mode_cnt[i]; bit++) {
ch = chan[i] + bit;
if (!ch->ring)
continue;
/* Clear ABORT and DONE interrupt, as abort handles both */
dma_common_wr(prv->edma_base, OSI_BIT(16 + bit) | OSI_BIT(bit),
int_clear_off[i]);
/** wait until exisitng xfer submit completed */
mutex_lock(&ch->lock);
mutex_unlock(&ch->lock);
process_ch_irq(prv, bit, ch, i);
edma_ch_init(prv, ch);
edma_ll_ch_init(prv->edma_base, bit, ch->dma_iova, (i == 0),
prv->is_remote_dma);
}
edma_hw_init(prv, !!i);
} else {
for (bit = 0; bit < mode_cnt[i]; bit++) {
ch = chan[i] + bit;
if (OSI_BIT(bit) & val) {
dma_common_wr(prv->edma_base, OSI_BIT(bit),
int_clear_off[i]);
process_ch_irq(prv, bit, ch, i);
}
}
}
}
/* Must enable before exit */
enable_irq(irq);
return IRQ_HANDLED;
}
void *tegra_pcie_edma_initialize(struct tegra_pcie_edma_init_info *info)
{
struct edma_prv *prv;
struct resource *dma_res;
int32_t ret, i, j;
struct edma_chan *ch = NULL;
struct edma_chan *chan[2];
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
struct tegra_pcie_edma_chans_info *chan_info[2];
struct tegra_pcie_edma_chans_info *ch_info;
struct platform_device *pdev;
prv = kzalloc(sizeof(*prv), GFP_KERNEL);
if (!prv) {
pr_err("Failed to allocate memory for edma_prv\n");
return NULL;
}
chan[0] = &prv->tx[0];
chan[1] = &prv->rx[0];
chan_info[0] = &info->tx[0];
chan_info[1] = &info->rx[0];
if (info->edma_remote != NULL) {
if (!info->edma_remote->dev) {
pr_err("%s: dev pointer is NULL\n", __func__);
goto free_priv;
}
prv->dev = info->edma_remote->dev;
if (info->edma_remote->msi_irq > INT_MAX) {
pr_err("%s: msi_irq is out of range\n", __func__);
goto free_priv;
}
prv->irq = (int)info->edma_remote->msi_irq;
prv->msi_data = info->edma_remote->msi_data;
prv->msi_addr = info->edma_remote->msi_addr;
prv->is_remote_dma = true;
prv->edma_base = devm_ioremap(prv->dev, info->edma_remote->dma_phy_base,
info->edma_remote->dma_size);
if (IS_ERR(prv->edma_base)) {
dev_err(prv->dev, "dma region map failed.\n");
goto free_priv;
}
} else if (info->np != NULL) {
prv->is_remote_dma = false;
pdev = of_find_device_by_node(info->np);
if (!pdev) {
pr_err("Unable to retrieve pdev node\n");
goto free_priv;
}
prv->dev = &pdev->dev;
dma_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
"atu_dma");
if (!dma_res) {
dev_err(prv->dev, "missing atu_dma resource in DT\n");
goto put_dev;
}
prv->edma_base = devm_ioremap(prv->dev, dma_res->start + DMA_OFFSET,
resource_size(dma_res) - DMA_OFFSET);
if (IS_ERR(prv->edma_base)) {
dev_err(prv->dev, "dma region map failed.\n");
goto put_dev;
}
prv->irq = platform_get_irq_byname(pdev, "intr");
if (prv->irq <= 0) {
dev_err(prv->dev, "failed to get intr interrupt\n");
goto put_dev;
};
} else {
pr_err("Neither device node nor edma remote available");
goto free_priv;
}
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch_info = chan_info[j] + i;
ch = chan[j] + i;
if (ch_info->num_descriptors == 0)
continue;
ch->type = ch_info->ch_type;
ch->desc_sz = ch_info->num_descriptors;
if (prv->is_remote_dma) {
ch->dma_iova = ch_info->desc_iova;
ch->remap_desc = devm_ioremap(prv->dev, ch_info->desc_phy_base,
(sizeof(struct edma_dblock)) *
((ch->desc_sz / 2) + 1));
ch->desc = (__force void *)ch->remap_desc;
if (!ch->desc) {
dev_err(prv->dev, "desc region map failed, phy: 0x%llx\n",
ch_info->desc_phy_base);
goto dma_iounmap;
}
} else {
ch->edma_desc_size = (sizeof(struct edma_dblock)) *
((ch->desc_sz / 2) + 1);
ch->desc = dma_alloc_coherent(prv->dev, ch->edma_desc_size,
&ch->dma_iova, GFP_KERNEL);
if (!ch->desc) {
dev_err(prv->dev, "Cannot allocate required descriptos(%d) of size (%lu) for channel:%d type: %d\n",
ch->desc_sz,
(sizeof(struct edma_hw_desc) * ch->desc_sz), i, j);
goto dma_iounmap;
}
}
prv->ch_init |= OSI_BIT(j);
if (edma_ch_init(prv, ch) < 0)
goto free_dma_desc;
edma_ll_ch_init(prv->edma_base, i, ch->dma_iova, (j == 0),
prv->is_remote_dma);
}
}
prv->irq_name = kasprintf(GFP_KERNEL, "%s_edma_lib", dev_name(prv->dev));
if (!prv->irq_name)
goto free_ring;
ret = request_threaded_irq(prv->irq, edma_irq, edma_irq_handler,
IRQF_SHARED,
prv->irq_name, prv);
if (ret < 0) {
dev_err(prv->dev, "failed to request \"intr\" irq\n");
goto free_irq_name;
}
for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
mutex_init(&prv->tx[i].lock);
init_waitqueue_head(&prv->tx[i].wq);
}
for (i = 0; i < DMA_RD_CHNL_NUM; i++) {
mutex_init(&prv->rx[i].lock);
init_waitqueue_head(&prv->rx[i].wq);
}
edma_hw_init(prv, false);
edma_hw_init(prv, true);
dev_info(prv->dev, "%s: success", __func__);
return prv;
free_irq_name:
kfree(prv->irq_name);
free_ring:
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch = chan[j] + i;
kfree(ch->ring);
}
}
free_dma_desc:
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch = chan[j] + i;
if (prv->is_remote_dma && ch->desc)
devm_iounmap(prv->dev, ch->remap_desc);
else if (ch->desc)
dma_free_coherent(prv->dev, ch->edma_desc_size,
ch->desc, ch->dma_iova);
}
}
dma_iounmap:
devm_iounmap(prv->dev, prv->edma_base);
put_dev:
if (!prv->is_remote_dma)
put_device(prv->dev);
free_priv:
kfree(prv);
return NULL;
}
EXPORT_SYMBOL_GPL(tegra_pcie_edma_initialize);
edma_xfer_status_t tegra_pcie_edma_submit_xfer(void *cookie,
struct tegra_pcie_edma_xfer_info *tx_info)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
struct edma_chan *ch;
struct edma_hw_desc *dma_ll_virt;
struct edma_dblock *db;
int i, ret;
u64 total_sz = 0;
edma_xfer_status_t st = EDMA_XFER_SUCCESS;
u32 avail;
struct tegra_pcie_edma_xfer_info *ring;
u32 int_status_off[2] = {DMA_WRITE_INT_STATUS_OFF, DMA_READ_INT_STATUS_OFF};
u32 doorbell_off[2] = {DMA_WRITE_DOORBELL_OFF, DMA_READ_DOORBELL_OFF};
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
bool pcs;
if (!prv || !tx_info || tx_info->nents == 0 || !tx_info->desc ||
tx_info->channel_num >= mode_cnt[tx_info->type])
return EDMA_XFER_FAIL_INVAL_INPUTS;
ch = (tx_info->type == EDMA_XFER_WRITE) ? &prv->tx[tx_info->channel_num] :
&prv->rx[tx_info->channel_num];
if (!ch->desc_sz)
return EDMA_XFER_FAIL_INVAL_INPUTS;
if ((tx_info->complete == NULL) && (ch->type == EDMA_CHAN_XFER_ASYNC))
return EDMA_XFER_FAIL_INVAL_INPUTS;
/* Get hold of the hardware - locking */
mutex_lock(&ch->lock);
/* Channel busy flag should be updated before channel status check */
ch->busy = true;
if (ch->st != EDMA_XFER_SUCCESS) {
st = ch->st;
goto unlock;
}
avail = (ch->r_idx - ch->w_idx - 1U) & (ch->desc_sz - 1U);
if (tx_info->nents > avail) {
dev_dbg(prv->dev, "Descriptors full. w_idx %d. r_idx %d, avail %d, req %d\n",
ch->w_idx, ch->r_idx, avail, tx_info->nents);
st = EDMA_XFER_FAIL_NOMEM;
goto unlock;
}
dev_dbg(prv->dev, "xmit for %d nents at %d widx and %d ridx\n",
tx_info->nents, ch->w_idx, ch->r_idx);
db = (struct edma_dblock *)ch->desc + (ch->w_idx/2);
for (i = 0; i < tx_info->nents; i++) {
dma_ll_virt = &db->desc[ch->db_pos];
dma_ll_virt->size = tx_info->desc[i].sz;
/* calculate number of packets and add those many headers */
total_sz += ((tx_info->desc[i].sz / ch->desc_sz) + 1) * 30;
total_sz += tx_info->desc[i].sz;
dma_ll_virt->sar_low = lower_32_bits(tx_info->desc[i].src);
dma_ll_virt->sar_high = upper_32_bits(tx_info->desc[i].src);
dma_ll_virt->dar_low = lower_32_bits(tx_info->desc[i].dst);
dma_ll_virt->dar_high = upper_32_bits(tx_info->desc[i].dst);
/* Set LIE or RIE in last element */
if (i == tx_info->nents - 1) {
dma_ll_virt->ctrl_reg.ctrl_e.lie = 1;
dma_ll_virt->ctrl_reg.ctrl_e.rie = !!prv->is_remote_dma;
}
/* CB should be updated last in the descriptor */
dma_ll_virt->ctrl_reg.ctrl_e.cb = ch->pcs;
ch->db_pos = !ch->db_pos;
avail = ch->w_idx;
ch->w_idx++;
if (!ch->db_pos) {
ch->wcount = 0;
db->llp.ctrl_reg.ctrl_e.cb = ch->pcs;
if (ch->w_idx == ch->desc_sz) {
ch->pcs = !ch->pcs;
db->llp.ctrl_reg.ctrl_e.cb = ch->pcs;
dev_dbg(prv->dev, "Toggled pcs at w_idx %d\n", ch->w_idx);
ch->w_idx = 0;
}
db = (struct edma_dblock *)ch->desc + (ch->w_idx/2);
}
}
ring = &ch->ring[avail];
ring->complete = tx_info->complete;
ring->priv = tx_info->priv;
ring->nents = tx_info->nents;
ring->desc = tx_info->desc;
/* Read back CB to avoid OOO in case of remote dma. */
pcs = dma_ll_virt->ctrl_reg.ctrl_e.cb;
/* desc write should not go OOO wrt DMA DB ring */
wmb();
dma_common_wr(prv->edma_base, tx_info->channel_num, doorbell_off[tx_info->type]);
if (ch->type == EDMA_CHAN_XFER_SYNC) {
ret = wait_event_timeout(ch->wq, !ch->busy,
msecs_to_jiffies((uint32_t)(GET_SYNC_TIMEOUT(total_sz))));
if (ret == 0) {
/* dummy print to avoid misra-c voilations */
dev_dbg(prv->dev, "read back pcs: %d\n", pcs);
dev_err(prv->dev, "%s: timeout at %d ch, w_idx(%d), r_idx(%d)\n",
__func__, tx_info->channel_num, ch->w_idx,
ch->r_idx);
dev_err(prv->dev, "%s: int status 0x%x", __func__,
dma_common_rd(prv->edma_base, int_status_off[tx_info->type]));
st = EDMA_XFER_FAIL_TIMEOUT;
goto unlock;
} else {
st = ch->st;
}
dev_dbg(prv->dev, "xmit done for %d nents at %d widx and %d ridx\n",
tx_info->nents, ch->w_idx, ch->r_idx);
}
unlock:
/* release hardware - unlocking */
mutex_unlock(&ch->lock);
return st;
}
EXPORT_SYMBOL_GPL(tegra_pcie_edma_submit_xfer);
static void edma_stop(struct edma_prv *prv, edma_xfer_status_t st)
{
struct edma_chan *chan[2], *ch;
int i, j;
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
chan[0] = &prv->tx[0];
chan[1] = &prv->rx[0];
/* wake up xfer function waiting on dma completion in sync mode */
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch = chan[j] + i;
ch->st = st;
if ((ch->type == EDMA_CHAN_XFER_SYNC) && ch->busy) {
ch->busy = false;
wake_up(&ch->wq);
}
/** wait until exisitng xfer submit completed */
mutex_lock(&ch->lock);
mutex_unlock(&ch->lock);
}
}
edma_hw_deinit(prv, false);
edma_hw_deinit(prv, true);
synchronize_irq(prv->irq);
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch = chan[j] + i;
if (prv->ch_init & OSI_BIT(i))
process_r_idx(ch, st, ch->w_idx);
}
}
}
bool tegra_pcie_edma_stop(void *cookie)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
if (cookie == NULL)
return false;
edma_stop(prv, EDMA_XFER_ABORT);
return true;
}
EXPORT_SYMBOL_GPL(tegra_pcie_edma_stop);
void tegra_pcie_edma_deinit(void *cookie)
{
struct edma_prv *prv = (struct edma_prv *)cookie;
struct edma_chan *chan[2], *ch;
int i, j;
u32 mode_cnt[2] = {DMA_WR_CHNL_NUM, DMA_RD_CHNL_NUM};
if (cookie == NULL)
return;
edma_stop(prv, EDMA_XFER_DEINIT);
free_irq(prv->irq, prv);
kfree(prv->irq_name);
chan[0] = &prv->tx[0];
chan[1] = &prv->rx[0];
for (j = 0; j < 2; j++) {
for (i = 0; i < mode_cnt[j]; i++) {
ch = chan[j] + i;
if (prv->is_remote_dma && ch->desc)
devm_iounmap(prv->dev, ch->remap_desc);
else if (ch->desc)
dma_free_coherent(prv->dev, ch->edma_desc_size,
ch->desc, ch->dma_iova);
kfree(ch->ring);
}
}
devm_iounmap(prv->dev, prv->edma_base);
if (!prv->is_remote_dma)
put_device(prv->dev);
kfree(prv);
}
EXPORT_SYMBOL_GPL(tegra_pcie_edma_deinit);
MODULE_LICENSE("GPL v2");

View File

@@ -0,0 +1,160 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* PCIe DMA EPF Library for Tegra PCIe
*
* Copyright (C) 2021-2022 NVIDIA Corporation. All rights reserved.
*/
#ifndef TEGRA_PCIE_EDMA_H
#define TEGRA_PCIE_EDMA_H
#define DMA_RD_CHNL_NUM 2
#define DMA_WR_CHNL_NUM 4
#define EDMA_DESC_SZ 32
/**
* Application can use this macro as default number of descriptors.
* Number of descriptors should always be power of 2.
*/
#define NUM_EDMA_DESC 4096
/**
* @brief typedef to define various values for xfer status passed for edma_complete_t or
* tegra_pcie_edma_submit_xfer()
*/
typedef enum {
EDMA_XFER_SUCCESS = 0,
EDMA_XFER_FAIL_INVAL_INPUTS,
EDMA_XFER_FAIL_NOMEM,
EDMA_XFER_FAIL_TIMEOUT,
EDMA_XFER_ABORT,
EDMA_XFER_DEINIT,
} edma_xfer_status_t;
/** @brief typedef to define various values for xfer type passed tegra_pcie_edma_submit_xfer() */
typedef enum {
EDMA_XFER_WRITE = 0,
EDMA_XFER_READ,
} edma_xfer_type_t;
/**
* @brief typedef to define various values for channel xfer configuration done
* during tegra_pcie_edma_submit_init()
*/
typedef enum {
EDMA_CHAN_XFER_SYNC = 0,
EDMA_CHAN_XFER_ASYNC,
} edma_chan_type_t;
/** Forward declaration */
struct tegra_pcie_edma_desc;
/** @brief Tx Async callback function pointer */
typedef void (edma_complete_t)(void *priv, edma_xfer_status_t status,
struct tegra_pcie_edma_desc *desc);
/** @brief Remote EDMA controller details.
* @note: this is initial revision and expected to be modified.
*/
struct pcie_tegra_edma_remote_info {
/** MSI IRQ number */
uint32_t msi_irq;
/** MSI data that needs to be configured on EP DMA registers */
uint16_t msi_data;
/** MSI address that needs to be configured on EP DMA registers */
uint64_t msi_addr;
/** EP's DMA PHY base address, which same as BAR4 base address */
phys_addr_t dma_phy_base;
/** EP's DMA register spec size, which same as BAR4 size */
uint32_t dma_size;
/** &pci_dev.dev poniter used for devm_* and logging */
struct device *dev;
};
/** @brief details of EDMA Tx channel configuration */
struct tegra_pcie_edma_chans_info {
/** Variable to specify if corresponding channel should run in Sync/Async mode. */
edma_chan_type_t ch_type;
/** Number of descriptors that needs to be configured for this channel.
* @note
* - If 0 is passed, this channel will be treated un-used.
* - else it must be power of 2.
*/
uint32_t num_descriptors;
/* Below parameter are used, only if edma_remote is present in #tegra_pcie_edma_init_info */
/** Descriptor PHY base allocated by client which is part of BAR0. Memory allocated for this
* should be 1 more than number of descriptors.
*/
phys_addr_t desc_phy_base;
/** Abosolute IOVA address of desc of desc_phy_base. */
dma_addr_t desc_iova;
};
/** @brief init data structure to be used for tegra_pcie_edma_init() API */
struct tegra_pcie_edma_init_info {
/** configuration details for edma Tx channels */
struct tegra_pcie_edma_chans_info tx[DMA_WR_CHNL_NUM];
/** configuration details for edma Tx channels */
struct tegra_pcie_edma_chans_info rx[DMA_RD_CHNL_NUM];
/** device node for corresponding edma to read information from DT */
struct device_node *np;
/**
* Remote edma pointer: if not NULL, library uses remote EDMA engine for transfers
* else uses local controller EDMA engine.
*/
struct pcie_tegra_edma_remote_info *edma_remote;
};
/** @brief edma descriptor for data transfer operations */
struct tegra_pcie_edma_desc {
/** source address of data buffer */
dma_addr_t src;
/** destination address where data buffer needs to be transferred */
dma_addr_t dst;
/** Size of data buffer */
uint32_t sz;
};
/** @brief data strcuture needs to be passed for Tx operations */
struct tegra_pcie_edma_xfer_info {
/** Read or write operation. 0 -> write, 1->read */
edma_xfer_type_t type;
/** Channel on which operation needs to be performed.
* Range 0 to (DMA_RD_CHNL_NUM-1)/(DMA_WR_CHNL_NUM-1)
*/
uint32_t channel_num;
/** EDMA descriptor structure with source, destination DMA addr along with its size. */
struct tegra_pcie_edma_desc *desc;
/** Number of desc entries. */
uint32_t nents;
/** complete callback to be called */
edma_complete_t *complete;
/** Caller'z private data pointer which will be passed as part of edma_complete_t */
void *priv;
};
/**
* @brief: API to perform EDMA library initialization.
* @param[in] info: EDMA init data structure. Refer struct tegra_pcie_edma_init_info for details.
* @retVal: NULL on failure and valid pointer if success and this should
* be passed for all subsequent calls of this library.
*/
void *tegra_pcie_edma_initialize(struct tegra_pcie_edma_init_info *info);
/**
* @brief: API to perform transfer operation.
* @param[in] tx_info: EDMA Tx data structure. Refer struct tegra_pcie_edma_xfer_info for details.
* @param[in] coockie : cookie data returned in tegra_pcie_edma_initialize() call.
* @retVal: Refer edma_xfer_status_t.
*/
edma_xfer_status_t tegra_pcie_edma_submit_xfer(void *cookie,
struct tegra_pcie_edma_xfer_info *tx_info);
/**
* @brief: API to perform de-init of EDMA library.
* @param[in] cookie : cookie data returned in tegra_pcie_edma_initialize() call.
*/
void tegra_pcie_edma_deinit(void *cookie);
#endif //TEGRA_PCIE_EDMA_H