diff --git a/drivers/net/ethernet/nvidia/nvethernet/Makefile b/drivers/net/ethernet/nvidia/nvethernet/Makefile index a1182f06..7254a491 100644 --- a/drivers/net/ethernet/nvidia/nvethernet/Makefile +++ b/drivers/net/ethernet/nvidia/nvethernet/Makefile @@ -31,6 +31,7 @@ endif #ccflags-y += -DOSI_DEBUG -DMACSEC_SUPPORT -DDEBUG_MACSEC -DMACSEC_KEY_PROGRAM ccflags-y += -DMACSEC_SUPPORT +ccflags-y += -DBW_TEST nvethernet-objs:= ether_linux.o \ osd.o \ @@ -41,6 +42,7 @@ nvethernet-objs:= ether_linux.o \ ptp.o \ macsec.o \ selftests.o \ + ether_test_tx.o \ $(OSI_CORE)/osi_core.o \ $(OSI_CORE)/osi_hal.o \ $(OSI_CORE)/common_macsec.o \ diff --git a/drivers/net/ethernet/nvidia/nvethernet/ether_linux.c b/drivers/net/ethernet/nvidia/nvethernet/ether_linux.c index e9ac63b2..06ff4c63 100644 --- a/drivers/net/ethernet/nvidia/nvethernet/ether_linux.c +++ b/drivers/net/ethernet/nvidia/nvethernet/ether_linux.c @@ -1728,11 +1728,16 @@ static irqreturn_t ether_vm_isr(int irq, void *data) osi_handle_dma_intr(osi_dma, chan, OSI_DMA_CH_TX_INTR, OSI_DMA_INTR_DISABLE); - +#ifdef BW_TEST + if (pdata->test_tx_bandwidth == OSI_DISABLE) { +#endif if (likely(napi_schedule_prep(&tx_napi->napi))) { /* TODO: Schedule NAPI on different CPU core */ __napi_schedule_irqoff(&tx_napi->napi); } +#ifdef BW_TEST + } +#endif } dma_status[i] &= ~BIT(temp); @@ -3424,6 +3429,11 @@ static int ether_close(struct net_device *ndev) tegra_unregister_hwtime_source(ndev); #endif +#ifdef BW_TEST + pdata->test_tx_bandwidth = OSI_DISABLE; + cancel_delayed_work_sync(&pdata->tx_bandwidth_work); +#endif + /* Stop workqueue to get further scheduled */ ether_stats_work_queue_stop(pdata); @@ -3885,6 +3895,13 @@ static int ether_start_xmit(struct sk_buff *skb, struct net_device *ndev) int count = 0; int ret; +#ifdef BW_TEST + if (pdata->test_tx_bandwidth == OSI_ENABLE) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } +#endif + count = ether_tx_swcx_alloc(pdata, tx_ring, skb); if (count <= 0) { if (count == 0) { @@ -7639,6 +7656,10 @@ static int ether_probe(struct platform_device *pdev) INIT_LIST_HEAD(&pdata->tx_ts_skb_head); INIT_LIST_HEAD(&pdata->timestamp_skb_head); INIT_DELAYED_WORK(&pdata->tx_ts_work, ether_get_tx_ts_work); +#ifdef BW_TEST + pdata->tx_bw_wq = create_singlethread_workqueue("tx_bw_wq"); + INIT_DELAYED_WORK(&pdata->tx_bandwidth_work, ether_tx_bandwidth_work); +#endif pdata->rx_m_enabled = false; pdata->rx_pcs_m_enabled = false; atomic_set(&pdata->tx_ts_ref_cnt, -1); diff --git a/drivers/net/ethernet/nvidia/nvethernet/ether_linux.h b/drivers/net/ethernet/nvidia/nvethernet/ether_linux.h index 4a98d15e..d0d545cb 100644 --- a/drivers/net/ethernet/nvidia/nvethernet/ether_linux.h +++ b/drivers/net/ethernet/nvidia/nvethernet/ether_linux.h @@ -706,6 +706,16 @@ struct ether_priv_data { uint32_t disable_rx_csum; /** select Tx queue/dma channel for testing */ unsigned int tx_queue_select; +#ifdef BW_TEST + /** enable tx bandwidth for testing */ + unsigned int test_tx_bandwidth; + /** enable tx bandwidth pkt size */ + unsigned int tx_bandwidth_pkt_size; + /** delayed work - tx bandwidth for testing */ + struct delayed_work tx_bandwidth_work; + /** tx bandwidth pkt work queue */ + struct workqueue_struct *tx_bw_wq; +#endif }; /** @@ -907,4 +917,7 @@ static inline nveu64_t update_stats_counter(nveu64_t last_value, nveu64_t incr) #ifdef ETHER_NVGRO void ether_nvgro_purge_timer(struct timer_list *t); #endif /* ETHER_NVGRO */ +#ifdef BW_TEST +void ether_tx_bandwidth_work(struct work_struct *work); +#endif #endif /* ETHER_LINUX_H */ diff --git a/drivers/net/ethernet/nvidia/nvethernet/ether_test_tx.c b/drivers/net/ethernet/nvidia/nvethernet/ether_test_tx.c new file mode 100644 index 00000000..0e95a8e0 --- /dev/null +++ b/drivers/net/ethernet/nvidia/nvethernet/ether_test_tx.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include +#ifdef HSI_SUPPORT +#include +#endif +#include "ether_linux.h" +#include + +#ifdef BW_TEST +static unsigned long long iterations = 0; +static uint32_t const g_num_frags = 1U; +static bool alloc = 0; +static char xmitbuffer[9000] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + 0x08, 0x06, 0x00, 0x01, + 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, 0xc0, 0xa8, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, + 0x01, 0xc8}; + +#define TX_BW_RING_SIZE 16384 +nveu64_t buf_phy_addr[TX_BW_RING_SIZE]; +void* buf_virt_addr[TX_BW_RING_SIZE]; + +static int alloc_dma(struct device *dev, struct ether_priv_data *pdata) +{ + void *cpu_addr; + dma_addr_t dma_handle; + + size_t size = pdata->tx_bandwidth_pkt_size; // Size of the buffer + enum dma_data_direction direction = DMA_TO_DEVICE; + int i = 0; + + for (i = 0 ; i < TX_BW_RING_SIZE; i++) { + // Allocate a buffer in kernel space + cpu_addr = kmalloc(size, GFP_KERNEL); + if (!cpu_addr) { + printk(KERN_ERR "Failed to allocate memory\n"); + return -1; + } + + // Clear the buffer + memset(cpu_addr, 0, size); + memcpy(cpu_addr, xmitbuffer, size); + // Map the buffer to a DMA address + dma_handle = dma_map_single(dev, cpu_addr, size, direction); + + // Check if mapping succeeded + if (dma_mapping_error(dev, dma_handle)) { + printk(KERN_ERR "DMA mapping failed\n"); + kfree(cpu_addr); + return -1; + } + buf_phy_addr[i] = dma_handle; + buf_virt_addr[i] = cpu_addr; + } + return 0; +} + +static int32_t ether_tx_swcx_populate(struct osi_tx_ring *tx_ring, struct ether_priv_data *pdata) +{ + struct osi_tx_pkt_cx *tx_pkt_cx = &tx_ring->tx_pkt_cx; + struct osi_tx_swcx *tx_swcx = NULL; + uint32_t cnt = 0; + nveu32_t cur_tx_idx = tx_ring->cur_tx_idx; + (void) memset((void *)tx_pkt_cx, 0, sizeof(*tx_pkt_cx)); + + tx_pkt_cx->payload_len = pdata->tx_bandwidth_pkt_size; + //tx_pkt_cx->flags |= OSI_PKT_CX_LEN; + + while (cnt < g_num_frags) { + tx_swcx = tx_ring->tx_swcx + cur_tx_idx; + + if (tx_swcx->len != 0U) { + /* should not hit this case */ + return -1; + } + + if (cur_tx_idx > TX_BW_RING_SIZE) { + printk(KERN_ERR "INVALID RING SIZE\n"); + return -1; + } + tx_swcx->len = pdata->tx_bandwidth_pkt_size; + /*update mbuf to last software context only */ + tx_swcx->buf_virt_addr = (void *) buf_virt_addr[cur_tx_idx]; + tx_swcx->buf_phy_addr = buf_phy_addr[cur_tx_idx]; + + INCR_TX_DESC_INDEX(cur_tx_idx, pdata->osi_dma->tx_ring_sz); + cnt++; + } + tx_pkt_cx->desc_cnt = g_num_frags; + + (void) tx_ring; + return 0; +} + +static void free_tx_dma_resources(struct osi_dma_priv_data *osi_dma, + unsigned int qinx) +{ + unsigned int i; + struct osi_tx_ring *tx_ring = NULL; + unsigned int chan = osi_dma->dma_chans[qinx]; + nveu32_t cur_tx_idx = 0U; + struct osi_tx_swcx *tx_swcx = NULL; + + tx_ring = osi_dma->tx_ring[chan]; + osi_process_tx_completions(osi_dma, chan, osi_dma->tx_ring_sz); + + for (i = 0; i < osi_dma->tx_ring_sz; i++) { + tx_swcx = tx_ring->tx_swcx + cur_tx_idx; + tx_swcx->buf_virt_addr = NULL; + tx_swcx->buf_phy_addr = 0; + tx_swcx->len = 0; + INCR_TX_DESC_INDEX(cur_tx_idx, osi_dma->tx_ring_sz); + } +} + +static uint32_t ether_avail_txdesc_cnt_test(const struct osi_tx_ring *tx_ring, + struct osi_dma_priv_data *osi_dma) +{ + if ((tx_ring->clean_idx >= osi_dma->tx_ring_sz) || + (tx_ring->cur_tx_idx >= osi_dma->tx_ring_sz)) { + return 0; + } + + return ((tx_ring->clean_idx - tx_ring->cur_tx_idx - 1U) & + (osi_dma->tx_ring_sz - 1U)); +} + +static inline int32_t ether_transmit(struct ether_priv_data *pdata, unsigned int qinx) +{ + //struct ethernet *e = &gethernet; + struct osi_dma_priv_data *osi_dma = pdata->osi_dma; + struct osi_tx_ring *tx_ring = NULL; + int32_t count = 0; + unsigned int chan = osi_dma->dma_chans[qinx]; + + tx_ring = osi_dma->tx_ring[chan]; + if (tx_ring == NULL) { + dev_err(pdata->dev, "Invalid Tx Ring Error %s\n", + __func__); + return -2; + } + + if (ether_avail_txdesc_cnt_test(tx_ring, osi_dma) < g_num_frags) { + // TODO + if ((iterations % 100000000) == 0) { + pr_info("Error Tx Add Wait desc %s %d print_once %llu \n", + __func__, ether_avail_txdesc_cnt_test(tx_ring, osi_dma), iterations); + } + osi_process_tx_completions(osi_dma, chan, 512); + return -3; + } + + count = ether_tx_swcx_populate(tx_ring, pdata); + if (count < 0) { + return -4; + } + + if ((iterations % 50) == 0) { + tx_ring->skip_dmb = 0; + } else { + tx_ring->skip_dmb = 1; + } + + if (osi_hw_transmit(osi_dma, chan) < 0) { + return -5; + } + + if ((iterations % 50) == 0) { + osi_process_tx_completions(osi_dma, chan, 50); + } + return 0; +} + +static inline void test_transmit(bool test, bool loop, struct ether_priv_data *pdata) +{ + // Sample Code to send transfer data + struct osi_dma_priv_data *osi_dma = pdata->osi_dma; + unsigned int qinx = 0; + + while (test && pdata->test_tx_bandwidth) { + if (ether_transmit(pdata, qinx) < 0) { + // Retry after delay + //msleep(1); + } + + if (!loop) { + break; + } + iterations++; + } + + // flush all the buffers + free_tx_dma_resources(osi_dma, qinx); +} + +/** + * @brief start tx bandwidth work + * + */ +void ether_tx_bandwidth_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct ether_priv_data *pdata = container_of(dwork, + struct ether_priv_data, tx_bandwidth_work); + struct device *dev = pdata->dev; + if (alloc == 0) { + if (alloc_dma(dev, pdata) < 0) { + dev_err(pdata->dev, "ALLOCATION Failed %s\n", + __func__); + } + alloc = 1; + } + test_transmit(1, 1, pdata); +} +#endif \ No newline at end of file diff --git a/drivers/net/ethernet/nvidia/nvethernet/osd.c b/drivers/net/ethernet/nvidia/nvethernet/osd.c index 7832bd3c..aed2eb35 100644 --- a/drivers/net/ethernet/nvidia/nvethernet/osd.c +++ b/drivers/net/ethernet/nvidia/nvethernet/osd.c @@ -630,9 +630,15 @@ static void osd_receive_packet(void *priv, struct osi_rx_ring *rx_ring, return; } +#ifdef BW_TEST + /* Process only the Valid packets */ + if (likely((rx_pkt_cx->flags & OSI_PKT_CX_VALID) == + OSI_PKT_CX_VALID) && (pdata->test_tx_bandwidth == OSI_DISABLE)) { +#else /* Process only the Valid packets */ if (likely((rx_pkt_cx->flags & OSI_PKT_CX_VALID) == OSI_PKT_CX_VALID)) { +#endif #ifdef ETHER_PAGE_POOL skb = netdev_alloc_skb_ip_align(pdata->ndev, rx_pkt_cx->pkt_len); @@ -754,6 +760,12 @@ static void osd_transmit_complete(void *priv, const struct osi_tx_swcx *swcx, ndev->stats.tx_bytes += len; +#ifdef BW_TEST + if (pdata->test_tx_bandwidth == OSI_ENABLE) { + return; + } +#endif + if ((txdone_pkt_cx->flags & OSI_TXDONE_CX_TS) == OSI_TXDONE_CX_TS) { memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp.hwtstamp = ns_to_ktime(txdone_pkt_cx->ns); diff --git a/drivers/net/ethernet/nvidia/nvethernet/sysfs.c b/drivers/net/ethernet/nvidia/nvethernet/sysfs.c index fefa9ea5..cef074c9 100644 --- a/drivers/net/ethernet/nvidia/nvethernet/sysfs.c +++ b/drivers/net/ethernet/nvidia/nvethernet/sysfs.c @@ -77,6 +77,102 @@ static DEVICE_ATTR(desc_dump_enable, (S_IRUGO | S_IWUSR), ether_desc_dump_show, ether_desc_dump_store); +#ifdef BW_TEST +/** + * @brief Shows the current setting of tx packet dump + * + * @param[in] dev: Device data. + * @param[in] attr: Device attribute + * @param[in] buf: Buffer to store the current MAC loopback setting + */ +static ssize_t ether_test_tx_bandwidth_dump_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *ndev = (struct net_device *)dev_get_drvdata(dev); + struct ether_priv_data *pdata = netdev_priv(ndev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", + (pdata->test_tx_bandwidth == 1U) ? + "enabled" : "disabled"); +} + +/** + * @brief Extract MTU from String. + * + * @param[in] str: Input Buffer. + * + * @return MTU size. + */ +static inline int extract_mtu(const char* str) +{ + int num = 0; + + // Iterate through the string to find the first digit + while (*str) { + if (((*str) >= '0') && ((*str) <= '9')) { + // Convert the digit characters to an integer + num = num * 10 + (*str - '0'); + } + str++; + } + + if (num > OSI_MAX_MTU_SIZE) { + num = 0; + } + return num; +} + +/** + * @brief Set the user setting for enable_test_tx_bandwidth + * + * Algorithm: This is used to update osi_dma->enable_desc_dump + * + * @param[in] dev: Device data. + * @param[in] attr: Device attribute + * @param[in] buf: Buffer which contains the user settings of MAC loopback + * @param[in] size: size of buffer + * + * @return size of buffer. + */ +static ssize_t ether_test_tx_bandwidth_dump_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct net_device *ndev = (struct net_device *)dev_get_drvdata(dev); + struct ether_priv_data *pdata = netdev_priv(ndev); + if (strncmp(buf, "enable", 6) == 0U) { + pdata->test_tx_bandwidth = OSI_ENABLE; + if (pdata->tx_bandwidth_pkt_size == 0U) { + pdata->tx_bandwidth_pkt_size = OSI_DFLT_MTU_SIZE; + } + queue_delayed_work_on(2, pdata->tx_bw_wq, &pdata->tx_bandwidth_work, + msecs_to_jiffies(0)); + } else if (strncmp(buf, "disable", 7) == 0U) { + pdata->test_tx_bandwidth = OSI_DISABLE; + cancel_delayed_work_sync(&pdata->tx_bandwidth_work); + } else if (strncmp(buf, "rx_disable", 10) == 0U) { + pdata->test_tx_bandwidth = OSI_ENABLE; + } else if (strncmp(buf, "pkt", 3) == 0U) { + pdata->tx_bandwidth_pkt_size = extract_mtu(buf); + } else { + dev_err(pdata->dev, + "Invalid entry Valid Entries are" + " enable or disable or rx_disable or pkt1500 or pkt8192 or pkt64 or pkt256\n"); + } + + return size; +} + +/** + * @brief Sysfs attribute for enable burst tx packet + * + */ +static DEVICE_ATTR(test_tx_bandwidth_dump_enable, (S_IRUGO | S_IWUSR), + ether_test_tx_bandwidth_dump_show, + ether_test_tx_bandwidth_dump_store); +#endif + /** * @brief Shows current configured tx queue * @@ -3365,6 +3461,9 @@ static struct attribute *ether_sysfs_attrs[] = { #ifdef OSI_DEBUG &dev_attr_desc_dump_enable.attr, #endif /* OSI_DEBUG */ +#ifdef BW_TEST + &dev_attr_test_tx_bandwidth_dump_enable.attr, +#endif &dev_attr_mac_loopback.attr, &dev_attr_pcs_baser_fec.attr, &dev_attr_ptp_mode.attr, @@ -3426,6 +3525,9 @@ static struct attribute *ether_sysfs_attrs_without_macsec[] = { #ifdef OSI_DEBUG &dev_attr_desc_dump_enable.attr, #endif /* OSI_DEBUG */ +#ifdef BW_TEST + &dev_attr_test_tx_bandwidth_dump_enable.attr, +#endif &dev_attr_mac_loopback.attr, &dev_attr_ptp_mode.attr, &dev_attr_ptp_sync.attr,