From 2c680151e4a5ed2114bbd6a82a8d0638f507decd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 29 Aug 2014 19:42:08 +0000 Subject: [PATCH] ar71xx: split packets into multiple descriptors on ar716x This improves performance when doing concurrent rx/tx on a single ethernet MAC, e.g. when routing between VLANs. Fixes #13072 Signed-off-by: Felix Fietkau SVN-Revision: 42328 --- .../net/ethernet/atheros/ag71xx/ag71xx.h | 10 +- .../ethernet/atheros/ag71xx/ag71xx_ethtool.c | 6 + .../net/ethernet/atheros/ag71xx/ag71xx_main.c | 117 ++++++++++++++---- 3 files changed, 105 insertions(+), 28 deletions(-) diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h index f6d85b9084..9fb10fd898 100644 --- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h +++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h @@ -52,10 +52,13 @@ #define AG71XX_TX_MTU_LEN 1540 -#define AG71XX_TX_RING_SIZE_DEFAULT 32 +#define AG71XX_TX_RING_SPLIT 256 +#define AG71XX_TX_RING_DS_PER_PKT DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \ + AG71XX_TX_RING_SPLIT) +#define AG71XX_TX_RING_SIZE_DEFAULT 48 #define AG71XX_RX_RING_SIZE_DEFAULT 128 -#define AG71XX_TX_RING_SIZE_MAX 32 +#define AG71XX_TX_RING_SIZE_MAX 48 #define AG71XX_RX_RING_SIZE_MAX 128 #ifdef CONFIG_AG71XX_DEBUG @@ -99,7 +102,8 @@ struct ag71xx_ring { struct ag71xx_buf *buf; u8 *descs_cpu; dma_addr_t descs_dma; - unsigned int desc_size; + u16 desc_split; + u16 desc_size; unsigned int curr; unsigned int dirty; unsigned int size; diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_ethtool.c b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_ethtool.c index 498fbed1ff..cad9f2ff06 100644 --- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_ethtool.c +++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_ethtool.c @@ -75,6 +75,9 @@ static void ag71xx_ethtool_get_ringparam(struct net_device *dev, er->rx_pending = ag->rx_ring.size; er->rx_mini_pending = 0; er->rx_jumbo_pending = 0; + + if (ag->tx_ring.desc_split) + er->tx_pending /= AG71XX_TX_RING_DS_PER_PKT; } static int ag71xx_ethtool_set_ringparam(struct net_device *dev, @@ -103,6 +106,9 @@ static int ag71xx_ethtool_set_ringparam(struct net_device *dev, return err; } + if (ag->tx_ring.desc_split) + tx_size *= AG71XX_TX_RING_DS_PER_PKT; + ag->tx_ring.size = tx_size; ag->rx_ring.size = rx_size; diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c index d010373155..b0f39159c1 100644 --- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c +++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c @@ -536,6 +536,7 @@ void ag71xx_link_adjust(struct ag71xx *ag) u32 cfg2; u32 ifctl; u32 fifo5; + u32 fifo3; if (!ag->link) { ag71xx_hw_stop(ag); @@ -576,11 +577,18 @@ void ag71xx_link_adjust(struct ag71xx *ag) } if (pdata->is_ar91xx) - ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x00780fff); + fifo3 = 0x00780fff; else if (pdata->is_ar724x) - ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, pdata->fifo_cfg3); + fifo3 = pdata->fifo_cfg3; else - ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x008001ff); + fifo3 = 0x008001ff; + + if (ag->tx_ring.desc_split) { + fifo3 &= 0xffff; + fifo3 |= ((2048 - ag->tx_ring.desc_split) / 4) << 16; + } + + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, fifo3); if (pdata->set_speed) pdata->set_speed(ag->speed); @@ -675,6 +683,49 @@ static int ag71xx_stop(struct net_device *dev) return 0; } +static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len) +{ + int i; + struct ag71xx_desc *desc; + int ndesc = 0; + int split = ring->desc_split; + + if (!split) + split = len; + + while (len > 0) { + unsigned int cur_len = len; + + i = (ring->curr + ndesc) % ring->size; + desc = ring->buf[i].desc; + + if (!ag71xx_desc_empty(desc)) + return -1; + + if (cur_len > split) { + cur_len = split; + if (len < split + 4) + cur_len -= 4; + } + + desc->data = addr; + addr += cur_len; + len -= cur_len; + + if (len > 0) + cur_len |= DESC_MORE; + + /* prevent early tx attempt of this descriptor */ + if (!ndesc) + cur_len |= DESC_EMPTY; + + desc->ctrl = cur_len; + ndesc++; + } + + return ndesc; +} + static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -682,18 +733,12 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb, struct ag71xx_ring *ring = &ag->tx_ring; struct ag71xx_desc *desc; dma_addr_t dma_addr; - int i; - - i = ring->curr % ring->size; - desc = ring->buf[i].desc; - - if (!ag71xx_desc_empty(desc)) - goto err_drop; + int i, n, ring_min; if (ag71xx_has_ar8216(ag)) ag71xx_add_ar8216_header(ag, skb); - if (skb->len <= 0) { + if (skb->len <= 4) { DBG("%s: packet len is too small\n", ag->dev->name); goto err_drop; } @@ -701,21 +746,33 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb, dma_addr = dma_map_single(&dev->dev, skb->data, skb->len, DMA_TO_DEVICE); - netdev_sent_queue(dev, skb->len); + i = ring->curr % ring->size; + desc = ring->buf[i].desc; + + /* setup descriptor fields */ + n = ag71xx_fill_dma_desc(ring, (u32) dma_addr, skb->len & ag->desc_pktlen_mask); + if (n < 0) + goto err_drop_unmap; + + i = (ring->curr + n - 1) % ring->size; ring->buf[i].len = skb->len; ring->buf[i].skb = skb; ring->buf[i].timestamp = jiffies; - /* setup descriptor fields */ - desc->data = (u32) dma_addr; - desc->ctrl = skb->len & ag->desc_pktlen_mask; + netdev_sent_queue(dev, skb->len); + + desc->ctrl &= ~DESC_EMPTY; + ring->curr += n; /* flush descriptor */ wmb(); - ring->curr++; - if (ring->curr == (ring->dirty + ring->size)) { - DBG("%s: tx queue full\n", ag->dev->name); + ring_min = 2; + if (ring->desc_split) + ring_min *= AG71XX_TX_RING_DS_PER_PKT; + + if (ring->curr - ring->dirty >= ring->size - ring_min) { + DBG("%s: tx queue full\n", dev->name); netif_stop_queue(dev); } @@ -726,6 +783,9 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; +err_drop_unmap: + dma_unmap_single(&dev->dev, dma_addr, skb->len, DMA_TO_DEVICE); + err_drop: dev->stats.tx_dropped++; @@ -843,7 +903,6 @@ static int ag71xx_tx_packets(struct ag71xx *ag) unsigned int i = ring->dirty % ring->size; struct ag71xx_desc *desc = ring->buf[i].desc; struct sk_buff *skb = ring->buf[i].skb; - int len = ring->buf[i].len; if (!ag71xx_desc_empty(desc)) { if (pdata->is_ar7240 && @@ -854,19 +913,22 @@ static int ag71xx_tx_packets(struct ag71xx *ag) ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS); - bytes_compl += len; - ag->dev->stats.tx_bytes += len; - ag->dev->stats.tx_packets++; + if (skb) { + dev_kfree_skb_any(skb); + ring->buf[i].skb = NULL; - dev_kfree_skb_any(skb); - ring->buf[i].skb = NULL; + bytes_compl += ring->buf[i].len; + sent++; + } ring->dirty++; - sent++; } DBG("%s: %d packets sent out\n", ag->dev->name, sent); + ag->dev->stats.tx_bytes += bytes_compl; + ag->dev->stats.tx_packets += sent; + if (!sent) return 0; @@ -1195,6 +1257,11 @@ static int ag71xx_probe(struct platform_device *pdev) ag->max_frame_len = pdata->max_frame_len; ag->desc_pktlen_mask = pdata->desc_pktlen_mask; + if (!pdata->is_ar724x && !pdata->is_ar91xx) { + ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT; + ag->tx_ring.size *= AG71XX_TX_RING_DS_PER_PKT; + } + ag->stop_desc = dma_alloc_coherent(NULL, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); -- 2.34.1