From: Felix Fietkau Date: Sun, 12 Apr 2015 10:35:21 +0000 (+0000) Subject: kernel: add bgmac fixes for various issues X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8904cd1dcca81e0e4cf5e36cea6a180bfe5aaa0e;p=lede.git kernel: add bgmac fixes for various issues Signed-off-by: Felix Fietkau SVN-Revision: 45388 --- diff --git a/target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch b/target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch new file mode 100644 index 0000000000..cf62a50a2f --- /dev/null +++ b/target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch @@ -0,0 +1,123 @@ +From: Felix Fietkau +Date: Sun, 12 Apr 2015 09:58:56 +0200 +Subject: [PATCH] bgmac: simplify tx ring index handling + +Keep incrementing ring->start and ring->end instead of pointing it to +the actual ring slot entry. This simplifies the calculation of the +number of free slots. + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -142,11 +142,10 @@ static netdev_tx_t bgmac_dma_tx_add(stru + { + struct device *dma_dev = bgmac->core->dma_dev; + struct net_device *net_dev = bgmac->net_dev; +- struct bgmac_slot_info *slot = &ring->slots[ring->end]; +- int free_slots; ++ int index = ring->end % BGMAC_TX_RING_SLOTS; ++ struct bgmac_slot_info *slot = &ring->slots[index]; + int nr_frags; + u32 flags; +- int index = ring->end; + int i; + + if (skb->len > BGMAC_DESC_CTL1_LEN) { +@@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru + skb_checksum_help(skb); + + nr_frags = skb_shinfo(skb)->nr_frags; +- +- if (ring->start <= ring->end) +- free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; +- else +- free_slots = ring->start - ring->end; +- +- if (free_slots <= nr_frags + 1) { ++ if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) { + bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); + netif_stop_queue(net_dev); + return NETDEV_TX_BUSY; +@@ -200,7 +193,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru + } + + slot->skb = skb; +- ++ ring->end += nr_frags + 1; + netdev_sent_queue(net_dev, skb->len); + + wmb(); +@@ -208,13 +201,12 @@ static netdev_tx_t bgmac_dma_tx_add(stru + /* Increase ring->end to point empty slot. We tell hardware the first + * slot it should *not* read. + */ +- ring->end = (index + 1) % BGMAC_TX_RING_SLOTS; + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, + ring->index_base + +- ring->end * sizeof(struct bgmac_dma_desc)); ++ (ring->end % BGMAC_TX_RING_SLOTS) * ++ sizeof(struct bgmac_dma_desc)); + +- free_slots -= nr_frags + 1; +- if (free_slots < 8) ++ if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8) + netif_stop_queue(net_dev); + + return NETDEV_TX_OK; +@@ -256,17 +248,17 @@ static void bgmac_dma_tx_free(struct bgm + empty_slot &= BGMAC_DMA_TX_STATDPTR; + empty_slot /= sizeof(struct bgmac_dma_desc); + +- while (ring->start != empty_slot) { +- struct bgmac_slot_info *slot = &ring->slots[ring->start]; +- u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1); +- int len = ctl1 & BGMAC_DESC_CTL1_LEN; ++ while (ring->start != ring->end) { ++ int slot_idx = ring->start % BGMAC_TX_RING_SLOTS; ++ struct bgmac_slot_info *slot = &ring->slots[slot_idx]; ++ u32 ctl1; ++ int len; + +- if (!slot->dma_addr) { +- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", +- ring->start, ring->end); +- goto next; +- } ++ if (slot_idx == empty_slot) ++ break; + ++ ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1); ++ len = ctl1 & BGMAC_DESC_CTL1_LEN; + if (ctl1 & BGMAC_DESC_CTL0_SOF) + /* Unmap no longer used buffer */ + dma_unmap_single(dma_dev, slot->dma_addr, len, +@@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgm + slot->skb = NULL; + } + +-next: + slot->dma_addr = 0; +- if (++ring->start >= BGMAC_TX_RING_SLOTS) +- ring->start = 0; ++ ring->start++; + freed = true; + } + +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -414,10 +414,10 @@ enum bgmac_dma_ring_type { + * empty. + */ + struct bgmac_dma_ring { +- u16 num_slots; +- u16 start; +- u16 end; ++ u32 start; ++ u32 end; + ++ u16 num_slots; + u16 mmio_base; + struct bgmac_dma_desc *cpu_base; + dma_addr_t dma_base; diff --git a/target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch b/target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch new file mode 100644 index 0000000000..79746540c9 --- /dev/null +++ b/target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch @@ -0,0 +1,87 @@ +From: Felix Fietkau +Date: Sun, 12 Apr 2015 10:08:04 +0200 +Subject: [PATCH] bgmac: leave interrupts disabled as long as there is work + to do + +Always poll rx and tx during NAPI poll instead of relying on the status +of the first interrupt. This prevents bgmac_poll from leaving unfinished +work around until the next IRQ. +In my tests this makes bridging/routing throughput under heavy load more +stable and ensures that no new IRQs arrive as long as bgmac_poll uses up +the entire budget. + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -1105,8 +1105,6 @@ static void bgmac_chip_reset(struct bgma + bgmac_phy_init(bgmac); + + netdev_reset_queue(bgmac->net_dev); +- +- bgmac->int_status = 0; + } + + static void bgmac_chip_intrs_on(struct bgmac *bgmac) +@@ -1221,14 +1219,13 @@ static irqreturn_t bgmac_interrupt(int i + if (!int_status) + return IRQ_NONE; + +- /* Ack */ +- bgmac_write(bgmac, BGMAC_INT_STATUS, int_status); ++ int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX); ++ if (int_status) ++ bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status); + + /* Disable new interrupts until handling existing ones */ + bgmac_chip_intrs_off(bgmac); + +- bgmac->int_status = int_status; +- + napi_schedule(&bgmac->napi); + + return IRQ_HANDLED; +@@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int i + static int bgmac_poll(struct napi_struct *napi, int weight) + { + struct bgmac *bgmac = container_of(napi, struct bgmac, napi); +- struct bgmac_dma_ring *ring; + int handled = 0; + +- if (bgmac->int_status & BGMAC_IS_TX0) { +- ring = &bgmac->tx_ring[0]; +- bgmac_dma_tx_free(bgmac, ring); +- bgmac->int_status &= ~BGMAC_IS_TX0; +- } ++ /* Ack */ ++ bgmac_write(bgmac, BGMAC_INT_STATUS, ~0); + +- if (bgmac->int_status & BGMAC_IS_RX) { +- ring = &bgmac->rx_ring[0]; +- handled += bgmac_dma_rx_read(bgmac, ring, weight); +- bgmac->int_status &= ~BGMAC_IS_RX; +- } ++ bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]); ++ handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight); + +- if (bgmac->int_status) { +- bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status); +- bgmac->int_status = 0; +- } ++ /* poll again if more events arrived in the mean time */ ++ if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX)) ++ return handled; + + if (handled < weight) { + napi_complete(napi); +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -452,7 +452,6 @@ struct bgmac { + + /* Int */ + u32 int_mask; +- u32 int_status; + + /* Current MAC state */ + int mac_speed; diff --git a/target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch b/target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch new file mode 100644 index 0000000000..e7fde163fd --- /dev/null +++ b/target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch @@ -0,0 +1,66 @@ +From: Felix Fietkau +Date: Sun, 12 Apr 2015 10:13:28 +0200 +Subject: [PATCH] bgmac: set received skb headroom to NET_SKB_PAD + +A packet buffer offset of 30 bytes is inefficient, because the first 2 +bytes end up in a different cacheline. + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -342,13 +342,13 @@ static int bgmac_dma_rx_skb_for_slot(str + return -ENOMEM; + + /* Poison - if everything goes fine, hardware will overwrite it */ +- rx = buf; ++ rx = buf + BGMAC_RX_BUF_OFFSET; + rx->len = cpu_to_le16(0xdead); + rx->flags = cpu_to_le16(0xbeef); + + /* Map skb for the DMA */ +- dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE, +- DMA_FROM_DEVICE); ++ dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET, ++ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { + bgmac_err(bgmac, "DMA mapping error\n"); + put_page(virt_to_head_page(buf)); +@@ -399,7 +399,7 @@ static int bgmac_dma_rx_read(struct bgma + while (ring->start != ring->end) { + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_slot_info *slot = &ring->slots[ring->start]; +- struct bgmac_rx_header *rx = slot->buf; ++ struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET; + struct sk_buff *skb; + void *buf = slot->buf; + u16 len, flags; +@@ -450,8 +450,10 @@ static int bgmac_dma_rx_read(struct bgma + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + + skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE); +- skb_put(skb, BGMAC_RX_FRAME_OFFSET + len); +- skb_pull(skb, BGMAC_RX_FRAME_OFFSET); ++ skb_put(skb, BGMAC_RX_FRAME_OFFSET + ++ BGMAC_RX_BUF_OFFSET + len); ++ skb_pull(skb, BGMAC_RX_FRAME_OFFSET + ++ BGMAC_RX_BUF_OFFSET); + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, bgmac->net_dev); +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -360,9 +360,11 @@ + + #define BGMAC_RX_HEADER_LEN 28 /* Last 24 bytes are unused. Well... */ + #define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */ ++#define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \ ++ BGMAC_RX_FRAME_OFFSET) + #define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */ + #define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE) +-#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \ ++#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + + #define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */ diff --git a/target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch b/target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch new file mode 100644 index 0000000000..55e80332bd --- /dev/null +++ b/target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch @@ -0,0 +1,54 @@ +From: Felix Fietkau +Date: Sun, 12 Apr 2015 11:59:47 +0200 +Subject: [PATCH] bgmac: fix DMA rx corruption + +The driver needs to inform the hardware about the first invalid (not yet +filled) rx slot, by writing its DMA descriptor pointer offset to the +BGMAC_DMA_RX_INDEX register. + +This register was set to a value exceeding the rx ring size, effectively +allowing the hardware constant access to the full ring, regardless of +which slots are initialized. + +Fix this by updating the register in bgmac_dma_rx_setup_desc. + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(stru + dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr)); + dma_desc->ctl0 = cpu_to_le32(ctl0); + dma_desc->ctl1 = cpu_to_le32(ctl1); ++ ++ desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS; ++ ++ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX, ++ ring->index_base + ++ desc_idx * sizeof(struct bgmac_dma_desc)); + } + + static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, +@@ -394,9 +400,7 @@ static int bgmac_dma_rx_read(struct bgma + end_slot &= BGMAC_DMA_RX_STATDPTR; + end_slot /= sizeof(struct bgmac_dma_desc); + +- ring->end = end_slot; +- +- while (ring->start != ring->end) { ++ while (ring->start != end_slot) { + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_slot_info *slot = &ring->slots[ring->start]; + struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET; +@@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac + for (j = 0; j < ring->num_slots; j++) + bgmac_dma_rx_setup_desc(bgmac, ring, j); + +- bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX, +- ring->index_base + +- ring->num_slots * sizeof(struct bgmac_dma_desc)); +- + ring->start = 0; + ring->end = 0; + }