From: Stephen Hemminger Date: Mon, 8 Jan 2007 19:26:12 +0000 (-0800) Subject: chelsio: more rx speedup X-Git-Tag: firefly_0821_release~30992^2~11^2~65 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=24a427cf76984726641ea0d8163e61e99119069d;p=firefly-linux-kernel-4.4.55.git chelsio: more rx speedup Cleanup receive processing some more: * do the reserve padding of skb during setup * don't pass constants to get_packet * do smart prefetch of skb * make copybreak a module parameter Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Garzik --- diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 8e287e79e4e3..a15611925338 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -71,12 +71,9 @@ #define SGE_FREEL_REFILL_THRESH 16 #define SGE_RESPQ_E_N 1024 #define SGE_INTRTIMER_NRES 1000 -#define SGE_RX_COPY_THRES 256 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_TX_DESC_MAX_PLEN 16384 -# define SGE_RX_DROP_THRES 2 - #define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) /* @@ -846,6 +843,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q) skb_reserve(skb, q->dma_offset); mapping = pci_map_single(pdev, skb->data, dma_len, PCI_DMA_FROMDEVICE); + skb_reserve(skb, sge->rx_pkt_pad); + ce->skb = skb; pci_unmap_addr_set(ce, dma_addr, mapping); pci_unmap_len_set(ce, dma_len, dma_len); @@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ *fl, int idx) } } +static int copybreak __read_mostly = 256; +module_param(copybreak, int, 0); +MODULE_PARM_DESC(copybreak, "Receive copy threshold"); + /** * get_packet - return the next ingress packet buffer * @pdev: the PCI device that received the packet @@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ *fl, int idx) * be copied but there is no memory for the copy. */ static inline struct sk_buff *get_packet(struct pci_dev *pdev, - struct freelQ *fl, unsigned int len, - int dma_pad, int skb_pad, - unsigned int copy_thres, - unsigned int drop_thres) + struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct freelQ_ce *ce = &fl->centries[fl->cidx]; + const struct freelQ_ce *ce = &fl->centries[fl->cidx]; - if (len < copy_thres) { - skb = alloc_skb(len + skb_pad, GFP_ATOMIC); - if (likely(skb != NULL)) { - skb_reserve(skb, skb_pad); - skb_put(skb, len); - pci_dma_sync_single_for_cpu(pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - memcpy(skb->data, ce->skb->data + dma_pad, len); - pci_dma_sync_single_for_device(pdev, + if (len < copybreak) { + skb = alloc_skb(len + 2, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, 2); /* align IP header */ + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); - } else if (!drop_thres) - goto use_orig_buf; - + memcpy(skb->data, ce->skb->data, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); recycle_fl_buf(fl, fl->cidx); return skb; } - if (fl->credits < drop_thres) { +use_orig_buf: + if (fl->credits < 2) { recycle_fl_buf(fl, fl->cidx); return NULL; } -use_orig_buf: pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); skb = ce->skb; - skb_reserve(skb, dma_pad); + prefetch(skb->data); + skb_put(skb, len); return skb; } @@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long arg) * * Process an ingress ethernet pakcet and deliver it to the stack. */ -static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) +static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct cpl_rx_pkt *p; + const struct cpl_rx_pkt *p; struct adapter *adapter = sge->adapter; struct sge_port_stats *st; - skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, - sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES, - SGE_RX_DROP_THRES); + skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad); if (unlikely(!skb)) { sge->stats.rx_drops++; - return 0; + return; } - p = (struct cpl_rx_pkt *)skb->data; - skb_pull(skb, sizeof(*p)); + p = (const struct cpl_rx_pkt *) skb->data; if (p->iff >= adapter->params.nports) { kfree_skb(skb); - return 0; + return; } + __skb_pull(skb, sizeof(*p)); skb->dev = adapter->port[p->iff].dev; skb->dev->last_rx = jiffies; @@ -1411,7 +1409,6 @@ static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) netif_rx(skb); #endif } - return 0; } /* @@ -1493,12 +1490,11 @@ static int process_responses(struct adapter *adapter, int budget) struct sge *sge = adapter->sge; struct respQ *q = &sge->respQ; struct respQ_e *e = &q->entries[q->cidx]; - int budget_left = budget; + int done = 0; unsigned int flags = 0; unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; - - while (likely(budget_left && e->GenerationBit == q->genbit)) { + while (done < budget && e->GenerationBit == q->genbit) { flags |= e->Qsleeping; cmdq_processed[0] += e->Cmdq0CreditReturn; @@ -1508,14 +1504,16 @@ static int process_responses(struct adapter *adapter, int budget) * ping-pong of TX state information on MP where the sender * might run on a different CPU than this function... */ - if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { + if (unlikely((flags & F_CMDQ0_ENABLE) || cmdq_processed[0] > 64)) { flags = update_tx_info(adapter, flags, cmdq_processed[0]); cmdq_processed[0] = 0; } + if (unlikely(cmdq_processed[1] > 16)) { sge->cmdQ[1].processed += cmdq_processed[1]; cmdq_processed[1] = 0; } + if (likely(e->DataValid)) { struct freelQ *fl = &sge->freelQ[e->FreelistQid]; @@ -1525,12 +1523,16 @@ static int process_responses(struct adapter *adapter, int budget) else sge_rx(sge, fl, e->BufferLength); + ++done; + /* * Note: this depends on each packet consuming a * single free-list buffer; cf. the BUG above. */ if (++fl->cidx == fl->size) fl->cidx = 0; + prefetch(fl->centries[fl->cidx].skb); + if (unlikely(--fl->credits < fl->size - SGE_FREEL_REFILL_THRESH)) refill_free_list(sge, fl); @@ -1549,14 +1551,12 @@ static int process_responses(struct adapter *adapter, int budget) writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); q->credits = 0; } - --budget_left; } flags = update_tx_info(adapter, flags, cmdq_processed[0]); sge->cmdQ[1].processed += cmdq_processed[1]; - budget -= budget_left; - return budget; + return done; } static inline int responses_pending(const struct adapter *adapter) @@ -1581,11 +1581,14 @@ static int process_pure_responses(struct adapter *adapter) struct sge *sge = adapter->sge; struct respQ *q = &sge->respQ; struct respQ_e *e = &q->entries[q->cidx]; + const struct freelQ *fl = &sge->freelQ[e->FreelistQid]; unsigned int flags = 0; unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; + prefetch(fl->centries[fl->cidx].skb); if (e->DataValid) return 1; + do { flags |= e->Qsleeping;