Commit hnyman's changes
[lede.git] / target / linux / generic / patches-4.4 / 038-net_sched-generalize-bulk-dequeue.patch
1 From: Eric Dumazet <edumazet@google.com>
2 Date: Tue, 21 Jun 2016 23:16:52 -0700
3 Subject: [PATCH] net_sched: generalize bulk dequeue
4
5 When qdisc bulk dequeue was added in linux-3.18 (commit
6 5772e9a3463b "qdisc: bulk dequeue support for qdiscs
7 with TCQ_F_ONETXQUEUE"), it was constrained to some
8 specific qdiscs.
9
10 With some extra care, we can extend this to all qdiscs,
11 so that typical traffic shaping solutions can benefit from
12 small batches (8 packets in this patch).
13
14 For example, HTB is often used on some multi queue device.
15 And bonding/team are multi queue devices...
16
17 Idea is to bulk-dequeue packets mapping to the same transmit queue.
18
19 This brings between 35 and 80 % performance increase in HTB setup
20 under pressure on a bonding setup :
21
22 1) NUMA node contention :   610,000 pps -> 1,110,000 pps
23 2) No node contention   : 1,380,000 pps -> 1,930,000 pps
24
25 Now we should work to add batches on the enqueue() side ;)
26
27 Signed-off-by: Eric Dumazet <edumazet@google.com>
28 Cc: John Fastabend <john.r.fastabend@intel.com>
29 Cc: Jesper Dangaard Brouer <brouer@redhat.com>
30 Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
31 Cc: Florian Westphal <fw@strlen.de>
32 Cc: Daniel Borkmann <daniel@iogearbox.net>
33 Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
34 Signed-off-by: David S. Miller <davem@davemloft.net>
35 ---
36
37 --- a/include/net/sch_generic.h
38 +++ b/include/net/sch_generic.h
39 @@ -88,13 +88,14 @@ struct Qdisc {
40         /*
41          * For performance sake on SMP, we put highly modified fields at the end
42          */
43 -       struct Qdisc            *next_sched ____cacheline_aligned_in_smp;
44 -       struct sk_buff          *gso_skb;
45 -       unsigned long           state;
46 +       struct sk_buff          *gso_skb ____cacheline_aligned_in_smp;
47         struct sk_buff_head     q;
48         struct gnet_stats_basic_packed bstats;
49         unsigned int            __state;
50         struct gnet_stats_queue qstats;
51 +       unsigned long           state;
52 +       struct Qdisc            *next_sched;
53 +       struct sk_buff          *skb_bad_txq;
54         struct rcu_head         rcu_head;
55         int                     padded;
56         atomic_t                refcnt;
57 --- a/net/sched/sch_generic.c
58 +++ b/net/sched/sch_generic.c
59 @@ -76,6 +76,34 @@ static void try_bulk_dequeue_skb(struct
60         skb->next = NULL;
61  }
62  
63 +/* This variant of try_bulk_dequeue_skb() makes sure
64 + * all skbs in the chain are for the same txq
65 + */
66 +static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
67 +                                     struct sk_buff *skb,
68 +                                     int *packets)
69 +{
70 +       int mapping = skb_get_queue_mapping(skb);
71 +       struct sk_buff *nskb;
72 +       int cnt = 0;
73 +
74 +       do {
75 +               nskb = q->dequeue(q);
76 +               if (!nskb)
77 +                       break;
78 +               if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
79 +                       q->skb_bad_txq = nskb;
80 +                       qdisc_qstats_backlog_inc(q, nskb);
81 +                       q->q.qlen++;
82 +                       break;
83 +               }
84 +               skb->next = nskb;
85 +               skb = nskb;
86 +       } while (++cnt < 8);
87 +       (*packets) += cnt;
88 +       skb->next = NULL;
89 +}
90 +
91  /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
92   * A requeued skb (via q->gso_skb) can also be a SKB list.
93   */
94 @@ -86,8 +114,9 @@ static struct sk_buff *dequeue_skb(struc
95         const struct netdev_queue *txq = q->dev_queue;
96  
97         *packets = 1;
98 -       *validate = true;
99         if (unlikely(skb)) {
100 +               /* skb in gso_skb were already validated */
101 +               *validate = false;
102                 /* check the reason of requeuing without tx lock first */
103                 txq = skb_get_tx_queue(txq->dev, skb);
104                 if (!netif_xmit_frozen_or_stopped(txq)) {
105 @@ -95,15 +124,30 @@ static struct sk_buff *dequeue_skb(struc
106                         q->q.qlen--;
107                 } else
108                         skb = NULL;
109 -               /* skb in gso_skb were already validated */
110 -               *validate = false;
111 -       } else {
112 -               if (!(q->flags & TCQ_F_ONETXQUEUE) ||
113 -                   !netif_xmit_frozen_or_stopped(txq)) {
114 -                       skb = q->dequeue(q);
115 -                       if (skb && qdisc_may_bulk(q))
116 -                               try_bulk_dequeue_skb(q, skb, txq, packets);
117 +               return skb;
118 +       }
119 +       *validate = true;
120 +       skb = q->skb_bad_txq;
121 +       if (unlikely(skb)) {
122 +               /* check the reason of requeuing without tx lock first */
123 +               txq = skb_get_tx_queue(txq->dev, skb);
124 +               if (!netif_xmit_frozen_or_stopped(txq)) {
125 +                       q->skb_bad_txq = NULL;
126 +                       qdisc_qstats_backlog_dec(q, skb);
127 +                       q->q.qlen--;
128 +                       goto bulk;
129                 }
130 +               return NULL;
131 +       }
132 +       if (!(q->flags & TCQ_F_ONETXQUEUE) ||
133 +           !netif_xmit_frozen_or_stopped(txq))
134 +               skb = q->dequeue(q);
135 +       if (skb) {
136 +bulk:
137 +               if (qdisc_may_bulk(q))
138 +                       try_bulk_dequeue_skb(q, skb, txq, packets);
139 +               else
140 +                       try_bulk_dequeue_skb_slow(q, skb, packets);
141         }
142         return skb;
143  }
144 @@ -649,11 +693,14 @@ void qdisc_reset(struct Qdisc *qdisc)
145         if (ops->reset)
146                 ops->reset(qdisc);
147  
148 +       kfree_skb(qdisc->skb_bad_txq);
149 +       qdisc->skb_bad_txq = NULL;
150 +
151         if (qdisc->gso_skb) {
152                 kfree_skb_list(qdisc->gso_skb);
153                 qdisc->gso_skb = NULL;
154 -               qdisc->q.qlen = 0;
155         }
156 +       qdisc->q.qlen = 0;
157  }
158  EXPORT_SYMBOL(qdisc_reset);
159  
160 @@ -692,6 +739,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
161         dev_put(qdisc_dev(qdisc));
162  
163         kfree_skb_list(qdisc->gso_skb);
164 +       kfree_skb(qdisc->skb_bad_txq);
165         /*
166          * gen_estimator est_timer() might access qdisc->q.lock,
167          * wait a RCU grace period before freeing qdisc.