net: sched: make cls_u32 lockless
[firefly-linux-kernel-4.4.55.git] / net / sched / cls_u32.c
1 /*
2  * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  *      The filters are packed to hash tables of key nodes
12  *      with a set of 32bit key/mask pairs at every node.
13  *      Nodes reference next level hash tables etc.
14  *
15  *      This scheme is the best universal classifier I managed to
16  *      invent; it is not super-fast, but it is not slow (provided you
17  *      program it correctly), and general enough.  And its relative
18  *      speed grows as the number of rules becomes larger.
19  *
20  *      It seems that it represents the best middle point between
21  *      speed and manageability both by human and by machine.
22  *
23  *      It is especially useful for link sharing combined with QoS;
24  *      pure RSVP doesn't need such a general approach and can use
25  *      much simpler (and faster) schemes, sort of cls_rsvp.c.
26  *
27  *      JHS: We should remove the CONFIG_NET_CLS_IND from here
28  *      eventually when the meta match extension is made available
29  *
30  *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
31  */
32
33 #include <linux/module.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/kernel.h>
37 #include <linux/string.h>
38 #include <linux/errno.h>
39 #include <linux/percpu.h>
40 #include <linux/rtnetlink.h>
41 #include <linux/skbuff.h>
42 #include <linux/bitmap.h>
43 #include <net/netlink.h>
44 #include <net/act_api.h>
45 #include <net/pkt_cls.h>
46
47 struct tc_u_knode {
48         struct tc_u_knode __rcu *next;
49         u32                     handle;
50         struct tc_u_hnode __rcu *ht_up;
51         struct tcf_exts         exts;
52 #ifdef CONFIG_NET_CLS_IND
53         int                     ifindex;
54 #endif
55         u8                      fshift;
56         struct tcf_result       res;
57         struct tc_u_hnode __rcu *ht_down;
58 #ifdef CONFIG_CLS_U32_PERF
59         struct tc_u32_pcnt __percpu *pf;
60 #endif
61 #ifdef CONFIG_CLS_U32_MARK
62         u32                     val;
63         u32                     mask;
64         u32 __percpu            *pcpu_success;
65 #endif
66         struct tcf_proto        *tp;
67         struct tc_u32_sel       sel;
68         struct rcu_head         rcu;
69 };
70
71 struct tc_u_hnode {
72         struct tc_u_hnode __rcu *next;
73         u32                     handle;
74         u32                     prio;
75         struct tc_u_common      *tp_c;
76         int                     refcnt;
77         unsigned int            divisor;
78         struct tc_u_knode __rcu *ht[1];
79         struct rcu_head         rcu;
80 };
81
82 struct tc_u_common {
83         struct tc_u_hnode __rcu *hlist;
84         struct Qdisc            *q;
85         int                     refcnt;
86         u32                     hgenerator;
87         struct rcu_head         rcu;
88 };
89
90 static inline unsigned int u32_hash_fold(__be32 key,
91                                          const struct tc_u32_sel *sel,
92                                          u8 fshift)
93 {
94         unsigned int h = ntohl(key & sel->hmask) >> fshift;
95
96         return h;
97 }
98
99 static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res)
100 {
101         struct {
102                 struct tc_u_knode *knode;
103                 unsigned int      off;
104         } stack[TC_U32_MAXDEPTH];
105
106         struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
107         unsigned int off = skb_network_offset(skb);
108         struct tc_u_knode *n;
109         int sdepth = 0;
110         int off2 = 0;
111         int sel = 0;
112 #ifdef CONFIG_CLS_U32_PERF
113         int j;
114 #endif
115         int i, r;
116
117 next_ht:
118         n = rcu_dereference_bh(ht->ht[sel]);
119
120 next_knode:
121         if (n) {
122                 struct tc_u32_key *key = n->sel.keys;
123
124 #ifdef CONFIG_CLS_U32_PERF
125                 __this_cpu_inc(n->pf->rcnt);
126                 j = 0;
127 #endif
128
129 #ifdef CONFIG_CLS_U32_MARK
130                 if ((skb->mark & n->mask) != n->val) {
131                         n = rcu_dereference_bh(n->next);
132                         goto next_knode;
133                 } else {
134                         __this_cpu_inc(*n->pcpu_success);
135                 }
136 #endif
137
138                 for (i = n->sel.nkeys; i > 0; i--, key++) {
139                         int toff = off + key->off + (off2 & key->offmask);
140                         __be32 *data, hdata;
141
142                         if (skb_headroom(skb) + toff > INT_MAX)
143                                 goto out;
144
145                         data = skb_header_pointer(skb, toff, 4, &hdata);
146                         if (!data)
147                                 goto out;
148                         if ((*data ^ key->val) & key->mask) {
149                                 n = rcu_dereference_bh(n->next);
150                                 goto next_knode;
151                         }
152 #ifdef CONFIG_CLS_U32_PERF
153                         __this_cpu_inc(n->pf->kcnts[j]);
154                         j++;
155 #endif
156                 }
157
158                 ht = rcu_dereference_bh(n->ht_down);
159                 if (!ht) {
160 check_terminal:
161                         if (n->sel.flags & TC_U32_TERMINAL) {
162
163                                 *res = n->res;
164 #ifdef CONFIG_NET_CLS_IND
165                                 if (!tcf_match_indev(skb, n->ifindex)) {
166                                         n = rcu_dereference_bh(n->next);
167                                         goto next_knode;
168                                 }
169 #endif
170 #ifdef CONFIG_CLS_U32_PERF
171                                 __this_cpu_inc(n->pf->rhit);
172 #endif
173                                 r = tcf_exts_exec(skb, &n->exts, res);
174                                 if (r < 0) {
175                                         n = rcu_dereference_bh(n->next);
176                                         goto next_knode;
177                                 }
178
179                                 return r;
180                         }
181                         n = rcu_dereference_bh(n->next);
182                         goto next_knode;
183                 }
184
185                 /* PUSH */
186                 if (sdepth >= TC_U32_MAXDEPTH)
187                         goto deadloop;
188                 stack[sdepth].knode = n;
189                 stack[sdepth].off = off;
190                 sdepth++;
191
192                 ht = rcu_dereference_bh(n->ht_down);
193                 sel = 0;
194                 if (ht->divisor) {
195                         __be32 *data, hdata;
196
197                         data = skb_header_pointer(skb, off + n->sel.hoff, 4,
198                                                   &hdata);
199                         if (!data)
200                                 goto out;
201                         sel = ht->divisor & u32_hash_fold(*data, &n->sel,
202                                                           n->fshift);
203                 }
204                 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
205                         goto next_ht;
206
207                 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
208                         off2 = n->sel.off + 3;
209                         if (n->sel.flags & TC_U32_VAROFFSET) {
210                                 __be16 *data, hdata;
211
212                                 data = skb_header_pointer(skb,
213                                                           off + n->sel.offoff,
214                                                           2, &hdata);
215                                 if (!data)
216                                         goto out;
217                                 off2 += ntohs(n->sel.offmask & *data) >>
218                                         n->sel.offshift;
219                         }
220                         off2 &= ~3;
221                 }
222                 if (n->sel.flags & TC_U32_EAT) {
223                         off += off2;
224                         off2 = 0;
225                 }
226
227                 if (off < skb->len)
228                         goto next_ht;
229         }
230
231         /* POP */
232         if (sdepth--) {
233                 n = stack[sdepth].knode;
234                 ht = rcu_dereference_bh(n->ht_up);
235                 off = stack[sdepth].off;
236                 goto check_terminal;
237         }
238 out:
239         return -1;
240
241 deadloop:
242         net_warn_ratelimited("cls_u32: dead loop\n");
243         return -1;
244 }
245
246 static struct tc_u_hnode *
247 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
248 {
249         struct tc_u_hnode *ht;
250
251         for (ht = rtnl_dereference(tp_c->hlist);
252              ht;
253              ht = rtnl_dereference(ht->next))
254                 if (ht->handle == handle)
255                         break;
256
257         return ht;
258 }
259
260 static struct tc_u_knode *
261 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
262 {
263         unsigned int sel;
264         struct tc_u_knode *n = NULL;
265
266         sel = TC_U32_HASH(handle);
267         if (sel > ht->divisor)
268                 goto out;
269
270         for (n = rtnl_dereference(ht->ht[sel]);
271              n;
272              n = rtnl_dereference(n->next))
273                 if (n->handle == handle)
274                         break;
275 out:
276         return n;
277 }
278
279
280 static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
281 {
282         struct tc_u_hnode *ht;
283         struct tc_u_common *tp_c = tp->data;
284
285         if (TC_U32_HTID(handle) == TC_U32_ROOT)
286                 ht = rtnl_dereference(tp->root);
287         else
288                 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
289
290         if (!ht)
291                 return 0;
292
293         if (TC_U32_KEY(handle) == 0)
294                 return (unsigned long)ht;
295
296         return (unsigned long)u32_lookup_key(ht, handle);
297 }
298
299 static void u32_put(struct tcf_proto *tp, unsigned long f)
300 {
301 }
302
303 static u32 gen_new_htid(struct tc_u_common *tp_c)
304 {
305         int i = 0x800;
306
307         /* hgenerator only used inside rtnl lock it is safe to increment
308          * without read _copy_ update semantics
309          */
310         do {
311                 if (++tp_c->hgenerator == 0x7FF)
312                         tp_c->hgenerator = 1;
313         } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
314
315         return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
316 }
317
318 static int u32_init(struct tcf_proto *tp)
319 {
320         struct tc_u_hnode *root_ht;
321         struct tc_u_common *tp_c;
322
323         tp_c = tp->q->u32_node;
324
325         root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
326         if (root_ht == NULL)
327                 return -ENOBUFS;
328
329         root_ht->divisor = 0;
330         root_ht->refcnt++;
331         root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
332         root_ht->prio = tp->prio;
333
334         if (tp_c == NULL) {
335                 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
336                 if (tp_c == NULL) {
337                         kfree(root_ht);
338                         return -ENOBUFS;
339                 }
340                 tp_c->q = tp->q;
341                 tp->q->u32_node = tp_c;
342         }
343
344         tp_c->refcnt++;
345         RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
346         rcu_assign_pointer(tp_c->hlist, root_ht);
347         root_ht->tp_c = tp_c;
348
349         rcu_assign_pointer(tp->root, root_ht);
350         tp->data = tp_c;
351         return 0;
352 }
353
354 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
355 {
356         tcf_unbind_filter(tp, &n->res);
357         tcf_exts_destroy(tp, &n->exts);
358         if (n->ht_down)
359                 n->ht_down->refcnt--;
360 #ifdef CONFIG_CLS_U32_PERF
361         free_percpu(n->pf);
362 #endif
363         kfree(n);
364         return 0;
365 }
366
367 static void u32_delete_key_rcu(struct rcu_head *rcu)
368 {
369         struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
370
371         u32_destroy_key(key->tp, key);
372 }
373
374 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
375 {
376         struct tc_u_knode __rcu **kp;
377         struct tc_u_knode *pkp;
378         struct tc_u_hnode *ht = key->ht_up;
379
380         if (ht) {
381                 kp = &ht->ht[TC_U32_HASH(key->handle)];
382                 for (pkp = rtnl_dereference(*kp); pkp;
383                      kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
384                         if (pkp == key) {
385                                 RCU_INIT_POINTER(*kp, key->next);
386
387                                 call_rcu(&key->rcu, u32_delete_key_rcu);
388                                 return 0;
389                         }
390                 }
391         }
392         WARN_ON(1);
393         return 0;
394 }
395
396 static void u32_clear_hnode(struct tc_u_hnode *ht)
397 {
398         struct tc_u_knode *n;
399         unsigned int h;
400
401         for (h = 0; h <= ht->divisor; h++) {
402                 while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
403                         RCU_INIT_POINTER(ht->ht[h],
404                                          rtnl_dereference(n->next));
405                         call_rcu(&n->rcu, u32_delete_key_rcu);
406                 }
407         }
408 }
409
410 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
411 {
412         struct tc_u_common *tp_c = tp->data;
413         struct tc_u_hnode __rcu **hn;
414         struct tc_u_hnode *phn;
415
416         WARN_ON(ht->refcnt);
417
418         u32_clear_hnode(ht);
419
420         hn = &tp_c->hlist;
421         for (phn = rtnl_dereference(*hn);
422              phn;
423              hn = &phn->next, phn = rtnl_dereference(*hn)) {
424                 if (phn == ht) {
425                         RCU_INIT_POINTER(*hn, ht->next);
426                         kfree_rcu(ht, rcu);
427                         return 0;
428                 }
429         }
430
431         return -ENOENT;
432 }
433
434 static void u32_destroy(struct tcf_proto *tp)
435 {
436         struct tc_u_common *tp_c = tp->data;
437         struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
438
439         WARN_ON(root_ht == NULL);
440
441         if (root_ht && --root_ht->refcnt == 0)
442                 u32_destroy_hnode(tp, root_ht);
443
444         if (--tp_c->refcnt == 0) {
445                 struct tc_u_hnode *ht;
446
447                 tp->q->u32_node = NULL;
448
449                 for (ht = rtnl_dereference(tp_c->hlist);
450                      ht;
451                      ht = rtnl_dereference(ht->next)) {
452                         ht->refcnt--;
453                         u32_clear_hnode(ht);
454                 }
455
456                 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
457                         RCU_INIT_POINTER(tp_c->hlist, ht->next);
458                         kfree_rcu(ht, rcu);
459                 }
460
461                 kfree(tp_c);
462         }
463
464         tp->data = NULL;
465 }
466
467 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
468 {
469         struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
470         struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
471
472         if (ht == NULL)
473                 return 0;
474
475         if (TC_U32_KEY(ht->handle))
476                 return u32_delete_key(tp, (struct tc_u_knode *)ht);
477
478         if (root_ht == ht)
479                 return -EINVAL;
480
481         if (ht->refcnt == 1) {
482                 ht->refcnt--;
483                 u32_destroy_hnode(tp, ht);
484         } else {
485                 return -EBUSY;
486         }
487
488         return 0;
489 }
490
491 #define NR_U32_NODE (1<<12)
492 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
493 {
494         struct tc_u_knode *n;
495         unsigned long i;
496         unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
497                                         GFP_KERNEL);
498         if (!bitmap)
499                 return handle | 0xFFF;
500
501         for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
502              n;
503              n = rtnl_dereference(n->next))
504                 set_bit(TC_U32_NODE(n->handle), bitmap);
505
506         i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
507         if (i >= NR_U32_NODE)
508                 i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
509
510         kfree(bitmap);
511         return handle | (i >= NR_U32_NODE ? 0xFFF : i);
512 }
513
514 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
515         [TCA_U32_CLASSID]       = { .type = NLA_U32 },
516         [TCA_U32_HASH]          = { .type = NLA_U32 },
517         [TCA_U32_LINK]          = { .type = NLA_U32 },
518         [TCA_U32_DIVISOR]       = { .type = NLA_U32 },
519         [TCA_U32_SEL]           = { .len = sizeof(struct tc_u32_sel) },
520         [TCA_U32_INDEV]         = { .type = NLA_STRING, .len = IFNAMSIZ },
521         [TCA_U32_MARK]          = { .len = sizeof(struct tc_u32_mark) },
522 };
523
524 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
525                          unsigned long base, struct tc_u_hnode *ht,
526                          struct tc_u_knode *n, struct nlattr **tb,
527                          struct nlattr *est, bool ovr)
528 {
529         int err;
530         struct tcf_exts e;
531
532         tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
533         err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
534         if (err < 0)
535                 return err;
536
537         err = -EINVAL;
538         if (tb[TCA_U32_LINK]) {
539                 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
540                 struct tc_u_hnode *ht_down = NULL, *ht_old;
541
542                 if (TC_U32_KEY(handle))
543                         goto errout;
544
545                 if (handle) {
546                         ht_down = u32_lookup_ht(ht->tp_c, handle);
547
548                         if (ht_down == NULL)
549                                 goto errout;
550                         ht_down->refcnt++;
551                 }
552
553                 ht_old = rtnl_dereference(n->ht_down);
554                 rcu_assign_pointer(n->ht_down, ht_down);
555
556                 if (ht_old)
557                         ht_old->refcnt--;
558         }
559         if (tb[TCA_U32_CLASSID]) {
560                 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
561                 tcf_bind_filter(tp, &n->res, base);
562         }
563
564 #ifdef CONFIG_NET_CLS_IND
565         if (tb[TCA_U32_INDEV]) {
566                 int ret;
567                 ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
568                 if (ret < 0)
569                         goto errout;
570                 n->ifindex = ret;
571         }
572 #endif
573         tcf_exts_change(tp, &n->exts, &e);
574
575         return 0;
576 errout:
577         tcf_exts_destroy(tp, &e);
578         return err;
579 }
580
581 static int u32_change(struct net *net, struct sk_buff *in_skb,
582                       struct tcf_proto *tp, unsigned long base, u32 handle,
583                       struct nlattr **tca,
584                       unsigned long *arg, bool ovr)
585 {
586         struct tc_u_common *tp_c = tp->data;
587         struct tc_u_hnode *ht;
588         struct tc_u_knode *n;
589         struct tc_u32_sel *s;
590         struct nlattr *opt = tca[TCA_OPTIONS];
591         struct nlattr *tb[TCA_U32_MAX + 1];
592         u32 htid;
593         int err;
594 #ifdef CONFIG_CLS_U32_PERF
595         size_t size;
596 #endif
597
598         if (opt == NULL)
599                 return handle ? -EINVAL : 0;
600
601         err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
602         if (err < 0)
603                 return err;
604
605         n = (struct tc_u_knode *)*arg;
606         if (n) {
607                 if (TC_U32_KEY(n->handle) == 0)
608                         return -EINVAL;
609
610                 return u32_set_parms(net, tp, base, n->ht_up, n, tb,
611                                      tca[TCA_RATE], ovr);
612         }
613
614         if (tb[TCA_U32_DIVISOR]) {
615                 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
616
617                 if (--divisor > 0x100)
618                         return -EINVAL;
619                 if (TC_U32_KEY(handle))
620                         return -EINVAL;
621                 if (handle == 0) {
622                         handle = gen_new_htid(tp->data);
623                         if (handle == 0)
624                                 return -ENOMEM;
625                 }
626                 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
627                 if (ht == NULL)
628                         return -ENOBUFS;
629                 ht->tp_c = tp_c;
630                 ht->refcnt = 1;
631                 ht->divisor = divisor;
632                 ht->handle = handle;
633                 ht->prio = tp->prio;
634                 RCU_INIT_POINTER(ht->next, tp_c->hlist);
635                 rcu_assign_pointer(tp_c->hlist, ht);
636                 *arg = (unsigned long)ht;
637                 return 0;
638         }
639
640         if (tb[TCA_U32_HASH]) {
641                 htid = nla_get_u32(tb[TCA_U32_HASH]);
642                 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
643                         ht = rtnl_dereference(tp->root);
644                         htid = ht->handle;
645                 } else {
646                         ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
647                         if (ht == NULL)
648                                 return -EINVAL;
649                 }
650         } else {
651                 ht = rtnl_dereference(tp->root);
652                 htid = ht->handle;
653         }
654
655         if (ht->divisor < TC_U32_HASH(htid))
656                 return -EINVAL;
657
658         if (handle) {
659                 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
660                         return -EINVAL;
661                 handle = htid | TC_U32_NODE(handle);
662         } else
663                 handle = gen_new_kid(ht, htid);
664
665         if (tb[TCA_U32_SEL] == NULL)
666                 return -EINVAL;
667
668         s = nla_data(tb[TCA_U32_SEL]);
669
670         n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
671         if (n == NULL)
672                 return -ENOBUFS;
673
674 #ifdef CONFIG_CLS_U32_PERF
675         size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
676         n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
677         if (!n->pf) {
678                 kfree(n);
679                 return -ENOBUFS;
680         }
681 #endif
682
683         memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
684         n->ht_up = ht;
685         n->handle = handle;
686         n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
687         tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
688         n->tp = tp;
689
690 #ifdef CONFIG_CLS_U32_MARK
691         n->pcpu_success = alloc_percpu(u32);
692
693         if (tb[TCA_U32_MARK]) {
694                 struct tc_u32_mark *mark;
695
696                 mark = nla_data(tb[TCA_U32_MARK]);
697                 n->val = mark->val;
698                 n->mask = mark->mask;
699         }
700 #endif
701
702         err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
703         if (err == 0) {
704                 struct tc_u_knode __rcu **ins;
705                 struct tc_u_knode *pins;
706
707                 ins = &ht->ht[TC_U32_HASH(handle)];
708                 for (pins = rtnl_dereference(*ins); pins;
709                      ins = &pins->next, pins = rtnl_dereference(*ins))
710                         if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
711                                 break;
712
713                 RCU_INIT_POINTER(n->next, pins);
714                 rcu_assign_pointer(*ins, n);
715
716                 *arg = (unsigned long)n;
717                 return 0;
718         }
719 #ifdef CONFIG_CLS_U32_PERF
720         free_percpu(n->pf);
721 #endif
722         kfree(n);
723         return err;
724 }
725
726 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
727 {
728         struct tc_u_common *tp_c = tp->data;
729         struct tc_u_hnode *ht;
730         struct tc_u_knode *n;
731         unsigned int h;
732
733         if (arg->stop)
734                 return;
735
736         for (ht = rtnl_dereference(tp_c->hlist);
737              ht;
738              ht = rtnl_dereference(ht->next)) {
739                 if (ht->prio != tp->prio)
740                         continue;
741                 if (arg->count >= arg->skip) {
742                         if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
743                                 arg->stop = 1;
744                                 return;
745                         }
746                 }
747                 arg->count++;
748                 for (h = 0; h <= ht->divisor; h++) {
749                         for (n = rtnl_dereference(ht->ht[h]);
750                              n;
751                              n = rtnl_dereference(n->next)) {
752                                 if (arg->count < arg->skip) {
753                                         arg->count++;
754                                         continue;
755                                 }
756                                 if (arg->fn(tp, (unsigned long)n, arg) < 0) {
757                                         arg->stop = 1;
758                                         return;
759                                 }
760                                 arg->count++;
761                         }
762                 }
763         }
764 }
765
766 static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
767                      struct sk_buff *skb, struct tcmsg *t)
768 {
769         struct tc_u_knode *n = (struct tc_u_knode *)fh;
770         struct tc_u_hnode *ht_up, *ht_down;
771         struct nlattr *nest;
772
773         if (n == NULL)
774                 return skb->len;
775
776         t->tcm_handle = n->handle;
777
778         nest = nla_nest_start(skb, TCA_OPTIONS);
779         if (nest == NULL)
780                 goto nla_put_failure;
781
782         if (TC_U32_KEY(n->handle) == 0) {
783                 struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
784                 u32 divisor = ht->divisor + 1;
785
786                 if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
787                         goto nla_put_failure;
788         } else {
789 #ifdef CONFIG_CLS_U32_PERF
790                 struct tc_u32_pcnt *gpf;
791 #endif
792                 int cpu;
793
794                 if (nla_put(skb, TCA_U32_SEL,
795                             sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
796                             &n->sel))
797                         goto nla_put_failure;
798
799                 ht_up = rtnl_dereference(n->ht_up);
800                 if (ht_up) {
801                         u32 htid = n->handle & 0xFFFFF000;
802                         if (nla_put_u32(skb, TCA_U32_HASH, htid))
803                                 goto nla_put_failure;
804                 }
805                 if (n->res.classid &&
806                     nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
807                         goto nla_put_failure;
808
809                 ht_down = rtnl_dereference(n->ht_down);
810                 if (ht_down &&
811                     nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
812                         goto nla_put_failure;
813
814 #ifdef CONFIG_CLS_U32_MARK
815                 if ((n->val || n->mask)) {
816                         struct tc_u32_mark mark = {.val = n->val,
817                                                    .mask = n->mask,
818                                                    .success = 0};
819
820                         for_each_possible_cpu(cpu) {
821                                 __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpu);
822
823                                 mark.success += cnt;
824                         }
825
826                         if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
827                                 goto nla_put_failure;
828                 }
829 #endif
830
831                 if (tcf_exts_dump(skb, &n->exts) < 0)
832                         goto nla_put_failure;
833
834 #ifdef CONFIG_NET_CLS_IND
835                 if (n->ifindex) {
836                         struct net_device *dev;
837                         dev = __dev_get_by_index(net, n->ifindex);
838                         if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
839                                 goto nla_put_failure;
840                 }
841 #endif
842 #ifdef CONFIG_CLS_U32_PERF
843                 gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
844                               n->sel.nkeys * sizeof(u64),
845                               GFP_KERNEL);
846                 if (!gpf)
847                         goto nla_put_failure;
848
849                 for_each_possible_cpu(cpu) {
850                         int i;
851                         struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
852
853                         gpf->rcnt += pf->rcnt;
854                         gpf->rhit += pf->rhit;
855                         for (i = 0; i < n->sel.nkeys; i++)
856                                 gpf->kcnts[i] += pf->kcnts[i];
857                 }
858
859                 if (nla_put(skb, TCA_U32_PCNT,
860                             sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
861                             gpf)) {
862                         kfree(gpf);
863                         goto nla_put_failure;
864                 }
865                 kfree(gpf);
866 #endif
867         }
868
869         nla_nest_end(skb, nest);
870
871         if (TC_U32_KEY(n->handle))
872                 if (tcf_exts_dump_stats(skb, &n->exts) < 0)
873                         goto nla_put_failure;
874         return skb->len;
875
876 nla_put_failure:
877         nla_nest_cancel(skb, nest);
878         return -1;
879 }
880
881 static struct tcf_proto_ops cls_u32_ops __read_mostly = {
882         .kind           =       "u32",
883         .classify       =       u32_classify,
884         .init           =       u32_init,
885         .destroy        =       u32_destroy,
886         .get            =       u32_get,
887         .put            =       u32_put,
888         .change         =       u32_change,
889         .delete         =       u32_delete,
890         .walk           =       u32_walk,
891         .dump           =       u32_dump,
892         .owner          =       THIS_MODULE,
893 };
894
895 static int __init init_u32(void)
896 {
897         pr_info("u32 classifier\n");
898 #ifdef CONFIG_CLS_U32_PERF
899         pr_info("    Performance counters on\n");
900 #endif
901 #ifdef CONFIG_NET_CLS_IND
902         pr_info("    input device check on\n");
903 #endif
904 #ifdef CONFIG_NET_CLS_ACT
905         pr_info("    Actions configured\n");
906 #endif
907         return register_tcf_proto_ops(&cls_u32_ops);
908 }
909
910 static void __exit exit_u32(void)
911 {
912         unregister_tcf_proto_ops(&cls_u32_ops);
913 }
914
915 module_init(init_u32)
916 module_exit(exit_u32)
917 MODULE_LICENSE("GPL");