ipv6: Do not depend on rt->n in rt6_probe().
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= ipv6_addr_hash(&fl6->daddr);
392         val ^= ipv6_addr_hash(&fl6->saddr);
393
394         /* Work only if this not encapsulated */
395         switch (fl6->flowi6_proto) {
396         case IPPROTO_UDP:
397         case IPPROTO_TCP:
398         case IPPROTO_SCTP:
399                 val ^= (__force u16)fl6->fl6_sport;
400                 val ^= (__force u16)fl6->fl6_dport;
401                 break;
402
403         case IPPROTO_ICMPV6:
404                 val ^= (__force u16)fl6->fl6_icmp_type;
405                 val ^= (__force u16)fl6->fl6_icmp_code;
406                 break;
407         }
408         /* RFC6438 recommands to use flowlabel */
409         val ^= (__force u32)fl6->flowlabel;
410
411         /* Perhaps, we need to tune, this function? */
412         val = val ^ (val >> 7) ^ (val >> 12);
413         return val % candidate_count;
414 }
415
416 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
417                                              struct flowi6 *fl6)
418 {
419         struct rt6_info *sibling, *next_sibling;
420         int route_choosen;
421
422         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
423         /* Don't change the route, if route_choosen == 0
424          * (siblings does not include ourself)
425          */
426         if (route_choosen)
427                 list_for_each_entry_safe(sibling, next_sibling,
428                                 &match->rt6i_siblings, rt6i_siblings) {
429                         route_choosen--;
430                         if (route_choosen == 0) {
431                                 match = sibling;
432                                 break;
433                         }
434                 }
435         return match;
436 }
437
438 /*
439  *      Route lookup. Any table->tb6_lock is implied.
440  */
441
442 static inline struct rt6_info *rt6_device_match(struct net *net,
443                                                     struct rt6_info *rt,
444                                                     const struct in6_addr *saddr,
445                                                     int oif,
446                                                     int flags)
447 {
448         struct rt6_info *local = NULL;
449         struct rt6_info *sprt;
450
451         if (!oif && ipv6_addr_any(saddr))
452                 goto out;
453
454         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
455                 struct net_device *dev = sprt->dst.dev;
456
457                 if (oif) {
458                         if (dev->ifindex == oif)
459                                 return sprt;
460                         if (dev->flags & IFF_LOOPBACK) {
461                                 if (!sprt->rt6i_idev ||
462                                     sprt->rt6i_idev->dev->ifindex != oif) {
463                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
464                                                 continue;
465                                         if (local && (!oif ||
466                                                       local->rt6i_idev->dev->ifindex == oif))
467                                                 continue;
468                                 }
469                                 local = sprt;
470                         }
471                 } else {
472                         if (ipv6_chk_addr(net, saddr, dev,
473                                           flags & RT6_LOOKUP_F_IFACE))
474                                 return sprt;
475                 }
476         }
477
478         if (oif) {
479                 if (local)
480                         return local;
481
482                 if (flags & RT6_LOOKUP_F_IFACE)
483                         return net->ipv6.ip6_null_entry;
484         }
485 out:
486         return rt;
487 }
488
489 #ifdef CONFIG_IPV6_ROUTER_PREF
490 static void rt6_probe(struct rt6_info *rt)
491 {
492         struct neighbour *neigh;
493         /*
494          * Okay, this does not seem to be appropriate
495          * for now, however, we need to check if it
496          * is really so; aka Router Reachability Probing.
497          *
498          * Router Reachability Probe MUST be rate-limited
499          * to no more than one per minute.
500          */
501         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
502                 return;
503         rcu_read_lock_bh();
504         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
505         if (neigh) {
506                 write_lock(&neigh->lock);
507                 if (neigh->nud_state & NUD_VALID)
508                         goto out;
509         }
510
511         if (!neigh ||
512             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
513                 struct in6_addr mcaddr;
514                 struct in6_addr *target;
515
516                 neigh->updated = jiffies;
517
518                 if (neigh)
519                         write_unlock(&neigh->lock);
520
521                 target = (struct in6_addr *)&rt->rt6i_gateway;
522                 addrconf_addr_solict_mult(target, &mcaddr);
523                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
524         } else {
525 out:
526                 write_unlock(&neigh->lock);
527         }
528         rcu_read_unlock_bh();
529 }
530 #else
531 static inline void rt6_probe(struct rt6_info *rt)
532 {
533 }
534 #endif
535
536 /*
537  * Default Router Selection (RFC 2461 6.3.6)
538  */
539 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
540 {
541         struct net_device *dev = rt->dst.dev;
542         if (!oif || dev->ifindex == oif)
543                 return 2;
544         if ((dev->flags & IFF_LOOPBACK) &&
545             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
546                 return 1;
547         return 0;
548 }
549
550 static inline bool rt6_check_neigh(struct rt6_info *rt)
551 {
552         struct neighbour *neigh;
553         bool ret = false;
554
555         if (rt->rt6i_flags & RTF_NONEXTHOP ||
556             !(rt->rt6i_flags & RTF_GATEWAY))
557                 return true;
558
559         rcu_read_lock_bh();
560         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
561         if (neigh) {
562                 read_lock(&neigh->lock);
563                 if (neigh->nud_state & NUD_VALID)
564                         ret = true;
565 #ifdef CONFIG_IPV6_ROUTER_PREF
566                 else if (!(neigh->nud_state & NUD_FAILED))
567                         ret = true;
568 #endif
569                 read_unlock(&neigh->lock);
570         }
571         rcu_read_unlock_bh();
572
573         return ret;
574 }
575
576 static int rt6_score_route(struct rt6_info *rt, int oif,
577                            int strict)
578 {
579         int m;
580
581         m = rt6_check_dev(rt, oif);
582         if (!m && (strict & RT6_LOOKUP_F_IFACE))
583                 return -1;
584 #ifdef CONFIG_IPV6_ROUTER_PREF
585         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
586 #endif
587         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
588                 return -1;
589         return m;
590 }
591
592 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
593                                    int *mpri, struct rt6_info *match)
594 {
595         int m;
596
597         if (rt6_check_expired(rt))
598                 goto out;
599
600         m = rt6_score_route(rt, oif, strict);
601         if (m < 0)
602                 goto out;
603
604         if (m > *mpri) {
605                 if (strict & RT6_LOOKUP_F_REACHABLE)
606                         rt6_probe(match);
607                 *mpri = m;
608                 match = rt;
609         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
610                 rt6_probe(rt);
611         }
612
613 out:
614         return match;
615 }
616
617 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
618                                      struct rt6_info *rr_head,
619                                      u32 metric, int oif, int strict)
620 {
621         struct rt6_info *rt, *match;
622         int mpri = -1;
623
624         match = NULL;
625         for (rt = rr_head; rt && rt->rt6i_metric == metric;
626              rt = rt->dst.rt6_next)
627                 match = find_match(rt, oif, strict, &mpri, match);
628         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
629              rt = rt->dst.rt6_next)
630                 match = find_match(rt, oif, strict, &mpri, match);
631
632         return match;
633 }
634
635 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
636 {
637         struct rt6_info *match, *rt0;
638         struct net *net;
639
640         rt0 = fn->rr_ptr;
641         if (!rt0)
642                 fn->rr_ptr = rt0 = fn->leaf;
643
644         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
645
646         if (!match &&
647             (strict & RT6_LOOKUP_F_REACHABLE)) {
648                 struct rt6_info *next = rt0->dst.rt6_next;
649
650                 /* no entries matched; do round-robin */
651                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
652                         next = fn->leaf;
653
654                 if (next != rt0)
655                         fn->rr_ptr = next;
656         }
657
658         net = dev_net(rt0->dst.dev);
659         return match ? match : net->ipv6.ip6_null_entry;
660 }
661
662 #ifdef CONFIG_IPV6_ROUTE_INFO
663 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
664                   const struct in6_addr *gwaddr)
665 {
666         struct net *net = dev_net(dev);
667         struct route_info *rinfo = (struct route_info *) opt;
668         struct in6_addr prefix_buf, *prefix;
669         unsigned int pref;
670         unsigned long lifetime;
671         struct rt6_info *rt;
672
673         if (len < sizeof(struct route_info)) {
674                 return -EINVAL;
675         }
676
677         /* Sanity check for prefix_len and length */
678         if (rinfo->length > 3) {
679                 return -EINVAL;
680         } else if (rinfo->prefix_len > 128) {
681                 return -EINVAL;
682         } else if (rinfo->prefix_len > 64) {
683                 if (rinfo->length < 2) {
684                         return -EINVAL;
685                 }
686         } else if (rinfo->prefix_len > 0) {
687                 if (rinfo->length < 1) {
688                         return -EINVAL;
689                 }
690         }
691
692         pref = rinfo->route_pref;
693         if (pref == ICMPV6_ROUTER_PREF_INVALID)
694                 return -EINVAL;
695
696         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
697
698         if (rinfo->length == 3)
699                 prefix = (struct in6_addr *)rinfo->prefix;
700         else {
701                 /* this function is safe */
702                 ipv6_addr_prefix(&prefix_buf,
703                                  (struct in6_addr *)rinfo->prefix,
704                                  rinfo->prefix_len);
705                 prefix = &prefix_buf;
706         }
707
708         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
709                                 dev->ifindex);
710
711         if (rt && !lifetime) {
712                 ip6_del_rt(rt);
713                 rt = NULL;
714         }
715
716         if (!rt && lifetime)
717                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
718                                         pref);
719         else if (rt)
720                 rt->rt6i_flags = RTF_ROUTEINFO |
721                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
722
723         if (rt) {
724                 if (!addrconf_finite_timeout(lifetime))
725                         rt6_clean_expires(rt);
726                 else
727                         rt6_set_expires(rt, jiffies + HZ * lifetime);
728
729                 ip6_rt_put(rt);
730         }
731         return 0;
732 }
733 #endif
734
735 #define BACKTRACK(__net, saddr)                 \
736 do { \
737         if (rt == __net->ipv6.ip6_null_entry) { \
738                 struct fib6_node *pn; \
739                 while (1) { \
740                         if (fn->fn_flags & RTN_TL_ROOT) \
741                                 goto out; \
742                         pn = fn->parent; \
743                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
744                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
745                         else \
746                                 fn = pn; \
747                         if (fn->fn_flags & RTN_RTINFO) \
748                                 goto restart; \
749                 } \
750         } \
751 } while (0)
752
753 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
754                                              struct fib6_table *table,
755                                              struct flowi6 *fl6, int flags)
756 {
757         struct fib6_node *fn;
758         struct rt6_info *rt;
759
760         read_lock_bh(&table->tb6_lock);
761         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
762 restart:
763         rt = fn->leaf;
764         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
765         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
766                 rt = rt6_multipath_select(rt, fl6);
767         BACKTRACK(net, &fl6->saddr);
768 out:
769         dst_use(&rt->dst, jiffies);
770         read_unlock_bh(&table->tb6_lock);
771         return rt;
772
773 }
774
775 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
776                                     int flags)
777 {
778         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
779 }
780 EXPORT_SYMBOL_GPL(ip6_route_lookup);
781
782 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
783                             const struct in6_addr *saddr, int oif, int strict)
784 {
785         struct flowi6 fl6 = {
786                 .flowi6_oif = oif,
787                 .daddr = *daddr,
788         };
789         struct dst_entry *dst;
790         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
791
792         if (saddr) {
793                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
794                 flags |= RT6_LOOKUP_F_HAS_SADDR;
795         }
796
797         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
798         if (dst->error == 0)
799                 return (struct rt6_info *) dst;
800
801         dst_release(dst);
802
803         return NULL;
804 }
805
806 EXPORT_SYMBOL(rt6_lookup);
807
808 /* ip6_ins_rt is called with FREE table->tb6_lock.
809    It takes new route entry, the addition fails by any reason the
810    route is freed. In any case, if caller does not hold it, it may
811    be destroyed.
812  */
813
814 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
815 {
816         int err;
817         struct fib6_table *table;
818
819         table = rt->rt6i_table;
820         write_lock_bh(&table->tb6_lock);
821         err = fib6_add(&table->tb6_root, rt, info);
822         write_unlock_bh(&table->tb6_lock);
823
824         return err;
825 }
826
827 int ip6_ins_rt(struct rt6_info *rt)
828 {
829         struct nl_info info = {
830                 .nl_net = dev_net(rt->dst.dev),
831         };
832         return __ip6_ins_rt(rt, &info);
833 }
834
835 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
836                                       const struct in6_addr *daddr,
837                                       const struct in6_addr *saddr)
838 {
839         struct rt6_info *rt;
840
841         /*
842          *      Clone the route.
843          */
844
845         rt = ip6_rt_copy(ort, daddr);
846
847         if (rt) {
848                 int attempts = !in_softirq();
849
850                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
851                         if (ort->rt6i_dst.plen != 128 &&
852                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
853                                 rt->rt6i_flags |= RTF_ANYCAST;
854                         rt->rt6i_gateway = *daddr;
855                 }
856
857                 rt->rt6i_flags |= RTF_CACHE;
858
859 #ifdef CONFIG_IPV6_SUBTREES
860                 if (rt->rt6i_src.plen && saddr) {
861                         rt->rt6i_src.addr = *saddr;
862                         rt->rt6i_src.plen = 128;
863                 }
864 #endif
865
866         retry:
867                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
868                         struct net *net = dev_net(rt->dst.dev);
869                         int saved_rt_min_interval =
870                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
871                         int saved_rt_elasticity =
872                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
873
874                         if (attempts-- > 0) {
875                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
876                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
877
878                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
879
880                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
881                                         saved_rt_elasticity;
882                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
883                                         saved_rt_min_interval;
884                                 goto retry;
885                         }
886
887                         net_warn_ratelimited("Neighbour table overflow\n");
888                         dst_free(&rt->dst);
889                         return NULL;
890                 }
891         }
892
893         return rt;
894 }
895
896 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
897                                         const struct in6_addr *daddr)
898 {
899         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
900
901         if (rt) {
902                 rt->rt6i_flags |= RTF_CACHE;
903                 rt->n = neigh_clone(ort->n);
904         }
905         return rt;
906 }
907
908 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
909                                       struct flowi6 *fl6, int flags)
910 {
911         struct fib6_node *fn;
912         struct rt6_info *rt, *nrt;
913         int strict = 0;
914         int attempts = 3;
915         int err;
916         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
917
918         strict |= flags & RT6_LOOKUP_F_IFACE;
919
920 relookup:
921         read_lock_bh(&table->tb6_lock);
922
923 restart_2:
924         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
925
926 restart:
927         rt = rt6_select(fn, oif, strict | reachable);
928         if (rt->rt6i_nsiblings && oif == 0)
929                 rt = rt6_multipath_select(rt, fl6);
930         BACKTRACK(net, &fl6->saddr);
931         if (rt == net->ipv6.ip6_null_entry ||
932             rt->rt6i_flags & RTF_CACHE)
933                 goto out;
934
935         dst_hold(&rt->dst);
936         read_unlock_bh(&table->tb6_lock);
937
938         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
939                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
940         else if (!(rt->dst.flags & DST_HOST))
941                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
942         else
943                 goto out2;
944
945         ip6_rt_put(rt);
946         rt = nrt ? : net->ipv6.ip6_null_entry;
947
948         dst_hold(&rt->dst);
949         if (nrt) {
950                 err = ip6_ins_rt(nrt);
951                 if (!err)
952                         goto out2;
953         }
954
955         if (--attempts <= 0)
956                 goto out2;
957
958         /*
959          * Race condition! In the gap, when table->tb6_lock was
960          * released someone could insert this route.  Relookup.
961          */
962         ip6_rt_put(rt);
963         goto relookup;
964
965 out:
966         if (reachable) {
967                 reachable = 0;
968                 goto restart_2;
969         }
970         dst_hold(&rt->dst);
971         read_unlock_bh(&table->tb6_lock);
972 out2:
973         rt->dst.lastuse = jiffies;
974         rt->dst.__use++;
975
976         return rt;
977 }
978
979 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
980                                             struct flowi6 *fl6, int flags)
981 {
982         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
983 }
984
985 static struct dst_entry *ip6_route_input_lookup(struct net *net,
986                                                 struct net_device *dev,
987                                                 struct flowi6 *fl6, int flags)
988 {
989         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
990                 flags |= RT6_LOOKUP_F_IFACE;
991
992         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
993 }
994
995 void ip6_route_input(struct sk_buff *skb)
996 {
997         const struct ipv6hdr *iph = ipv6_hdr(skb);
998         struct net *net = dev_net(skb->dev);
999         int flags = RT6_LOOKUP_F_HAS_SADDR;
1000         struct flowi6 fl6 = {
1001                 .flowi6_iif = skb->dev->ifindex,
1002                 .daddr = iph->daddr,
1003                 .saddr = iph->saddr,
1004                 .flowlabel = ip6_flowinfo(iph),
1005                 .flowi6_mark = skb->mark,
1006                 .flowi6_proto = iph->nexthdr,
1007         };
1008
1009         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1010 }
1011
1012 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1013                                              struct flowi6 *fl6, int flags)
1014 {
1015         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1016 }
1017
1018 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1019                                     struct flowi6 *fl6)
1020 {
1021         int flags = 0;
1022
1023         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1024
1025         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1026                 flags |= RT6_LOOKUP_F_IFACE;
1027
1028         if (!ipv6_addr_any(&fl6->saddr))
1029                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1030         else if (sk)
1031                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1032
1033         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1034 }
1035
1036 EXPORT_SYMBOL(ip6_route_output);
1037
1038 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1039 {
1040         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1041         struct dst_entry *new = NULL;
1042
1043         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1044         if (rt) {
1045                 new = &rt->dst;
1046
1047                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1048                 rt6_init_peer(rt, net->ipv6.peers);
1049
1050                 new->__use = 1;
1051                 new->input = dst_discard;
1052                 new->output = dst_discard;
1053
1054                 if (dst_metrics_read_only(&ort->dst))
1055                         new->_metrics = ort->dst._metrics;
1056                 else
1057                         dst_copy_metrics(new, &ort->dst);
1058                 rt->rt6i_idev = ort->rt6i_idev;
1059                 if (rt->rt6i_idev)
1060                         in6_dev_hold(rt->rt6i_idev);
1061
1062                 rt->rt6i_gateway = ort->rt6i_gateway;
1063                 rt->rt6i_flags = ort->rt6i_flags;
1064                 rt6_clean_expires(rt);
1065                 rt->rt6i_metric = 0;
1066
1067                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1068 #ifdef CONFIG_IPV6_SUBTREES
1069                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1070 #endif
1071
1072                 dst_free(new);
1073         }
1074
1075         dst_release(dst_orig);
1076         return new ? new : ERR_PTR(-ENOMEM);
1077 }
1078
1079 /*
1080  *      Destination cache support functions
1081  */
1082
1083 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1084 {
1085         struct rt6_info *rt;
1086
1087         rt = (struct rt6_info *) dst;
1088
1089         /* All IPV6 dsts are created with ->obsolete set to the value
1090          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1091          * into this function always.
1092          */
1093         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1094                 return NULL;
1095
1096         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1097                 return dst;
1098
1099         return NULL;
1100 }
1101
1102 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1103 {
1104         struct rt6_info *rt = (struct rt6_info *) dst;
1105
1106         if (rt) {
1107                 if (rt->rt6i_flags & RTF_CACHE) {
1108                         if (rt6_check_expired(rt)) {
1109                                 ip6_del_rt(rt);
1110                                 dst = NULL;
1111                         }
1112                 } else {
1113                         dst_release(dst);
1114                         dst = NULL;
1115                 }
1116         }
1117         return dst;
1118 }
1119
1120 static void ip6_link_failure(struct sk_buff *skb)
1121 {
1122         struct rt6_info *rt;
1123
1124         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1125
1126         rt = (struct rt6_info *) skb_dst(skb);
1127         if (rt) {
1128                 if (rt->rt6i_flags & RTF_CACHE)
1129                         rt6_update_expires(rt, 0);
1130                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1131                         rt->rt6i_node->fn_sernum = -1;
1132         }
1133 }
1134
1135 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1136                                struct sk_buff *skb, u32 mtu)
1137 {
1138         struct rt6_info *rt6 = (struct rt6_info*)dst;
1139
1140         dst_confirm(dst);
1141         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1142                 struct net *net = dev_net(dst->dev);
1143
1144                 rt6->rt6i_flags |= RTF_MODIFIED;
1145                 if (mtu < IPV6_MIN_MTU) {
1146                         u32 features = dst_metric(dst, RTAX_FEATURES);
1147                         mtu = IPV6_MIN_MTU;
1148                         features |= RTAX_FEATURE_ALLFRAG;
1149                         dst_metric_set(dst, RTAX_FEATURES, features);
1150                 }
1151                 dst_metric_set(dst, RTAX_MTU, mtu);
1152                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1153         }
1154 }
1155
1156 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1157                      int oif, u32 mark)
1158 {
1159         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1160         struct dst_entry *dst;
1161         struct flowi6 fl6;
1162
1163         memset(&fl6, 0, sizeof(fl6));
1164         fl6.flowi6_oif = oif;
1165         fl6.flowi6_mark = mark;
1166         fl6.flowi6_flags = 0;
1167         fl6.daddr = iph->daddr;
1168         fl6.saddr = iph->saddr;
1169         fl6.flowlabel = ip6_flowinfo(iph);
1170
1171         dst = ip6_route_output(net, NULL, &fl6);
1172         if (!dst->error)
1173                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1174         dst_release(dst);
1175 }
1176 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1177
1178 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1179 {
1180         ip6_update_pmtu(skb, sock_net(sk), mtu,
1181                         sk->sk_bound_dev_if, sk->sk_mark);
1182 }
1183 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1184
1185 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1186 {
1187         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1188         struct dst_entry *dst;
1189         struct flowi6 fl6;
1190
1191         memset(&fl6, 0, sizeof(fl6));
1192         fl6.flowi6_oif = oif;
1193         fl6.flowi6_mark = mark;
1194         fl6.flowi6_flags = 0;
1195         fl6.daddr = iph->daddr;
1196         fl6.saddr = iph->saddr;
1197         fl6.flowlabel = ip6_flowinfo(iph);
1198
1199         dst = ip6_route_output(net, NULL, &fl6);
1200         if (!dst->error)
1201                 rt6_do_redirect(dst, NULL, skb);
1202         dst_release(dst);
1203 }
1204 EXPORT_SYMBOL_GPL(ip6_redirect);
1205
1206 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1207 {
1208         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1209 }
1210 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1211
1212 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1213 {
1214         struct net_device *dev = dst->dev;
1215         unsigned int mtu = dst_mtu(dst);
1216         struct net *net = dev_net(dev);
1217
1218         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1219
1220         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1221                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1222
1223         /*
1224          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1225          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1226          * IPV6_MAXPLEN is also valid and means: "any MSS,
1227          * rely only on pmtu discovery"
1228          */
1229         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1230                 mtu = IPV6_MAXPLEN;
1231         return mtu;
1232 }
1233
1234 static unsigned int ip6_mtu(const struct dst_entry *dst)
1235 {
1236         struct inet6_dev *idev;
1237         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1238
1239         if (mtu)
1240                 return mtu;
1241
1242         mtu = IPV6_MIN_MTU;
1243
1244         rcu_read_lock();
1245         idev = __in6_dev_get(dst->dev);
1246         if (idev)
1247                 mtu = idev->cnf.mtu6;
1248         rcu_read_unlock();
1249
1250         return mtu;
1251 }
1252
1253 static struct dst_entry *icmp6_dst_gc_list;
1254 static DEFINE_SPINLOCK(icmp6_dst_lock);
1255
1256 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1257                                   struct neighbour *neigh,
1258                                   struct flowi6 *fl6)
1259 {
1260         struct dst_entry *dst;
1261         struct rt6_info *rt;
1262         struct inet6_dev *idev = in6_dev_get(dev);
1263         struct net *net = dev_net(dev);
1264
1265         if (unlikely(!idev))
1266                 return ERR_PTR(-ENODEV);
1267
1268         rt = ip6_dst_alloc(net, dev, 0, NULL);
1269         if (unlikely(!rt)) {
1270                 in6_dev_put(idev);
1271                 dst = ERR_PTR(-ENOMEM);
1272                 goto out;
1273         }
1274
1275         if (neigh)
1276                 neigh_hold(neigh);
1277         else {
1278                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1279                 if (IS_ERR(neigh)) {
1280                         in6_dev_put(idev);
1281                         dst_free(&rt->dst);
1282                         return ERR_CAST(neigh);
1283                 }
1284         }
1285
1286         rt->dst.flags |= DST_HOST;
1287         rt->dst.output  = ip6_output;
1288         rt->n = neigh;
1289         atomic_set(&rt->dst.__refcnt, 1);
1290         rt->rt6i_dst.addr = fl6->daddr;
1291         rt->rt6i_dst.plen = 128;
1292         rt->rt6i_idev     = idev;
1293         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1294
1295         spin_lock_bh(&icmp6_dst_lock);
1296         rt->dst.next = icmp6_dst_gc_list;
1297         icmp6_dst_gc_list = &rt->dst;
1298         spin_unlock_bh(&icmp6_dst_lock);
1299
1300         fib6_force_start_gc(net);
1301
1302         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1303
1304 out:
1305         return dst;
1306 }
1307
1308 int icmp6_dst_gc(void)
1309 {
1310         struct dst_entry *dst, **pprev;
1311         int more = 0;
1312
1313         spin_lock_bh(&icmp6_dst_lock);
1314         pprev = &icmp6_dst_gc_list;
1315
1316         while ((dst = *pprev) != NULL) {
1317                 if (!atomic_read(&dst->__refcnt)) {
1318                         *pprev = dst->next;
1319                         dst_free(dst);
1320                 } else {
1321                         pprev = &dst->next;
1322                         ++more;
1323                 }
1324         }
1325
1326         spin_unlock_bh(&icmp6_dst_lock);
1327
1328         return more;
1329 }
1330
1331 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1332                             void *arg)
1333 {
1334         struct dst_entry *dst, **pprev;
1335
1336         spin_lock_bh(&icmp6_dst_lock);
1337         pprev = &icmp6_dst_gc_list;
1338         while ((dst = *pprev) != NULL) {
1339                 struct rt6_info *rt = (struct rt6_info *) dst;
1340                 if (func(rt, arg)) {
1341                         *pprev = dst->next;
1342                         dst_free(dst);
1343                 } else {
1344                         pprev = &dst->next;
1345                 }
1346         }
1347         spin_unlock_bh(&icmp6_dst_lock);
1348 }
1349
1350 static int ip6_dst_gc(struct dst_ops *ops)
1351 {
1352         unsigned long now = jiffies;
1353         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1354         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1355         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1356         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1357         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1358         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1359         int entries;
1360
1361         entries = dst_entries_get_fast(ops);
1362         if (time_after(rt_last_gc + rt_min_interval, now) &&
1363             entries <= rt_max_size)
1364                 goto out;
1365
1366         net->ipv6.ip6_rt_gc_expire++;
1367         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1368         net->ipv6.ip6_rt_last_gc = now;
1369         entries = dst_entries_get_slow(ops);
1370         if (entries < ops->gc_thresh)
1371                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1372 out:
1373         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1374         return entries > rt_max_size;
1375 }
1376
1377 int ip6_dst_hoplimit(struct dst_entry *dst)
1378 {
1379         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1380         if (hoplimit == 0) {
1381                 struct net_device *dev = dst->dev;
1382                 struct inet6_dev *idev;
1383
1384                 rcu_read_lock();
1385                 idev = __in6_dev_get(dev);
1386                 if (idev)
1387                         hoplimit = idev->cnf.hop_limit;
1388                 else
1389                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1390                 rcu_read_unlock();
1391         }
1392         return hoplimit;
1393 }
1394 EXPORT_SYMBOL(ip6_dst_hoplimit);
1395
1396 /*
1397  *
1398  */
1399
1400 int ip6_route_add(struct fib6_config *cfg)
1401 {
1402         int err;
1403         struct net *net = cfg->fc_nlinfo.nl_net;
1404         struct rt6_info *rt = NULL;
1405         struct net_device *dev = NULL;
1406         struct inet6_dev *idev = NULL;
1407         struct fib6_table *table;
1408         int addr_type;
1409
1410         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1411                 return -EINVAL;
1412 #ifndef CONFIG_IPV6_SUBTREES
1413         if (cfg->fc_src_len)
1414                 return -EINVAL;
1415 #endif
1416         if (cfg->fc_ifindex) {
1417                 err = -ENODEV;
1418                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1419                 if (!dev)
1420                         goto out;
1421                 idev = in6_dev_get(dev);
1422                 if (!idev)
1423                         goto out;
1424         }
1425
1426         if (cfg->fc_metric == 0)
1427                 cfg->fc_metric = IP6_RT_PRIO_USER;
1428
1429         err = -ENOBUFS;
1430         if (cfg->fc_nlinfo.nlh &&
1431             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1432                 table = fib6_get_table(net, cfg->fc_table);
1433                 if (!table) {
1434                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1435                         table = fib6_new_table(net, cfg->fc_table);
1436                 }
1437         } else {
1438                 table = fib6_new_table(net, cfg->fc_table);
1439         }
1440
1441         if (!table)
1442                 goto out;
1443
1444         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1445
1446         if (!rt) {
1447                 err = -ENOMEM;
1448                 goto out;
1449         }
1450
1451         if (cfg->fc_flags & RTF_EXPIRES)
1452                 rt6_set_expires(rt, jiffies +
1453                                 clock_t_to_jiffies(cfg->fc_expires));
1454         else
1455                 rt6_clean_expires(rt);
1456
1457         if (cfg->fc_protocol == RTPROT_UNSPEC)
1458                 cfg->fc_protocol = RTPROT_BOOT;
1459         rt->rt6i_protocol = cfg->fc_protocol;
1460
1461         addr_type = ipv6_addr_type(&cfg->fc_dst);
1462
1463         if (addr_type & IPV6_ADDR_MULTICAST)
1464                 rt->dst.input = ip6_mc_input;
1465         else if (cfg->fc_flags & RTF_LOCAL)
1466                 rt->dst.input = ip6_input;
1467         else
1468                 rt->dst.input = ip6_forward;
1469
1470         rt->dst.output = ip6_output;
1471
1472         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1473         rt->rt6i_dst.plen = cfg->fc_dst_len;
1474         if (rt->rt6i_dst.plen == 128)
1475                rt->dst.flags |= DST_HOST;
1476
1477         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1478                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1479                 if (!metrics) {
1480                         err = -ENOMEM;
1481                         goto out;
1482                 }
1483                 dst_init_metrics(&rt->dst, metrics, 0);
1484         }
1485 #ifdef CONFIG_IPV6_SUBTREES
1486         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1487         rt->rt6i_src.plen = cfg->fc_src_len;
1488 #endif
1489
1490         rt->rt6i_metric = cfg->fc_metric;
1491
1492         /* We cannot add true routes via loopback here,
1493            they would result in kernel looping; promote them to reject routes
1494          */
1495         if ((cfg->fc_flags & RTF_REJECT) ||
1496             (dev && (dev->flags & IFF_LOOPBACK) &&
1497              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1498              !(cfg->fc_flags & RTF_LOCAL))) {
1499                 /* hold loopback dev/idev if we haven't done so. */
1500                 if (dev != net->loopback_dev) {
1501                         if (dev) {
1502                                 dev_put(dev);
1503                                 in6_dev_put(idev);
1504                         }
1505                         dev = net->loopback_dev;
1506                         dev_hold(dev);
1507                         idev = in6_dev_get(dev);
1508                         if (!idev) {
1509                                 err = -ENODEV;
1510                                 goto out;
1511                         }
1512                 }
1513                 rt->dst.output = ip6_pkt_discard_out;
1514                 rt->dst.input = ip6_pkt_discard;
1515                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1516                 switch (cfg->fc_type) {
1517                 case RTN_BLACKHOLE:
1518                         rt->dst.error = -EINVAL;
1519                         break;
1520                 case RTN_PROHIBIT:
1521                         rt->dst.error = -EACCES;
1522                         break;
1523                 case RTN_THROW:
1524                         rt->dst.error = -EAGAIN;
1525                         break;
1526                 default:
1527                         rt->dst.error = -ENETUNREACH;
1528                         break;
1529                 }
1530                 goto install_route;
1531         }
1532
1533         if (cfg->fc_flags & RTF_GATEWAY) {
1534                 const struct in6_addr *gw_addr;
1535                 int gwa_type;
1536
1537                 gw_addr = &cfg->fc_gateway;
1538                 rt->rt6i_gateway = *gw_addr;
1539                 gwa_type = ipv6_addr_type(gw_addr);
1540
1541                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1542                         struct rt6_info *grt;
1543
1544                         /* IPv6 strictly inhibits using not link-local
1545                            addresses as nexthop address.
1546                            Otherwise, router will not able to send redirects.
1547                            It is very good, but in some (rare!) circumstances
1548                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1549                            some exceptions. --ANK
1550                          */
1551                         err = -EINVAL;
1552                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1553                                 goto out;
1554
1555                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1556
1557                         err = -EHOSTUNREACH;
1558                         if (!grt)
1559                                 goto out;
1560                         if (dev) {
1561                                 if (dev != grt->dst.dev) {
1562                                         ip6_rt_put(grt);
1563                                         goto out;
1564                                 }
1565                         } else {
1566                                 dev = grt->dst.dev;
1567                                 idev = grt->rt6i_idev;
1568                                 dev_hold(dev);
1569                                 in6_dev_hold(grt->rt6i_idev);
1570                         }
1571                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1572                                 err = 0;
1573                         ip6_rt_put(grt);
1574
1575                         if (err)
1576                                 goto out;
1577                 }
1578                 err = -EINVAL;
1579                 if (!dev || (dev->flags & IFF_LOOPBACK))
1580                         goto out;
1581         }
1582
1583         err = -ENODEV;
1584         if (!dev)
1585                 goto out;
1586
1587         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1588                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1589                         err = -EINVAL;
1590                         goto out;
1591                 }
1592                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1593                 rt->rt6i_prefsrc.plen = 128;
1594         } else
1595                 rt->rt6i_prefsrc.plen = 0;
1596
1597         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1598                 err = rt6_bind_neighbour(rt, dev);
1599                 if (err)
1600                         goto out;
1601         }
1602
1603         rt->rt6i_flags = cfg->fc_flags;
1604
1605 install_route:
1606         if (cfg->fc_mx) {
1607                 struct nlattr *nla;
1608                 int remaining;
1609
1610                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1611                         int type = nla_type(nla);
1612
1613                         if (type) {
1614                                 if (type > RTAX_MAX) {
1615                                         err = -EINVAL;
1616                                         goto out;
1617                                 }
1618
1619                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1620                         }
1621                 }
1622         }
1623
1624         rt->dst.dev = dev;
1625         rt->rt6i_idev = idev;
1626         rt->rt6i_table = table;
1627
1628         cfg->fc_nlinfo.nl_net = dev_net(dev);
1629
1630         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1631
1632 out:
1633         if (dev)
1634                 dev_put(dev);
1635         if (idev)
1636                 in6_dev_put(idev);
1637         if (rt)
1638                 dst_free(&rt->dst);
1639         return err;
1640 }
1641
1642 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1643 {
1644         int err;
1645         struct fib6_table *table;
1646         struct net *net = dev_net(rt->dst.dev);
1647
1648         if (rt == net->ipv6.ip6_null_entry) {
1649                 err = -ENOENT;
1650                 goto out;
1651         }
1652
1653         table = rt->rt6i_table;
1654         write_lock_bh(&table->tb6_lock);
1655         err = fib6_del(rt, info);
1656         write_unlock_bh(&table->tb6_lock);
1657
1658 out:
1659         ip6_rt_put(rt);
1660         return err;
1661 }
1662
1663 int ip6_del_rt(struct rt6_info *rt)
1664 {
1665         struct nl_info info = {
1666                 .nl_net = dev_net(rt->dst.dev),
1667         };
1668         return __ip6_del_rt(rt, &info);
1669 }
1670
1671 static int ip6_route_del(struct fib6_config *cfg)
1672 {
1673         struct fib6_table *table;
1674         struct fib6_node *fn;
1675         struct rt6_info *rt;
1676         int err = -ESRCH;
1677
1678         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1679         if (!table)
1680                 return err;
1681
1682         read_lock_bh(&table->tb6_lock);
1683
1684         fn = fib6_locate(&table->tb6_root,
1685                          &cfg->fc_dst, cfg->fc_dst_len,
1686                          &cfg->fc_src, cfg->fc_src_len);
1687
1688         if (fn) {
1689                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1690                         if (cfg->fc_ifindex &&
1691                             (!rt->dst.dev ||
1692                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1693                                 continue;
1694                         if (cfg->fc_flags & RTF_GATEWAY &&
1695                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1696                                 continue;
1697                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1698                                 continue;
1699                         dst_hold(&rt->dst);
1700                         read_unlock_bh(&table->tb6_lock);
1701
1702                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1703                 }
1704         }
1705         read_unlock_bh(&table->tb6_lock);
1706
1707         return err;
1708 }
1709
1710 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1711 {
1712         struct net *net = dev_net(skb->dev);
1713         struct netevent_redirect netevent;
1714         struct rt6_info *rt, *nrt = NULL;
1715         struct ndisc_options ndopts;
1716         struct neighbour *old_neigh;
1717         struct inet6_dev *in6_dev;
1718         struct neighbour *neigh;
1719         struct rd_msg *msg;
1720         int optlen, on_link;
1721         u8 *lladdr;
1722
1723         optlen = skb->tail - skb->transport_header;
1724         optlen -= sizeof(*msg);
1725
1726         if (optlen < 0) {
1727                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1728                 return;
1729         }
1730
1731         msg = (struct rd_msg *)icmp6_hdr(skb);
1732
1733         if (ipv6_addr_is_multicast(&msg->dest)) {
1734                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1735                 return;
1736         }
1737
1738         on_link = 0;
1739         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1740                 on_link = 1;
1741         } else if (ipv6_addr_type(&msg->target) !=
1742                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1743                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1744                 return;
1745         }
1746
1747         in6_dev = __in6_dev_get(skb->dev);
1748         if (!in6_dev)
1749                 return;
1750         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1751                 return;
1752
1753         /* RFC2461 8.1:
1754          *      The IP source address of the Redirect MUST be the same as the current
1755          *      first-hop router for the specified ICMP Destination Address.
1756          */
1757
1758         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1759                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1760                 return;
1761         }
1762
1763         lladdr = NULL;
1764         if (ndopts.nd_opts_tgt_lladdr) {
1765                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1766                                              skb->dev);
1767                 if (!lladdr) {
1768                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1769                         return;
1770                 }
1771         }
1772
1773         rt = (struct rt6_info *) dst;
1774         if (rt == net->ipv6.ip6_null_entry) {
1775                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1776                 return;
1777         }
1778
1779         /* Redirect received -> path was valid.
1780          * Look, redirects are sent only in response to data packets,
1781          * so that this nexthop apparently is reachable. --ANK
1782          */
1783         dst_confirm(&rt->dst);
1784
1785         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1786         if (!neigh)
1787                 return;
1788
1789         /* Duplicate redirect: silently ignore. */
1790         old_neigh = rt->n;
1791         if (neigh == old_neigh)
1792                 goto out;
1793
1794         /*
1795          *      We have finally decided to accept it.
1796          */
1797
1798         neigh_update(neigh, lladdr, NUD_STALE,
1799                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1800                      NEIGH_UPDATE_F_OVERRIDE|
1801                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1802                                      NEIGH_UPDATE_F_ISROUTER))
1803                      );
1804
1805         nrt = ip6_rt_copy(rt, &msg->dest);
1806         if (!nrt)
1807                 goto out;
1808
1809         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1810         if (on_link)
1811                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1812
1813         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1814         nrt->n = neigh_clone(neigh);
1815
1816         if (ip6_ins_rt(nrt))
1817                 goto out;
1818
1819         netevent.old = &rt->dst;
1820         netevent.new = &nrt->dst;
1821         netevent.daddr = &msg->dest;
1822         netevent.neigh = neigh;
1823         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1824
1825         if (rt->rt6i_flags & RTF_CACHE) {
1826                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1827                 ip6_del_rt(rt);
1828         }
1829
1830 out:
1831         neigh_release(neigh);
1832 }
1833
1834 /*
1835  *      Misc support functions
1836  */
1837
1838 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1839                                     const struct in6_addr *dest)
1840 {
1841         struct net *net = dev_net(ort->dst.dev);
1842         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1843                                             ort->rt6i_table);
1844
1845         if (rt) {
1846                 rt->dst.input = ort->dst.input;
1847                 rt->dst.output = ort->dst.output;
1848                 rt->dst.flags |= DST_HOST;
1849
1850                 rt->rt6i_dst.addr = *dest;
1851                 rt->rt6i_dst.plen = 128;
1852                 dst_copy_metrics(&rt->dst, &ort->dst);
1853                 rt->dst.error = ort->dst.error;
1854                 rt->rt6i_idev = ort->rt6i_idev;
1855                 if (rt->rt6i_idev)
1856                         in6_dev_hold(rt->rt6i_idev);
1857                 rt->dst.lastuse = jiffies;
1858
1859                 rt->rt6i_gateway = ort->rt6i_gateway;
1860                 rt->rt6i_flags = ort->rt6i_flags;
1861                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1862                     (RTF_DEFAULT | RTF_ADDRCONF))
1863                         rt6_set_from(rt, ort);
1864                 else
1865                         rt6_clean_expires(rt);
1866                 rt->rt6i_metric = 0;
1867
1868 #ifdef CONFIG_IPV6_SUBTREES
1869                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1870 #endif
1871                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1872                 rt->rt6i_table = ort->rt6i_table;
1873         }
1874         return rt;
1875 }
1876
1877 #ifdef CONFIG_IPV6_ROUTE_INFO
1878 static struct rt6_info *rt6_get_route_info(struct net *net,
1879                                            const struct in6_addr *prefix, int prefixlen,
1880                                            const struct in6_addr *gwaddr, int ifindex)
1881 {
1882         struct fib6_node *fn;
1883         struct rt6_info *rt = NULL;
1884         struct fib6_table *table;
1885
1886         table = fib6_get_table(net, RT6_TABLE_INFO);
1887         if (!table)
1888                 return NULL;
1889
1890         read_lock_bh(&table->tb6_lock);
1891         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1892         if (!fn)
1893                 goto out;
1894
1895         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1896                 if (rt->dst.dev->ifindex != ifindex)
1897                         continue;
1898                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1899                         continue;
1900                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1901                         continue;
1902                 dst_hold(&rt->dst);
1903                 break;
1904         }
1905 out:
1906         read_unlock_bh(&table->tb6_lock);
1907         return rt;
1908 }
1909
1910 static struct rt6_info *rt6_add_route_info(struct net *net,
1911                                            const struct in6_addr *prefix, int prefixlen,
1912                                            const struct in6_addr *gwaddr, int ifindex,
1913                                            unsigned int pref)
1914 {
1915         struct fib6_config cfg = {
1916                 .fc_table       = RT6_TABLE_INFO,
1917                 .fc_metric      = IP6_RT_PRIO_USER,
1918                 .fc_ifindex     = ifindex,
1919                 .fc_dst_len     = prefixlen,
1920                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1921                                   RTF_UP | RTF_PREF(pref),
1922                 .fc_nlinfo.portid = 0,
1923                 .fc_nlinfo.nlh = NULL,
1924                 .fc_nlinfo.nl_net = net,
1925         };
1926
1927         cfg.fc_dst = *prefix;
1928         cfg.fc_gateway = *gwaddr;
1929
1930         /* We should treat it as a default route if prefix length is 0. */
1931         if (!prefixlen)
1932                 cfg.fc_flags |= RTF_DEFAULT;
1933
1934         ip6_route_add(&cfg);
1935
1936         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1937 }
1938 #endif
1939
1940 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1941 {
1942         struct rt6_info *rt;
1943         struct fib6_table *table;
1944
1945         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1946         if (!table)
1947                 return NULL;
1948
1949         read_lock_bh(&table->tb6_lock);
1950         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1951                 if (dev == rt->dst.dev &&
1952                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1953                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1954                         break;
1955         }
1956         if (rt)
1957                 dst_hold(&rt->dst);
1958         read_unlock_bh(&table->tb6_lock);
1959         return rt;
1960 }
1961
1962 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1963                                      struct net_device *dev,
1964                                      unsigned int pref)
1965 {
1966         struct fib6_config cfg = {
1967                 .fc_table       = RT6_TABLE_DFLT,
1968                 .fc_metric      = IP6_RT_PRIO_USER,
1969                 .fc_ifindex     = dev->ifindex,
1970                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1971                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1972                 .fc_nlinfo.portid = 0,
1973                 .fc_nlinfo.nlh = NULL,
1974                 .fc_nlinfo.nl_net = dev_net(dev),
1975         };
1976
1977         cfg.fc_gateway = *gwaddr;
1978
1979         ip6_route_add(&cfg);
1980
1981         return rt6_get_dflt_router(gwaddr, dev);
1982 }
1983
1984 void rt6_purge_dflt_routers(struct net *net)
1985 {
1986         struct rt6_info *rt;
1987         struct fib6_table *table;
1988
1989         /* NOTE: Keep consistent with rt6_get_dflt_router */
1990         table = fib6_get_table(net, RT6_TABLE_DFLT);
1991         if (!table)
1992                 return;
1993
1994 restart:
1995         read_lock_bh(&table->tb6_lock);
1996         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1997                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1998                         dst_hold(&rt->dst);
1999                         read_unlock_bh(&table->tb6_lock);
2000                         ip6_del_rt(rt);
2001                         goto restart;
2002                 }
2003         }
2004         read_unlock_bh(&table->tb6_lock);
2005 }
2006
2007 static void rtmsg_to_fib6_config(struct net *net,
2008                                  struct in6_rtmsg *rtmsg,
2009                                  struct fib6_config *cfg)
2010 {
2011         memset(cfg, 0, sizeof(*cfg));
2012
2013         cfg->fc_table = RT6_TABLE_MAIN;
2014         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2015         cfg->fc_metric = rtmsg->rtmsg_metric;
2016         cfg->fc_expires = rtmsg->rtmsg_info;
2017         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2018         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2019         cfg->fc_flags = rtmsg->rtmsg_flags;
2020
2021         cfg->fc_nlinfo.nl_net = net;
2022
2023         cfg->fc_dst = rtmsg->rtmsg_dst;
2024         cfg->fc_src = rtmsg->rtmsg_src;
2025         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2026 }
2027
2028 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2029 {
2030         struct fib6_config cfg;
2031         struct in6_rtmsg rtmsg;
2032         int err;
2033
2034         switch(cmd) {
2035         case SIOCADDRT:         /* Add a route */
2036         case SIOCDELRT:         /* Delete a route */
2037                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2038                         return -EPERM;
2039                 err = copy_from_user(&rtmsg, arg,
2040                                      sizeof(struct in6_rtmsg));
2041                 if (err)
2042                         return -EFAULT;
2043
2044                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2045
2046                 rtnl_lock();
2047                 switch (cmd) {
2048                 case SIOCADDRT:
2049                         err = ip6_route_add(&cfg);
2050                         break;
2051                 case SIOCDELRT:
2052                         err = ip6_route_del(&cfg);
2053                         break;
2054                 default:
2055                         err = -EINVAL;
2056                 }
2057                 rtnl_unlock();
2058
2059                 return err;
2060         }
2061
2062         return -EINVAL;
2063 }
2064
2065 /*
2066  *      Drop the packet on the floor
2067  */
2068
2069 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2070 {
2071         int type;
2072         struct dst_entry *dst = skb_dst(skb);
2073         switch (ipstats_mib_noroutes) {
2074         case IPSTATS_MIB_INNOROUTES:
2075                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2076                 if (type == IPV6_ADDR_ANY) {
2077                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2078                                       IPSTATS_MIB_INADDRERRORS);
2079                         break;
2080                 }
2081                 /* FALLTHROUGH */
2082         case IPSTATS_MIB_OUTNOROUTES:
2083                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2084                               ipstats_mib_noroutes);
2085                 break;
2086         }
2087         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2088         kfree_skb(skb);
2089         return 0;
2090 }
2091
2092 static int ip6_pkt_discard(struct sk_buff *skb)
2093 {
2094         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2095 }
2096
2097 static int ip6_pkt_discard_out(struct sk_buff *skb)
2098 {
2099         skb->dev = skb_dst(skb)->dev;
2100         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2101 }
2102
2103 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2104
2105 static int ip6_pkt_prohibit(struct sk_buff *skb)
2106 {
2107         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2108 }
2109
2110 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2111 {
2112         skb->dev = skb_dst(skb)->dev;
2113         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2114 }
2115
2116 #endif
2117
2118 /*
2119  *      Allocate a dst for local (unicast / anycast) address.
2120  */
2121
2122 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2123                                     const struct in6_addr *addr,
2124                                     bool anycast)
2125 {
2126         struct net *net = dev_net(idev->dev);
2127         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2128         int err;
2129
2130         if (!rt) {
2131                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2132                 return ERR_PTR(-ENOMEM);
2133         }
2134
2135         in6_dev_hold(idev);
2136
2137         rt->dst.flags |= DST_HOST;
2138         rt->dst.input = ip6_input;
2139         rt->dst.output = ip6_output;
2140         rt->rt6i_idev = idev;
2141
2142         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2143         if (anycast)
2144                 rt->rt6i_flags |= RTF_ANYCAST;
2145         else
2146                 rt->rt6i_flags |= RTF_LOCAL;
2147         err = rt6_bind_neighbour(rt, rt->dst.dev);
2148         if (err) {
2149                 dst_free(&rt->dst);
2150                 return ERR_PTR(err);
2151         }
2152
2153         rt->rt6i_dst.addr = *addr;
2154         rt->rt6i_dst.plen = 128;
2155         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2156
2157         atomic_set(&rt->dst.__refcnt, 1);
2158
2159         return rt;
2160 }
2161
2162 int ip6_route_get_saddr(struct net *net,
2163                         struct rt6_info *rt,
2164                         const struct in6_addr *daddr,
2165                         unsigned int prefs,
2166                         struct in6_addr *saddr)
2167 {
2168         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2169         int err = 0;
2170         if (rt->rt6i_prefsrc.plen)
2171                 *saddr = rt->rt6i_prefsrc.addr;
2172         else
2173                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2174                                          daddr, prefs, saddr);
2175         return err;
2176 }
2177
2178 /* remove deleted ip from prefsrc entries */
2179 struct arg_dev_net_ip {
2180         struct net_device *dev;
2181         struct net *net;
2182         struct in6_addr *addr;
2183 };
2184
2185 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2186 {
2187         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2188         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2189         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2190
2191         if (((void *)rt->dst.dev == dev || !dev) &&
2192             rt != net->ipv6.ip6_null_entry &&
2193             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2194                 /* remove prefsrc entry */
2195                 rt->rt6i_prefsrc.plen = 0;
2196         }
2197         return 0;
2198 }
2199
2200 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2201 {
2202         struct net *net = dev_net(ifp->idev->dev);
2203         struct arg_dev_net_ip adni = {
2204                 .dev = ifp->idev->dev,
2205                 .net = net,
2206                 .addr = &ifp->addr,
2207         };
2208         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2209 }
2210
2211 struct arg_dev_net {
2212         struct net_device *dev;
2213         struct net *net;
2214 };
2215
2216 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2217 {
2218         const struct arg_dev_net *adn = arg;
2219         const struct net_device *dev = adn->dev;
2220
2221         if ((rt->dst.dev == dev || !dev) &&
2222             rt != adn->net->ipv6.ip6_null_entry)
2223                 return -1;
2224
2225         return 0;
2226 }
2227
2228 void rt6_ifdown(struct net *net, struct net_device *dev)
2229 {
2230         struct arg_dev_net adn = {
2231                 .dev = dev,
2232                 .net = net,
2233         };
2234
2235         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2236         icmp6_clean_all(fib6_ifdown, &adn);
2237 }
2238
2239 struct rt6_mtu_change_arg {
2240         struct net_device *dev;
2241         unsigned int mtu;
2242 };
2243
2244 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2245 {
2246         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2247         struct inet6_dev *idev;
2248
2249         /* In IPv6 pmtu discovery is not optional,
2250            so that RTAX_MTU lock cannot disable it.
2251            We still use this lock to block changes
2252            caused by addrconf/ndisc.
2253         */
2254
2255         idev = __in6_dev_get(arg->dev);
2256         if (!idev)
2257                 return 0;
2258
2259         /* For administrative MTU increase, there is no way to discover
2260            IPv6 PMTU increase, so PMTU increase should be updated here.
2261            Since RFC 1981 doesn't include administrative MTU increase
2262            update PMTU increase is a MUST. (i.e. jumbo frame)
2263          */
2264         /*
2265            If new MTU is less than route PMTU, this new MTU will be the
2266            lowest MTU in the path, update the route PMTU to reflect PMTU
2267            decreases; if new MTU is greater than route PMTU, and the
2268            old MTU is the lowest MTU in the path, update the route PMTU
2269            to reflect the increase. In this case if the other nodes' MTU
2270            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2271            PMTU discouvery.
2272          */
2273         if (rt->dst.dev == arg->dev &&
2274             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2275             (dst_mtu(&rt->dst) >= arg->mtu ||
2276              (dst_mtu(&rt->dst) < arg->mtu &&
2277               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2278                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2279         }
2280         return 0;
2281 }
2282
2283 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2284 {
2285         struct rt6_mtu_change_arg arg = {
2286                 .dev = dev,
2287                 .mtu = mtu,
2288         };
2289
2290         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2291 }
2292
2293 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2294         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2295         [RTA_OIF]               = { .type = NLA_U32 },
2296         [RTA_IIF]               = { .type = NLA_U32 },
2297         [RTA_PRIORITY]          = { .type = NLA_U32 },
2298         [RTA_METRICS]           = { .type = NLA_NESTED },
2299         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2300 };
2301
2302 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2303                               struct fib6_config *cfg)
2304 {
2305         struct rtmsg *rtm;
2306         struct nlattr *tb[RTA_MAX+1];
2307         int err;
2308
2309         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2310         if (err < 0)
2311                 goto errout;
2312
2313         err = -EINVAL;
2314         rtm = nlmsg_data(nlh);
2315         memset(cfg, 0, sizeof(*cfg));
2316
2317         cfg->fc_table = rtm->rtm_table;
2318         cfg->fc_dst_len = rtm->rtm_dst_len;
2319         cfg->fc_src_len = rtm->rtm_src_len;
2320         cfg->fc_flags = RTF_UP;
2321         cfg->fc_protocol = rtm->rtm_protocol;
2322         cfg->fc_type = rtm->rtm_type;
2323
2324         if (rtm->rtm_type == RTN_UNREACHABLE ||
2325             rtm->rtm_type == RTN_BLACKHOLE ||
2326             rtm->rtm_type == RTN_PROHIBIT ||
2327             rtm->rtm_type == RTN_THROW)
2328                 cfg->fc_flags |= RTF_REJECT;
2329
2330         if (rtm->rtm_type == RTN_LOCAL)
2331                 cfg->fc_flags |= RTF_LOCAL;
2332
2333         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2334         cfg->fc_nlinfo.nlh = nlh;
2335         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2336
2337         if (tb[RTA_GATEWAY]) {
2338                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2339                 cfg->fc_flags |= RTF_GATEWAY;
2340         }
2341
2342         if (tb[RTA_DST]) {
2343                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2344
2345                 if (nla_len(tb[RTA_DST]) < plen)
2346                         goto errout;
2347
2348                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2349         }
2350
2351         if (tb[RTA_SRC]) {
2352                 int plen = (rtm->rtm_src_len + 7) >> 3;
2353
2354                 if (nla_len(tb[RTA_SRC]) < plen)
2355                         goto errout;
2356
2357                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2358         }
2359
2360         if (tb[RTA_PREFSRC])
2361                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2362
2363         if (tb[RTA_OIF])
2364                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2365
2366         if (tb[RTA_PRIORITY])
2367                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2368
2369         if (tb[RTA_METRICS]) {
2370                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2371                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2372         }
2373
2374         if (tb[RTA_TABLE])
2375                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2376
2377         if (tb[RTA_MULTIPATH]) {
2378                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2379                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2380         }
2381
2382         err = 0;
2383 errout:
2384         return err;
2385 }
2386
2387 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2388 {
2389         struct fib6_config r_cfg;
2390         struct rtnexthop *rtnh;
2391         int remaining;
2392         int attrlen;
2393         int err = 0, last_err = 0;
2394
2395 beginning:
2396         rtnh = (struct rtnexthop *)cfg->fc_mp;
2397         remaining = cfg->fc_mp_len;
2398
2399         /* Parse a Multipath Entry */
2400         while (rtnh_ok(rtnh, remaining)) {
2401                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2402                 if (rtnh->rtnh_ifindex)
2403                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2404
2405                 attrlen = rtnh_attrlen(rtnh);
2406                 if (attrlen > 0) {
2407                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2408
2409                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2410                         if (nla) {
2411                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2412                                 r_cfg.fc_flags |= RTF_GATEWAY;
2413                         }
2414                 }
2415                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2416                 if (err) {
2417                         last_err = err;
2418                         /* If we are trying to remove a route, do not stop the
2419                          * loop when ip6_route_del() fails (because next hop is
2420                          * already gone), we should try to remove all next hops.
2421                          */
2422                         if (add) {
2423                                 /* If add fails, we should try to delete all
2424                                  * next hops that have been already added.
2425                                  */
2426                                 add = 0;
2427                                 goto beginning;
2428                         }
2429                 }
2430                 /* Because each route is added like a single route we remove
2431                  * this flag after the first nexthop (if there is a collision,
2432                  * we have already fail to add the first nexthop:
2433                  * fib6_add_rt2node() has reject it).
2434                  */
2435                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2436                 rtnh = rtnh_next(rtnh, &remaining);
2437         }
2438
2439         return last_err;
2440 }
2441
2442 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2443 {
2444         struct fib6_config cfg;
2445         int err;
2446
2447         err = rtm_to_fib6_config(skb, nlh, &cfg);
2448         if (err < 0)
2449                 return err;
2450
2451         if (cfg.fc_mp)
2452                 return ip6_route_multipath(&cfg, 0);
2453         else
2454                 return ip6_route_del(&cfg);
2455 }
2456
2457 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2458 {
2459         struct fib6_config cfg;
2460         int err;
2461
2462         err = rtm_to_fib6_config(skb, nlh, &cfg);
2463         if (err < 0)
2464                 return err;
2465
2466         if (cfg.fc_mp)
2467                 return ip6_route_multipath(&cfg, 1);
2468         else
2469                 return ip6_route_add(&cfg);
2470 }
2471
2472 static inline size_t rt6_nlmsg_size(void)
2473 {
2474         return NLMSG_ALIGN(sizeof(struct rtmsg))
2475                + nla_total_size(16) /* RTA_SRC */
2476                + nla_total_size(16) /* RTA_DST */
2477                + nla_total_size(16) /* RTA_GATEWAY */
2478                + nla_total_size(16) /* RTA_PREFSRC */
2479                + nla_total_size(4) /* RTA_TABLE */
2480                + nla_total_size(4) /* RTA_IIF */
2481                + nla_total_size(4) /* RTA_OIF */
2482                + nla_total_size(4) /* RTA_PRIORITY */
2483                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2484                + nla_total_size(sizeof(struct rta_cacheinfo));
2485 }
2486
2487 static int rt6_fill_node(struct net *net,
2488                          struct sk_buff *skb, struct rt6_info *rt,
2489                          struct in6_addr *dst, struct in6_addr *src,
2490                          int iif, int type, u32 portid, u32 seq,
2491                          int prefix, int nowait, unsigned int flags)
2492 {
2493         struct rtmsg *rtm;
2494         struct nlmsghdr *nlh;
2495         long expires;
2496         u32 table;
2497
2498         if (prefix) {   /* user wants prefix routes only */
2499                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2500                         /* success since this is not a prefix route */
2501                         return 1;
2502                 }
2503         }
2504
2505         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2506         if (!nlh)
2507                 return -EMSGSIZE;
2508
2509         rtm = nlmsg_data(nlh);
2510         rtm->rtm_family = AF_INET6;
2511         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2512         rtm->rtm_src_len = rt->rt6i_src.plen;
2513         rtm->rtm_tos = 0;
2514         if (rt->rt6i_table)
2515                 table = rt->rt6i_table->tb6_id;
2516         else
2517                 table = RT6_TABLE_UNSPEC;
2518         rtm->rtm_table = table;
2519         if (nla_put_u32(skb, RTA_TABLE, table))
2520                 goto nla_put_failure;
2521         if (rt->rt6i_flags & RTF_REJECT) {
2522                 switch (rt->dst.error) {
2523                 case -EINVAL:
2524                         rtm->rtm_type = RTN_BLACKHOLE;
2525                         break;
2526                 case -EACCES:
2527                         rtm->rtm_type = RTN_PROHIBIT;
2528                         break;
2529                 case -EAGAIN:
2530                         rtm->rtm_type = RTN_THROW;
2531                         break;
2532                 default:
2533                         rtm->rtm_type = RTN_UNREACHABLE;
2534                         break;
2535                 }
2536         }
2537         else if (rt->rt6i_flags & RTF_LOCAL)
2538                 rtm->rtm_type = RTN_LOCAL;
2539         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2540                 rtm->rtm_type = RTN_LOCAL;
2541         else
2542                 rtm->rtm_type = RTN_UNICAST;
2543         rtm->rtm_flags = 0;
2544         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2545         rtm->rtm_protocol = rt->rt6i_protocol;
2546         if (rt->rt6i_flags & RTF_DYNAMIC)
2547                 rtm->rtm_protocol = RTPROT_REDIRECT;
2548         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2549                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2550                         rtm->rtm_protocol = RTPROT_RA;
2551                 else
2552                         rtm->rtm_protocol = RTPROT_KERNEL;
2553         }
2554
2555         if (rt->rt6i_flags & RTF_CACHE)
2556                 rtm->rtm_flags |= RTM_F_CLONED;
2557
2558         if (dst) {
2559                 if (nla_put(skb, RTA_DST, 16, dst))
2560                         goto nla_put_failure;
2561                 rtm->rtm_dst_len = 128;
2562         } else if (rtm->rtm_dst_len)
2563                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2564                         goto nla_put_failure;
2565 #ifdef CONFIG_IPV6_SUBTREES
2566         if (src) {
2567                 if (nla_put(skb, RTA_SRC, 16, src))
2568                         goto nla_put_failure;
2569                 rtm->rtm_src_len = 128;
2570         } else if (rtm->rtm_src_len &&
2571                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2572                 goto nla_put_failure;
2573 #endif
2574         if (iif) {
2575 #ifdef CONFIG_IPV6_MROUTE
2576                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2577                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2578                         if (err <= 0) {
2579                                 if (!nowait) {
2580                                         if (err == 0)
2581                                                 return 0;
2582                                         goto nla_put_failure;
2583                                 } else {
2584                                         if (err == -EMSGSIZE)
2585                                                 goto nla_put_failure;
2586                                 }
2587                         }
2588                 } else
2589 #endif
2590                         if (nla_put_u32(skb, RTA_IIF, iif))
2591                                 goto nla_put_failure;
2592         } else if (dst) {
2593                 struct in6_addr saddr_buf;
2594                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2595                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2596                         goto nla_put_failure;
2597         }
2598
2599         if (rt->rt6i_prefsrc.plen) {
2600                 struct in6_addr saddr_buf;
2601                 saddr_buf = rt->rt6i_prefsrc.addr;
2602                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2603                         goto nla_put_failure;
2604         }
2605
2606         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2607                 goto nla_put_failure;
2608
2609         if (rt->rt6i_flags & RTF_GATEWAY) {
2610                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2611                         goto nla_put_failure;
2612         }
2613
2614         if (rt->dst.dev &&
2615             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2616                 goto nla_put_failure;
2617         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2618                 goto nla_put_failure;
2619
2620         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2621
2622         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2623                 goto nla_put_failure;
2624
2625         return nlmsg_end(skb, nlh);
2626
2627 nla_put_failure:
2628         nlmsg_cancel(skb, nlh);
2629         return -EMSGSIZE;
2630 }
2631
2632 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2633 {
2634         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2635         int prefix;
2636
2637         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2638                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2639                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2640         } else
2641                 prefix = 0;
2642
2643         return rt6_fill_node(arg->net,
2644                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2645                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2646                      prefix, 0, NLM_F_MULTI);
2647 }
2648
2649 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2650 {
2651         struct net *net = sock_net(in_skb->sk);
2652         struct nlattr *tb[RTA_MAX+1];
2653         struct rt6_info *rt;
2654         struct sk_buff *skb;
2655         struct rtmsg *rtm;
2656         struct flowi6 fl6;
2657         int err, iif = 0, oif = 0;
2658
2659         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2660         if (err < 0)
2661                 goto errout;
2662
2663         err = -EINVAL;
2664         memset(&fl6, 0, sizeof(fl6));
2665
2666         if (tb[RTA_SRC]) {
2667                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2668                         goto errout;
2669
2670                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2671         }
2672
2673         if (tb[RTA_DST]) {
2674                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2675                         goto errout;
2676
2677                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2678         }
2679
2680         if (tb[RTA_IIF])
2681                 iif = nla_get_u32(tb[RTA_IIF]);
2682
2683         if (tb[RTA_OIF])
2684                 oif = nla_get_u32(tb[RTA_OIF]);
2685
2686         if (iif) {
2687                 struct net_device *dev;
2688                 int flags = 0;
2689
2690                 dev = __dev_get_by_index(net, iif);
2691                 if (!dev) {
2692                         err = -ENODEV;
2693                         goto errout;
2694                 }
2695
2696                 fl6.flowi6_iif = iif;
2697
2698                 if (!ipv6_addr_any(&fl6.saddr))
2699                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2700
2701                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2702                                                                flags);
2703         } else {
2704                 fl6.flowi6_oif = oif;
2705
2706                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2707         }
2708
2709         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2710         if (!skb) {
2711                 ip6_rt_put(rt);
2712                 err = -ENOBUFS;
2713                 goto errout;
2714         }
2715
2716         /* Reserve room for dummy headers, this skb can pass
2717            through good chunk of routing engine.
2718          */
2719         skb_reset_mac_header(skb);
2720         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2721
2722         skb_dst_set(skb, &rt->dst);
2723
2724         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2725                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2726                             nlh->nlmsg_seq, 0, 0, 0);
2727         if (err < 0) {
2728                 kfree_skb(skb);
2729                 goto errout;
2730         }
2731
2732         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2733 errout:
2734         return err;
2735 }
2736
2737 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2738 {
2739         struct sk_buff *skb;
2740         struct net *net = info->nl_net;
2741         u32 seq;
2742         int err;
2743
2744         err = -ENOBUFS;
2745         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2746
2747         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2748         if (!skb)
2749                 goto errout;
2750
2751         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2752                                 event, info->portid, seq, 0, 0, 0);
2753         if (err < 0) {
2754                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2755                 WARN_ON(err == -EMSGSIZE);
2756                 kfree_skb(skb);
2757                 goto errout;
2758         }
2759         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2760                     info->nlh, gfp_any());
2761         return;
2762 errout:
2763         if (err < 0)
2764                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2765 }
2766
2767 static int ip6_route_dev_notify(struct notifier_block *this,
2768                                 unsigned long event, void *data)
2769 {
2770         struct net_device *dev = (struct net_device *)data;
2771         struct net *net = dev_net(dev);
2772
2773         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2774                 net->ipv6.ip6_null_entry->dst.dev = dev;
2775                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2776 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2777                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2778                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2779                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2780                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2781 #endif
2782         }
2783
2784         return NOTIFY_OK;
2785 }
2786
2787 /*
2788  *      /proc
2789  */
2790
2791 #ifdef CONFIG_PROC_FS
2792
2793 struct rt6_proc_arg
2794 {
2795         char *buffer;
2796         int offset;
2797         int length;
2798         int skip;
2799         int len;
2800 };
2801
2802 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2803 {
2804         struct seq_file *m = p_arg;
2805
2806         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2807
2808 #ifdef CONFIG_IPV6_SUBTREES
2809         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2810 #else
2811         seq_puts(m, "00000000000000000000000000000000 00 ");
2812 #endif
2813         if (rt->rt6i_flags & RTF_GATEWAY) {
2814                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2815         } else {
2816                 seq_puts(m, "00000000000000000000000000000000");
2817         }
2818         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2819                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2820                    rt->dst.__use, rt->rt6i_flags,
2821                    rt->dst.dev ? rt->dst.dev->name : "");
2822         return 0;
2823 }
2824
2825 static int ipv6_route_show(struct seq_file *m, void *v)
2826 {
2827         struct net *net = (struct net *)m->private;
2828         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2829         return 0;
2830 }
2831
2832 static int ipv6_route_open(struct inode *inode, struct file *file)
2833 {
2834         return single_open_net(inode, file, ipv6_route_show);
2835 }
2836
2837 static const struct file_operations ipv6_route_proc_fops = {
2838         .owner          = THIS_MODULE,
2839         .open           = ipv6_route_open,
2840         .read           = seq_read,
2841         .llseek         = seq_lseek,
2842         .release        = single_release_net,
2843 };
2844
2845 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2846 {
2847         struct net *net = (struct net *)seq->private;
2848         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2849                    net->ipv6.rt6_stats->fib_nodes,
2850                    net->ipv6.rt6_stats->fib_route_nodes,
2851                    net->ipv6.rt6_stats->fib_rt_alloc,
2852                    net->ipv6.rt6_stats->fib_rt_entries,
2853                    net->ipv6.rt6_stats->fib_rt_cache,
2854                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2855                    net->ipv6.rt6_stats->fib_discarded_routes);
2856
2857         return 0;
2858 }
2859
2860 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2861 {
2862         return single_open_net(inode, file, rt6_stats_seq_show);
2863 }
2864
2865 static const struct file_operations rt6_stats_seq_fops = {
2866         .owner   = THIS_MODULE,
2867         .open    = rt6_stats_seq_open,
2868         .read    = seq_read,
2869         .llseek  = seq_lseek,
2870         .release = single_release_net,
2871 };
2872 #endif  /* CONFIG_PROC_FS */
2873
2874 #ifdef CONFIG_SYSCTL
2875
2876 static
2877 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2878                               void __user *buffer, size_t *lenp, loff_t *ppos)
2879 {
2880         struct net *net;
2881         int delay;
2882         if (!write)
2883                 return -EINVAL;
2884
2885         net = (struct net *)ctl->extra1;
2886         delay = net->ipv6.sysctl.flush_delay;
2887         proc_dointvec(ctl, write, buffer, lenp, ppos);
2888         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2889         return 0;
2890 }
2891
2892 ctl_table ipv6_route_table_template[] = {
2893         {
2894                 .procname       =       "flush",
2895                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2896                 .maxlen         =       sizeof(int),
2897                 .mode           =       0200,
2898                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2899         },
2900         {
2901                 .procname       =       "gc_thresh",
2902                 .data           =       &ip6_dst_ops_template.gc_thresh,
2903                 .maxlen         =       sizeof(int),
2904                 .mode           =       0644,
2905                 .proc_handler   =       proc_dointvec,
2906         },
2907         {
2908                 .procname       =       "max_size",
2909                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2910                 .maxlen         =       sizeof(int),
2911                 .mode           =       0644,
2912                 .proc_handler   =       proc_dointvec,
2913         },
2914         {
2915                 .procname       =       "gc_min_interval",
2916                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2917                 .maxlen         =       sizeof(int),
2918                 .mode           =       0644,
2919                 .proc_handler   =       proc_dointvec_jiffies,
2920         },
2921         {
2922                 .procname       =       "gc_timeout",
2923                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2924                 .maxlen         =       sizeof(int),
2925                 .mode           =       0644,
2926                 .proc_handler   =       proc_dointvec_jiffies,
2927         },
2928         {
2929                 .procname       =       "gc_interval",
2930                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2931                 .maxlen         =       sizeof(int),
2932                 .mode           =       0644,
2933                 .proc_handler   =       proc_dointvec_jiffies,
2934         },
2935         {
2936                 .procname       =       "gc_elasticity",
2937                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2938                 .maxlen         =       sizeof(int),
2939                 .mode           =       0644,
2940                 .proc_handler   =       proc_dointvec,
2941         },
2942         {
2943                 .procname       =       "mtu_expires",
2944                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2945                 .maxlen         =       sizeof(int),
2946                 .mode           =       0644,
2947                 .proc_handler   =       proc_dointvec_jiffies,
2948         },
2949         {
2950                 .procname       =       "min_adv_mss",
2951                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2952                 .maxlen         =       sizeof(int),
2953                 .mode           =       0644,
2954                 .proc_handler   =       proc_dointvec,
2955         },
2956         {
2957                 .procname       =       "gc_min_interval_ms",
2958                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2959                 .maxlen         =       sizeof(int),
2960                 .mode           =       0644,
2961                 .proc_handler   =       proc_dointvec_ms_jiffies,
2962         },
2963         { }
2964 };
2965
2966 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2967 {
2968         struct ctl_table *table;
2969
2970         table = kmemdup(ipv6_route_table_template,
2971                         sizeof(ipv6_route_table_template),
2972                         GFP_KERNEL);
2973
2974         if (table) {
2975                 table[0].data = &net->ipv6.sysctl.flush_delay;
2976                 table[0].extra1 = net;
2977                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2978                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2979                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2980                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2981                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2982                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2983                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2984                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2985                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2986
2987                 /* Don't export sysctls to unprivileged users */
2988                 if (net->user_ns != &init_user_ns)
2989                         table[0].procname = NULL;
2990         }
2991
2992         return table;
2993 }
2994 #endif
2995
2996 static int __net_init ip6_route_net_init(struct net *net)
2997 {
2998         int ret = -ENOMEM;
2999
3000         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3001                sizeof(net->ipv6.ip6_dst_ops));
3002
3003         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3004                 goto out_ip6_dst_ops;
3005
3006         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3007                                            sizeof(*net->ipv6.ip6_null_entry),
3008                                            GFP_KERNEL);
3009         if (!net->ipv6.ip6_null_entry)
3010                 goto out_ip6_dst_entries;
3011         net->ipv6.ip6_null_entry->dst.path =
3012                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3013         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3014         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3015                          ip6_template_metrics, true);
3016
3017 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3018         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3019                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3020                                                GFP_KERNEL);
3021         if (!net->ipv6.ip6_prohibit_entry)
3022                 goto out_ip6_null_entry;
3023         net->ipv6.ip6_prohibit_entry->dst.path =
3024                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3025         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3027                          ip6_template_metrics, true);
3028
3029         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3030                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3031                                                GFP_KERNEL);
3032         if (!net->ipv6.ip6_blk_hole_entry)
3033                 goto out_ip6_prohibit_entry;
3034         net->ipv6.ip6_blk_hole_entry->dst.path =
3035                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3036         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3037         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3038                          ip6_template_metrics, true);
3039 #endif
3040
3041         net->ipv6.sysctl.flush_delay = 0;
3042         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3043         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3044         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3045         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3046         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3047         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3048         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3049
3050         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3051
3052         ret = 0;
3053 out:
3054         return ret;
3055
3056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3057 out_ip6_prohibit_entry:
3058         kfree(net->ipv6.ip6_prohibit_entry);
3059 out_ip6_null_entry:
3060         kfree(net->ipv6.ip6_null_entry);
3061 #endif
3062 out_ip6_dst_entries:
3063         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3064 out_ip6_dst_ops:
3065         goto out;
3066 }
3067
3068 static void __net_exit ip6_route_net_exit(struct net *net)
3069 {
3070         kfree(net->ipv6.ip6_null_entry);
3071 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3072         kfree(net->ipv6.ip6_prohibit_entry);
3073         kfree(net->ipv6.ip6_blk_hole_entry);
3074 #endif
3075         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 }
3077
3078 static int __net_init ip6_route_net_init_late(struct net *net)
3079 {
3080 #ifdef CONFIG_PROC_FS
3081         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3082         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3083 #endif
3084         return 0;
3085 }
3086
3087 static void __net_exit ip6_route_net_exit_late(struct net *net)
3088 {
3089 #ifdef CONFIG_PROC_FS
3090         proc_net_remove(net, "ipv6_route");
3091         proc_net_remove(net, "rt6_stats");
3092 #endif
3093 }
3094
3095 static struct pernet_operations ip6_route_net_ops = {
3096         .init = ip6_route_net_init,
3097         .exit = ip6_route_net_exit,
3098 };
3099
3100 static int __net_init ipv6_inetpeer_init(struct net *net)
3101 {
3102         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3103
3104         if (!bp)
3105                 return -ENOMEM;
3106         inet_peer_base_init(bp);
3107         net->ipv6.peers = bp;
3108         return 0;
3109 }
3110
3111 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3112 {
3113         struct inet_peer_base *bp = net->ipv6.peers;
3114
3115         net->ipv6.peers = NULL;
3116         inetpeer_invalidate_tree(bp);
3117         kfree(bp);
3118 }
3119
3120 static struct pernet_operations ipv6_inetpeer_ops = {
3121         .init   =       ipv6_inetpeer_init,
3122         .exit   =       ipv6_inetpeer_exit,
3123 };
3124
3125 static struct pernet_operations ip6_route_net_late_ops = {
3126         .init = ip6_route_net_init_late,
3127         .exit = ip6_route_net_exit_late,
3128 };
3129
3130 static struct notifier_block ip6_route_dev_notifier = {
3131         .notifier_call = ip6_route_dev_notify,
3132         .priority = 0,
3133 };
3134
3135 int __init ip6_route_init(void)
3136 {
3137         int ret;
3138
3139         ret = -ENOMEM;
3140         ip6_dst_ops_template.kmem_cachep =
3141                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3142                                   SLAB_HWCACHE_ALIGN, NULL);
3143         if (!ip6_dst_ops_template.kmem_cachep)
3144                 goto out;
3145
3146         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3147         if (ret)
3148                 goto out_kmem_cache;
3149
3150         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3151         if (ret)
3152                 goto out_dst_entries;
3153
3154         ret = register_pernet_subsys(&ip6_route_net_ops);
3155         if (ret)
3156                 goto out_register_inetpeer;
3157
3158         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3159
3160         /* Registering of the loopback is done before this portion of code,
3161          * the loopback reference in rt6_info will not be taken, do it
3162          * manually for init_net */
3163         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3164         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3165   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3166         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3167         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3168         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3169         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3170   #endif
3171         ret = fib6_init();
3172         if (ret)
3173                 goto out_register_subsys;
3174
3175         ret = xfrm6_init();
3176         if (ret)
3177                 goto out_fib6_init;
3178
3179         ret = fib6_rules_init();
3180         if (ret)
3181                 goto xfrm6_init;
3182
3183         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3184         if (ret)
3185                 goto fib6_rules_init;
3186
3187         ret = -ENOBUFS;
3188         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3189             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3190             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3191                 goto out_register_late_subsys;
3192
3193         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3194         if (ret)
3195                 goto out_register_late_subsys;
3196
3197 out:
3198         return ret;
3199
3200 out_register_late_subsys:
3201         unregister_pernet_subsys(&ip6_route_net_late_ops);
3202 fib6_rules_init:
3203         fib6_rules_cleanup();
3204 xfrm6_init:
3205         xfrm6_fini();
3206 out_fib6_init:
3207         fib6_gc_cleanup();
3208 out_register_subsys:
3209         unregister_pernet_subsys(&ip6_route_net_ops);
3210 out_register_inetpeer:
3211         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3212 out_dst_entries:
3213         dst_entries_destroy(&ip6_dst_blackhole_ops);
3214 out_kmem_cache:
3215         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3216         goto out;
3217 }
3218
3219 void ip6_route_cleanup(void)
3220 {
3221         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3222         unregister_pernet_subsys(&ip6_route_net_late_ops);
3223         fib6_rules_cleanup();
3224         xfrm6_fini();
3225         fib6_gc_cleanup();
3226         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3227         unregister_pernet_subsys(&ip6_route_net_ops);
3228         dst_entries_destroy(&ip6_dst_blackhole_ops);
3229         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3230 }