1341f68e8009dc86b335c3baad51dec3d1189ced
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= ipv6_addr_hash(&fl6->daddr);
392         val ^= ipv6_addr_hash(&fl6->saddr);
393
394         /* Work only if this not encapsulated */
395         switch (fl6->flowi6_proto) {
396         case IPPROTO_UDP:
397         case IPPROTO_TCP:
398         case IPPROTO_SCTP:
399                 val ^= (__force u16)fl6->fl6_sport;
400                 val ^= (__force u16)fl6->fl6_dport;
401                 break;
402
403         case IPPROTO_ICMPV6:
404                 val ^= (__force u16)fl6->fl6_icmp_type;
405                 val ^= (__force u16)fl6->fl6_icmp_code;
406                 break;
407         }
408         /* RFC6438 recommands to use flowlabel */
409         val ^= (__force u32)fl6->flowlabel;
410
411         /* Perhaps, we need to tune, this function? */
412         val = val ^ (val >> 7) ^ (val >> 12);
413         return val % candidate_count;
414 }
415
416 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
417                                              struct flowi6 *fl6)
418 {
419         struct rt6_info *sibling, *next_sibling;
420         int route_choosen;
421
422         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
423         /* Don't change the route, if route_choosen == 0
424          * (siblings does not include ourself)
425          */
426         if (route_choosen)
427                 list_for_each_entry_safe(sibling, next_sibling,
428                                 &match->rt6i_siblings, rt6i_siblings) {
429                         route_choosen--;
430                         if (route_choosen == 0) {
431                                 match = sibling;
432                                 break;
433                         }
434                 }
435         return match;
436 }
437
438 /*
439  *      Route lookup. Any table->tb6_lock is implied.
440  */
441
442 static inline struct rt6_info *rt6_device_match(struct net *net,
443                                                     struct rt6_info *rt,
444                                                     const struct in6_addr *saddr,
445                                                     int oif,
446                                                     int flags)
447 {
448         struct rt6_info *local = NULL;
449         struct rt6_info *sprt;
450
451         if (!oif && ipv6_addr_any(saddr))
452                 goto out;
453
454         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
455                 struct net_device *dev = sprt->dst.dev;
456
457                 if (oif) {
458                         if (dev->ifindex == oif)
459                                 return sprt;
460                         if (dev->flags & IFF_LOOPBACK) {
461                                 if (!sprt->rt6i_idev ||
462                                     sprt->rt6i_idev->dev->ifindex != oif) {
463                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
464                                                 continue;
465                                         if (local && (!oif ||
466                                                       local->rt6i_idev->dev->ifindex == oif))
467                                                 continue;
468                                 }
469                                 local = sprt;
470                         }
471                 } else {
472                         if (ipv6_chk_addr(net, saddr, dev,
473                                           flags & RT6_LOOKUP_F_IFACE))
474                                 return sprt;
475                 }
476         }
477
478         if (oif) {
479                 if (local)
480                         return local;
481
482                 if (flags & RT6_LOOKUP_F_IFACE)
483                         return net->ipv6.ip6_null_entry;
484         }
485 out:
486         return rt;
487 }
488
489 #ifdef CONFIG_IPV6_ROUTER_PREF
490 static void rt6_probe(struct rt6_info *rt)
491 {
492         struct neighbour *neigh;
493         /*
494          * Okay, this does not seem to be appropriate
495          * for now, however, we need to check if it
496          * is really so; aka Router Reachability Probing.
497          *
498          * Router Reachability Probe MUST be rate-limited
499          * to no more than one per minute.
500          */
501         neigh = rt ? rt->n : NULL;
502         if (!neigh)
503                 return;
504         write_lock_bh(&neigh->lock);
505         if (neigh->nud_state & NUD_VALID) {
506                 write_unlock_bh(&neigh->lock);
507                 return;
508         }
509         if (!(neigh->nud_state & NUD_VALID) &&
510             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
511                 struct in6_addr mcaddr;
512                 struct in6_addr *target;
513
514                 neigh->updated = jiffies;
515                 write_unlock_bh(&neigh->lock);
516
517                 target = (struct in6_addr *)&neigh->primary_key;
518                 addrconf_addr_solict_mult(target, &mcaddr);
519                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
520         } else {
521                 write_unlock_bh(&neigh->lock);
522         }
523 }
524 #else
525 static inline void rt6_probe(struct rt6_info *rt)
526 {
527 }
528 #endif
529
530 /*
531  * Default Router Selection (RFC 2461 6.3.6)
532  */
533 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
534 {
535         struct net_device *dev = rt->dst.dev;
536         if (!oif || dev->ifindex == oif)
537                 return 2;
538         if ((dev->flags & IFF_LOOPBACK) &&
539             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
540                 return 1;
541         return 0;
542 }
543
544 static inline bool rt6_check_neigh(struct rt6_info *rt)
545 {
546         struct neighbour *neigh;
547         bool ret = false;
548
549         neigh = rt->n;
550         if (rt->rt6i_flags & RTF_NONEXTHOP ||
551             !(rt->rt6i_flags & RTF_GATEWAY))
552                 ret = true;
553         else if (neigh) {
554                 read_lock_bh(&neigh->lock);
555                 if (neigh->nud_state & NUD_VALID)
556                         ret = true;
557 #ifdef CONFIG_IPV6_ROUTER_PREF
558                 else if (!(neigh->nud_state & NUD_FAILED))
559                         ret = true;
560 #endif
561                 read_unlock_bh(&neigh->lock);
562         }
563         return ret;
564 }
565
566 static int rt6_score_route(struct rt6_info *rt, int oif,
567                            int strict)
568 {
569         int m;
570
571         m = rt6_check_dev(rt, oif);
572         if (!m && (strict & RT6_LOOKUP_F_IFACE))
573                 return -1;
574 #ifdef CONFIG_IPV6_ROUTER_PREF
575         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
576 #endif
577         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
578                 return -1;
579         return m;
580 }
581
582 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
583                                    int *mpri, struct rt6_info *match)
584 {
585         int m;
586
587         if (rt6_check_expired(rt))
588                 goto out;
589
590         m = rt6_score_route(rt, oif, strict);
591         if (m < 0)
592                 goto out;
593
594         if (m > *mpri) {
595                 if (strict & RT6_LOOKUP_F_REACHABLE)
596                         rt6_probe(match);
597                 *mpri = m;
598                 match = rt;
599         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
600                 rt6_probe(rt);
601         }
602
603 out:
604         return match;
605 }
606
607 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
608                                      struct rt6_info *rr_head,
609                                      u32 metric, int oif, int strict)
610 {
611         struct rt6_info *rt, *match;
612         int mpri = -1;
613
614         match = NULL;
615         for (rt = rr_head; rt && rt->rt6i_metric == metric;
616              rt = rt->dst.rt6_next)
617                 match = find_match(rt, oif, strict, &mpri, match);
618         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
619              rt = rt->dst.rt6_next)
620                 match = find_match(rt, oif, strict, &mpri, match);
621
622         return match;
623 }
624
625 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
626 {
627         struct rt6_info *match, *rt0;
628         struct net *net;
629
630         rt0 = fn->rr_ptr;
631         if (!rt0)
632                 fn->rr_ptr = rt0 = fn->leaf;
633
634         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
635
636         if (!match &&
637             (strict & RT6_LOOKUP_F_REACHABLE)) {
638                 struct rt6_info *next = rt0->dst.rt6_next;
639
640                 /* no entries matched; do round-robin */
641                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
642                         next = fn->leaf;
643
644                 if (next != rt0)
645                         fn->rr_ptr = next;
646         }
647
648         net = dev_net(rt0->dst.dev);
649         return match ? match : net->ipv6.ip6_null_entry;
650 }
651
652 #ifdef CONFIG_IPV6_ROUTE_INFO
653 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
654                   const struct in6_addr *gwaddr)
655 {
656         struct net *net = dev_net(dev);
657         struct route_info *rinfo = (struct route_info *) opt;
658         struct in6_addr prefix_buf, *prefix;
659         unsigned int pref;
660         unsigned long lifetime;
661         struct rt6_info *rt;
662
663         if (len < sizeof(struct route_info)) {
664                 return -EINVAL;
665         }
666
667         /* Sanity check for prefix_len and length */
668         if (rinfo->length > 3) {
669                 return -EINVAL;
670         } else if (rinfo->prefix_len > 128) {
671                 return -EINVAL;
672         } else if (rinfo->prefix_len > 64) {
673                 if (rinfo->length < 2) {
674                         return -EINVAL;
675                 }
676         } else if (rinfo->prefix_len > 0) {
677                 if (rinfo->length < 1) {
678                         return -EINVAL;
679                 }
680         }
681
682         pref = rinfo->route_pref;
683         if (pref == ICMPV6_ROUTER_PREF_INVALID)
684                 return -EINVAL;
685
686         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
687
688         if (rinfo->length == 3)
689                 prefix = (struct in6_addr *)rinfo->prefix;
690         else {
691                 /* this function is safe */
692                 ipv6_addr_prefix(&prefix_buf,
693                                  (struct in6_addr *)rinfo->prefix,
694                                  rinfo->prefix_len);
695                 prefix = &prefix_buf;
696         }
697
698         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
699                                 dev->ifindex);
700
701         if (rt && !lifetime) {
702                 ip6_del_rt(rt);
703                 rt = NULL;
704         }
705
706         if (!rt && lifetime)
707                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
708                                         pref);
709         else if (rt)
710                 rt->rt6i_flags = RTF_ROUTEINFO |
711                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
712
713         if (rt) {
714                 if (!addrconf_finite_timeout(lifetime))
715                         rt6_clean_expires(rt);
716                 else
717                         rt6_set_expires(rt, jiffies + HZ * lifetime);
718
719                 ip6_rt_put(rt);
720         }
721         return 0;
722 }
723 #endif
724
725 #define BACKTRACK(__net, saddr)                 \
726 do { \
727         if (rt == __net->ipv6.ip6_null_entry) { \
728                 struct fib6_node *pn; \
729                 while (1) { \
730                         if (fn->fn_flags & RTN_TL_ROOT) \
731                                 goto out; \
732                         pn = fn->parent; \
733                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
734                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
735                         else \
736                                 fn = pn; \
737                         if (fn->fn_flags & RTN_RTINFO) \
738                                 goto restart; \
739                 } \
740         } \
741 } while (0)
742
743 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
744                                              struct fib6_table *table,
745                                              struct flowi6 *fl6, int flags)
746 {
747         struct fib6_node *fn;
748         struct rt6_info *rt;
749
750         read_lock_bh(&table->tb6_lock);
751         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
752 restart:
753         rt = fn->leaf;
754         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
755         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
756                 rt = rt6_multipath_select(rt, fl6);
757         BACKTRACK(net, &fl6->saddr);
758 out:
759         dst_use(&rt->dst, jiffies);
760         read_unlock_bh(&table->tb6_lock);
761         return rt;
762
763 }
764
765 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
766                                     int flags)
767 {
768         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
769 }
770 EXPORT_SYMBOL_GPL(ip6_route_lookup);
771
772 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
773                             const struct in6_addr *saddr, int oif, int strict)
774 {
775         struct flowi6 fl6 = {
776                 .flowi6_oif = oif,
777                 .daddr = *daddr,
778         };
779         struct dst_entry *dst;
780         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
781
782         if (saddr) {
783                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
784                 flags |= RT6_LOOKUP_F_HAS_SADDR;
785         }
786
787         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
788         if (dst->error == 0)
789                 return (struct rt6_info *) dst;
790
791         dst_release(dst);
792
793         return NULL;
794 }
795
796 EXPORT_SYMBOL(rt6_lookup);
797
798 /* ip6_ins_rt is called with FREE table->tb6_lock.
799    It takes new route entry, the addition fails by any reason the
800    route is freed. In any case, if caller does not hold it, it may
801    be destroyed.
802  */
803
804 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
805 {
806         int err;
807         struct fib6_table *table;
808
809         table = rt->rt6i_table;
810         write_lock_bh(&table->tb6_lock);
811         err = fib6_add(&table->tb6_root, rt, info);
812         write_unlock_bh(&table->tb6_lock);
813
814         return err;
815 }
816
817 int ip6_ins_rt(struct rt6_info *rt)
818 {
819         struct nl_info info = {
820                 .nl_net = dev_net(rt->dst.dev),
821         };
822         return __ip6_ins_rt(rt, &info);
823 }
824
825 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
826                                       const struct in6_addr *daddr,
827                                       const struct in6_addr *saddr)
828 {
829         struct rt6_info *rt;
830
831         /*
832          *      Clone the route.
833          */
834
835         rt = ip6_rt_copy(ort, daddr);
836
837         if (rt) {
838                 int attempts = !in_softirq();
839
840                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
841                         if (ort->rt6i_dst.plen != 128 &&
842                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
843                                 rt->rt6i_flags |= RTF_ANYCAST;
844                         rt->rt6i_gateway = *daddr;
845                 }
846
847                 rt->rt6i_flags |= RTF_CACHE;
848
849 #ifdef CONFIG_IPV6_SUBTREES
850                 if (rt->rt6i_src.plen && saddr) {
851                         rt->rt6i_src.addr = *saddr;
852                         rt->rt6i_src.plen = 128;
853                 }
854 #endif
855
856         retry:
857                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
858                         struct net *net = dev_net(rt->dst.dev);
859                         int saved_rt_min_interval =
860                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
861                         int saved_rt_elasticity =
862                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
863
864                         if (attempts-- > 0) {
865                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
866                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
867
868                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
869
870                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
871                                         saved_rt_elasticity;
872                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
873                                         saved_rt_min_interval;
874                                 goto retry;
875                         }
876
877                         net_warn_ratelimited("Neighbour table overflow\n");
878                         dst_free(&rt->dst);
879                         return NULL;
880                 }
881         }
882
883         return rt;
884 }
885
886 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
887                                         const struct in6_addr *daddr)
888 {
889         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
890
891         if (rt) {
892                 rt->rt6i_flags |= RTF_CACHE;
893                 rt->n = neigh_clone(ort->n);
894         }
895         return rt;
896 }
897
898 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
899                                       struct flowi6 *fl6, int flags)
900 {
901         struct fib6_node *fn;
902         struct rt6_info *rt, *nrt;
903         int strict = 0;
904         int attempts = 3;
905         int err;
906         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
907
908         strict |= flags & RT6_LOOKUP_F_IFACE;
909
910 relookup:
911         read_lock_bh(&table->tb6_lock);
912
913 restart_2:
914         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
915
916 restart:
917         rt = rt6_select(fn, oif, strict | reachable);
918         if (rt->rt6i_nsiblings && oif == 0)
919                 rt = rt6_multipath_select(rt, fl6);
920         BACKTRACK(net, &fl6->saddr);
921         if (rt == net->ipv6.ip6_null_entry ||
922             rt->rt6i_flags & RTF_CACHE)
923                 goto out;
924
925         dst_hold(&rt->dst);
926         read_unlock_bh(&table->tb6_lock);
927
928         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
929                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
930         else if (!(rt->dst.flags & DST_HOST))
931                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
932         else
933                 goto out2;
934
935         ip6_rt_put(rt);
936         rt = nrt ? : net->ipv6.ip6_null_entry;
937
938         dst_hold(&rt->dst);
939         if (nrt) {
940                 err = ip6_ins_rt(nrt);
941                 if (!err)
942                         goto out2;
943         }
944
945         if (--attempts <= 0)
946                 goto out2;
947
948         /*
949          * Race condition! In the gap, when table->tb6_lock was
950          * released someone could insert this route.  Relookup.
951          */
952         ip6_rt_put(rt);
953         goto relookup;
954
955 out:
956         if (reachable) {
957                 reachable = 0;
958                 goto restart_2;
959         }
960         dst_hold(&rt->dst);
961         read_unlock_bh(&table->tb6_lock);
962 out2:
963         rt->dst.lastuse = jiffies;
964         rt->dst.__use++;
965
966         return rt;
967 }
968
969 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
970                                             struct flowi6 *fl6, int flags)
971 {
972         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
973 }
974
975 static struct dst_entry *ip6_route_input_lookup(struct net *net,
976                                                 struct net_device *dev,
977                                                 struct flowi6 *fl6, int flags)
978 {
979         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
980                 flags |= RT6_LOOKUP_F_IFACE;
981
982         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
983 }
984
985 void ip6_route_input(struct sk_buff *skb)
986 {
987         const struct ipv6hdr *iph = ipv6_hdr(skb);
988         struct net *net = dev_net(skb->dev);
989         int flags = RT6_LOOKUP_F_HAS_SADDR;
990         struct flowi6 fl6 = {
991                 .flowi6_iif = skb->dev->ifindex,
992                 .daddr = iph->daddr,
993                 .saddr = iph->saddr,
994                 .flowlabel = ip6_flowinfo(iph),
995                 .flowi6_mark = skb->mark,
996                 .flowi6_proto = iph->nexthdr,
997         };
998
999         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1000 }
1001
1002 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1003                                              struct flowi6 *fl6, int flags)
1004 {
1005         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1006 }
1007
1008 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1009                                     struct flowi6 *fl6)
1010 {
1011         int flags = 0;
1012
1013         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1014
1015         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1016                 flags |= RT6_LOOKUP_F_IFACE;
1017
1018         if (!ipv6_addr_any(&fl6->saddr))
1019                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1020         else if (sk)
1021                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1022
1023         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1024 }
1025
1026 EXPORT_SYMBOL(ip6_route_output);
1027
1028 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1029 {
1030         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1031         struct dst_entry *new = NULL;
1032
1033         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1034         if (rt) {
1035                 new = &rt->dst;
1036
1037                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1038                 rt6_init_peer(rt, net->ipv6.peers);
1039
1040                 new->__use = 1;
1041                 new->input = dst_discard;
1042                 new->output = dst_discard;
1043
1044                 if (dst_metrics_read_only(&ort->dst))
1045                         new->_metrics = ort->dst._metrics;
1046                 else
1047                         dst_copy_metrics(new, &ort->dst);
1048                 rt->rt6i_idev = ort->rt6i_idev;
1049                 if (rt->rt6i_idev)
1050                         in6_dev_hold(rt->rt6i_idev);
1051
1052                 rt->rt6i_gateway = ort->rt6i_gateway;
1053                 rt->rt6i_flags = ort->rt6i_flags;
1054                 rt6_clean_expires(rt);
1055                 rt->rt6i_metric = 0;
1056
1057                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1058 #ifdef CONFIG_IPV6_SUBTREES
1059                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1060 #endif
1061
1062                 dst_free(new);
1063         }
1064
1065         dst_release(dst_orig);
1066         return new ? new : ERR_PTR(-ENOMEM);
1067 }
1068
1069 /*
1070  *      Destination cache support functions
1071  */
1072
1073 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1074 {
1075         struct rt6_info *rt;
1076
1077         rt = (struct rt6_info *) dst;
1078
1079         /* All IPV6 dsts are created with ->obsolete set to the value
1080          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1081          * into this function always.
1082          */
1083         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1084                 return NULL;
1085
1086         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1087                 return dst;
1088
1089         return NULL;
1090 }
1091
1092 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1093 {
1094         struct rt6_info *rt = (struct rt6_info *) dst;
1095
1096         if (rt) {
1097                 if (rt->rt6i_flags & RTF_CACHE) {
1098                         if (rt6_check_expired(rt)) {
1099                                 ip6_del_rt(rt);
1100                                 dst = NULL;
1101                         }
1102                 } else {
1103                         dst_release(dst);
1104                         dst = NULL;
1105                 }
1106         }
1107         return dst;
1108 }
1109
1110 static void ip6_link_failure(struct sk_buff *skb)
1111 {
1112         struct rt6_info *rt;
1113
1114         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1115
1116         rt = (struct rt6_info *) skb_dst(skb);
1117         if (rt) {
1118                 if (rt->rt6i_flags & RTF_CACHE)
1119                         rt6_update_expires(rt, 0);
1120                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1121                         rt->rt6i_node->fn_sernum = -1;
1122         }
1123 }
1124
1125 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1126                                struct sk_buff *skb, u32 mtu)
1127 {
1128         struct rt6_info *rt6 = (struct rt6_info*)dst;
1129
1130         dst_confirm(dst);
1131         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1132                 struct net *net = dev_net(dst->dev);
1133
1134                 rt6->rt6i_flags |= RTF_MODIFIED;
1135                 if (mtu < IPV6_MIN_MTU) {
1136                         u32 features = dst_metric(dst, RTAX_FEATURES);
1137                         mtu = IPV6_MIN_MTU;
1138                         features |= RTAX_FEATURE_ALLFRAG;
1139                         dst_metric_set(dst, RTAX_FEATURES, features);
1140                 }
1141                 dst_metric_set(dst, RTAX_MTU, mtu);
1142                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1143         }
1144 }
1145
1146 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1147                      int oif, u32 mark)
1148 {
1149         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1150         struct dst_entry *dst;
1151         struct flowi6 fl6;
1152
1153         memset(&fl6, 0, sizeof(fl6));
1154         fl6.flowi6_oif = oif;
1155         fl6.flowi6_mark = mark;
1156         fl6.flowi6_flags = 0;
1157         fl6.daddr = iph->daddr;
1158         fl6.saddr = iph->saddr;
1159         fl6.flowlabel = ip6_flowinfo(iph);
1160
1161         dst = ip6_route_output(net, NULL, &fl6);
1162         if (!dst->error)
1163                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1164         dst_release(dst);
1165 }
1166 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1167
1168 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1169 {
1170         ip6_update_pmtu(skb, sock_net(sk), mtu,
1171                         sk->sk_bound_dev_if, sk->sk_mark);
1172 }
1173 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1174
1175 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1176 {
1177         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1178         struct dst_entry *dst;
1179         struct flowi6 fl6;
1180
1181         memset(&fl6, 0, sizeof(fl6));
1182         fl6.flowi6_oif = oif;
1183         fl6.flowi6_mark = mark;
1184         fl6.flowi6_flags = 0;
1185         fl6.daddr = iph->daddr;
1186         fl6.saddr = iph->saddr;
1187         fl6.flowlabel = ip6_flowinfo(iph);
1188
1189         dst = ip6_route_output(net, NULL, &fl6);
1190         if (!dst->error)
1191                 rt6_do_redirect(dst, NULL, skb);
1192         dst_release(dst);
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_redirect);
1195
1196 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1197 {
1198         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1201
1202 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1203 {
1204         struct net_device *dev = dst->dev;
1205         unsigned int mtu = dst_mtu(dst);
1206         struct net *net = dev_net(dev);
1207
1208         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1209
1210         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1211                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1212
1213         /*
1214          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1215          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1216          * IPV6_MAXPLEN is also valid and means: "any MSS,
1217          * rely only on pmtu discovery"
1218          */
1219         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1220                 mtu = IPV6_MAXPLEN;
1221         return mtu;
1222 }
1223
1224 static unsigned int ip6_mtu(const struct dst_entry *dst)
1225 {
1226         struct inet6_dev *idev;
1227         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1228
1229         if (mtu)
1230                 return mtu;
1231
1232         mtu = IPV6_MIN_MTU;
1233
1234         rcu_read_lock();
1235         idev = __in6_dev_get(dst->dev);
1236         if (idev)
1237                 mtu = idev->cnf.mtu6;
1238         rcu_read_unlock();
1239
1240         return mtu;
1241 }
1242
1243 static struct dst_entry *icmp6_dst_gc_list;
1244 static DEFINE_SPINLOCK(icmp6_dst_lock);
1245
1246 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1247                                   struct neighbour *neigh,
1248                                   struct flowi6 *fl6)
1249 {
1250         struct dst_entry *dst;
1251         struct rt6_info *rt;
1252         struct inet6_dev *idev = in6_dev_get(dev);
1253         struct net *net = dev_net(dev);
1254
1255         if (unlikely(!idev))
1256                 return ERR_PTR(-ENODEV);
1257
1258         rt = ip6_dst_alloc(net, dev, 0, NULL);
1259         if (unlikely(!rt)) {
1260                 in6_dev_put(idev);
1261                 dst = ERR_PTR(-ENOMEM);
1262                 goto out;
1263         }
1264
1265         if (neigh)
1266                 neigh_hold(neigh);
1267         else {
1268                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1269                 if (IS_ERR(neigh)) {
1270                         in6_dev_put(idev);
1271                         dst_free(&rt->dst);
1272                         return ERR_CAST(neigh);
1273                 }
1274         }
1275
1276         rt->dst.flags |= DST_HOST;
1277         rt->dst.output  = ip6_output;
1278         rt->n = neigh;
1279         atomic_set(&rt->dst.__refcnt, 1);
1280         rt->rt6i_dst.addr = fl6->daddr;
1281         rt->rt6i_dst.plen = 128;
1282         rt->rt6i_idev     = idev;
1283         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1284
1285         spin_lock_bh(&icmp6_dst_lock);
1286         rt->dst.next = icmp6_dst_gc_list;
1287         icmp6_dst_gc_list = &rt->dst;
1288         spin_unlock_bh(&icmp6_dst_lock);
1289
1290         fib6_force_start_gc(net);
1291
1292         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1293
1294 out:
1295         return dst;
1296 }
1297
1298 int icmp6_dst_gc(void)
1299 {
1300         struct dst_entry *dst, **pprev;
1301         int more = 0;
1302
1303         spin_lock_bh(&icmp6_dst_lock);
1304         pprev = &icmp6_dst_gc_list;
1305
1306         while ((dst = *pprev) != NULL) {
1307                 if (!atomic_read(&dst->__refcnt)) {
1308                         *pprev = dst->next;
1309                         dst_free(dst);
1310                 } else {
1311                         pprev = &dst->next;
1312                         ++more;
1313                 }
1314         }
1315
1316         spin_unlock_bh(&icmp6_dst_lock);
1317
1318         return more;
1319 }
1320
1321 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1322                             void *arg)
1323 {
1324         struct dst_entry *dst, **pprev;
1325
1326         spin_lock_bh(&icmp6_dst_lock);
1327         pprev = &icmp6_dst_gc_list;
1328         while ((dst = *pprev) != NULL) {
1329                 struct rt6_info *rt = (struct rt6_info *) dst;
1330                 if (func(rt, arg)) {
1331                         *pprev = dst->next;
1332                         dst_free(dst);
1333                 } else {
1334                         pprev = &dst->next;
1335                 }
1336         }
1337         spin_unlock_bh(&icmp6_dst_lock);
1338 }
1339
1340 static int ip6_dst_gc(struct dst_ops *ops)
1341 {
1342         unsigned long now = jiffies;
1343         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1344         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1345         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1346         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1347         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1348         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1349         int entries;
1350
1351         entries = dst_entries_get_fast(ops);
1352         if (time_after(rt_last_gc + rt_min_interval, now) &&
1353             entries <= rt_max_size)
1354                 goto out;
1355
1356         net->ipv6.ip6_rt_gc_expire++;
1357         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1358         net->ipv6.ip6_rt_last_gc = now;
1359         entries = dst_entries_get_slow(ops);
1360         if (entries < ops->gc_thresh)
1361                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1362 out:
1363         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1364         return entries > rt_max_size;
1365 }
1366
1367 int ip6_dst_hoplimit(struct dst_entry *dst)
1368 {
1369         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1370         if (hoplimit == 0) {
1371                 struct net_device *dev = dst->dev;
1372                 struct inet6_dev *idev;
1373
1374                 rcu_read_lock();
1375                 idev = __in6_dev_get(dev);
1376                 if (idev)
1377                         hoplimit = idev->cnf.hop_limit;
1378                 else
1379                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1380                 rcu_read_unlock();
1381         }
1382         return hoplimit;
1383 }
1384 EXPORT_SYMBOL(ip6_dst_hoplimit);
1385
1386 /*
1387  *
1388  */
1389
1390 int ip6_route_add(struct fib6_config *cfg)
1391 {
1392         int err;
1393         struct net *net = cfg->fc_nlinfo.nl_net;
1394         struct rt6_info *rt = NULL;
1395         struct net_device *dev = NULL;
1396         struct inet6_dev *idev = NULL;
1397         struct fib6_table *table;
1398         int addr_type;
1399
1400         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1401                 return -EINVAL;
1402 #ifndef CONFIG_IPV6_SUBTREES
1403         if (cfg->fc_src_len)
1404                 return -EINVAL;
1405 #endif
1406         if (cfg->fc_ifindex) {
1407                 err = -ENODEV;
1408                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1409                 if (!dev)
1410                         goto out;
1411                 idev = in6_dev_get(dev);
1412                 if (!idev)
1413                         goto out;
1414         }
1415
1416         if (cfg->fc_metric == 0)
1417                 cfg->fc_metric = IP6_RT_PRIO_USER;
1418
1419         err = -ENOBUFS;
1420         if (cfg->fc_nlinfo.nlh &&
1421             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1422                 table = fib6_get_table(net, cfg->fc_table);
1423                 if (!table) {
1424                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1425                         table = fib6_new_table(net, cfg->fc_table);
1426                 }
1427         } else {
1428                 table = fib6_new_table(net, cfg->fc_table);
1429         }
1430
1431         if (!table)
1432                 goto out;
1433
1434         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1435
1436         if (!rt) {
1437                 err = -ENOMEM;
1438                 goto out;
1439         }
1440
1441         if (cfg->fc_flags & RTF_EXPIRES)
1442                 rt6_set_expires(rt, jiffies +
1443                                 clock_t_to_jiffies(cfg->fc_expires));
1444         else
1445                 rt6_clean_expires(rt);
1446
1447         if (cfg->fc_protocol == RTPROT_UNSPEC)
1448                 cfg->fc_protocol = RTPROT_BOOT;
1449         rt->rt6i_protocol = cfg->fc_protocol;
1450
1451         addr_type = ipv6_addr_type(&cfg->fc_dst);
1452
1453         if (addr_type & IPV6_ADDR_MULTICAST)
1454                 rt->dst.input = ip6_mc_input;
1455         else if (cfg->fc_flags & RTF_LOCAL)
1456                 rt->dst.input = ip6_input;
1457         else
1458                 rt->dst.input = ip6_forward;
1459
1460         rt->dst.output = ip6_output;
1461
1462         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1463         rt->rt6i_dst.plen = cfg->fc_dst_len;
1464         if (rt->rt6i_dst.plen == 128)
1465                rt->dst.flags |= DST_HOST;
1466
1467         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1468                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1469                 if (!metrics) {
1470                         err = -ENOMEM;
1471                         goto out;
1472                 }
1473                 dst_init_metrics(&rt->dst, metrics, 0);
1474         }
1475 #ifdef CONFIG_IPV6_SUBTREES
1476         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1477         rt->rt6i_src.plen = cfg->fc_src_len;
1478 #endif
1479
1480         rt->rt6i_metric = cfg->fc_metric;
1481
1482         /* We cannot add true routes via loopback here,
1483            they would result in kernel looping; promote them to reject routes
1484          */
1485         if ((cfg->fc_flags & RTF_REJECT) ||
1486             (dev && (dev->flags & IFF_LOOPBACK) &&
1487              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1488              !(cfg->fc_flags & RTF_LOCAL))) {
1489                 /* hold loopback dev/idev if we haven't done so. */
1490                 if (dev != net->loopback_dev) {
1491                         if (dev) {
1492                                 dev_put(dev);
1493                                 in6_dev_put(idev);
1494                         }
1495                         dev = net->loopback_dev;
1496                         dev_hold(dev);
1497                         idev = in6_dev_get(dev);
1498                         if (!idev) {
1499                                 err = -ENODEV;
1500                                 goto out;
1501                         }
1502                 }
1503                 rt->dst.output = ip6_pkt_discard_out;
1504                 rt->dst.input = ip6_pkt_discard;
1505                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1506                 switch (cfg->fc_type) {
1507                 case RTN_BLACKHOLE:
1508                         rt->dst.error = -EINVAL;
1509                         break;
1510                 case RTN_PROHIBIT:
1511                         rt->dst.error = -EACCES;
1512                         break;
1513                 case RTN_THROW:
1514                         rt->dst.error = -EAGAIN;
1515                         break;
1516                 default:
1517                         rt->dst.error = -ENETUNREACH;
1518                         break;
1519                 }
1520                 goto install_route;
1521         }
1522
1523         if (cfg->fc_flags & RTF_GATEWAY) {
1524                 const struct in6_addr *gw_addr;
1525                 int gwa_type;
1526
1527                 gw_addr = &cfg->fc_gateway;
1528                 rt->rt6i_gateway = *gw_addr;
1529                 gwa_type = ipv6_addr_type(gw_addr);
1530
1531                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1532                         struct rt6_info *grt;
1533
1534                         /* IPv6 strictly inhibits using not link-local
1535                            addresses as nexthop address.
1536                            Otherwise, router will not able to send redirects.
1537                            It is very good, but in some (rare!) circumstances
1538                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1539                            some exceptions. --ANK
1540                          */
1541                         err = -EINVAL;
1542                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1543                                 goto out;
1544
1545                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1546
1547                         err = -EHOSTUNREACH;
1548                         if (!grt)
1549                                 goto out;
1550                         if (dev) {
1551                                 if (dev != grt->dst.dev) {
1552                                         ip6_rt_put(grt);
1553                                         goto out;
1554                                 }
1555                         } else {
1556                                 dev = grt->dst.dev;
1557                                 idev = grt->rt6i_idev;
1558                                 dev_hold(dev);
1559                                 in6_dev_hold(grt->rt6i_idev);
1560                         }
1561                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1562                                 err = 0;
1563                         ip6_rt_put(grt);
1564
1565                         if (err)
1566                                 goto out;
1567                 }
1568                 err = -EINVAL;
1569                 if (!dev || (dev->flags & IFF_LOOPBACK))
1570                         goto out;
1571         }
1572
1573         err = -ENODEV;
1574         if (!dev)
1575                 goto out;
1576
1577         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1578                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1579                         err = -EINVAL;
1580                         goto out;
1581                 }
1582                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1583                 rt->rt6i_prefsrc.plen = 128;
1584         } else
1585                 rt->rt6i_prefsrc.plen = 0;
1586
1587         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1588                 err = rt6_bind_neighbour(rt, dev);
1589                 if (err)
1590                         goto out;
1591         }
1592
1593         rt->rt6i_flags = cfg->fc_flags;
1594
1595 install_route:
1596         if (cfg->fc_mx) {
1597                 struct nlattr *nla;
1598                 int remaining;
1599
1600                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1601                         int type = nla_type(nla);
1602
1603                         if (type) {
1604                                 if (type > RTAX_MAX) {
1605                                         err = -EINVAL;
1606                                         goto out;
1607                                 }
1608
1609                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1610                         }
1611                 }
1612         }
1613
1614         rt->dst.dev = dev;
1615         rt->rt6i_idev = idev;
1616         rt->rt6i_table = table;
1617
1618         cfg->fc_nlinfo.nl_net = dev_net(dev);
1619
1620         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1621
1622 out:
1623         if (dev)
1624                 dev_put(dev);
1625         if (idev)
1626                 in6_dev_put(idev);
1627         if (rt)
1628                 dst_free(&rt->dst);
1629         return err;
1630 }
1631
1632 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1633 {
1634         int err;
1635         struct fib6_table *table;
1636         struct net *net = dev_net(rt->dst.dev);
1637
1638         if (rt == net->ipv6.ip6_null_entry) {
1639                 err = -ENOENT;
1640                 goto out;
1641         }
1642
1643         table = rt->rt6i_table;
1644         write_lock_bh(&table->tb6_lock);
1645         err = fib6_del(rt, info);
1646         write_unlock_bh(&table->tb6_lock);
1647
1648 out:
1649         ip6_rt_put(rt);
1650         return err;
1651 }
1652
1653 int ip6_del_rt(struct rt6_info *rt)
1654 {
1655         struct nl_info info = {
1656                 .nl_net = dev_net(rt->dst.dev),
1657         };
1658         return __ip6_del_rt(rt, &info);
1659 }
1660
1661 static int ip6_route_del(struct fib6_config *cfg)
1662 {
1663         struct fib6_table *table;
1664         struct fib6_node *fn;
1665         struct rt6_info *rt;
1666         int err = -ESRCH;
1667
1668         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1669         if (!table)
1670                 return err;
1671
1672         read_lock_bh(&table->tb6_lock);
1673
1674         fn = fib6_locate(&table->tb6_root,
1675                          &cfg->fc_dst, cfg->fc_dst_len,
1676                          &cfg->fc_src, cfg->fc_src_len);
1677
1678         if (fn) {
1679                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1680                         if (cfg->fc_ifindex &&
1681                             (!rt->dst.dev ||
1682                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1683                                 continue;
1684                         if (cfg->fc_flags & RTF_GATEWAY &&
1685                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1686                                 continue;
1687                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1688                                 continue;
1689                         dst_hold(&rt->dst);
1690                         read_unlock_bh(&table->tb6_lock);
1691
1692                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1693                 }
1694         }
1695         read_unlock_bh(&table->tb6_lock);
1696
1697         return err;
1698 }
1699
1700 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1701 {
1702         struct net *net = dev_net(skb->dev);
1703         struct netevent_redirect netevent;
1704         struct rt6_info *rt, *nrt = NULL;
1705         struct ndisc_options ndopts;
1706         struct neighbour *old_neigh;
1707         struct inet6_dev *in6_dev;
1708         struct neighbour *neigh;
1709         struct rd_msg *msg;
1710         int optlen, on_link;
1711         u8 *lladdr;
1712
1713         optlen = skb->tail - skb->transport_header;
1714         optlen -= sizeof(*msg);
1715
1716         if (optlen < 0) {
1717                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1718                 return;
1719         }
1720
1721         msg = (struct rd_msg *)icmp6_hdr(skb);
1722
1723         if (ipv6_addr_is_multicast(&msg->dest)) {
1724                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1725                 return;
1726         }
1727
1728         on_link = 0;
1729         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1730                 on_link = 1;
1731         } else if (ipv6_addr_type(&msg->target) !=
1732                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1733                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1734                 return;
1735         }
1736
1737         in6_dev = __in6_dev_get(skb->dev);
1738         if (!in6_dev)
1739                 return;
1740         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1741                 return;
1742
1743         /* RFC2461 8.1:
1744          *      The IP source address of the Redirect MUST be the same as the current
1745          *      first-hop router for the specified ICMP Destination Address.
1746          */
1747
1748         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1749                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1750                 return;
1751         }
1752
1753         lladdr = NULL;
1754         if (ndopts.nd_opts_tgt_lladdr) {
1755                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1756                                              skb->dev);
1757                 if (!lladdr) {
1758                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1759                         return;
1760                 }
1761         }
1762
1763         rt = (struct rt6_info *) dst;
1764         if (rt == net->ipv6.ip6_null_entry) {
1765                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1766                 return;
1767         }
1768
1769         /* Redirect received -> path was valid.
1770          * Look, redirects are sent only in response to data packets,
1771          * so that this nexthop apparently is reachable. --ANK
1772          */
1773         dst_confirm(&rt->dst);
1774
1775         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1776         if (!neigh)
1777                 return;
1778
1779         /* Duplicate redirect: silently ignore. */
1780         old_neigh = rt->n;
1781         if (neigh == old_neigh)
1782                 goto out;
1783
1784         /*
1785          *      We have finally decided to accept it.
1786          */
1787
1788         neigh_update(neigh, lladdr, NUD_STALE,
1789                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1790                      NEIGH_UPDATE_F_OVERRIDE|
1791                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1792                                      NEIGH_UPDATE_F_ISROUTER))
1793                      );
1794
1795         nrt = ip6_rt_copy(rt, &msg->dest);
1796         if (!nrt)
1797                 goto out;
1798
1799         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1800         if (on_link)
1801                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1802
1803         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1804         nrt->n = neigh_clone(neigh);
1805
1806         if (ip6_ins_rt(nrt))
1807                 goto out;
1808
1809         netevent.old = &rt->dst;
1810         netevent.new = &nrt->dst;
1811         netevent.daddr = &msg->dest;
1812         netevent.neigh = neigh;
1813         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1814
1815         if (rt->rt6i_flags & RTF_CACHE) {
1816                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1817                 ip6_del_rt(rt);
1818         }
1819
1820 out:
1821         neigh_release(neigh);
1822 }
1823
1824 /*
1825  *      Misc support functions
1826  */
1827
1828 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1829                                     const struct in6_addr *dest)
1830 {
1831         struct net *net = dev_net(ort->dst.dev);
1832         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1833                                             ort->rt6i_table);
1834
1835         if (rt) {
1836                 rt->dst.input = ort->dst.input;
1837                 rt->dst.output = ort->dst.output;
1838                 rt->dst.flags |= DST_HOST;
1839
1840                 rt->rt6i_dst.addr = *dest;
1841                 rt->rt6i_dst.plen = 128;
1842                 dst_copy_metrics(&rt->dst, &ort->dst);
1843                 rt->dst.error = ort->dst.error;
1844                 rt->rt6i_idev = ort->rt6i_idev;
1845                 if (rt->rt6i_idev)
1846                         in6_dev_hold(rt->rt6i_idev);
1847                 rt->dst.lastuse = jiffies;
1848
1849                 rt->rt6i_gateway = ort->rt6i_gateway;
1850                 rt->rt6i_flags = ort->rt6i_flags;
1851                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1852                     (RTF_DEFAULT | RTF_ADDRCONF))
1853                         rt6_set_from(rt, ort);
1854                 else
1855                         rt6_clean_expires(rt);
1856                 rt->rt6i_metric = 0;
1857
1858 #ifdef CONFIG_IPV6_SUBTREES
1859                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1860 #endif
1861                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1862                 rt->rt6i_table = ort->rt6i_table;
1863         }
1864         return rt;
1865 }
1866
1867 #ifdef CONFIG_IPV6_ROUTE_INFO
1868 static struct rt6_info *rt6_get_route_info(struct net *net,
1869                                            const struct in6_addr *prefix, int prefixlen,
1870                                            const struct in6_addr *gwaddr, int ifindex)
1871 {
1872         struct fib6_node *fn;
1873         struct rt6_info *rt = NULL;
1874         struct fib6_table *table;
1875
1876         table = fib6_get_table(net, RT6_TABLE_INFO);
1877         if (!table)
1878                 return NULL;
1879
1880         read_lock_bh(&table->tb6_lock);
1881         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1882         if (!fn)
1883                 goto out;
1884
1885         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1886                 if (rt->dst.dev->ifindex != ifindex)
1887                         continue;
1888                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1889                         continue;
1890                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1891                         continue;
1892                 dst_hold(&rt->dst);
1893                 break;
1894         }
1895 out:
1896         read_unlock_bh(&table->tb6_lock);
1897         return rt;
1898 }
1899
1900 static struct rt6_info *rt6_add_route_info(struct net *net,
1901                                            const struct in6_addr *prefix, int prefixlen,
1902                                            const struct in6_addr *gwaddr, int ifindex,
1903                                            unsigned int pref)
1904 {
1905         struct fib6_config cfg = {
1906                 .fc_table       = RT6_TABLE_INFO,
1907                 .fc_metric      = IP6_RT_PRIO_USER,
1908                 .fc_ifindex     = ifindex,
1909                 .fc_dst_len     = prefixlen,
1910                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1911                                   RTF_UP | RTF_PREF(pref),
1912                 .fc_nlinfo.portid = 0,
1913                 .fc_nlinfo.nlh = NULL,
1914                 .fc_nlinfo.nl_net = net,
1915         };
1916
1917         cfg.fc_dst = *prefix;
1918         cfg.fc_gateway = *gwaddr;
1919
1920         /* We should treat it as a default route if prefix length is 0. */
1921         if (!prefixlen)
1922                 cfg.fc_flags |= RTF_DEFAULT;
1923
1924         ip6_route_add(&cfg);
1925
1926         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1927 }
1928 #endif
1929
1930 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1931 {
1932         struct rt6_info *rt;
1933         struct fib6_table *table;
1934
1935         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1936         if (!table)
1937                 return NULL;
1938
1939         read_lock_bh(&table->tb6_lock);
1940         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1941                 if (dev == rt->dst.dev &&
1942                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1943                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1944                         break;
1945         }
1946         if (rt)
1947                 dst_hold(&rt->dst);
1948         read_unlock_bh(&table->tb6_lock);
1949         return rt;
1950 }
1951
1952 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1953                                      struct net_device *dev,
1954                                      unsigned int pref)
1955 {
1956         struct fib6_config cfg = {
1957                 .fc_table       = RT6_TABLE_DFLT,
1958                 .fc_metric      = IP6_RT_PRIO_USER,
1959                 .fc_ifindex     = dev->ifindex,
1960                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1961                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1962                 .fc_nlinfo.portid = 0,
1963                 .fc_nlinfo.nlh = NULL,
1964                 .fc_nlinfo.nl_net = dev_net(dev),
1965         };
1966
1967         cfg.fc_gateway = *gwaddr;
1968
1969         ip6_route_add(&cfg);
1970
1971         return rt6_get_dflt_router(gwaddr, dev);
1972 }
1973
1974 void rt6_purge_dflt_routers(struct net *net)
1975 {
1976         struct rt6_info *rt;
1977         struct fib6_table *table;
1978
1979         /* NOTE: Keep consistent with rt6_get_dflt_router */
1980         table = fib6_get_table(net, RT6_TABLE_DFLT);
1981         if (!table)
1982                 return;
1983
1984 restart:
1985         read_lock_bh(&table->tb6_lock);
1986         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1987                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1988                         dst_hold(&rt->dst);
1989                         read_unlock_bh(&table->tb6_lock);
1990                         ip6_del_rt(rt);
1991                         goto restart;
1992                 }
1993         }
1994         read_unlock_bh(&table->tb6_lock);
1995 }
1996
1997 static void rtmsg_to_fib6_config(struct net *net,
1998                                  struct in6_rtmsg *rtmsg,
1999                                  struct fib6_config *cfg)
2000 {
2001         memset(cfg, 0, sizeof(*cfg));
2002
2003         cfg->fc_table = RT6_TABLE_MAIN;
2004         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2005         cfg->fc_metric = rtmsg->rtmsg_metric;
2006         cfg->fc_expires = rtmsg->rtmsg_info;
2007         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2008         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2009         cfg->fc_flags = rtmsg->rtmsg_flags;
2010
2011         cfg->fc_nlinfo.nl_net = net;
2012
2013         cfg->fc_dst = rtmsg->rtmsg_dst;
2014         cfg->fc_src = rtmsg->rtmsg_src;
2015         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2016 }
2017
2018 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2019 {
2020         struct fib6_config cfg;
2021         struct in6_rtmsg rtmsg;
2022         int err;
2023
2024         switch(cmd) {
2025         case SIOCADDRT:         /* Add a route */
2026         case SIOCDELRT:         /* Delete a route */
2027                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2028                         return -EPERM;
2029                 err = copy_from_user(&rtmsg, arg,
2030                                      sizeof(struct in6_rtmsg));
2031                 if (err)
2032                         return -EFAULT;
2033
2034                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2035
2036                 rtnl_lock();
2037                 switch (cmd) {
2038                 case SIOCADDRT:
2039                         err = ip6_route_add(&cfg);
2040                         break;
2041                 case SIOCDELRT:
2042                         err = ip6_route_del(&cfg);
2043                         break;
2044                 default:
2045                         err = -EINVAL;
2046                 }
2047                 rtnl_unlock();
2048
2049                 return err;
2050         }
2051
2052         return -EINVAL;
2053 }
2054
2055 /*
2056  *      Drop the packet on the floor
2057  */
2058
2059 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2060 {
2061         int type;
2062         struct dst_entry *dst = skb_dst(skb);
2063         switch (ipstats_mib_noroutes) {
2064         case IPSTATS_MIB_INNOROUTES:
2065                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2066                 if (type == IPV6_ADDR_ANY) {
2067                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2068                                       IPSTATS_MIB_INADDRERRORS);
2069                         break;
2070                 }
2071                 /* FALLTHROUGH */
2072         case IPSTATS_MIB_OUTNOROUTES:
2073                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2074                               ipstats_mib_noroutes);
2075                 break;
2076         }
2077         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2078         kfree_skb(skb);
2079         return 0;
2080 }
2081
2082 static int ip6_pkt_discard(struct sk_buff *skb)
2083 {
2084         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2085 }
2086
2087 static int ip6_pkt_discard_out(struct sk_buff *skb)
2088 {
2089         skb->dev = skb_dst(skb)->dev;
2090         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2091 }
2092
2093 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2094
2095 static int ip6_pkt_prohibit(struct sk_buff *skb)
2096 {
2097         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2098 }
2099
2100 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2101 {
2102         skb->dev = skb_dst(skb)->dev;
2103         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2104 }
2105
2106 #endif
2107
2108 /*
2109  *      Allocate a dst for local (unicast / anycast) address.
2110  */
2111
2112 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2113                                     const struct in6_addr *addr,
2114                                     bool anycast)
2115 {
2116         struct net *net = dev_net(idev->dev);
2117         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2118         int err;
2119
2120         if (!rt) {
2121                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2122                 return ERR_PTR(-ENOMEM);
2123         }
2124
2125         in6_dev_hold(idev);
2126
2127         rt->dst.flags |= DST_HOST;
2128         rt->dst.input = ip6_input;
2129         rt->dst.output = ip6_output;
2130         rt->rt6i_idev = idev;
2131
2132         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2133         if (anycast)
2134                 rt->rt6i_flags |= RTF_ANYCAST;
2135         else
2136                 rt->rt6i_flags |= RTF_LOCAL;
2137         err = rt6_bind_neighbour(rt, rt->dst.dev);
2138         if (err) {
2139                 dst_free(&rt->dst);
2140                 return ERR_PTR(err);
2141         }
2142
2143         rt->rt6i_dst.addr = *addr;
2144         rt->rt6i_dst.plen = 128;
2145         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2146
2147         atomic_set(&rt->dst.__refcnt, 1);
2148
2149         return rt;
2150 }
2151
2152 int ip6_route_get_saddr(struct net *net,
2153                         struct rt6_info *rt,
2154                         const struct in6_addr *daddr,
2155                         unsigned int prefs,
2156                         struct in6_addr *saddr)
2157 {
2158         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2159         int err = 0;
2160         if (rt->rt6i_prefsrc.plen)
2161                 *saddr = rt->rt6i_prefsrc.addr;
2162         else
2163                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2164                                          daddr, prefs, saddr);
2165         return err;
2166 }
2167
2168 /* remove deleted ip from prefsrc entries */
2169 struct arg_dev_net_ip {
2170         struct net_device *dev;
2171         struct net *net;
2172         struct in6_addr *addr;
2173 };
2174
2175 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2176 {
2177         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2178         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2179         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2180
2181         if (((void *)rt->dst.dev == dev || !dev) &&
2182             rt != net->ipv6.ip6_null_entry &&
2183             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2184                 /* remove prefsrc entry */
2185                 rt->rt6i_prefsrc.plen = 0;
2186         }
2187         return 0;
2188 }
2189
2190 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2191 {
2192         struct net *net = dev_net(ifp->idev->dev);
2193         struct arg_dev_net_ip adni = {
2194                 .dev = ifp->idev->dev,
2195                 .net = net,
2196                 .addr = &ifp->addr,
2197         };
2198         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2199 }
2200
2201 struct arg_dev_net {
2202         struct net_device *dev;
2203         struct net *net;
2204 };
2205
2206 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2207 {
2208         const struct arg_dev_net *adn = arg;
2209         const struct net_device *dev = adn->dev;
2210
2211         if ((rt->dst.dev == dev || !dev) &&
2212             rt != adn->net->ipv6.ip6_null_entry)
2213                 return -1;
2214
2215         return 0;
2216 }
2217
2218 void rt6_ifdown(struct net *net, struct net_device *dev)
2219 {
2220         struct arg_dev_net adn = {
2221                 .dev = dev,
2222                 .net = net,
2223         };
2224
2225         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2226         icmp6_clean_all(fib6_ifdown, &adn);
2227 }
2228
2229 struct rt6_mtu_change_arg {
2230         struct net_device *dev;
2231         unsigned int mtu;
2232 };
2233
2234 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2235 {
2236         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2237         struct inet6_dev *idev;
2238
2239         /* In IPv6 pmtu discovery is not optional,
2240            so that RTAX_MTU lock cannot disable it.
2241            We still use this lock to block changes
2242            caused by addrconf/ndisc.
2243         */
2244
2245         idev = __in6_dev_get(arg->dev);
2246         if (!idev)
2247                 return 0;
2248
2249         /* For administrative MTU increase, there is no way to discover
2250            IPv6 PMTU increase, so PMTU increase should be updated here.
2251            Since RFC 1981 doesn't include administrative MTU increase
2252            update PMTU increase is a MUST. (i.e. jumbo frame)
2253          */
2254         /*
2255            If new MTU is less than route PMTU, this new MTU will be the
2256            lowest MTU in the path, update the route PMTU to reflect PMTU
2257            decreases; if new MTU is greater than route PMTU, and the
2258            old MTU is the lowest MTU in the path, update the route PMTU
2259            to reflect the increase. In this case if the other nodes' MTU
2260            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2261            PMTU discouvery.
2262          */
2263         if (rt->dst.dev == arg->dev &&
2264             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2265             (dst_mtu(&rt->dst) >= arg->mtu ||
2266              (dst_mtu(&rt->dst) < arg->mtu &&
2267               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2268                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2269         }
2270         return 0;
2271 }
2272
2273 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2274 {
2275         struct rt6_mtu_change_arg arg = {
2276                 .dev = dev,
2277                 .mtu = mtu,
2278         };
2279
2280         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2281 }
2282
2283 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2284         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2285         [RTA_OIF]               = { .type = NLA_U32 },
2286         [RTA_IIF]               = { .type = NLA_U32 },
2287         [RTA_PRIORITY]          = { .type = NLA_U32 },
2288         [RTA_METRICS]           = { .type = NLA_NESTED },
2289         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2290 };
2291
2292 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2293                               struct fib6_config *cfg)
2294 {
2295         struct rtmsg *rtm;
2296         struct nlattr *tb[RTA_MAX+1];
2297         int err;
2298
2299         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2300         if (err < 0)
2301                 goto errout;
2302
2303         err = -EINVAL;
2304         rtm = nlmsg_data(nlh);
2305         memset(cfg, 0, sizeof(*cfg));
2306
2307         cfg->fc_table = rtm->rtm_table;
2308         cfg->fc_dst_len = rtm->rtm_dst_len;
2309         cfg->fc_src_len = rtm->rtm_src_len;
2310         cfg->fc_flags = RTF_UP;
2311         cfg->fc_protocol = rtm->rtm_protocol;
2312         cfg->fc_type = rtm->rtm_type;
2313
2314         if (rtm->rtm_type == RTN_UNREACHABLE ||
2315             rtm->rtm_type == RTN_BLACKHOLE ||
2316             rtm->rtm_type == RTN_PROHIBIT ||
2317             rtm->rtm_type == RTN_THROW)
2318                 cfg->fc_flags |= RTF_REJECT;
2319
2320         if (rtm->rtm_type == RTN_LOCAL)
2321                 cfg->fc_flags |= RTF_LOCAL;
2322
2323         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2324         cfg->fc_nlinfo.nlh = nlh;
2325         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2326
2327         if (tb[RTA_GATEWAY]) {
2328                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2329                 cfg->fc_flags |= RTF_GATEWAY;
2330         }
2331
2332         if (tb[RTA_DST]) {
2333                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2334
2335                 if (nla_len(tb[RTA_DST]) < plen)
2336                         goto errout;
2337
2338                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2339         }
2340
2341         if (tb[RTA_SRC]) {
2342                 int plen = (rtm->rtm_src_len + 7) >> 3;
2343
2344                 if (nla_len(tb[RTA_SRC]) < plen)
2345                         goto errout;
2346
2347                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2348         }
2349
2350         if (tb[RTA_PREFSRC])
2351                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2352
2353         if (tb[RTA_OIF])
2354                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2355
2356         if (tb[RTA_PRIORITY])
2357                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2358
2359         if (tb[RTA_METRICS]) {
2360                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2361                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2362         }
2363
2364         if (tb[RTA_TABLE])
2365                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2366
2367         if (tb[RTA_MULTIPATH]) {
2368                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2369                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2370         }
2371
2372         err = 0;
2373 errout:
2374         return err;
2375 }
2376
2377 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2378 {
2379         struct fib6_config r_cfg;
2380         struct rtnexthop *rtnh;
2381         int remaining;
2382         int attrlen;
2383         int err = 0, last_err = 0;
2384
2385 beginning:
2386         rtnh = (struct rtnexthop *)cfg->fc_mp;
2387         remaining = cfg->fc_mp_len;
2388
2389         /* Parse a Multipath Entry */
2390         while (rtnh_ok(rtnh, remaining)) {
2391                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2392                 if (rtnh->rtnh_ifindex)
2393                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2394
2395                 attrlen = rtnh_attrlen(rtnh);
2396                 if (attrlen > 0) {
2397                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2398
2399                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2400                         if (nla) {
2401                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2402                                 r_cfg.fc_flags |= RTF_GATEWAY;
2403                         }
2404                 }
2405                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2406                 if (err) {
2407                         last_err = err;
2408                         /* If we are trying to remove a route, do not stop the
2409                          * loop when ip6_route_del() fails (because next hop is
2410                          * already gone), we should try to remove all next hops.
2411                          */
2412                         if (add) {
2413                                 /* If add fails, we should try to delete all
2414                                  * next hops that have been already added.
2415                                  */
2416                                 add = 0;
2417                                 goto beginning;
2418                         }
2419                 }
2420                 /* Because each route is added like a single route we remove
2421                  * this flag after the first nexthop (if there is a collision,
2422                  * we have already fail to add the first nexthop:
2423                  * fib6_add_rt2node() has reject it).
2424                  */
2425                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2426                 rtnh = rtnh_next(rtnh, &remaining);
2427         }
2428
2429         return last_err;
2430 }
2431
2432 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2433 {
2434         struct fib6_config cfg;
2435         int err;
2436
2437         err = rtm_to_fib6_config(skb, nlh, &cfg);
2438         if (err < 0)
2439                 return err;
2440
2441         if (cfg.fc_mp)
2442                 return ip6_route_multipath(&cfg, 0);
2443         else
2444                 return ip6_route_del(&cfg);
2445 }
2446
2447 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2448 {
2449         struct fib6_config cfg;
2450         int err;
2451
2452         err = rtm_to_fib6_config(skb, nlh, &cfg);
2453         if (err < 0)
2454                 return err;
2455
2456         if (cfg.fc_mp)
2457                 return ip6_route_multipath(&cfg, 1);
2458         else
2459                 return ip6_route_add(&cfg);
2460 }
2461
2462 static inline size_t rt6_nlmsg_size(void)
2463 {
2464         return NLMSG_ALIGN(sizeof(struct rtmsg))
2465                + nla_total_size(16) /* RTA_SRC */
2466                + nla_total_size(16) /* RTA_DST */
2467                + nla_total_size(16) /* RTA_GATEWAY */
2468                + nla_total_size(16) /* RTA_PREFSRC */
2469                + nla_total_size(4) /* RTA_TABLE */
2470                + nla_total_size(4) /* RTA_IIF */
2471                + nla_total_size(4) /* RTA_OIF */
2472                + nla_total_size(4) /* RTA_PRIORITY */
2473                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2474                + nla_total_size(sizeof(struct rta_cacheinfo));
2475 }
2476
2477 static int rt6_fill_node(struct net *net,
2478                          struct sk_buff *skb, struct rt6_info *rt,
2479                          struct in6_addr *dst, struct in6_addr *src,
2480                          int iif, int type, u32 portid, u32 seq,
2481                          int prefix, int nowait, unsigned int flags)
2482 {
2483         struct rtmsg *rtm;
2484         struct nlmsghdr *nlh;
2485         long expires;
2486         u32 table;
2487         struct neighbour *n;
2488
2489         if (prefix) {   /* user wants prefix routes only */
2490                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2491                         /* success since this is not a prefix route */
2492                         return 1;
2493                 }
2494         }
2495
2496         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2497         if (!nlh)
2498                 return -EMSGSIZE;
2499
2500         rtm = nlmsg_data(nlh);
2501         rtm->rtm_family = AF_INET6;
2502         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2503         rtm->rtm_src_len = rt->rt6i_src.plen;
2504         rtm->rtm_tos = 0;
2505         if (rt->rt6i_table)
2506                 table = rt->rt6i_table->tb6_id;
2507         else
2508                 table = RT6_TABLE_UNSPEC;
2509         rtm->rtm_table = table;
2510         if (nla_put_u32(skb, RTA_TABLE, table))
2511                 goto nla_put_failure;
2512         if (rt->rt6i_flags & RTF_REJECT) {
2513                 switch (rt->dst.error) {
2514                 case -EINVAL:
2515                         rtm->rtm_type = RTN_BLACKHOLE;
2516                         break;
2517                 case -EACCES:
2518                         rtm->rtm_type = RTN_PROHIBIT;
2519                         break;
2520                 case -EAGAIN:
2521                         rtm->rtm_type = RTN_THROW;
2522                         break;
2523                 default:
2524                         rtm->rtm_type = RTN_UNREACHABLE;
2525                         break;
2526                 }
2527         }
2528         else if (rt->rt6i_flags & RTF_LOCAL)
2529                 rtm->rtm_type = RTN_LOCAL;
2530         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2531                 rtm->rtm_type = RTN_LOCAL;
2532         else
2533                 rtm->rtm_type = RTN_UNICAST;
2534         rtm->rtm_flags = 0;
2535         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2536         rtm->rtm_protocol = rt->rt6i_protocol;
2537         if (rt->rt6i_flags & RTF_DYNAMIC)
2538                 rtm->rtm_protocol = RTPROT_REDIRECT;
2539         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2540                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2541                         rtm->rtm_protocol = RTPROT_RA;
2542                 else
2543                         rtm->rtm_protocol = RTPROT_KERNEL;
2544         }
2545
2546         if (rt->rt6i_flags & RTF_CACHE)
2547                 rtm->rtm_flags |= RTM_F_CLONED;
2548
2549         if (dst) {
2550                 if (nla_put(skb, RTA_DST, 16, dst))
2551                         goto nla_put_failure;
2552                 rtm->rtm_dst_len = 128;
2553         } else if (rtm->rtm_dst_len)
2554                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2555                         goto nla_put_failure;
2556 #ifdef CONFIG_IPV6_SUBTREES
2557         if (src) {
2558                 if (nla_put(skb, RTA_SRC, 16, src))
2559                         goto nla_put_failure;
2560                 rtm->rtm_src_len = 128;
2561         } else if (rtm->rtm_src_len &&
2562                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2563                 goto nla_put_failure;
2564 #endif
2565         if (iif) {
2566 #ifdef CONFIG_IPV6_MROUTE
2567                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2568                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2569                         if (err <= 0) {
2570                                 if (!nowait) {
2571                                         if (err == 0)
2572                                                 return 0;
2573                                         goto nla_put_failure;
2574                                 } else {
2575                                         if (err == -EMSGSIZE)
2576                                                 goto nla_put_failure;
2577                                 }
2578                         }
2579                 } else
2580 #endif
2581                         if (nla_put_u32(skb, RTA_IIF, iif))
2582                                 goto nla_put_failure;
2583         } else if (dst) {
2584                 struct in6_addr saddr_buf;
2585                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2586                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2587                         goto nla_put_failure;
2588         }
2589
2590         if (rt->rt6i_prefsrc.plen) {
2591                 struct in6_addr saddr_buf;
2592                 saddr_buf = rt->rt6i_prefsrc.addr;
2593                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2594                         goto nla_put_failure;
2595         }
2596
2597         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2598                 goto nla_put_failure;
2599
2600         n = rt->n;
2601         if (n) {
2602                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2603                         goto nla_put_failure;
2604         }
2605
2606         if (rt->dst.dev &&
2607             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2608                 goto nla_put_failure;
2609         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2610                 goto nla_put_failure;
2611
2612         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2613
2614         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2615                 goto nla_put_failure;
2616
2617         return nlmsg_end(skb, nlh);
2618
2619 nla_put_failure:
2620         nlmsg_cancel(skb, nlh);
2621         return -EMSGSIZE;
2622 }
2623
2624 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2625 {
2626         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2627         int prefix;
2628
2629         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2630                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2631                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2632         } else
2633                 prefix = 0;
2634
2635         return rt6_fill_node(arg->net,
2636                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2637                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2638                      prefix, 0, NLM_F_MULTI);
2639 }
2640
2641 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2642 {
2643         struct net *net = sock_net(in_skb->sk);
2644         struct nlattr *tb[RTA_MAX+1];
2645         struct rt6_info *rt;
2646         struct sk_buff *skb;
2647         struct rtmsg *rtm;
2648         struct flowi6 fl6;
2649         int err, iif = 0, oif = 0;
2650
2651         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2652         if (err < 0)
2653                 goto errout;
2654
2655         err = -EINVAL;
2656         memset(&fl6, 0, sizeof(fl6));
2657
2658         if (tb[RTA_SRC]) {
2659                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2660                         goto errout;
2661
2662                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2663         }
2664
2665         if (tb[RTA_DST]) {
2666                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2667                         goto errout;
2668
2669                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2670         }
2671
2672         if (tb[RTA_IIF])
2673                 iif = nla_get_u32(tb[RTA_IIF]);
2674
2675         if (tb[RTA_OIF])
2676                 oif = nla_get_u32(tb[RTA_OIF]);
2677
2678         if (iif) {
2679                 struct net_device *dev;
2680                 int flags = 0;
2681
2682                 dev = __dev_get_by_index(net, iif);
2683                 if (!dev) {
2684                         err = -ENODEV;
2685                         goto errout;
2686                 }
2687
2688                 fl6.flowi6_iif = iif;
2689
2690                 if (!ipv6_addr_any(&fl6.saddr))
2691                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2692
2693                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2694                                                                flags);
2695         } else {
2696                 fl6.flowi6_oif = oif;
2697
2698                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2699         }
2700
2701         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2702         if (!skb) {
2703                 ip6_rt_put(rt);
2704                 err = -ENOBUFS;
2705                 goto errout;
2706         }
2707
2708         /* Reserve room for dummy headers, this skb can pass
2709            through good chunk of routing engine.
2710          */
2711         skb_reset_mac_header(skb);
2712         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2713
2714         skb_dst_set(skb, &rt->dst);
2715
2716         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2717                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2718                             nlh->nlmsg_seq, 0, 0, 0);
2719         if (err < 0) {
2720                 kfree_skb(skb);
2721                 goto errout;
2722         }
2723
2724         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2725 errout:
2726         return err;
2727 }
2728
2729 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2730 {
2731         struct sk_buff *skb;
2732         struct net *net = info->nl_net;
2733         u32 seq;
2734         int err;
2735
2736         err = -ENOBUFS;
2737         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2738
2739         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2740         if (!skb)
2741                 goto errout;
2742
2743         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2744                                 event, info->portid, seq, 0, 0, 0);
2745         if (err < 0) {
2746                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2747                 WARN_ON(err == -EMSGSIZE);
2748                 kfree_skb(skb);
2749                 goto errout;
2750         }
2751         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2752                     info->nlh, gfp_any());
2753         return;
2754 errout:
2755         if (err < 0)
2756                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2757 }
2758
2759 static int ip6_route_dev_notify(struct notifier_block *this,
2760                                 unsigned long event, void *data)
2761 {
2762         struct net_device *dev = (struct net_device *)data;
2763         struct net *net = dev_net(dev);
2764
2765         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2766                 net->ipv6.ip6_null_entry->dst.dev = dev;
2767                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2768 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2769                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2770                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2771                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2772                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2773 #endif
2774         }
2775
2776         return NOTIFY_OK;
2777 }
2778
2779 /*
2780  *      /proc
2781  */
2782
2783 #ifdef CONFIG_PROC_FS
2784
2785 struct rt6_proc_arg
2786 {
2787         char *buffer;
2788         int offset;
2789         int length;
2790         int skip;
2791         int len;
2792 };
2793
2794 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2795 {
2796         struct seq_file *m = p_arg;
2797         struct neighbour *n;
2798
2799         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2800
2801 #ifdef CONFIG_IPV6_SUBTREES
2802         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2803 #else
2804         seq_puts(m, "00000000000000000000000000000000 00 ");
2805 #endif
2806         n = rt->n;
2807         if (n) {
2808                 seq_printf(m, "%pi6", n->primary_key);
2809         } else {
2810                 seq_puts(m, "00000000000000000000000000000000");
2811         }
2812         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2813                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2814                    rt->dst.__use, rt->rt6i_flags,
2815                    rt->dst.dev ? rt->dst.dev->name : "");
2816         return 0;
2817 }
2818
2819 static int ipv6_route_show(struct seq_file *m, void *v)
2820 {
2821         struct net *net = (struct net *)m->private;
2822         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2823         return 0;
2824 }
2825
2826 static int ipv6_route_open(struct inode *inode, struct file *file)
2827 {
2828         return single_open_net(inode, file, ipv6_route_show);
2829 }
2830
2831 static const struct file_operations ipv6_route_proc_fops = {
2832         .owner          = THIS_MODULE,
2833         .open           = ipv6_route_open,
2834         .read           = seq_read,
2835         .llseek         = seq_lseek,
2836         .release        = single_release_net,
2837 };
2838
2839 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2840 {
2841         struct net *net = (struct net *)seq->private;
2842         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2843                    net->ipv6.rt6_stats->fib_nodes,
2844                    net->ipv6.rt6_stats->fib_route_nodes,
2845                    net->ipv6.rt6_stats->fib_rt_alloc,
2846                    net->ipv6.rt6_stats->fib_rt_entries,
2847                    net->ipv6.rt6_stats->fib_rt_cache,
2848                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2849                    net->ipv6.rt6_stats->fib_discarded_routes);
2850
2851         return 0;
2852 }
2853
2854 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2855 {
2856         return single_open_net(inode, file, rt6_stats_seq_show);
2857 }
2858
2859 static const struct file_operations rt6_stats_seq_fops = {
2860         .owner   = THIS_MODULE,
2861         .open    = rt6_stats_seq_open,
2862         .read    = seq_read,
2863         .llseek  = seq_lseek,
2864         .release = single_release_net,
2865 };
2866 #endif  /* CONFIG_PROC_FS */
2867
2868 #ifdef CONFIG_SYSCTL
2869
2870 static
2871 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2872                               void __user *buffer, size_t *lenp, loff_t *ppos)
2873 {
2874         struct net *net;
2875         int delay;
2876         if (!write)
2877                 return -EINVAL;
2878
2879         net = (struct net *)ctl->extra1;
2880         delay = net->ipv6.sysctl.flush_delay;
2881         proc_dointvec(ctl, write, buffer, lenp, ppos);
2882         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2883         return 0;
2884 }
2885
2886 ctl_table ipv6_route_table_template[] = {
2887         {
2888                 .procname       =       "flush",
2889                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2890                 .maxlen         =       sizeof(int),
2891                 .mode           =       0200,
2892                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2893         },
2894         {
2895                 .procname       =       "gc_thresh",
2896                 .data           =       &ip6_dst_ops_template.gc_thresh,
2897                 .maxlen         =       sizeof(int),
2898                 .mode           =       0644,
2899                 .proc_handler   =       proc_dointvec,
2900         },
2901         {
2902                 .procname       =       "max_size",
2903                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2904                 .maxlen         =       sizeof(int),
2905                 .mode           =       0644,
2906                 .proc_handler   =       proc_dointvec,
2907         },
2908         {
2909                 .procname       =       "gc_min_interval",
2910                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2911                 .maxlen         =       sizeof(int),
2912                 .mode           =       0644,
2913                 .proc_handler   =       proc_dointvec_jiffies,
2914         },
2915         {
2916                 .procname       =       "gc_timeout",
2917                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2918                 .maxlen         =       sizeof(int),
2919                 .mode           =       0644,
2920                 .proc_handler   =       proc_dointvec_jiffies,
2921         },
2922         {
2923                 .procname       =       "gc_interval",
2924                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2925                 .maxlen         =       sizeof(int),
2926                 .mode           =       0644,
2927                 .proc_handler   =       proc_dointvec_jiffies,
2928         },
2929         {
2930                 .procname       =       "gc_elasticity",
2931                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2932                 .maxlen         =       sizeof(int),
2933                 .mode           =       0644,
2934                 .proc_handler   =       proc_dointvec,
2935         },
2936         {
2937                 .procname       =       "mtu_expires",
2938                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2939                 .maxlen         =       sizeof(int),
2940                 .mode           =       0644,
2941                 .proc_handler   =       proc_dointvec_jiffies,
2942         },
2943         {
2944                 .procname       =       "min_adv_mss",
2945                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2946                 .maxlen         =       sizeof(int),
2947                 .mode           =       0644,
2948                 .proc_handler   =       proc_dointvec,
2949         },
2950         {
2951                 .procname       =       "gc_min_interval_ms",
2952                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2953                 .maxlen         =       sizeof(int),
2954                 .mode           =       0644,
2955                 .proc_handler   =       proc_dointvec_ms_jiffies,
2956         },
2957         { }
2958 };
2959
2960 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2961 {
2962         struct ctl_table *table;
2963
2964         table = kmemdup(ipv6_route_table_template,
2965                         sizeof(ipv6_route_table_template),
2966                         GFP_KERNEL);
2967
2968         if (table) {
2969                 table[0].data = &net->ipv6.sysctl.flush_delay;
2970                 table[0].extra1 = net;
2971                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2972                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2973                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2974                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2975                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2976                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2977                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2978                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2979                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2980
2981                 /* Don't export sysctls to unprivileged users */
2982                 if (net->user_ns != &init_user_ns)
2983                         table[0].procname = NULL;
2984         }
2985
2986         return table;
2987 }
2988 #endif
2989
2990 static int __net_init ip6_route_net_init(struct net *net)
2991 {
2992         int ret = -ENOMEM;
2993
2994         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2995                sizeof(net->ipv6.ip6_dst_ops));
2996
2997         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2998                 goto out_ip6_dst_ops;
2999
3000         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3001                                            sizeof(*net->ipv6.ip6_null_entry),
3002                                            GFP_KERNEL);
3003         if (!net->ipv6.ip6_null_entry)
3004                 goto out_ip6_dst_entries;
3005         net->ipv6.ip6_null_entry->dst.path =
3006                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3007         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3008         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3009                          ip6_template_metrics, true);
3010
3011 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3012         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3013                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3014                                                GFP_KERNEL);
3015         if (!net->ipv6.ip6_prohibit_entry)
3016                 goto out_ip6_null_entry;
3017         net->ipv6.ip6_prohibit_entry->dst.path =
3018                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3019         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3020         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3021                          ip6_template_metrics, true);
3022
3023         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3024                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3025                                                GFP_KERNEL);
3026         if (!net->ipv6.ip6_blk_hole_entry)
3027                 goto out_ip6_prohibit_entry;
3028         net->ipv6.ip6_blk_hole_entry->dst.path =
3029                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3030         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3031         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3032                          ip6_template_metrics, true);
3033 #endif
3034
3035         net->ipv6.sysctl.flush_delay = 0;
3036         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3037         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3038         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3039         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3040         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3041         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3042         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3043
3044         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3045
3046         ret = 0;
3047 out:
3048         return ret;
3049
3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3051 out_ip6_prohibit_entry:
3052         kfree(net->ipv6.ip6_prohibit_entry);
3053 out_ip6_null_entry:
3054         kfree(net->ipv6.ip6_null_entry);
3055 #endif
3056 out_ip6_dst_entries:
3057         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3058 out_ip6_dst_ops:
3059         goto out;
3060 }
3061
3062 static void __net_exit ip6_route_net_exit(struct net *net)
3063 {
3064         kfree(net->ipv6.ip6_null_entry);
3065 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3066         kfree(net->ipv6.ip6_prohibit_entry);
3067         kfree(net->ipv6.ip6_blk_hole_entry);
3068 #endif
3069         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3070 }
3071
3072 static int __net_init ip6_route_net_init_late(struct net *net)
3073 {
3074 #ifdef CONFIG_PROC_FS
3075         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3076         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3077 #endif
3078         return 0;
3079 }
3080
3081 static void __net_exit ip6_route_net_exit_late(struct net *net)
3082 {
3083 #ifdef CONFIG_PROC_FS
3084         proc_net_remove(net, "ipv6_route");
3085         proc_net_remove(net, "rt6_stats");
3086 #endif
3087 }
3088
3089 static struct pernet_operations ip6_route_net_ops = {
3090         .init = ip6_route_net_init,
3091         .exit = ip6_route_net_exit,
3092 };
3093
3094 static int __net_init ipv6_inetpeer_init(struct net *net)
3095 {
3096         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3097
3098         if (!bp)
3099                 return -ENOMEM;
3100         inet_peer_base_init(bp);
3101         net->ipv6.peers = bp;
3102         return 0;
3103 }
3104
3105 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3106 {
3107         struct inet_peer_base *bp = net->ipv6.peers;
3108
3109         net->ipv6.peers = NULL;
3110         inetpeer_invalidate_tree(bp);
3111         kfree(bp);
3112 }
3113
3114 static struct pernet_operations ipv6_inetpeer_ops = {
3115         .init   =       ipv6_inetpeer_init,
3116         .exit   =       ipv6_inetpeer_exit,
3117 };
3118
3119 static struct pernet_operations ip6_route_net_late_ops = {
3120         .init = ip6_route_net_init_late,
3121         .exit = ip6_route_net_exit_late,
3122 };
3123
3124 static struct notifier_block ip6_route_dev_notifier = {
3125         .notifier_call = ip6_route_dev_notify,
3126         .priority = 0,
3127 };
3128
3129 int __init ip6_route_init(void)
3130 {
3131         int ret;
3132
3133         ret = -ENOMEM;
3134         ip6_dst_ops_template.kmem_cachep =
3135                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3136                                   SLAB_HWCACHE_ALIGN, NULL);
3137         if (!ip6_dst_ops_template.kmem_cachep)
3138                 goto out;
3139
3140         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3141         if (ret)
3142                 goto out_kmem_cache;
3143
3144         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3145         if (ret)
3146                 goto out_dst_entries;
3147
3148         ret = register_pernet_subsys(&ip6_route_net_ops);
3149         if (ret)
3150                 goto out_register_inetpeer;
3151
3152         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3153
3154         /* Registering of the loopback is done before this portion of code,
3155          * the loopback reference in rt6_info will not be taken, do it
3156          * manually for init_net */
3157         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3158         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3159   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3160         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3161         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3162         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3163         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3164   #endif
3165         ret = fib6_init();
3166         if (ret)
3167                 goto out_register_subsys;
3168
3169         ret = xfrm6_init();
3170         if (ret)
3171                 goto out_fib6_init;
3172
3173         ret = fib6_rules_init();
3174         if (ret)
3175                 goto xfrm6_init;
3176
3177         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3178         if (ret)
3179                 goto fib6_rules_init;
3180
3181         ret = -ENOBUFS;
3182         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3183             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3184             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3185                 goto out_register_late_subsys;
3186
3187         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3188         if (ret)
3189                 goto out_register_late_subsys;
3190
3191 out:
3192         return ret;
3193
3194 out_register_late_subsys:
3195         unregister_pernet_subsys(&ip6_route_net_late_ops);
3196 fib6_rules_init:
3197         fib6_rules_cleanup();
3198 xfrm6_init:
3199         xfrm6_fini();
3200 out_fib6_init:
3201         fib6_gc_cleanup();
3202 out_register_subsys:
3203         unregister_pernet_subsys(&ip6_route_net_ops);
3204 out_register_inetpeer:
3205         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3206 out_dst_entries:
3207         dst_entries_destroy(&ip6_dst_blackhole_ops);
3208 out_kmem_cache:
3209         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3210         goto out;
3211 }
3212
3213 void ip6_route_cleanup(void)
3214 {
3215         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3216         unregister_pernet_subsys(&ip6_route_net_late_ops);
3217         fib6_rules_cleanup();
3218         xfrm6_fini();
3219         fib6_gc_cleanup();
3220         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3221         unregister_pernet_subsys(&ip6_route_net_ops);
3222         dst_entries_destroy(&ip6_dst_blackhole_ops);
3223         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3224 }