6238eb5037a7d99c0fd23b71cfca8e14a3ef3f6b
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= (__force u32)fl6->daddr.s6_addr32[0];
392         val ^= (__force u32)fl6->daddr.s6_addr32[1];
393         val ^= (__force u32)fl6->daddr.s6_addr32[2];
394         val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396         val ^= (__force u32)fl6->saddr.s6_addr32[0];
397         val ^= (__force u32)fl6->saddr.s6_addr32[1];
398         val ^= (__force u32)fl6->saddr.s6_addr32[2];
399         val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401         /* Work only if this not encapsulated */
402         switch (fl6->flowi6_proto) {
403         case IPPROTO_UDP:
404         case IPPROTO_TCP:
405         case IPPROTO_SCTP:
406                 val ^= (__force u16)fl6->fl6_sport;
407                 val ^= (__force u16)fl6->fl6_dport;
408                 break;
409
410         case IPPROTO_ICMPV6:
411                 val ^= (__force u16)fl6->fl6_icmp_type;
412                 val ^= (__force u16)fl6->fl6_icmp_code;
413                 break;
414         }
415         /* RFC6438 recommands to use flowlabel */
416         val ^= (__force u32)fl6->flowlabel;
417
418         /* Perhaps, we need to tune, this function? */
419         val = val ^ (val >> 7) ^ (val >> 12);
420         return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424                                              struct flowi6 *fl6)
425 {
426         struct rt6_info *sibling, *next_sibling;
427         int route_choosen;
428
429         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430         /* Don't change the route, if route_choosen == 0
431          * (siblings does not include ourself)
432          */
433         if (route_choosen)
434                 list_for_each_entry_safe(sibling, next_sibling,
435                                 &match->rt6i_siblings, rt6i_siblings) {
436                         route_choosen--;
437                         if (route_choosen == 0) {
438                                 match = sibling;
439                                 break;
440                         }
441                 }
442         return match;
443 }
444
445 /*
446  *      Route lookup. Any table->tb6_lock is implied.
447  */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450                                                     struct rt6_info *rt,
451                                                     const struct in6_addr *saddr,
452                                                     int oif,
453                                                     int flags)
454 {
455         struct rt6_info *local = NULL;
456         struct rt6_info *sprt;
457
458         if (!oif && ipv6_addr_any(saddr))
459                 goto out;
460
461         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462                 struct net_device *dev = sprt->dst.dev;
463
464                 if (oif) {
465                         if (dev->ifindex == oif)
466                                 return sprt;
467                         if (dev->flags & IFF_LOOPBACK) {
468                                 if (!sprt->rt6i_idev ||
469                                     sprt->rt6i_idev->dev->ifindex != oif) {
470                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
471                                                 continue;
472                                         if (local && (!oif ||
473                                                       local->rt6i_idev->dev->ifindex == oif))
474                                                 continue;
475                                 }
476                                 local = sprt;
477                         }
478                 } else {
479                         if (ipv6_chk_addr(net, saddr, dev,
480                                           flags & RT6_LOOKUP_F_IFACE))
481                                 return sprt;
482                 }
483         }
484
485         if (oif) {
486                 if (local)
487                         return local;
488
489                 if (flags & RT6_LOOKUP_F_IFACE)
490                         return net->ipv6.ip6_null_entry;
491         }
492 out:
493         return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         /*
501          * Okay, this does not seem to be appropriate
502          * for now, however, we need to check if it
503          * is really so; aka Router Reachability Probing.
504          *
505          * Router Reachability Probe MUST be rate-limited
506          * to no more than one per minute.
507          */
508         neigh = rt ? rt->n : NULL;
509         if (!neigh || (neigh->nud_state & NUD_VALID))
510                 return;
511         read_lock_bh(&neigh->lock);
512         if (!(neigh->nud_state & NUD_VALID) &&
513             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514                 struct in6_addr mcaddr;
515                 struct in6_addr *target;
516
517                 neigh->updated = jiffies;
518                 read_unlock_bh(&neigh->lock);
519
520                 target = (struct in6_addr *)&neigh->primary_key;
521                 addrconf_addr_solict_mult(target, &mcaddr);
522                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523         } else {
524                 read_unlock_bh(&neigh->lock);
525         }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534  * Default Router Selection (RFC 2461 6.3.6)
535  */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538         struct net_device *dev = rt->dst.dev;
539         if (!oif || dev->ifindex == oif)
540                 return 2;
541         if ((dev->flags & IFF_LOOPBACK) &&
542             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543                 return 1;
544         return 0;
545 }
546
547 static inline bool rt6_check_neigh(struct rt6_info *rt)
548 {
549         struct neighbour *neigh;
550         bool ret = false;
551
552         neigh = rt->n;
553         if (rt->rt6i_flags & RTF_NONEXTHOP ||
554             !(rt->rt6i_flags & RTF_GATEWAY))
555                 ret = true;
556         else if (neigh) {
557                 read_lock_bh(&neigh->lock);
558                 if (neigh->nud_state & NUD_VALID)
559                         ret = true;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561                 else if (!(neigh->nud_state & NUD_FAILED))
562                         ret = true;
563 #endif
564                 read_unlock_bh(&neigh->lock);
565         }
566         return ret;
567 }
568
569 static int rt6_score_route(struct rt6_info *rt, int oif,
570                            int strict)
571 {
572         int m;
573
574         m = rt6_check_dev(rt, oif);
575         if (!m && (strict & RT6_LOOKUP_F_IFACE))
576                 return -1;
577 #ifdef CONFIG_IPV6_ROUTER_PREF
578         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579 #endif
580         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
581                 return -1;
582         return m;
583 }
584
585 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
586                                    int *mpri, struct rt6_info *match)
587 {
588         int m;
589
590         if (rt6_check_expired(rt))
591                 goto out;
592
593         m = rt6_score_route(rt, oif, strict);
594         if (m < 0)
595                 goto out;
596
597         if (m > *mpri) {
598                 if (strict & RT6_LOOKUP_F_REACHABLE)
599                         rt6_probe(match);
600                 *mpri = m;
601                 match = rt;
602         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
603                 rt6_probe(rt);
604         }
605
606 out:
607         return match;
608 }
609
610 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
611                                      struct rt6_info *rr_head,
612                                      u32 metric, int oif, int strict)
613 {
614         struct rt6_info *rt, *match;
615         int mpri = -1;
616
617         match = NULL;
618         for (rt = rr_head; rt && rt->rt6i_metric == metric;
619              rt = rt->dst.rt6_next)
620                 match = find_match(rt, oif, strict, &mpri, match);
621         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
622              rt = rt->dst.rt6_next)
623                 match = find_match(rt, oif, strict, &mpri, match);
624
625         return match;
626 }
627
628 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
629 {
630         struct rt6_info *match, *rt0;
631         struct net *net;
632
633         rt0 = fn->rr_ptr;
634         if (!rt0)
635                 fn->rr_ptr = rt0 = fn->leaf;
636
637         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
638
639         if (!match &&
640             (strict & RT6_LOOKUP_F_REACHABLE)) {
641                 struct rt6_info *next = rt0->dst.rt6_next;
642
643                 /* no entries matched; do round-robin */
644                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
645                         next = fn->leaf;
646
647                 if (next != rt0)
648                         fn->rr_ptr = next;
649         }
650
651         net = dev_net(rt0->dst.dev);
652         return match ? match : net->ipv6.ip6_null_entry;
653 }
654
655 #ifdef CONFIG_IPV6_ROUTE_INFO
656 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
657                   const struct in6_addr *gwaddr)
658 {
659         struct net *net = dev_net(dev);
660         struct route_info *rinfo = (struct route_info *) opt;
661         struct in6_addr prefix_buf, *prefix;
662         unsigned int pref;
663         unsigned long lifetime;
664         struct rt6_info *rt;
665
666         if (len < sizeof(struct route_info)) {
667                 return -EINVAL;
668         }
669
670         /* Sanity check for prefix_len and length */
671         if (rinfo->length > 3) {
672                 return -EINVAL;
673         } else if (rinfo->prefix_len > 128) {
674                 return -EINVAL;
675         } else if (rinfo->prefix_len > 64) {
676                 if (rinfo->length < 2) {
677                         return -EINVAL;
678                 }
679         } else if (rinfo->prefix_len > 0) {
680                 if (rinfo->length < 1) {
681                         return -EINVAL;
682                 }
683         }
684
685         pref = rinfo->route_pref;
686         if (pref == ICMPV6_ROUTER_PREF_INVALID)
687                 return -EINVAL;
688
689         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
690
691         if (rinfo->length == 3)
692                 prefix = (struct in6_addr *)rinfo->prefix;
693         else {
694                 /* this function is safe */
695                 ipv6_addr_prefix(&prefix_buf,
696                                  (struct in6_addr *)rinfo->prefix,
697                                  rinfo->prefix_len);
698                 prefix = &prefix_buf;
699         }
700
701         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
702                                 dev->ifindex);
703
704         if (rt && !lifetime) {
705                 ip6_del_rt(rt);
706                 rt = NULL;
707         }
708
709         if (!rt && lifetime)
710                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
711                                         pref);
712         else if (rt)
713                 rt->rt6i_flags = RTF_ROUTEINFO |
714                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
715
716         if (rt) {
717                 if (!addrconf_finite_timeout(lifetime))
718                         rt6_clean_expires(rt);
719                 else
720                         rt6_set_expires(rt, jiffies + HZ * lifetime);
721
722                 ip6_rt_put(rt);
723         }
724         return 0;
725 }
726 #endif
727
728 #define BACKTRACK(__net, saddr)                 \
729 do { \
730         if (rt == __net->ipv6.ip6_null_entry) { \
731                 struct fib6_node *pn; \
732                 while (1) { \
733                         if (fn->fn_flags & RTN_TL_ROOT) \
734                                 goto out; \
735                         pn = fn->parent; \
736                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
737                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
738                         else \
739                                 fn = pn; \
740                         if (fn->fn_flags & RTN_RTINFO) \
741                                 goto restart; \
742                 } \
743         } \
744 } while (0)
745
746 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
747                                              struct fib6_table *table,
748                                              struct flowi6 *fl6, int flags)
749 {
750         struct fib6_node *fn;
751         struct rt6_info *rt;
752
753         read_lock_bh(&table->tb6_lock);
754         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
755 restart:
756         rt = fn->leaf;
757         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
758         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
759                 rt = rt6_multipath_select(rt, fl6);
760         BACKTRACK(net, &fl6->saddr);
761 out:
762         dst_use(&rt->dst, jiffies);
763         read_unlock_bh(&table->tb6_lock);
764         return rt;
765
766 }
767
768 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
769                                     int flags)
770 {
771         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
772 }
773 EXPORT_SYMBOL_GPL(ip6_route_lookup);
774
775 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
776                             const struct in6_addr *saddr, int oif, int strict)
777 {
778         struct flowi6 fl6 = {
779                 .flowi6_oif = oif,
780                 .daddr = *daddr,
781         };
782         struct dst_entry *dst;
783         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
784
785         if (saddr) {
786                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
787                 flags |= RT6_LOOKUP_F_HAS_SADDR;
788         }
789
790         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
791         if (dst->error == 0)
792                 return (struct rt6_info *) dst;
793
794         dst_release(dst);
795
796         return NULL;
797 }
798
799 EXPORT_SYMBOL(rt6_lookup);
800
801 /* ip6_ins_rt is called with FREE table->tb6_lock.
802    It takes new route entry, the addition fails by any reason the
803    route is freed. In any case, if caller does not hold it, it may
804    be destroyed.
805  */
806
807 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
808 {
809         int err;
810         struct fib6_table *table;
811
812         table = rt->rt6i_table;
813         write_lock_bh(&table->tb6_lock);
814         err = fib6_add(&table->tb6_root, rt, info);
815         write_unlock_bh(&table->tb6_lock);
816
817         return err;
818 }
819
820 int ip6_ins_rt(struct rt6_info *rt)
821 {
822         struct nl_info info = {
823                 .nl_net = dev_net(rt->dst.dev),
824         };
825         return __ip6_ins_rt(rt, &info);
826 }
827
828 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
829                                       const struct in6_addr *daddr,
830                                       const struct in6_addr *saddr)
831 {
832         struct rt6_info *rt;
833
834         /*
835          *      Clone the route.
836          */
837
838         rt = ip6_rt_copy(ort, daddr);
839
840         if (rt) {
841                 int attempts = !in_softirq();
842
843                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
844                         if (ort->rt6i_dst.plen != 128 &&
845                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
846                                 rt->rt6i_flags |= RTF_ANYCAST;
847                         rt->rt6i_gateway = *daddr;
848                 }
849
850                 rt->rt6i_flags |= RTF_CACHE;
851
852 #ifdef CONFIG_IPV6_SUBTREES
853                 if (rt->rt6i_src.plen && saddr) {
854                         rt->rt6i_src.addr = *saddr;
855                         rt->rt6i_src.plen = 128;
856                 }
857 #endif
858
859         retry:
860                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
861                         struct net *net = dev_net(rt->dst.dev);
862                         int saved_rt_min_interval =
863                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
864                         int saved_rt_elasticity =
865                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
866
867                         if (attempts-- > 0) {
868                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
869                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
870
871                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
872
873                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
874                                         saved_rt_elasticity;
875                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
876                                         saved_rt_min_interval;
877                                 goto retry;
878                         }
879
880                         net_warn_ratelimited("Neighbour table overflow\n");
881                         dst_free(&rt->dst);
882                         return NULL;
883                 }
884         }
885
886         return rt;
887 }
888
889 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
890                                         const struct in6_addr *daddr)
891 {
892         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
893
894         if (rt) {
895                 rt->rt6i_flags |= RTF_CACHE;
896                 rt->n = neigh_clone(ort->n);
897         }
898         return rt;
899 }
900
901 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
902                                       struct flowi6 *fl6, int flags)
903 {
904         struct fib6_node *fn;
905         struct rt6_info *rt, *nrt;
906         int strict = 0;
907         int attempts = 3;
908         int err;
909         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
910
911         strict |= flags & RT6_LOOKUP_F_IFACE;
912
913 relookup:
914         read_lock_bh(&table->tb6_lock);
915
916 restart_2:
917         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
918
919 restart:
920         rt = rt6_select(fn, oif, strict | reachable);
921         if (rt->rt6i_nsiblings && oif == 0)
922                 rt = rt6_multipath_select(rt, fl6);
923         BACKTRACK(net, &fl6->saddr);
924         if (rt == net->ipv6.ip6_null_entry ||
925             rt->rt6i_flags & RTF_CACHE)
926                 goto out;
927
928         dst_hold(&rt->dst);
929         read_unlock_bh(&table->tb6_lock);
930
931         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
932                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
933         else if (!(rt->dst.flags & DST_HOST))
934                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
935         else
936                 goto out2;
937
938         ip6_rt_put(rt);
939         rt = nrt ? : net->ipv6.ip6_null_entry;
940
941         dst_hold(&rt->dst);
942         if (nrt) {
943                 err = ip6_ins_rt(nrt);
944                 if (!err)
945                         goto out2;
946         }
947
948         if (--attempts <= 0)
949                 goto out2;
950
951         /*
952          * Race condition! In the gap, when table->tb6_lock was
953          * released someone could insert this route.  Relookup.
954          */
955         ip6_rt_put(rt);
956         goto relookup;
957
958 out:
959         if (reachable) {
960                 reachable = 0;
961                 goto restart_2;
962         }
963         dst_hold(&rt->dst);
964         read_unlock_bh(&table->tb6_lock);
965 out2:
966         rt->dst.lastuse = jiffies;
967         rt->dst.__use++;
968
969         return rt;
970 }
971
972 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
973                                             struct flowi6 *fl6, int flags)
974 {
975         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
976 }
977
978 static struct dst_entry *ip6_route_input_lookup(struct net *net,
979                                                 struct net_device *dev,
980                                                 struct flowi6 *fl6, int flags)
981 {
982         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
983                 flags |= RT6_LOOKUP_F_IFACE;
984
985         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
986 }
987
988 void ip6_route_input(struct sk_buff *skb)
989 {
990         const struct ipv6hdr *iph = ipv6_hdr(skb);
991         struct net *net = dev_net(skb->dev);
992         int flags = RT6_LOOKUP_F_HAS_SADDR;
993         struct flowi6 fl6 = {
994                 .flowi6_iif = skb->dev->ifindex,
995                 .daddr = iph->daddr,
996                 .saddr = iph->saddr,
997                 .flowlabel = ip6_flowinfo(iph),
998                 .flowi6_mark = skb->mark,
999                 .flowi6_proto = iph->nexthdr,
1000         };
1001
1002         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1003 }
1004
1005 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1006                                              struct flowi6 *fl6, int flags)
1007 {
1008         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1009 }
1010
1011 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1012                                     struct flowi6 *fl6)
1013 {
1014         int flags = 0;
1015
1016         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1017
1018         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1019                 flags |= RT6_LOOKUP_F_IFACE;
1020
1021         if (!ipv6_addr_any(&fl6->saddr))
1022                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1023         else if (sk)
1024                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1025
1026         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1027 }
1028
1029 EXPORT_SYMBOL(ip6_route_output);
1030
1031 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1032 {
1033         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1034         struct dst_entry *new = NULL;
1035
1036         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1037         if (rt) {
1038                 new = &rt->dst;
1039
1040                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1041                 rt6_init_peer(rt, net->ipv6.peers);
1042
1043                 new->__use = 1;
1044                 new->input = dst_discard;
1045                 new->output = dst_discard;
1046
1047                 if (dst_metrics_read_only(&ort->dst))
1048                         new->_metrics = ort->dst._metrics;
1049                 else
1050                         dst_copy_metrics(new, &ort->dst);
1051                 rt->rt6i_idev = ort->rt6i_idev;
1052                 if (rt->rt6i_idev)
1053                         in6_dev_hold(rt->rt6i_idev);
1054
1055                 rt->rt6i_gateway = ort->rt6i_gateway;
1056                 rt->rt6i_flags = ort->rt6i_flags;
1057                 rt6_clean_expires(rt);
1058                 rt->rt6i_metric = 0;
1059
1060                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061 #ifdef CONFIG_IPV6_SUBTREES
1062                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063 #endif
1064
1065                 dst_free(new);
1066         }
1067
1068         dst_release(dst_orig);
1069         return new ? new : ERR_PTR(-ENOMEM);
1070 }
1071
1072 /*
1073  *      Destination cache support functions
1074  */
1075
1076 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077 {
1078         struct rt6_info *rt;
1079
1080         rt = (struct rt6_info *) dst;
1081
1082         /* All IPV6 dsts are created with ->obsolete set to the value
1083          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084          * into this function always.
1085          */
1086         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087                 return NULL;
1088
1089         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1090                 return dst;
1091
1092         return NULL;
1093 }
1094
1095 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1096 {
1097         struct rt6_info *rt = (struct rt6_info *) dst;
1098
1099         if (rt) {
1100                 if (rt->rt6i_flags & RTF_CACHE) {
1101                         if (rt6_check_expired(rt)) {
1102                                 ip6_del_rt(rt);
1103                                 dst = NULL;
1104                         }
1105                 } else {
1106                         dst_release(dst);
1107                         dst = NULL;
1108                 }
1109         }
1110         return dst;
1111 }
1112
1113 static void ip6_link_failure(struct sk_buff *skb)
1114 {
1115         struct rt6_info *rt;
1116
1117         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1118
1119         rt = (struct rt6_info *) skb_dst(skb);
1120         if (rt) {
1121                 if (rt->rt6i_flags & RTF_CACHE)
1122                         rt6_update_expires(rt, 0);
1123                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1124                         rt->rt6i_node->fn_sernum = -1;
1125         }
1126 }
1127
1128 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1129                                struct sk_buff *skb, u32 mtu)
1130 {
1131         struct rt6_info *rt6 = (struct rt6_info*)dst;
1132
1133         dst_confirm(dst);
1134         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1135                 struct net *net = dev_net(dst->dev);
1136
1137                 rt6->rt6i_flags |= RTF_MODIFIED;
1138                 if (mtu < IPV6_MIN_MTU) {
1139                         u32 features = dst_metric(dst, RTAX_FEATURES);
1140                         mtu = IPV6_MIN_MTU;
1141                         features |= RTAX_FEATURE_ALLFRAG;
1142                         dst_metric_set(dst, RTAX_FEATURES, features);
1143                 }
1144                 dst_metric_set(dst, RTAX_MTU, mtu);
1145                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1146         }
1147 }
1148
1149 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1150                      int oif, u32 mark)
1151 {
1152         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1153         struct dst_entry *dst;
1154         struct flowi6 fl6;
1155
1156         memset(&fl6, 0, sizeof(fl6));
1157         fl6.flowi6_oif = oif;
1158         fl6.flowi6_mark = mark;
1159         fl6.flowi6_flags = 0;
1160         fl6.daddr = iph->daddr;
1161         fl6.saddr = iph->saddr;
1162         fl6.flowlabel = ip6_flowinfo(iph);
1163
1164         dst = ip6_route_output(net, NULL, &fl6);
1165         if (!dst->error)
1166                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1167         dst_release(dst);
1168 }
1169 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1170
1171 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1172 {
1173         ip6_update_pmtu(skb, sock_net(sk), mtu,
1174                         sk->sk_bound_dev_if, sk->sk_mark);
1175 }
1176 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1177
1178 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1179 {
1180         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1181         struct dst_entry *dst;
1182         struct flowi6 fl6;
1183
1184         memset(&fl6, 0, sizeof(fl6));
1185         fl6.flowi6_oif = oif;
1186         fl6.flowi6_mark = mark;
1187         fl6.flowi6_flags = 0;
1188         fl6.daddr = iph->daddr;
1189         fl6.saddr = iph->saddr;
1190         fl6.flowlabel = ip6_flowinfo(iph);
1191
1192         dst = ip6_route_output(net, NULL, &fl6);
1193         if (!dst->error)
1194                 rt6_do_redirect(dst, NULL, skb);
1195         dst_release(dst);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_redirect);
1198
1199 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1200 {
1201         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1202 }
1203 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1204
1205 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1206 {
1207         struct net_device *dev = dst->dev;
1208         unsigned int mtu = dst_mtu(dst);
1209         struct net *net = dev_net(dev);
1210
1211         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1212
1213         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1214                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1215
1216         /*
1217          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1218          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1219          * IPV6_MAXPLEN is also valid and means: "any MSS,
1220          * rely only on pmtu discovery"
1221          */
1222         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1223                 mtu = IPV6_MAXPLEN;
1224         return mtu;
1225 }
1226
1227 static unsigned int ip6_mtu(const struct dst_entry *dst)
1228 {
1229         struct inet6_dev *idev;
1230         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1231
1232         if (mtu)
1233                 return mtu;
1234
1235         mtu = IPV6_MIN_MTU;
1236
1237         rcu_read_lock();
1238         idev = __in6_dev_get(dst->dev);
1239         if (idev)
1240                 mtu = idev->cnf.mtu6;
1241         rcu_read_unlock();
1242
1243         return mtu;
1244 }
1245
1246 static struct dst_entry *icmp6_dst_gc_list;
1247 static DEFINE_SPINLOCK(icmp6_dst_lock);
1248
1249 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1250                                   struct neighbour *neigh,
1251                                   struct flowi6 *fl6)
1252 {
1253         struct dst_entry *dst;
1254         struct rt6_info *rt;
1255         struct inet6_dev *idev = in6_dev_get(dev);
1256         struct net *net = dev_net(dev);
1257
1258         if (unlikely(!idev))
1259                 return ERR_PTR(-ENODEV);
1260
1261         rt = ip6_dst_alloc(net, dev, 0, NULL);
1262         if (unlikely(!rt)) {
1263                 in6_dev_put(idev);
1264                 dst = ERR_PTR(-ENOMEM);
1265                 goto out;
1266         }
1267
1268         if (neigh)
1269                 neigh_hold(neigh);
1270         else {
1271                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1272                 if (IS_ERR(neigh)) {
1273                         in6_dev_put(idev);
1274                         dst_free(&rt->dst);
1275                         return ERR_CAST(neigh);
1276                 }
1277         }
1278
1279         rt->dst.flags |= DST_HOST;
1280         rt->dst.output  = ip6_output;
1281         rt->n = neigh;
1282         atomic_set(&rt->dst.__refcnt, 1);
1283         rt->rt6i_dst.addr = fl6->daddr;
1284         rt->rt6i_dst.plen = 128;
1285         rt->rt6i_idev     = idev;
1286         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1287
1288         spin_lock_bh(&icmp6_dst_lock);
1289         rt->dst.next = icmp6_dst_gc_list;
1290         icmp6_dst_gc_list = &rt->dst;
1291         spin_unlock_bh(&icmp6_dst_lock);
1292
1293         fib6_force_start_gc(net);
1294
1295         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1296
1297 out:
1298         return dst;
1299 }
1300
1301 int icmp6_dst_gc(void)
1302 {
1303         struct dst_entry *dst, **pprev;
1304         int more = 0;
1305
1306         spin_lock_bh(&icmp6_dst_lock);
1307         pprev = &icmp6_dst_gc_list;
1308
1309         while ((dst = *pprev) != NULL) {
1310                 if (!atomic_read(&dst->__refcnt)) {
1311                         *pprev = dst->next;
1312                         dst_free(dst);
1313                 } else {
1314                         pprev = &dst->next;
1315                         ++more;
1316                 }
1317         }
1318
1319         spin_unlock_bh(&icmp6_dst_lock);
1320
1321         return more;
1322 }
1323
1324 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1325                             void *arg)
1326 {
1327         struct dst_entry *dst, **pprev;
1328
1329         spin_lock_bh(&icmp6_dst_lock);
1330         pprev = &icmp6_dst_gc_list;
1331         while ((dst = *pprev) != NULL) {
1332                 struct rt6_info *rt = (struct rt6_info *) dst;
1333                 if (func(rt, arg)) {
1334                         *pprev = dst->next;
1335                         dst_free(dst);
1336                 } else {
1337                         pprev = &dst->next;
1338                 }
1339         }
1340         spin_unlock_bh(&icmp6_dst_lock);
1341 }
1342
1343 static int ip6_dst_gc(struct dst_ops *ops)
1344 {
1345         unsigned long now = jiffies;
1346         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1347         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1348         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1349         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1350         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1351         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1352         int entries;
1353
1354         entries = dst_entries_get_fast(ops);
1355         if (time_after(rt_last_gc + rt_min_interval, now) &&
1356             entries <= rt_max_size)
1357                 goto out;
1358
1359         net->ipv6.ip6_rt_gc_expire++;
1360         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1361         net->ipv6.ip6_rt_last_gc = now;
1362         entries = dst_entries_get_slow(ops);
1363         if (entries < ops->gc_thresh)
1364                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1365 out:
1366         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1367         return entries > rt_max_size;
1368 }
1369
1370 int ip6_dst_hoplimit(struct dst_entry *dst)
1371 {
1372         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1373         if (hoplimit == 0) {
1374                 struct net_device *dev = dst->dev;
1375                 struct inet6_dev *idev;
1376
1377                 rcu_read_lock();
1378                 idev = __in6_dev_get(dev);
1379                 if (idev)
1380                         hoplimit = idev->cnf.hop_limit;
1381                 else
1382                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1383                 rcu_read_unlock();
1384         }
1385         return hoplimit;
1386 }
1387 EXPORT_SYMBOL(ip6_dst_hoplimit);
1388
1389 /*
1390  *
1391  */
1392
1393 int ip6_route_add(struct fib6_config *cfg)
1394 {
1395         int err;
1396         struct net *net = cfg->fc_nlinfo.nl_net;
1397         struct rt6_info *rt = NULL;
1398         struct net_device *dev = NULL;
1399         struct inet6_dev *idev = NULL;
1400         struct fib6_table *table;
1401         int addr_type;
1402
1403         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1404                 return -EINVAL;
1405 #ifndef CONFIG_IPV6_SUBTREES
1406         if (cfg->fc_src_len)
1407                 return -EINVAL;
1408 #endif
1409         if (cfg->fc_ifindex) {
1410                 err = -ENODEV;
1411                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1412                 if (!dev)
1413                         goto out;
1414                 idev = in6_dev_get(dev);
1415                 if (!idev)
1416                         goto out;
1417         }
1418
1419         if (cfg->fc_metric == 0)
1420                 cfg->fc_metric = IP6_RT_PRIO_USER;
1421
1422         err = -ENOBUFS;
1423         if (cfg->fc_nlinfo.nlh &&
1424             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1425                 table = fib6_get_table(net, cfg->fc_table);
1426                 if (!table) {
1427                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1428                         table = fib6_new_table(net, cfg->fc_table);
1429                 }
1430         } else {
1431                 table = fib6_new_table(net, cfg->fc_table);
1432         }
1433
1434         if (!table)
1435                 goto out;
1436
1437         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1438
1439         if (!rt) {
1440                 err = -ENOMEM;
1441                 goto out;
1442         }
1443
1444         if (cfg->fc_flags & RTF_EXPIRES)
1445                 rt6_set_expires(rt, jiffies +
1446                                 clock_t_to_jiffies(cfg->fc_expires));
1447         else
1448                 rt6_clean_expires(rt);
1449
1450         if (cfg->fc_protocol == RTPROT_UNSPEC)
1451                 cfg->fc_protocol = RTPROT_BOOT;
1452         rt->rt6i_protocol = cfg->fc_protocol;
1453
1454         addr_type = ipv6_addr_type(&cfg->fc_dst);
1455
1456         if (addr_type & IPV6_ADDR_MULTICAST)
1457                 rt->dst.input = ip6_mc_input;
1458         else if (cfg->fc_flags & RTF_LOCAL)
1459                 rt->dst.input = ip6_input;
1460         else
1461                 rt->dst.input = ip6_forward;
1462
1463         rt->dst.output = ip6_output;
1464
1465         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1466         rt->rt6i_dst.plen = cfg->fc_dst_len;
1467         if (rt->rt6i_dst.plen == 128)
1468                rt->dst.flags |= DST_HOST;
1469
1470         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1471                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1472                 if (!metrics) {
1473                         err = -ENOMEM;
1474                         goto out;
1475                 }
1476                 dst_init_metrics(&rt->dst, metrics, 0);
1477         }
1478 #ifdef CONFIG_IPV6_SUBTREES
1479         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1480         rt->rt6i_src.plen = cfg->fc_src_len;
1481 #endif
1482
1483         rt->rt6i_metric = cfg->fc_metric;
1484
1485         /* We cannot add true routes via loopback here,
1486            they would result in kernel looping; promote them to reject routes
1487          */
1488         if ((cfg->fc_flags & RTF_REJECT) ||
1489             (dev && (dev->flags & IFF_LOOPBACK) &&
1490              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1491              !(cfg->fc_flags & RTF_LOCAL))) {
1492                 /* hold loopback dev/idev if we haven't done so. */
1493                 if (dev != net->loopback_dev) {
1494                         if (dev) {
1495                                 dev_put(dev);
1496                                 in6_dev_put(idev);
1497                         }
1498                         dev = net->loopback_dev;
1499                         dev_hold(dev);
1500                         idev = in6_dev_get(dev);
1501                         if (!idev) {
1502                                 err = -ENODEV;
1503                                 goto out;
1504                         }
1505                 }
1506                 rt->dst.output = ip6_pkt_discard_out;
1507                 rt->dst.input = ip6_pkt_discard;
1508                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1509                 switch (cfg->fc_type) {
1510                 case RTN_BLACKHOLE:
1511                         rt->dst.error = -EINVAL;
1512                         break;
1513                 case RTN_PROHIBIT:
1514                         rt->dst.error = -EACCES;
1515                         break;
1516                 case RTN_THROW:
1517                         rt->dst.error = -EAGAIN;
1518                         break;
1519                 default:
1520                         rt->dst.error = -ENETUNREACH;
1521                         break;
1522                 }
1523                 goto install_route;
1524         }
1525
1526         if (cfg->fc_flags & RTF_GATEWAY) {
1527                 const struct in6_addr *gw_addr;
1528                 int gwa_type;
1529
1530                 gw_addr = &cfg->fc_gateway;
1531                 rt->rt6i_gateway = *gw_addr;
1532                 gwa_type = ipv6_addr_type(gw_addr);
1533
1534                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1535                         struct rt6_info *grt;
1536
1537                         /* IPv6 strictly inhibits using not link-local
1538                            addresses as nexthop address.
1539                            Otherwise, router will not able to send redirects.
1540                            It is very good, but in some (rare!) circumstances
1541                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1542                            some exceptions. --ANK
1543                          */
1544                         err = -EINVAL;
1545                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1546                                 goto out;
1547
1548                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1549
1550                         err = -EHOSTUNREACH;
1551                         if (!grt)
1552                                 goto out;
1553                         if (dev) {
1554                                 if (dev != grt->dst.dev) {
1555                                         ip6_rt_put(grt);
1556                                         goto out;
1557                                 }
1558                         } else {
1559                                 dev = grt->dst.dev;
1560                                 idev = grt->rt6i_idev;
1561                                 dev_hold(dev);
1562                                 in6_dev_hold(grt->rt6i_idev);
1563                         }
1564                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1565                                 err = 0;
1566                         ip6_rt_put(grt);
1567
1568                         if (err)
1569                                 goto out;
1570                 }
1571                 err = -EINVAL;
1572                 if (!dev || (dev->flags & IFF_LOOPBACK))
1573                         goto out;
1574         }
1575
1576         err = -ENODEV;
1577         if (!dev)
1578                 goto out;
1579
1580         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1581                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1582                         err = -EINVAL;
1583                         goto out;
1584                 }
1585                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1586                 rt->rt6i_prefsrc.plen = 128;
1587         } else
1588                 rt->rt6i_prefsrc.plen = 0;
1589
1590         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1591                 err = rt6_bind_neighbour(rt, dev);
1592                 if (err)
1593                         goto out;
1594         }
1595
1596         rt->rt6i_flags = cfg->fc_flags;
1597
1598 install_route:
1599         if (cfg->fc_mx) {
1600                 struct nlattr *nla;
1601                 int remaining;
1602
1603                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1604                         int type = nla_type(nla);
1605
1606                         if (type) {
1607                                 if (type > RTAX_MAX) {
1608                                         err = -EINVAL;
1609                                         goto out;
1610                                 }
1611
1612                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1613                         }
1614                 }
1615         }
1616
1617         rt->dst.dev = dev;
1618         rt->rt6i_idev = idev;
1619         rt->rt6i_table = table;
1620
1621         cfg->fc_nlinfo.nl_net = dev_net(dev);
1622
1623         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1624
1625 out:
1626         if (dev)
1627                 dev_put(dev);
1628         if (idev)
1629                 in6_dev_put(idev);
1630         if (rt)
1631                 dst_free(&rt->dst);
1632         return err;
1633 }
1634
1635 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1636 {
1637         int err;
1638         struct fib6_table *table;
1639         struct net *net = dev_net(rt->dst.dev);
1640
1641         if (rt == net->ipv6.ip6_null_entry) {
1642                 err = -ENOENT;
1643                 goto out;
1644         }
1645
1646         table = rt->rt6i_table;
1647         write_lock_bh(&table->tb6_lock);
1648         err = fib6_del(rt, info);
1649         write_unlock_bh(&table->tb6_lock);
1650
1651 out:
1652         ip6_rt_put(rt);
1653         return err;
1654 }
1655
1656 int ip6_del_rt(struct rt6_info *rt)
1657 {
1658         struct nl_info info = {
1659                 .nl_net = dev_net(rt->dst.dev),
1660         };
1661         return __ip6_del_rt(rt, &info);
1662 }
1663
1664 static int ip6_route_del(struct fib6_config *cfg)
1665 {
1666         struct fib6_table *table;
1667         struct fib6_node *fn;
1668         struct rt6_info *rt;
1669         int err = -ESRCH;
1670
1671         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1672         if (!table)
1673                 return err;
1674
1675         read_lock_bh(&table->tb6_lock);
1676
1677         fn = fib6_locate(&table->tb6_root,
1678                          &cfg->fc_dst, cfg->fc_dst_len,
1679                          &cfg->fc_src, cfg->fc_src_len);
1680
1681         if (fn) {
1682                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1683                         if (cfg->fc_ifindex &&
1684                             (!rt->dst.dev ||
1685                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1686                                 continue;
1687                         if (cfg->fc_flags & RTF_GATEWAY &&
1688                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1689                                 continue;
1690                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1691                                 continue;
1692                         dst_hold(&rt->dst);
1693                         read_unlock_bh(&table->tb6_lock);
1694
1695                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1696                 }
1697         }
1698         read_unlock_bh(&table->tb6_lock);
1699
1700         return err;
1701 }
1702
1703 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1704 {
1705         struct net *net = dev_net(skb->dev);
1706         struct netevent_redirect netevent;
1707         struct rt6_info *rt, *nrt = NULL;
1708         struct ndisc_options ndopts;
1709         struct neighbour *old_neigh;
1710         struct inet6_dev *in6_dev;
1711         struct neighbour *neigh;
1712         struct rd_msg *msg;
1713         int optlen, on_link;
1714         u8 *lladdr;
1715
1716         optlen = skb->tail - skb->transport_header;
1717         optlen -= sizeof(*msg);
1718
1719         if (optlen < 0) {
1720                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1721                 return;
1722         }
1723
1724         msg = (struct rd_msg *)icmp6_hdr(skb);
1725
1726         if (ipv6_addr_is_multicast(&msg->dest)) {
1727                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1728                 return;
1729         }
1730
1731         on_link = 0;
1732         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1733                 on_link = 1;
1734         } else if (ipv6_addr_type(&msg->target) !=
1735                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1736                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1737                 return;
1738         }
1739
1740         in6_dev = __in6_dev_get(skb->dev);
1741         if (!in6_dev)
1742                 return;
1743         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1744                 return;
1745
1746         /* RFC2461 8.1:
1747          *      The IP source address of the Redirect MUST be the same as the current
1748          *      first-hop router for the specified ICMP Destination Address.
1749          */
1750
1751         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1752                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1753                 return;
1754         }
1755
1756         lladdr = NULL;
1757         if (ndopts.nd_opts_tgt_lladdr) {
1758                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1759                                              skb->dev);
1760                 if (!lladdr) {
1761                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1762                         return;
1763                 }
1764         }
1765
1766         rt = (struct rt6_info *) dst;
1767         if (rt == net->ipv6.ip6_null_entry) {
1768                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1769                 return;
1770         }
1771
1772         /* Redirect received -> path was valid.
1773          * Look, redirects are sent only in response to data packets,
1774          * so that this nexthop apparently is reachable. --ANK
1775          */
1776         dst_confirm(&rt->dst);
1777
1778         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1779         if (!neigh)
1780                 return;
1781
1782         /* Duplicate redirect: silently ignore. */
1783         old_neigh = rt->n;
1784         if (neigh == old_neigh)
1785                 goto out;
1786
1787         /*
1788          *      We have finally decided to accept it.
1789          */
1790
1791         neigh_update(neigh, lladdr, NUD_STALE,
1792                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1793                      NEIGH_UPDATE_F_OVERRIDE|
1794                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1795                                      NEIGH_UPDATE_F_ISROUTER))
1796                      );
1797
1798         nrt = ip6_rt_copy(rt, &msg->dest);
1799         if (!nrt)
1800                 goto out;
1801
1802         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1803         if (on_link)
1804                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1805
1806         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1807         nrt->n = neigh_clone(neigh);
1808
1809         if (ip6_ins_rt(nrt))
1810                 goto out;
1811
1812         netevent.old = &rt->dst;
1813         netevent.old_neigh = old_neigh;
1814         netevent.new = &nrt->dst;
1815         netevent.new_neigh = neigh;
1816         netevent.daddr = &msg->dest;
1817         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1818
1819         if (rt->rt6i_flags & RTF_CACHE) {
1820                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1821                 ip6_del_rt(rt);
1822         }
1823
1824 out:
1825         neigh_release(neigh);
1826 }
1827
1828 /*
1829  *      Misc support functions
1830  */
1831
1832 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1833                                     const struct in6_addr *dest)
1834 {
1835         struct net *net = dev_net(ort->dst.dev);
1836         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1837                                             ort->rt6i_table);
1838
1839         if (rt) {
1840                 rt->dst.input = ort->dst.input;
1841                 rt->dst.output = ort->dst.output;
1842                 rt->dst.flags |= DST_HOST;
1843
1844                 rt->rt6i_dst.addr = *dest;
1845                 rt->rt6i_dst.plen = 128;
1846                 dst_copy_metrics(&rt->dst, &ort->dst);
1847                 rt->dst.error = ort->dst.error;
1848                 rt->rt6i_idev = ort->rt6i_idev;
1849                 if (rt->rt6i_idev)
1850                         in6_dev_hold(rt->rt6i_idev);
1851                 rt->dst.lastuse = jiffies;
1852
1853                 rt->rt6i_gateway = ort->rt6i_gateway;
1854                 rt->rt6i_flags = ort->rt6i_flags;
1855                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1856                     (RTF_DEFAULT | RTF_ADDRCONF))
1857                         rt6_set_from(rt, ort);
1858                 else
1859                         rt6_clean_expires(rt);
1860                 rt->rt6i_metric = 0;
1861
1862 #ifdef CONFIG_IPV6_SUBTREES
1863                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1864 #endif
1865                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1866                 rt->rt6i_table = ort->rt6i_table;
1867         }
1868         return rt;
1869 }
1870
1871 #ifdef CONFIG_IPV6_ROUTE_INFO
1872 static struct rt6_info *rt6_get_route_info(struct net *net,
1873                                            const struct in6_addr *prefix, int prefixlen,
1874                                            const struct in6_addr *gwaddr, int ifindex)
1875 {
1876         struct fib6_node *fn;
1877         struct rt6_info *rt = NULL;
1878         struct fib6_table *table;
1879
1880         table = fib6_get_table(net, RT6_TABLE_INFO);
1881         if (!table)
1882                 return NULL;
1883
1884         read_lock_bh(&table->tb6_lock);
1885         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1886         if (!fn)
1887                 goto out;
1888
1889         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1890                 if (rt->dst.dev->ifindex != ifindex)
1891                         continue;
1892                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1893                         continue;
1894                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1895                         continue;
1896                 dst_hold(&rt->dst);
1897                 break;
1898         }
1899 out:
1900         read_unlock_bh(&table->tb6_lock);
1901         return rt;
1902 }
1903
1904 static struct rt6_info *rt6_add_route_info(struct net *net,
1905                                            const struct in6_addr *prefix, int prefixlen,
1906                                            const struct in6_addr *gwaddr, int ifindex,
1907                                            unsigned int pref)
1908 {
1909         struct fib6_config cfg = {
1910                 .fc_table       = RT6_TABLE_INFO,
1911                 .fc_metric      = IP6_RT_PRIO_USER,
1912                 .fc_ifindex     = ifindex,
1913                 .fc_dst_len     = prefixlen,
1914                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1915                                   RTF_UP | RTF_PREF(pref),
1916                 .fc_nlinfo.portid = 0,
1917                 .fc_nlinfo.nlh = NULL,
1918                 .fc_nlinfo.nl_net = net,
1919         };
1920
1921         cfg.fc_dst = *prefix;
1922         cfg.fc_gateway = *gwaddr;
1923
1924         /* We should treat it as a default route if prefix length is 0. */
1925         if (!prefixlen)
1926                 cfg.fc_flags |= RTF_DEFAULT;
1927
1928         ip6_route_add(&cfg);
1929
1930         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1931 }
1932 #endif
1933
1934 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1935 {
1936         struct rt6_info *rt;
1937         struct fib6_table *table;
1938
1939         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1940         if (!table)
1941                 return NULL;
1942
1943         read_lock_bh(&table->tb6_lock);
1944         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1945                 if (dev == rt->dst.dev &&
1946                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1947                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1948                         break;
1949         }
1950         if (rt)
1951                 dst_hold(&rt->dst);
1952         read_unlock_bh(&table->tb6_lock);
1953         return rt;
1954 }
1955
1956 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1957                                      struct net_device *dev,
1958                                      unsigned int pref)
1959 {
1960         struct fib6_config cfg = {
1961                 .fc_table       = RT6_TABLE_DFLT,
1962                 .fc_metric      = IP6_RT_PRIO_USER,
1963                 .fc_ifindex     = dev->ifindex,
1964                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1965                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1966                 .fc_nlinfo.portid = 0,
1967                 .fc_nlinfo.nlh = NULL,
1968                 .fc_nlinfo.nl_net = dev_net(dev),
1969         };
1970
1971         cfg.fc_gateway = *gwaddr;
1972
1973         ip6_route_add(&cfg);
1974
1975         return rt6_get_dflt_router(gwaddr, dev);
1976 }
1977
1978 void rt6_purge_dflt_routers(struct net *net)
1979 {
1980         struct rt6_info *rt;
1981         struct fib6_table *table;
1982
1983         /* NOTE: Keep consistent with rt6_get_dflt_router */
1984         table = fib6_get_table(net, RT6_TABLE_DFLT);
1985         if (!table)
1986                 return;
1987
1988 restart:
1989         read_lock_bh(&table->tb6_lock);
1990         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1991                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1992                         dst_hold(&rt->dst);
1993                         read_unlock_bh(&table->tb6_lock);
1994                         ip6_del_rt(rt);
1995                         goto restart;
1996                 }
1997         }
1998         read_unlock_bh(&table->tb6_lock);
1999 }
2000
2001 static void rtmsg_to_fib6_config(struct net *net,
2002                                  struct in6_rtmsg *rtmsg,
2003                                  struct fib6_config *cfg)
2004 {
2005         memset(cfg, 0, sizeof(*cfg));
2006
2007         cfg->fc_table = RT6_TABLE_MAIN;
2008         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2009         cfg->fc_metric = rtmsg->rtmsg_metric;
2010         cfg->fc_expires = rtmsg->rtmsg_info;
2011         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2012         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2013         cfg->fc_flags = rtmsg->rtmsg_flags;
2014
2015         cfg->fc_nlinfo.nl_net = net;
2016
2017         cfg->fc_dst = rtmsg->rtmsg_dst;
2018         cfg->fc_src = rtmsg->rtmsg_src;
2019         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2020 }
2021
2022 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2023 {
2024         struct fib6_config cfg;
2025         struct in6_rtmsg rtmsg;
2026         int err;
2027
2028         switch(cmd) {
2029         case SIOCADDRT:         /* Add a route */
2030         case SIOCDELRT:         /* Delete a route */
2031                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2032                         return -EPERM;
2033                 err = copy_from_user(&rtmsg, arg,
2034                                      sizeof(struct in6_rtmsg));
2035                 if (err)
2036                         return -EFAULT;
2037
2038                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2039
2040                 rtnl_lock();
2041                 switch (cmd) {
2042                 case SIOCADDRT:
2043                         err = ip6_route_add(&cfg);
2044                         break;
2045                 case SIOCDELRT:
2046                         err = ip6_route_del(&cfg);
2047                         break;
2048                 default:
2049                         err = -EINVAL;
2050                 }
2051                 rtnl_unlock();
2052
2053                 return err;
2054         }
2055
2056         return -EINVAL;
2057 }
2058
2059 /*
2060  *      Drop the packet on the floor
2061  */
2062
2063 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2064 {
2065         int type;
2066         struct dst_entry *dst = skb_dst(skb);
2067         switch (ipstats_mib_noroutes) {
2068         case IPSTATS_MIB_INNOROUTES:
2069                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2070                 if (type == IPV6_ADDR_ANY) {
2071                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2072                                       IPSTATS_MIB_INADDRERRORS);
2073                         break;
2074                 }
2075                 /* FALLTHROUGH */
2076         case IPSTATS_MIB_OUTNOROUTES:
2077                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2078                               ipstats_mib_noroutes);
2079                 break;
2080         }
2081         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2082         kfree_skb(skb);
2083         return 0;
2084 }
2085
2086 static int ip6_pkt_discard(struct sk_buff *skb)
2087 {
2088         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2089 }
2090
2091 static int ip6_pkt_discard_out(struct sk_buff *skb)
2092 {
2093         skb->dev = skb_dst(skb)->dev;
2094         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2095 }
2096
2097 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2098
2099 static int ip6_pkt_prohibit(struct sk_buff *skb)
2100 {
2101         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2102 }
2103
2104 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2105 {
2106         skb->dev = skb_dst(skb)->dev;
2107         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2108 }
2109
2110 #endif
2111
2112 /*
2113  *      Allocate a dst for local (unicast / anycast) address.
2114  */
2115
2116 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2117                                     const struct in6_addr *addr,
2118                                     bool anycast)
2119 {
2120         struct net *net = dev_net(idev->dev);
2121         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2122         int err;
2123
2124         if (!rt) {
2125                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2126                 return ERR_PTR(-ENOMEM);
2127         }
2128
2129         in6_dev_hold(idev);
2130
2131         rt->dst.flags |= DST_HOST;
2132         rt->dst.input = ip6_input;
2133         rt->dst.output = ip6_output;
2134         rt->rt6i_idev = idev;
2135
2136         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2137         if (anycast)
2138                 rt->rt6i_flags |= RTF_ANYCAST;
2139         else
2140                 rt->rt6i_flags |= RTF_LOCAL;
2141         err = rt6_bind_neighbour(rt, rt->dst.dev);
2142         if (err) {
2143                 dst_free(&rt->dst);
2144                 return ERR_PTR(err);
2145         }
2146
2147         rt->rt6i_dst.addr = *addr;
2148         rt->rt6i_dst.plen = 128;
2149         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2150
2151         atomic_set(&rt->dst.__refcnt, 1);
2152
2153         return rt;
2154 }
2155
2156 int ip6_route_get_saddr(struct net *net,
2157                         struct rt6_info *rt,
2158                         const struct in6_addr *daddr,
2159                         unsigned int prefs,
2160                         struct in6_addr *saddr)
2161 {
2162         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2163         int err = 0;
2164         if (rt->rt6i_prefsrc.plen)
2165                 *saddr = rt->rt6i_prefsrc.addr;
2166         else
2167                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2168                                          daddr, prefs, saddr);
2169         return err;
2170 }
2171
2172 /* remove deleted ip from prefsrc entries */
2173 struct arg_dev_net_ip {
2174         struct net_device *dev;
2175         struct net *net;
2176         struct in6_addr *addr;
2177 };
2178
2179 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2180 {
2181         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2182         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2183         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2184
2185         if (((void *)rt->dst.dev == dev || !dev) &&
2186             rt != net->ipv6.ip6_null_entry &&
2187             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2188                 /* remove prefsrc entry */
2189                 rt->rt6i_prefsrc.plen = 0;
2190         }
2191         return 0;
2192 }
2193
2194 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2195 {
2196         struct net *net = dev_net(ifp->idev->dev);
2197         struct arg_dev_net_ip adni = {
2198                 .dev = ifp->idev->dev,
2199                 .net = net,
2200                 .addr = &ifp->addr,
2201         };
2202         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2203 }
2204
2205 struct arg_dev_net {
2206         struct net_device *dev;
2207         struct net *net;
2208 };
2209
2210 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2211 {
2212         const struct arg_dev_net *adn = arg;
2213         const struct net_device *dev = adn->dev;
2214
2215         if ((rt->dst.dev == dev || !dev) &&
2216             rt != adn->net->ipv6.ip6_null_entry)
2217                 return -1;
2218
2219         return 0;
2220 }
2221
2222 void rt6_ifdown(struct net *net, struct net_device *dev)
2223 {
2224         struct arg_dev_net adn = {
2225                 .dev = dev,
2226                 .net = net,
2227         };
2228
2229         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2230         icmp6_clean_all(fib6_ifdown, &adn);
2231 }
2232
2233 struct rt6_mtu_change_arg {
2234         struct net_device *dev;
2235         unsigned int mtu;
2236 };
2237
2238 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2239 {
2240         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2241         struct inet6_dev *idev;
2242
2243         /* In IPv6 pmtu discovery is not optional,
2244            so that RTAX_MTU lock cannot disable it.
2245            We still use this lock to block changes
2246            caused by addrconf/ndisc.
2247         */
2248
2249         idev = __in6_dev_get(arg->dev);
2250         if (!idev)
2251                 return 0;
2252
2253         /* For administrative MTU increase, there is no way to discover
2254            IPv6 PMTU increase, so PMTU increase should be updated here.
2255            Since RFC 1981 doesn't include administrative MTU increase
2256            update PMTU increase is a MUST. (i.e. jumbo frame)
2257          */
2258         /*
2259            If new MTU is less than route PMTU, this new MTU will be the
2260            lowest MTU in the path, update the route PMTU to reflect PMTU
2261            decreases; if new MTU is greater than route PMTU, and the
2262            old MTU is the lowest MTU in the path, update the route PMTU
2263            to reflect the increase. In this case if the other nodes' MTU
2264            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2265            PMTU discouvery.
2266          */
2267         if (rt->dst.dev == arg->dev &&
2268             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2269             (dst_mtu(&rt->dst) >= arg->mtu ||
2270              (dst_mtu(&rt->dst) < arg->mtu &&
2271               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2272                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2273         }
2274         return 0;
2275 }
2276
2277 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2278 {
2279         struct rt6_mtu_change_arg arg = {
2280                 .dev = dev,
2281                 .mtu = mtu,
2282         };
2283
2284         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2285 }
2286
2287 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2288         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2289         [RTA_OIF]               = { .type = NLA_U32 },
2290         [RTA_IIF]               = { .type = NLA_U32 },
2291         [RTA_PRIORITY]          = { .type = NLA_U32 },
2292         [RTA_METRICS]           = { .type = NLA_NESTED },
2293         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2294 };
2295
2296 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2297                               struct fib6_config *cfg)
2298 {
2299         struct rtmsg *rtm;
2300         struct nlattr *tb[RTA_MAX+1];
2301         int err;
2302
2303         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2304         if (err < 0)
2305                 goto errout;
2306
2307         err = -EINVAL;
2308         rtm = nlmsg_data(nlh);
2309         memset(cfg, 0, sizeof(*cfg));
2310
2311         cfg->fc_table = rtm->rtm_table;
2312         cfg->fc_dst_len = rtm->rtm_dst_len;
2313         cfg->fc_src_len = rtm->rtm_src_len;
2314         cfg->fc_flags = RTF_UP;
2315         cfg->fc_protocol = rtm->rtm_protocol;
2316         cfg->fc_type = rtm->rtm_type;
2317
2318         if (rtm->rtm_type == RTN_UNREACHABLE ||
2319             rtm->rtm_type == RTN_BLACKHOLE ||
2320             rtm->rtm_type == RTN_PROHIBIT ||
2321             rtm->rtm_type == RTN_THROW)
2322                 cfg->fc_flags |= RTF_REJECT;
2323
2324         if (rtm->rtm_type == RTN_LOCAL)
2325                 cfg->fc_flags |= RTF_LOCAL;
2326
2327         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2328         cfg->fc_nlinfo.nlh = nlh;
2329         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2330
2331         if (tb[RTA_GATEWAY]) {
2332                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2333                 cfg->fc_flags |= RTF_GATEWAY;
2334         }
2335
2336         if (tb[RTA_DST]) {
2337                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2338
2339                 if (nla_len(tb[RTA_DST]) < plen)
2340                         goto errout;
2341
2342                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2343         }
2344
2345         if (tb[RTA_SRC]) {
2346                 int plen = (rtm->rtm_src_len + 7) >> 3;
2347
2348                 if (nla_len(tb[RTA_SRC]) < plen)
2349                         goto errout;
2350
2351                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2352         }
2353
2354         if (tb[RTA_PREFSRC])
2355                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2356
2357         if (tb[RTA_OIF])
2358                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2359
2360         if (tb[RTA_PRIORITY])
2361                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2362
2363         if (tb[RTA_METRICS]) {
2364                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2365                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2366         }
2367
2368         if (tb[RTA_TABLE])
2369                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2370
2371         if (tb[RTA_MULTIPATH]) {
2372                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2373                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2374         }
2375
2376         err = 0;
2377 errout:
2378         return err;
2379 }
2380
2381 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2382 {
2383         struct fib6_config r_cfg;
2384         struct rtnexthop *rtnh;
2385         int remaining;
2386         int attrlen;
2387         int err = 0, last_err = 0;
2388
2389 beginning:
2390         rtnh = (struct rtnexthop *)cfg->fc_mp;
2391         remaining = cfg->fc_mp_len;
2392
2393         /* Parse a Multipath Entry */
2394         while (rtnh_ok(rtnh, remaining)) {
2395                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2396                 if (rtnh->rtnh_ifindex)
2397                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2398
2399                 attrlen = rtnh_attrlen(rtnh);
2400                 if (attrlen > 0) {
2401                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2402
2403                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2404                         if (nla) {
2405                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2406                                 r_cfg.fc_flags |= RTF_GATEWAY;
2407                         }
2408                 }
2409                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2410                 if (err) {
2411                         last_err = err;
2412                         /* If we are trying to remove a route, do not stop the
2413                          * loop when ip6_route_del() fails (because next hop is
2414                          * already gone), we should try to remove all next hops.
2415                          */
2416                         if (add) {
2417                                 /* If add fails, we should try to delete all
2418                                  * next hops that have been already added.
2419                                  */
2420                                 add = 0;
2421                                 goto beginning;
2422                         }
2423                 }
2424                 /* Because each route is added like a single route we remove
2425                  * this flag after the first nexthop (if there is a collision,
2426                  * we have already fail to add the first nexthop:
2427                  * fib6_add_rt2node() has reject it).
2428                  */
2429                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2430                 rtnh = rtnh_next(rtnh, &remaining);
2431         }
2432
2433         return last_err;
2434 }
2435
2436 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2437 {
2438         struct fib6_config cfg;
2439         int err;
2440
2441         err = rtm_to_fib6_config(skb, nlh, &cfg);
2442         if (err < 0)
2443                 return err;
2444
2445         if (cfg.fc_mp)
2446                 return ip6_route_multipath(&cfg, 0);
2447         else
2448                 return ip6_route_del(&cfg);
2449 }
2450
2451 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2452 {
2453         struct fib6_config cfg;
2454         int err;
2455
2456         err = rtm_to_fib6_config(skb, nlh, &cfg);
2457         if (err < 0)
2458                 return err;
2459
2460         if (cfg.fc_mp)
2461                 return ip6_route_multipath(&cfg, 1);
2462         else
2463                 return ip6_route_add(&cfg);
2464 }
2465
2466 static inline size_t rt6_nlmsg_size(void)
2467 {
2468         return NLMSG_ALIGN(sizeof(struct rtmsg))
2469                + nla_total_size(16) /* RTA_SRC */
2470                + nla_total_size(16) /* RTA_DST */
2471                + nla_total_size(16) /* RTA_GATEWAY */
2472                + nla_total_size(16) /* RTA_PREFSRC */
2473                + nla_total_size(4) /* RTA_TABLE */
2474                + nla_total_size(4) /* RTA_IIF */
2475                + nla_total_size(4) /* RTA_OIF */
2476                + nla_total_size(4) /* RTA_PRIORITY */
2477                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2478                + nla_total_size(sizeof(struct rta_cacheinfo));
2479 }
2480
2481 static int rt6_fill_node(struct net *net,
2482                          struct sk_buff *skb, struct rt6_info *rt,
2483                          struct in6_addr *dst, struct in6_addr *src,
2484                          int iif, int type, u32 portid, u32 seq,
2485                          int prefix, int nowait, unsigned int flags)
2486 {
2487         struct rtmsg *rtm;
2488         struct nlmsghdr *nlh;
2489         long expires;
2490         u32 table;
2491         struct neighbour *n;
2492
2493         if (prefix) {   /* user wants prefix routes only */
2494                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2495                         /* success since this is not a prefix route */
2496                         return 1;
2497                 }
2498         }
2499
2500         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2501         if (!nlh)
2502                 return -EMSGSIZE;
2503
2504         rtm = nlmsg_data(nlh);
2505         rtm->rtm_family = AF_INET6;
2506         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2507         rtm->rtm_src_len = rt->rt6i_src.plen;
2508         rtm->rtm_tos = 0;
2509         if (rt->rt6i_table)
2510                 table = rt->rt6i_table->tb6_id;
2511         else
2512                 table = RT6_TABLE_UNSPEC;
2513         rtm->rtm_table = table;
2514         if (nla_put_u32(skb, RTA_TABLE, table))
2515                 goto nla_put_failure;
2516         if (rt->rt6i_flags & RTF_REJECT) {
2517                 switch (rt->dst.error) {
2518                 case -EINVAL:
2519                         rtm->rtm_type = RTN_BLACKHOLE;
2520                         break;
2521                 case -EACCES:
2522                         rtm->rtm_type = RTN_PROHIBIT;
2523                         break;
2524                 case -EAGAIN:
2525                         rtm->rtm_type = RTN_THROW;
2526                         break;
2527                 default:
2528                         rtm->rtm_type = RTN_UNREACHABLE;
2529                         break;
2530                 }
2531         }
2532         else if (rt->rt6i_flags & RTF_LOCAL)
2533                 rtm->rtm_type = RTN_LOCAL;
2534         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2535                 rtm->rtm_type = RTN_LOCAL;
2536         else
2537                 rtm->rtm_type = RTN_UNICAST;
2538         rtm->rtm_flags = 0;
2539         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2540         rtm->rtm_protocol = rt->rt6i_protocol;
2541         if (rt->rt6i_flags & RTF_DYNAMIC)
2542                 rtm->rtm_protocol = RTPROT_REDIRECT;
2543         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2544                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2545                         rtm->rtm_protocol = RTPROT_RA;
2546                 else
2547                         rtm->rtm_protocol = RTPROT_KERNEL;
2548         }
2549
2550         if (rt->rt6i_flags & RTF_CACHE)
2551                 rtm->rtm_flags |= RTM_F_CLONED;
2552
2553         if (dst) {
2554                 if (nla_put(skb, RTA_DST, 16, dst))
2555                         goto nla_put_failure;
2556                 rtm->rtm_dst_len = 128;
2557         } else if (rtm->rtm_dst_len)
2558                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2559                         goto nla_put_failure;
2560 #ifdef CONFIG_IPV6_SUBTREES
2561         if (src) {
2562                 if (nla_put(skb, RTA_SRC, 16, src))
2563                         goto nla_put_failure;
2564                 rtm->rtm_src_len = 128;
2565         } else if (rtm->rtm_src_len &&
2566                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2567                 goto nla_put_failure;
2568 #endif
2569         if (iif) {
2570 #ifdef CONFIG_IPV6_MROUTE
2571                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2572                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2573                         if (err <= 0) {
2574                                 if (!nowait) {
2575                                         if (err == 0)
2576                                                 return 0;
2577                                         goto nla_put_failure;
2578                                 } else {
2579                                         if (err == -EMSGSIZE)
2580                                                 goto nla_put_failure;
2581                                 }
2582                         }
2583                 } else
2584 #endif
2585                         if (nla_put_u32(skb, RTA_IIF, iif))
2586                                 goto nla_put_failure;
2587         } else if (dst) {
2588                 struct in6_addr saddr_buf;
2589                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2590                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2591                         goto nla_put_failure;
2592         }
2593
2594         if (rt->rt6i_prefsrc.plen) {
2595                 struct in6_addr saddr_buf;
2596                 saddr_buf = rt->rt6i_prefsrc.addr;
2597                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2598                         goto nla_put_failure;
2599         }
2600
2601         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2602                 goto nla_put_failure;
2603
2604         n = rt->n;
2605         if (n) {
2606                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2607                         goto nla_put_failure;
2608         }
2609
2610         if (rt->dst.dev &&
2611             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2612                 goto nla_put_failure;
2613         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2614                 goto nla_put_failure;
2615
2616         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2617
2618         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2619                 goto nla_put_failure;
2620
2621         return nlmsg_end(skb, nlh);
2622
2623 nla_put_failure:
2624         nlmsg_cancel(skb, nlh);
2625         return -EMSGSIZE;
2626 }
2627
2628 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2629 {
2630         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2631         int prefix;
2632
2633         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2634                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2635                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2636         } else
2637                 prefix = 0;
2638
2639         return rt6_fill_node(arg->net,
2640                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2641                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2642                      prefix, 0, NLM_F_MULTI);
2643 }
2644
2645 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2646 {
2647         struct net *net = sock_net(in_skb->sk);
2648         struct nlattr *tb[RTA_MAX+1];
2649         struct rt6_info *rt;
2650         struct sk_buff *skb;
2651         struct rtmsg *rtm;
2652         struct flowi6 fl6;
2653         int err, iif = 0, oif = 0;
2654
2655         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2656         if (err < 0)
2657                 goto errout;
2658
2659         err = -EINVAL;
2660         memset(&fl6, 0, sizeof(fl6));
2661
2662         if (tb[RTA_SRC]) {
2663                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2664                         goto errout;
2665
2666                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2667         }
2668
2669         if (tb[RTA_DST]) {
2670                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2671                         goto errout;
2672
2673                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2674         }
2675
2676         if (tb[RTA_IIF])
2677                 iif = nla_get_u32(tb[RTA_IIF]);
2678
2679         if (tb[RTA_OIF])
2680                 oif = nla_get_u32(tb[RTA_OIF]);
2681
2682         if (iif) {
2683                 struct net_device *dev;
2684                 int flags = 0;
2685
2686                 dev = __dev_get_by_index(net, iif);
2687                 if (!dev) {
2688                         err = -ENODEV;
2689                         goto errout;
2690                 }
2691
2692                 fl6.flowi6_iif = iif;
2693
2694                 if (!ipv6_addr_any(&fl6.saddr))
2695                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2696
2697                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2698                                                                flags);
2699         } else {
2700                 fl6.flowi6_oif = oif;
2701
2702                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2703         }
2704
2705         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2706         if (!skb) {
2707                 ip6_rt_put(rt);
2708                 err = -ENOBUFS;
2709                 goto errout;
2710         }
2711
2712         /* Reserve room for dummy headers, this skb can pass
2713            through good chunk of routing engine.
2714          */
2715         skb_reset_mac_header(skb);
2716         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2717
2718         skb_dst_set(skb, &rt->dst);
2719
2720         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2721                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2722                             nlh->nlmsg_seq, 0, 0, 0);
2723         if (err < 0) {
2724                 kfree_skb(skb);
2725                 goto errout;
2726         }
2727
2728         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2729 errout:
2730         return err;
2731 }
2732
2733 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2734 {
2735         struct sk_buff *skb;
2736         struct net *net = info->nl_net;
2737         u32 seq;
2738         int err;
2739
2740         err = -ENOBUFS;
2741         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2742
2743         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2744         if (!skb)
2745                 goto errout;
2746
2747         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2748                                 event, info->portid, seq, 0, 0, 0);
2749         if (err < 0) {
2750                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2751                 WARN_ON(err == -EMSGSIZE);
2752                 kfree_skb(skb);
2753                 goto errout;
2754         }
2755         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2756                     info->nlh, gfp_any());
2757         return;
2758 errout:
2759         if (err < 0)
2760                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2761 }
2762
2763 static int ip6_route_dev_notify(struct notifier_block *this,
2764                                 unsigned long event, void *data)
2765 {
2766         struct net_device *dev = (struct net_device *)data;
2767         struct net *net = dev_net(dev);
2768
2769         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2770                 net->ipv6.ip6_null_entry->dst.dev = dev;
2771                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2772 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2773                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2774                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2775                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2776                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2777 #endif
2778         }
2779
2780         return NOTIFY_OK;
2781 }
2782
2783 /*
2784  *      /proc
2785  */
2786
2787 #ifdef CONFIG_PROC_FS
2788
2789 struct rt6_proc_arg
2790 {
2791         char *buffer;
2792         int offset;
2793         int length;
2794         int skip;
2795         int len;
2796 };
2797
2798 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2799 {
2800         struct seq_file *m = p_arg;
2801         struct neighbour *n;
2802
2803         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2804
2805 #ifdef CONFIG_IPV6_SUBTREES
2806         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2807 #else
2808         seq_puts(m, "00000000000000000000000000000000 00 ");
2809 #endif
2810         n = rt->n;
2811         if (n) {
2812                 seq_printf(m, "%pi6", n->primary_key);
2813         } else {
2814                 seq_puts(m, "00000000000000000000000000000000");
2815         }
2816         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2817                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2818                    rt->dst.__use, rt->rt6i_flags,
2819                    rt->dst.dev ? rt->dst.dev->name : "");
2820         return 0;
2821 }
2822
2823 static int ipv6_route_show(struct seq_file *m, void *v)
2824 {
2825         struct net *net = (struct net *)m->private;
2826         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2827         return 0;
2828 }
2829
2830 static int ipv6_route_open(struct inode *inode, struct file *file)
2831 {
2832         return single_open_net(inode, file, ipv6_route_show);
2833 }
2834
2835 static const struct file_operations ipv6_route_proc_fops = {
2836         .owner          = THIS_MODULE,
2837         .open           = ipv6_route_open,
2838         .read           = seq_read,
2839         .llseek         = seq_lseek,
2840         .release        = single_release_net,
2841 };
2842
2843 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2844 {
2845         struct net *net = (struct net *)seq->private;
2846         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2847                    net->ipv6.rt6_stats->fib_nodes,
2848                    net->ipv6.rt6_stats->fib_route_nodes,
2849                    net->ipv6.rt6_stats->fib_rt_alloc,
2850                    net->ipv6.rt6_stats->fib_rt_entries,
2851                    net->ipv6.rt6_stats->fib_rt_cache,
2852                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2853                    net->ipv6.rt6_stats->fib_discarded_routes);
2854
2855         return 0;
2856 }
2857
2858 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2859 {
2860         return single_open_net(inode, file, rt6_stats_seq_show);
2861 }
2862
2863 static const struct file_operations rt6_stats_seq_fops = {
2864         .owner   = THIS_MODULE,
2865         .open    = rt6_stats_seq_open,
2866         .read    = seq_read,
2867         .llseek  = seq_lseek,
2868         .release = single_release_net,
2869 };
2870 #endif  /* CONFIG_PROC_FS */
2871
2872 #ifdef CONFIG_SYSCTL
2873
2874 static
2875 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2876                               void __user *buffer, size_t *lenp, loff_t *ppos)
2877 {
2878         struct net *net;
2879         int delay;
2880         if (!write)
2881                 return -EINVAL;
2882
2883         net = (struct net *)ctl->extra1;
2884         delay = net->ipv6.sysctl.flush_delay;
2885         proc_dointvec(ctl, write, buffer, lenp, ppos);
2886         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2887         return 0;
2888 }
2889
2890 ctl_table ipv6_route_table_template[] = {
2891         {
2892                 .procname       =       "flush",
2893                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2894                 .maxlen         =       sizeof(int),
2895                 .mode           =       0200,
2896                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2897         },
2898         {
2899                 .procname       =       "gc_thresh",
2900                 .data           =       &ip6_dst_ops_template.gc_thresh,
2901                 .maxlen         =       sizeof(int),
2902                 .mode           =       0644,
2903                 .proc_handler   =       proc_dointvec,
2904         },
2905         {
2906                 .procname       =       "max_size",
2907                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2908                 .maxlen         =       sizeof(int),
2909                 .mode           =       0644,
2910                 .proc_handler   =       proc_dointvec,
2911         },
2912         {
2913                 .procname       =       "gc_min_interval",
2914                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2915                 .maxlen         =       sizeof(int),
2916                 .mode           =       0644,
2917                 .proc_handler   =       proc_dointvec_jiffies,
2918         },
2919         {
2920                 .procname       =       "gc_timeout",
2921                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2922                 .maxlen         =       sizeof(int),
2923                 .mode           =       0644,
2924                 .proc_handler   =       proc_dointvec_jiffies,
2925         },
2926         {
2927                 .procname       =       "gc_interval",
2928                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2929                 .maxlen         =       sizeof(int),
2930                 .mode           =       0644,
2931                 .proc_handler   =       proc_dointvec_jiffies,
2932         },
2933         {
2934                 .procname       =       "gc_elasticity",
2935                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2936                 .maxlen         =       sizeof(int),
2937                 .mode           =       0644,
2938                 .proc_handler   =       proc_dointvec,
2939         },
2940         {
2941                 .procname       =       "mtu_expires",
2942                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2943                 .maxlen         =       sizeof(int),
2944                 .mode           =       0644,
2945                 .proc_handler   =       proc_dointvec_jiffies,
2946         },
2947         {
2948                 .procname       =       "min_adv_mss",
2949                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2950                 .maxlen         =       sizeof(int),
2951                 .mode           =       0644,
2952                 .proc_handler   =       proc_dointvec,
2953         },
2954         {
2955                 .procname       =       "gc_min_interval_ms",
2956                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2957                 .maxlen         =       sizeof(int),
2958                 .mode           =       0644,
2959                 .proc_handler   =       proc_dointvec_ms_jiffies,
2960         },
2961         { }
2962 };
2963
2964 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2965 {
2966         struct ctl_table *table;
2967
2968         table = kmemdup(ipv6_route_table_template,
2969                         sizeof(ipv6_route_table_template),
2970                         GFP_KERNEL);
2971
2972         if (table) {
2973                 table[0].data = &net->ipv6.sysctl.flush_delay;
2974                 table[0].extra1 = net;
2975                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2976                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2977                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2978                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2979                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2980                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2981                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2982                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2983                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2984
2985                 /* Don't export sysctls to unprivileged users */
2986                 if (net->user_ns != &init_user_ns)
2987                         table[0].procname = NULL;
2988         }
2989
2990         return table;
2991 }
2992 #endif
2993
2994 static int __net_init ip6_route_net_init(struct net *net)
2995 {
2996         int ret = -ENOMEM;
2997
2998         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2999                sizeof(net->ipv6.ip6_dst_ops));
3000
3001         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3002                 goto out_ip6_dst_ops;
3003
3004         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3005                                            sizeof(*net->ipv6.ip6_null_entry),
3006                                            GFP_KERNEL);
3007         if (!net->ipv6.ip6_null_entry)
3008                 goto out_ip6_dst_entries;
3009         net->ipv6.ip6_null_entry->dst.path =
3010                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3011         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3012         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3013                          ip6_template_metrics, true);
3014
3015 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3016         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3017                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3018                                                GFP_KERNEL);
3019         if (!net->ipv6.ip6_prohibit_entry)
3020                 goto out_ip6_null_entry;
3021         net->ipv6.ip6_prohibit_entry->dst.path =
3022                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3023         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3024         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3025                          ip6_template_metrics, true);
3026
3027         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3028                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3029                                                GFP_KERNEL);
3030         if (!net->ipv6.ip6_blk_hole_entry)
3031                 goto out_ip6_prohibit_entry;
3032         net->ipv6.ip6_blk_hole_entry->dst.path =
3033                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3034         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3035         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3036                          ip6_template_metrics, true);
3037 #endif
3038
3039         net->ipv6.sysctl.flush_delay = 0;
3040         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3041         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3042         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3043         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3044         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3045         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3046         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3047
3048         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3049
3050         ret = 0;
3051 out:
3052         return ret;
3053
3054 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3055 out_ip6_prohibit_entry:
3056         kfree(net->ipv6.ip6_prohibit_entry);
3057 out_ip6_null_entry:
3058         kfree(net->ipv6.ip6_null_entry);
3059 #endif
3060 out_ip6_dst_entries:
3061         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3062 out_ip6_dst_ops:
3063         goto out;
3064 }
3065
3066 static void __net_exit ip6_route_net_exit(struct net *net)
3067 {
3068         kfree(net->ipv6.ip6_null_entry);
3069 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3070         kfree(net->ipv6.ip6_prohibit_entry);
3071         kfree(net->ipv6.ip6_blk_hole_entry);
3072 #endif
3073         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3074 }
3075
3076 static int __net_init ip6_route_net_init_late(struct net *net)
3077 {
3078 #ifdef CONFIG_PROC_FS
3079         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3080         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3081 #endif
3082         return 0;
3083 }
3084
3085 static void __net_exit ip6_route_net_exit_late(struct net *net)
3086 {
3087 #ifdef CONFIG_PROC_FS
3088         proc_net_remove(net, "ipv6_route");
3089         proc_net_remove(net, "rt6_stats");
3090 #endif
3091 }
3092
3093 static struct pernet_operations ip6_route_net_ops = {
3094         .init = ip6_route_net_init,
3095         .exit = ip6_route_net_exit,
3096 };
3097
3098 static int __net_init ipv6_inetpeer_init(struct net *net)
3099 {
3100         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3101
3102         if (!bp)
3103                 return -ENOMEM;
3104         inet_peer_base_init(bp);
3105         net->ipv6.peers = bp;
3106         return 0;
3107 }
3108
3109 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3110 {
3111         struct inet_peer_base *bp = net->ipv6.peers;
3112
3113         net->ipv6.peers = NULL;
3114         inetpeer_invalidate_tree(bp);
3115         kfree(bp);
3116 }
3117
3118 static struct pernet_operations ipv6_inetpeer_ops = {
3119         .init   =       ipv6_inetpeer_init,
3120         .exit   =       ipv6_inetpeer_exit,
3121 };
3122
3123 static struct pernet_operations ip6_route_net_late_ops = {
3124         .init = ip6_route_net_init_late,
3125         .exit = ip6_route_net_exit_late,
3126 };
3127
3128 static struct notifier_block ip6_route_dev_notifier = {
3129         .notifier_call = ip6_route_dev_notify,
3130         .priority = 0,
3131 };
3132
3133 int __init ip6_route_init(void)
3134 {
3135         int ret;
3136
3137         ret = -ENOMEM;
3138         ip6_dst_ops_template.kmem_cachep =
3139                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3140                                   SLAB_HWCACHE_ALIGN, NULL);
3141         if (!ip6_dst_ops_template.kmem_cachep)
3142                 goto out;
3143
3144         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3145         if (ret)
3146                 goto out_kmem_cache;
3147
3148         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3149         if (ret)
3150                 goto out_dst_entries;
3151
3152         ret = register_pernet_subsys(&ip6_route_net_ops);
3153         if (ret)
3154                 goto out_register_inetpeer;
3155
3156         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3157
3158         /* Registering of the loopback is done before this portion of code,
3159          * the loopback reference in rt6_info will not be taken, do it
3160          * manually for init_net */
3161         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3162         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3163   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3164         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3165         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3166         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3167         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3168   #endif
3169         ret = fib6_init();
3170         if (ret)
3171                 goto out_register_subsys;
3172
3173         ret = xfrm6_init();
3174         if (ret)
3175                 goto out_fib6_init;
3176
3177         ret = fib6_rules_init();
3178         if (ret)
3179                 goto xfrm6_init;
3180
3181         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3182         if (ret)
3183                 goto fib6_rules_init;
3184
3185         ret = -ENOBUFS;
3186         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3187             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3188             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3189                 goto out_register_late_subsys;
3190
3191         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3192         if (ret)
3193                 goto out_register_late_subsys;
3194
3195 out:
3196         return ret;
3197
3198 out_register_late_subsys:
3199         unregister_pernet_subsys(&ip6_route_net_late_ops);
3200 fib6_rules_init:
3201         fib6_rules_cleanup();
3202 xfrm6_init:
3203         xfrm6_fini();
3204 out_fib6_init:
3205         fib6_gc_cleanup();
3206 out_register_subsys:
3207         unregister_pernet_subsys(&ip6_route_net_ops);
3208 out_register_inetpeer:
3209         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3210 out_dst_entries:
3211         dst_entries_destroy(&ip6_dst_blackhole_ops);
3212 out_kmem_cache:
3213         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3214         goto out;
3215 }
3216
3217 void ip6_route_cleanup(void)
3218 {
3219         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3220         unregister_pernet_subsys(&ip6_route_net_late_ops);
3221         fib6_rules_cleanup();
3222         xfrm6_fini();
3223         fib6_gc_cleanup();
3224         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3225         unregister_pernet_subsys(&ip6_route_net_ops);
3226         dst_entries_destroy(&ip6_dst_blackhole_ops);
3227         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3228 }