ce3b1754b4f5c5d83debb5de76632a9df94a7a54
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex,
102                                            unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
109 {
110         struct rt6_info *rt = (struct rt6_info *)dst;
111
112         if (rt->rt6i_flags & RTF_CACHE)
113                 return NULL;
114         else
115                 return dst_cow_metrics_generic(dst, old);
116 }
117
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
119                                              struct sk_buff *skb,
120                                              const void *daddr)
121 {
122         struct in6_addr *p = &rt->rt6i_gateway;
123
124         if (!ipv6_addr_any(p))
125                 return (const void *) p;
126         else if (skb)
127                 return &ipv6_hdr(skb)->daddr;
128         return daddr;
129 }
130
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
132                                           struct sk_buff *skb,
133                                           const void *daddr)
134 {
135         struct rt6_info *rt = (struct rt6_info *) dst;
136         struct neighbour *n;
137
138         daddr = choose_neigh_daddr(rt, skb, daddr);
139         n = __ipv6_neigh_lookup(dst->dev, daddr);
140         if (n)
141                 return n;
142         return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static struct dst_ops ip6_dst_ops_template = {
146         .family                 =       AF_INET6,
147         .gc                     =       ip6_dst_gc,
148         .gc_thresh              =       1024,
149         .check                  =       ip6_dst_check,
150         .default_advmss         =       ip6_default_advmss,
151         .mtu                    =       ip6_mtu,
152         .cow_metrics            =       ipv6_cow_metrics,
153         .destroy                =       ip6_dst_destroy,
154         .ifdown                 =       ip6_dst_ifdown,
155         .negative_advice        =       ip6_negative_advice,
156         .link_failure           =       ip6_link_failure,
157         .update_pmtu            =       ip6_rt_update_pmtu,
158         .redirect               =       rt6_do_redirect,
159         .local_out              =       __ip6_local_out,
160         .neigh_lookup           =       ip6_neigh_lookup,
161 };
162
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167         return mtu ? : dst->dev->mtu;
168 }
169
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171                                          struct sk_buff *skb, u32 mtu)
172 {
173 }
174
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
176                                       struct sk_buff *skb)
177 {
178 }
179
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
181                                          unsigned long old)
182 {
183         return NULL;
184 }
185
186 static struct dst_ops ip6_dst_blackhole_ops = {
187         .family                 =       AF_INET6,
188         .destroy                =       ip6_dst_destroy,
189         .check                  =       ip6_dst_check,
190         .mtu                    =       ip6_blackhole_mtu,
191         .default_advmss         =       ip6_default_advmss,
192         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
193         .redirect               =       ip6_rt_blackhole_redirect,
194         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
195         .neigh_lookup           =       ip6_neigh_lookup,
196 };
197
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199         [RTAX_HOPLIMIT - 1] = 0,
200 };
201
202 static const struct rt6_info ip6_null_entry_template = {
203         .dst = {
204                 .__refcnt       = ATOMIC_INIT(1),
205                 .__use          = 1,
206                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
207                 .error          = -ENETUNREACH,
208                 .input          = ip6_pkt_discard,
209                 .output         = ip6_pkt_discard_out,
210         },
211         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
212         .rt6i_protocol  = RTPROT_KERNEL,
213         .rt6i_metric    = ~(u32) 0,
214         .rt6i_ref       = ATOMIC_INIT(1),
215 };
216
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
218
219 static const struct rt6_info ip6_prohibit_entry_template = {
220         .dst = {
221                 .__refcnt       = ATOMIC_INIT(1),
222                 .__use          = 1,
223                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
224                 .error          = -EACCES,
225                 .input          = ip6_pkt_prohibit,
226                 .output         = ip6_pkt_prohibit_out,
227         },
228         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
229         .rt6i_protocol  = RTPROT_KERNEL,
230         .rt6i_metric    = ~(u32) 0,
231         .rt6i_ref       = ATOMIC_INIT(1),
232 };
233
234 static const struct rt6_info ip6_blk_hole_entry_template = {
235         .dst = {
236                 .__refcnt       = ATOMIC_INIT(1),
237                 .__use          = 1,
238                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
239                 .error          = -EINVAL,
240                 .input          = dst_discard,
241                 .output         = dst_discard_sk,
242         },
243         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
244         .rt6i_protocol  = RTPROT_KERNEL,
245         .rt6i_metric    = ~(u32) 0,
246         .rt6i_ref       = ATOMIC_INIT(1),
247 };
248
249 #endif
250
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253                                              struct net_device *dev,
254                                              int flags,
255                                              struct fib6_table *table)
256 {
257         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258                                         0, DST_OBSOLETE_FORCE_CHK, flags);
259
260         if (rt) {
261                 struct dst_entry *dst = &rt->dst;
262
263                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264                 INIT_LIST_HEAD(&rt->rt6i_siblings);
265         }
266         return rt;
267 }
268
269 static void ip6_dst_destroy(struct dst_entry *dst)
270 {
271         struct rt6_info *rt = (struct rt6_info *)dst;
272         struct inet6_dev *idev = rt->rt6i_idev;
273         struct dst_entry *from = dst->from;
274
275         dst_destroy_metrics_generic(dst);
276
277         if (idev) {
278                 rt->rt6i_idev = NULL;
279                 in6_dev_put(idev);
280         }
281
282         dst->from = NULL;
283         dst_release(from);
284 }
285
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287                            int how)
288 {
289         struct rt6_info *rt = (struct rt6_info *)dst;
290         struct inet6_dev *idev = rt->rt6i_idev;
291         struct net_device *loopback_dev =
292                 dev_net(dev)->loopback_dev;
293
294         if (dev != loopback_dev) {
295                 if (idev && idev->dev == dev) {
296                         struct inet6_dev *loopback_idev =
297                                 in6_dev_get(loopback_dev);
298                         if (loopback_idev) {
299                                 rt->rt6i_idev = loopback_idev;
300                                 in6_dev_put(idev);
301                         }
302                 }
303         }
304 }
305
306 static bool rt6_check_expired(const struct rt6_info *rt)
307 {
308         if (rt->rt6i_flags & RTF_EXPIRES) {
309                 if (time_after(jiffies, rt->dst.expires))
310                         return true;
311         } else if (rt->dst.from) {
312                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
313         }
314         return false;
315 }
316
317 /* Multipath route selection:
318  *   Hash based function using packet header and flowlabel.
319  * Adapted from fib_info_hashfn()
320  */
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322                                const struct flowi6 *fl6)
323 {
324         unsigned int val = fl6->flowi6_proto;
325
326         val ^= ipv6_addr_hash(&fl6->daddr);
327         val ^= ipv6_addr_hash(&fl6->saddr);
328
329         /* Work only if this not encapsulated */
330         switch (fl6->flowi6_proto) {
331         case IPPROTO_UDP:
332         case IPPROTO_TCP:
333         case IPPROTO_SCTP:
334                 val ^= (__force u16)fl6->fl6_sport;
335                 val ^= (__force u16)fl6->fl6_dport;
336                 break;
337
338         case IPPROTO_ICMPV6:
339                 val ^= (__force u16)fl6->fl6_icmp_type;
340                 val ^= (__force u16)fl6->fl6_icmp_code;
341                 break;
342         }
343         /* RFC6438 recommands to use flowlabel */
344         val ^= (__force u32)fl6->flowlabel;
345
346         /* Perhaps, we need to tune, this function? */
347         val = val ^ (val >> 7) ^ (val >> 12);
348         return val % candidate_count;
349 }
350
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352                                              struct flowi6 *fl6, int oif,
353                                              int strict)
354 {
355         struct rt6_info *sibling, *next_sibling;
356         int route_choosen;
357
358         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359         /* Don't change the route, if route_choosen == 0
360          * (siblings does not include ourself)
361          */
362         if (route_choosen)
363                 list_for_each_entry_safe(sibling, next_sibling,
364                                 &match->rt6i_siblings, rt6i_siblings) {
365                         route_choosen--;
366                         if (route_choosen == 0) {
367                                 if (rt6_score_route(sibling, oif, strict) < 0)
368                                         break;
369                                 match = sibling;
370                                 break;
371                         }
372                 }
373         return match;
374 }
375
376 /*
377  *      Route lookup. Any table->tb6_lock is implied.
378  */
379
380 static inline struct rt6_info *rt6_device_match(struct net *net,
381                                                     struct rt6_info *rt,
382                                                     const struct in6_addr *saddr,
383                                                     int oif,
384                                                     int flags)
385 {
386         struct rt6_info *local = NULL;
387         struct rt6_info *sprt;
388
389         if (!oif && ipv6_addr_any(saddr))
390                 goto out;
391
392         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393                 struct net_device *dev = sprt->dst.dev;
394
395                 if (oif) {
396                         if (dev->ifindex == oif)
397                                 return sprt;
398                         if (dev->flags & IFF_LOOPBACK) {
399                                 if (!sprt->rt6i_idev ||
400                                     sprt->rt6i_idev->dev->ifindex != oif) {
401                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
402                                                 continue;
403                                         if (local && (!oif ||
404                                                       local->rt6i_idev->dev->ifindex == oif))
405                                                 continue;
406                                 }
407                                 local = sprt;
408                         }
409                 } else {
410                         if (ipv6_chk_addr(net, saddr, dev,
411                                           flags & RT6_LOOKUP_F_IFACE))
412                                 return sprt;
413                 }
414         }
415
416         if (oif) {
417                 if (local)
418                         return local;
419
420                 if (flags & RT6_LOOKUP_F_IFACE)
421                         return net->ipv6.ip6_null_entry;
422         }
423 out:
424         return rt;
425 }
426
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429         struct work_struct work;
430         struct in6_addr target;
431         struct net_device *dev;
432 };
433
434 static void rt6_probe_deferred(struct work_struct *w)
435 {
436         struct in6_addr mcaddr;
437         struct __rt6_probe_work *work =
438                 container_of(w, struct __rt6_probe_work, work);
439
440         addrconf_addr_solict_mult(&work->target, &mcaddr);
441         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
442         dev_put(work->dev);
443         kfree(work);
444 }
445
446 static void rt6_probe(struct rt6_info *rt)
447 {
448         struct neighbour *neigh;
449         /*
450          * Okay, this does not seem to be appropriate
451          * for now, however, we need to check if it
452          * is really so; aka Router Reachability Probing.
453          *
454          * Router Reachability Probe MUST be rate-limited
455          * to no more than one per minute.
456          */
457         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
458                 return;
459         rcu_read_lock_bh();
460         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
461         if (neigh) {
462                 write_lock(&neigh->lock);
463                 if (neigh->nud_state & NUD_VALID)
464                         goto out;
465         }
466
467         if (!neigh ||
468             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469                 struct __rt6_probe_work *work;
470
471                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
472
473                 if (neigh && work)
474                         __neigh_set_probe_once(neigh);
475
476                 if (neigh)
477                         write_unlock(&neigh->lock);
478
479                 if (work) {
480                         INIT_WORK(&work->work, rt6_probe_deferred);
481                         work->target = rt->rt6i_gateway;
482                         dev_hold(rt->dst.dev);
483                         work->dev = rt->dst.dev;
484                         schedule_work(&work->work);
485                 }
486         } else {
487 out:
488                 write_unlock(&neigh->lock);
489         }
490         rcu_read_unlock_bh();
491 }
492 #else
493 static inline void rt6_probe(struct rt6_info *rt)
494 {
495 }
496 #endif
497
498 /*
499  * Default Router Selection (RFC 2461 6.3.6)
500  */
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
502 {
503         struct net_device *dev = rt->dst.dev;
504         if (!oif || dev->ifindex == oif)
505                 return 2;
506         if ((dev->flags & IFF_LOOPBACK) &&
507             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
508                 return 1;
509         return 0;
510 }
511
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
513 {
514         struct neighbour *neigh;
515         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
516
517         if (rt->rt6i_flags & RTF_NONEXTHOP ||
518             !(rt->rt6i_flags & RTF_GATEWAY))
519                 return RT6_NUD_SUCCEED;
520
521         rcu_read_lock_bh();
522         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
523         if (neigh) {
524                 read_lock(&neigh->lock);
525                 if (neigh->nud_state & NUD_VALID)
526                         ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528                 else if (!(neigh->nud_state & NUD_FAILED))
529                         ret = RT6_NUD_SUCCEED;
530                 else
531                         ret = RT6_NUD_FAIL_PROBE;
532 #endif
533                 read_unlock(&neigh->lock);
534         } else {
535                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
537         }
538         rcu_read_unlock_bh();
539
540         return ret;
541 }
542
543 static int rt6_score_route(struct rt6_info *rt, int oif,
544                            int strict)
545 {
546         int m;
547
548         m = rt6_check_dev(rt, oif);
549         if (!m && (strict & RT6_LOOKUP_F_IFACE))
550                 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
553 #endif
554         if (strict & RT6_LOOKUP_F_REACHABLE) {
555                 int n = rt6_check_neigh(rt);
556                 if (n < 0)
557                         return n;
558         }
559         return m;
560 }
561
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563                                    int *mpri, struct rt6_info *match,
564                                    bool *do_rr)
565 {
566         int m;
567         bool match_do_rr = false;
568
569         if (rt6_check_expired(rt))
570                 goto out;
571
572         m = rt6_score_route(rt, oif, strict);
573         if (m == RT6_NUD_FAIL_DO_RR) {
574                 match_do_rr = true;
575                 m = 0; /* lowest valid score */
576         } else if (m == RT6_NUD_FAIL_HARD) {
577                 goto out;
578         }
579
580         if (strict & RT6_LOOKUP_F_REACHABLE)
581                 rt6_probe(rt);
582
583         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
584         if (m > *mpri) {
585                 *do_rr = match_do_rr;
586                 *mpri = m;
587                 match = rt;
588         }
589 out:
590         return match;
591 }
592
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594                                      struct rt6_info *rr_head,
595                                      u32 metric, int oif, int strict,
596                                      bool *do_rr)
597 {
598         struct rt6_info *rt, *match, *cont;
599         int mpri = -1;
600
601         match = NULL;
602         cont = NULL;
603         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604                 if (rt->rt6i_metric != metric) {
605                         cont = rt;
606                         break;
607                 }
608
609                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
610         }
611
612         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613                 if (rt->rt6i_metric != metric) {
614                         cont = rt;
615                         break;
616                 }
617
618                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
619         }
620
621         if (match || !cont)
622                 return match;
623
624         for (rt = cont; rt; rt = rt->dst.rt6_next)
625                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
626
627         return match;
628 }
629
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
631 {
632         struct rt6_info *match, *rt0;
633         struct net *net;
634         bool do_rr = false;
635
636         rt0 = fn->rr_ptr;
637         if (!rt0)
638                 fn->rr_ptr = rt0 = fn->leaf;
639
640         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
641                              &do_rr);
642
643         if (do_rr) {
644                 struct rt6_info *next = rt0->dst.rt6_next;
645
646                 /* no entries matched; do round-robin */
647                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
648                         next = fn->leaf;
649
650                 if (next != rt0)
651                         fn->rr_ptr = next;
652         }
653
654         net = dev_net(rt0->dst.dev);
655         return match ? match : net->ipv6.ip6_null_entry;
656 }
657
658 #ifdef CONFIG_IPV6_ROUTE_INFO
659 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
660                   const struct in6_addr *gwaddr)
661 {
662         struct net *net = dev_net(dev);
663         struct route_info *rinfo = (struct route_info *) opt;
664         struct in6_addr prefix_buf, *prefix;
665         unsigned int pref;
666         unsigned long lifetime;
667         struct rt6_info *rt;
668
669         if (len < sizeof(struct route_info)) {
670                 return -EINVAL;
671         }
672
673         /* Sanity check for prefix_len and length */
674         if (rinfo->length > 3) {
675                 return -EINVAL;
676         } else if (rinfo->prefix_len > 128) {
677                 return -EINVAL;
678         } else if (rinfo->prefix_len > 64) {
679                 if (rinfo->length < 2) {
680                         return -EINVAL;
681                 }
682         } else if (rinfo->prefix_len > 0) {
683                 if (rinfo->length < 1) {
684                         return -EINVAL;
685                 }
686         }
687
688         pref = rinfo->route_pref;
689         if (pref == ICMPV6_ROUTER_PREF_INVALID)
690                 return -EINVAL;
691
692         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
693
694         if (rinfo->length == 3)
695                 prefix = (struct in6_addr *)rinfo->prefix;
696         else {
697                 /* this function is safe */
698                 ipv6_addr_prefix(&prefix_buf,
699                                  (struct in6_addr *)rinfo->prefix,
700                                  rinfo->prefix_len);
701                 prefix = &prefix_buf;
702         }
703
704         if (rinfo->prefix_len == 0)
705                 rt = rt6_get_dflt_router(gwaddr, dev);
706         else
707                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
708                                         gwaddr, dev->ifindex);
709
710         if (rt && !lifetime) {
711                 ip6_del_rt(rt);
712                 rt = NULL;
713         }
714
715         if (!rt && lifetime)
716                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
717                                         pref);
718         else if (rt)
719                 rt->rt6i_flags = RTF_ROUTEINFO |
720                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
721
722         if (rt) {
723                 if (!addrconf_finite_timeout(lifetime))
724                         rt6_clean_expires(rt);
725                 else
726                         rt6_set_expires(rt, jiffies + HZ * lifetime);
727
728                 ip6_rt_put(rt);
729         }
730         return 0;
731 }
732 #endif
733
734 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
735                                         struct in6_addr *saddr)
736 {
737         struct fib6_node *pn;
738         while (1) {
739                 if (fn->fn_flags & RTN_TL_ROOT)
740                         return NULL;
741                 pn = fn->parent;
742                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
743                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
744                 else
745                         fn = pn;
746                 if (fn->fn_flags & RTN_RTINFO)
747                         return fn;
748         }
749 }
750
751 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
752                                              struct fib6_table *table,
753                                              struct flowi6 *fl6, int flags)
754 {
755         struct fib6_node *fn;
756         struct rt6_info *rt;
757
758         read_lock_bh(&table->tb6_lock);
759         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
760 restart:
761         rt = fn->leaf;
762         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
763         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
764                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
765         if (rt == net->ipv6.ip6_null_entry) {
766                 fn = fib6_backtrack(fn, &fl6->saddr);
767                 if (fn)
768                         goto restart;
769         }
770         dst_use(&rt->dst, jiffies);
771         read_unlock_bh(&table->tb6_lock);
772         return rt;
773
774 }
775
776 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
777                                     int flags)
778 {
779         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
780 }
781 EXPORT_SYMBOL_GPL(ip6_route_lookup);
782
783 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
784                             const struct in6_addr *saddr, int oif, int strict)
785 {
786         struct flowi6 fl6 = {
787                 .flowi6_oif = oif,
788                 .daddr = *daddr,
789         };
790         struct dst_entry *dst;
791         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
792
793         if (saddr) {
794                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
795                 flags |= RT6_LOOKUP_F_HAS_SADDR;
796         }
797
798         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
799         if (dst->error == 0)
800                 return (struct rt6_info *) dst;
801
802         dst_release(dst);
803
804         return NULL;
805 }
806 EXPORT_SYMBOL(rt6_lookup);
807
808 /* ip6_ins_rt is called with FREE table->tb6_lock.
809    It takes new route entry, the addition fails by any reason the
810    route is freed. In any case, if caller does not hold it, it may
811    be destroyed.
812  */
813
814 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
815                         struct mx6_config *mxc)
816 {
817         int err;
818         struct fib6_table *table;
819
820         table = rt->rt6i_table;
821         write_lock_bh(&table->tb6_lock);
822         err = fib6_add(&table->tb6_root, rt, info, mxc);
823         write_unlock_bh(&table->tb6_lock);
824
825         return err;
826 }
827
828 int ip6_ins_rt(struct rt6_info *rt)
829 {
830         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
831         struct mx6_config mxc = { .mx = NULL, };
832
833         return __ip6_ins_rt(rt, &info, &mxc);
834 }
835
836 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
837                                       const struct in6_addr *daddr,
838                                       const struct in6_addr *saddr)
839 {
840         struct rt6_info *rt;
841
842         /*
843          *      Clone the route.
844          */
845
846         rt = ip6_rt_copy(ort, daddr);
847
848         if (rt) {
849                 if (ort->rt6i_dst.plen != 128 &&
850                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
851                         rt->rt6i_flags |= RTF_ANYCAST;
852
853                 rt->rt6i_flags |= RTF_CACHE;
854
855 #ifdef CONFIG_IPV6_SUBTREES
856                 if (rt->rt6i_src.plen && saddr) {
857                         rt->rt6i_src.addr = *saddr;
858                         rt->rt6i_src.plen = 128;
859                 }
860 #endif
861         }
862
863         return rt;
864 }
865
866 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
867                                         const struct in6_addr *daddr)
868 {
869         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
870
871         if (rt)
872                 rt->rt6i_flags |= RTF_CACHE;
873         return rt;
874 }
875
876 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
877                                       struct flowi6 *fl6, int flags)
878 {
879         struct fib6_node *fn, *saved_fn;
880         struct rt6_info *rt, *nrt;
881         int strict = 0;
882         int attempts = 3;
883         int err;
884
885         strict |= flags & RT6_LOOKUP_F_IFACE;
886         if (net->ipv6.devconf_all->forwarding == 0)
887                 strict |= RT6_LOOKUP_F_REACHABLE;
888
889 redo_fib6_lookup_lock:
890         read_lock_bh(&table->tb6_lock);
891
892         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
893         saved_fn = fn;
894
895 redo_rt6_select:
896         rt = rt6_select(fn, oif, strict);
897         if (rt->rt6i_nsiblings)
898                 rt = rt6_multipath_select(rt, fl6, oif, strict);
899         if (rt == net->ipv6.ip6_null_entry) {
900                 fn = fib6_backtrack(fn, &fl6->saddr);
901                 if (fn)
902                         goto redo_rt6_select;
903                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
904                         /* also consider unreachable route */
905                         strict &= ~RT6_LOOKUP_F_REACHABLE;
906                         fn = saved_fn;
907                         goto redo_rt6_select;
908                 } else {
909                         dst_hold(&rt->dst);
910                         read_unlock_bh(&table->tb6_lock);
911                         goto out2;
912                 }
913         }
914
915         dst_hold(&rt->dst);
916         read_unlock_bh(&table->tb6_lock);
917
918         if (rt->rt6i_flags & RTF_CACHE)
919                 goto out2;
920
921         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
922                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
923         else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
924                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
925         else
926                 goto out2;
927
928         ip6_rt_put(rt);
929         rt = nrt ? : net->ipv6.ip6_null_entry;
930
931         dst_hold(&rt->dst);
932         if (nrt) {
933                 err = ip6_ins_rt(nrt);
934                 if (!err)
935                         goto out2;
936         }
937
938         if (--attempts <= 0)
939                 goto out2;
940
941         /*
942          * Race condition! In the gap, when table->tb6_lock was
943          * released someone could insert this route.  Relookup.
944          */
945         ip6_rt_put(rt);
946         goto redo_fib6_lookup_lock;
947
948 out2:
949         rt6_dst_from_metrics_check(rt);
950         rt->dst.lastuse = jiffies;
951         rt->dst.__use++;
952
953         return rt;
954 }
955
956 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
957                                             struct flowi6 *fl6, int flags)
958 {
959         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
960 }
961
962 static struct dst_entry *ip6_route_input_lookup(struct net *net,
963                                                 struct net_device *dev,
964                                                 struct flowi6 *fl6, int flags)
965 {
966         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
967                 flags |= RT6_LOOKUP_F_IFACE;
968
969         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
970 }
971
972 void ip6_route_input(struct sk_buff *skb)
973 {
974         const struct ipv6hdr *iph = ipv6_hdr(skb);
975         struct net *net = dev_net(skb->dev);
976         int flags = RT6_LOOKUP_F_HAS_SADDR;
977         struct flowi6 fl6 = {
978                 .flowi6_iif = skb->dev->ifindex,
979                 .daddr = iph->daddr,
980                 .saddr = iph->saddr,
981                 .flowlabel = ip6_flowinfo(iph),
982                 .flowi6_mark = skb->mark,
983                 .flowi6_proto = iph->nexthdr,
984         };
985
986         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
987 }
988
989 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
990                                              struct flowi6 *fl6, int flags)
991 {
992         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
993 }
994
995 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
996                                     struct flowi6 *fl6)
997 {
998         int flags = 0;
999
1000         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1001
1002         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1003                 flags |= RT6_LOOKUP_F_IFACE;
1004
1005         if (!ipv6_addr_any(&fl6->saddr))
1006                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1007         else if (sk)
1008                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1009
1010         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1011 }
1012 EXPORT_SYMBOL(ip6_route_output);
1013
1014 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1015 {
1016         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1017         struct dst_entry *new = NULL;
1018
1019         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1020         if (rt) {
1021                 new = &rt->dst;
1022
1023                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1024
1025                 new->__use = 1;
1026                 new->input = dst_discard;
1027                 new->output = dst_discard_sk;
1028
1029                 if (dst_metrics_read_only(&ort->dst))
1030                         new->_metrics = ort->dst._metrics;
1031                 else
1032                         dst_copy_metrics(new, &ort->dst);
1033                 rt->rt6i_idev = ort->rt6i_idev;
1034                 if (rt->rt6i_idev)
1035                         in6_dev_hold(rt->rt6i_idev);
1036
1037                 rt->rt6i_gateway = ort->rt6i_gateway;
1038                 rt->rt6i_flags = ort->rt6i_flags;
1039                 rt->rt6i_metric = 0;
1040
1041                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1042 #ifdef CONFIG_IPV6_SUBTREES
1043                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1044 #endif
1045
1046                 dst_free(new);
1047         }
1048
1049         dst_release(dst_orig);
1050         return new ? new : ERR_PTR(-ENOMEM);
1051 }
1052
1053 /*
1054  *      Destination cache support functions
1055  */
1056
1057 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1058 {
1059         if (rt->dst.from &&
1060             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1061                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1062 }
1063
1064 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1065 {
1066         struct rt6_info *rt;
1067
1068         rt = (struct rt6_info *) dst;
1069
1070         /* All IPV6 dsts are created with ->obsolete set to the value
1071          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1072          * into this function always.
1073          */
1074         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1075                 return NULL;
1076
1077         if (rt6_check_expired(rt))
1078                 return NULL;
1079
1080         rt6_dst_from_metrics_check(rt);
1081
1082         return dst;
1083 }
1084
1085 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1086 {
1087         struct rt6_info *rt = (struct rt6_info *) dst;
1088
1089         if (rt) {
1090                 if (rt->rt6i_flags & RTF_CACHE) {
1091                         if (rt6_check_expired(rt)) {
1092                                 ip6_del_rt(rt);
1093                                 dst = NULL;
1094                         }
1095                 } else {
1096                         dst_release(dst);
1097                         dst = NULL;
1098                 }
1099         }
1100         return dst;
1101 }
1102
1103 static void ip6_link_failure(struct sk_buff *skb)
1104 {
1105         struct rt6_info *rt;
1106
1107         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1108
1109         rt = (struct rt6_info *) skb_dst(skb);
1110         if (rt) {
1111                 if (rt->rt6i_flags & RTF_CACHE) {
1112                         dst_hold(&rt->dst);
1113                         if (ip6_del_rt(rt))
1114                                 dst_free(&rt->dst);
1115                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1116                         rt->rt6i_node->fn_sernum = -1;
1117                 }
1118         }
1119 }
1120
1121 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1122                                struct sk_buff *skb, u32 mtu)
1123 {
1124         struct rt6_info *rt6 = (struct rt6_info *)dst;
1125
1126         dst_confirm(dst);
1127         if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1128                 struct net *net = dev_net(dst->dev);
1129
1130                 rt6->rt6i_flags |= RTF_MODIFIED;
1131                 if (mtu < IPV6_MIN_MTU)
1132                         mtu = IPV6_MIN_MTU;
1133
1134                 rt6->rt6i_pmtu = mtu;
1135                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1136         }
1137 }
1138
1139 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1140                      int oif, u32 mark)
1141 {
1142         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1143         struct dst_entry *dst;
1144         struct flowi6 fl6;
1145
1146         memset(&fl6, 0, sizeof(fl6));
1147         fl6.flowi6_oif = oif;
1148         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1149         fl6.daddr = iph->daddr;
1150         fl6.saddr = iph->saddr;
1151         fl6.flowlabel = ip6_flowinfo(iph);
1152
1153         dst = ip6_route_output(net, NULL, &fl6);
1154         if (!dst->error)
1155                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1156         dst_release(dst);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1159
1160 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1161 {
1162         ip6_update_pmtu(skb, sock_net(sk), mtu,
1163                         sk->sk_bound_dev_if, sk->sk_mark);
1164 }
1165 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1166
1167 /* Handle redirects */
1168 struct ip6rd_flowi {
1169         struct flowi6 fl6;
1170         struct in6_addr gateway;
1171 };
1172
1173 static struct rt6_info *__ip6_route_redirect(struct net *net,
1174                                              struct fib6_table *table,
1175                                              struct flowi6 *fl6,
1176                                              int flags)
1177 {
1178         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1179         struct rt6_info *rt;
1180         struct fib6_node *fn;
1181
1182         /* Get the "current" route for this destination and
1183          * check if the redirect has come from approriate router.
1184          *
1185          * RFC 4861 specifies that redirects should only be
1186          * accepted if they come from the nexthop to the target.
1187          * Due to the way the routes are chosen, this notion
1188          * is a bit fuzzy and one might need to check all possible
1189          * routes.
1190          */
1191
1192         read_lock_bh(&table->tb6_lock);
1193         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1194 restart:
1195         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1196                 if (rt6_check_expired(rt))
1197                         continue;
1198                 if (rt->dst.error)
1199                         break;
1200                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1201                         continue;
1202                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1203                         continue;
1204                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1205                         continue;
1206                 break;
1207         }
1208
1209         if (!rt)
1210                 rt = net->ipv6.ip6_null_entry;
1211         else if (rt->dst.error) {
1212                 rt = net->ipv6.ip6_null_entry;
1213                 goto out;
1214         }
1215
1216         if (rt == net->ipv6.ip6_null_entry) {
1217                 fn = fib6_backtrack(fn, &fl6->saddr);
1218                 if (fn)
1219                         goto restart;
1220         }
1221
1222 out:
1223         dst_hold(&rt->dst);
1224
1225         read_unlock_bh(&table->tb6_lock);
1226
1227         return rt;
1228 };
1229
1230 static struct dst_entry *ip6_route_redirect(struct net *net,
1231                                         const struct flowi6 *fl6,
1232                                         const struct in6_addr *gateway)
1233 {
1234         int flags = RT6_LOOKUP_F_HAS_SADDR;
1235         struct ip6rd_flowi rdfl;
1236
1237         rdfl.fl6 = *fl6;
1238         rdfl.gateway = *gateway;
1239
1240         return fib6_rule_lookup(net, &rdfl.fl6,
1241                                 flags, __ip6_route_redirect);
1242 }
1243
1244 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1245 {
1246         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1247         struct dst_entry *dst;
1248         struct flowi6 fl6;
1249
1250         memset(&fl6, 0, sizeof(fl6));
1251         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1252         fl6.flowi6_oif = oif;
1253         fl6.flowi6_mark = mark;
1254         fl6.daddr = iph->daddr;
1255         fl6.saddr = iph->saddr;
1256         fl6.flowlabel = ip6_flowinfo(iph);
1257
1258         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1259         rt6_do_redirect(dst, NULL, skb);
1260         dst_release(dst);
1261 }
1262 EXPORT_SYMBOL_GPL(ip6_redirect);
1263
1264 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1265                             u32 mark)
1266 {
1267         const struct ipv6hdr *iph = ipv6_hdr(skb);
1268         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1269         struct dst_entry *dst;
1270         struct flowi6 fl6;
1271
1272         memset(&fl6, 0, sizeof(fl6));
1273         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1274         fl6.flowi6_oif = oif;
1275         fl6.flowi6_mark = mark;
1276         fl6.daddr = msg->dest;
1277         fl6.saddr = iph->daddr;
1278
1279         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1280         rt6_do_redirect(dst, NULL, skb);
1281         dst_release(dst);
1282 }
1283
1284 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1285 {
1286         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1287 }
1288 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1289
1290 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1291 {
1292         struct net_device *dev = dst->dev;
1293         unsigned int mtu = dst_mtu(dst);
1294         struct net *net = dev_net(dev);
1295
1296         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1297
1298         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1299                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1300
1301         /*
1302          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1303          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1304          * IPV6_MAXPLEN is also valid and means: "any MSS,
1305          * rely only on pmtu discovery"
1306          */
1307         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1308                 mtu = IPV6_MAXPLEN;
1309         return mtu;
1310 }
1311
1312 static unsigned int ip6_mtu(const struct dst_entry *dst)
1313 {
1314         const struct rt6_info *rt = (const struct rt6_info *)dst;
1315         unsigned int mtu = rt->rt6i_pmtu;
1316         struct inet6_dev *idev;
1317
1318         if (mtu)
1319                 goto out;
1320
1321         mtu = dst_metric_raw(dst, RTAX_MTU);
1322         if (mtu)
1323                 goto out;
1324
1325         mtu = IPV6_MIN_MTU;
1326
1327         rcu_read_lock();
1328         idev = __in6_dev_get(dst->dev);
1329         if (idev)
1330                 mtu = idev->cnf.mtu6;
1331         rcu_read_unlock();
1332
1333 out:
1334         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1335 }
1336
1337 static struct dst_entry *icmp6_dst_gc_list;
1338 static DEFINE_SPINLOCK(icmp6_dst_lock);
1339
1340 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1341                                   struct flowi6 *fl6)
1342 {
1343         struct dst_entry *dst;
1344         struct rt6_info *rt;
1345         struct inet6_dev *idev = in6_dev_get(dev);
1346         struct net *net = dev_net(dev);
1347
1348         if (unlikely(!idev))
1349                 return ERR_PTR(-ENODEV);
1350
1351         rt = ip6_dst_alloc(net, dev, 0, NULL);
1352         if (unlikely(!rt)) {
1353                 in6_dev_put(idev);
1354                 dst = ERR_PTR(-ENOMEM);
1355                 goto out;
1356         }
1357
1358         rt->dst.flags |= DST_HOST;
1359         rt->dst.output  = ip6_output;
1360         atomic_set(&rt->dst.__refcnt, 1);
1361         rt->rt6i_gateway  = fl6->daddr;
1362         rt->rt6i_dst.addr = fl6->daddr;
1363         rt->rt6i_dst.plen = 128;
1364         rt->rt6i_idev     = idev;
1365         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1366
1367         spin_lock_bh(&icmp6_dst_lock);
1368         rt->dst.next = icmp6_dst_gc_list;
1369         icmp6_dst_gc_list = &rt->dst;
1370         spin_unlock_bh(&icmp6_dst_lock);
1371
1372         fib6_force_start_gc(net);
1373
1374         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1375
1376 out:
1377         return dst;
1378 }
1379
1380 int icmp6_dst_gc(void)
1381 {
1382         struct dst_entry *dst, **pprev;
1383         int more = 0;
1384
1385         spin_lock_bh(&icmp6_dst_lock);
1386         pprev = &icmp6_dst_gc_list;
1387
1388         while ((dst = *pprev) != NULL) {
1389                 if (!atomic_read(&dst->__refcnt)) {
1390                         *pprev = dst->next;
1391                         dst_free(dst);
1392                 } else {
1393                         pprev = &dst->next;
1394                         ++more;
1395                 }
1396         }
1397
1398         spin_unlock_bh(&icmp6_dst_lock);
1399
1400         return more;
1401 }
1402
1403 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1404                             void *arg)
1405 {
1406         struct dst_entry *dst, **pprev;
1407
1408         spin_lock_bh(&icmp6_dst_lock);
1409         pprev = &icmp6_dst_gc_list;
1410         while ((dst = *pprev) != NULL) {
1411                 struct rt6_info *rt = (struct rt6_info *) dst;
1412                 if (func(rt, arg)) {
1413                         *pprev = dst->next;
1414                         dst_free(dst);
1415                 } else {
1416                         pprev = &dst->next;
1417                 }
1418         }
1419         spin_unlock_bh(&icmp6_dst_lock);
1420 }
1421
1422 static int ip6_dst_gc(struct dst_ops *ops)
1423 {
1424         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1425         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1426         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1427         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1428         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1429         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1430         int entries;
1431
1432         entries = dst_entries_get_fast(ops);
1433         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1434             entries <= rt_max_size)
1435                 goto out;
1436
1437         net->ipv6.ip6_rt_gc_expire++;
1438         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1439         entries = dst_entries_get_slow(ops);
1440         if (entries < ops->gc_thresh)
1441                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1442 out:
1443         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1444         return entries > rt_max_size;
1445 }
1446
1447 static int ip6_convert_metrics(struct mx6_config *mxc,
1448                                const struct fib6_config *cfg)
1449 {
1450         struct nlattr *nla;
1451         int remaining;
1452         u32 *mp;
1453
1454         if (!cfg->fc_mx)
1455                 return 0;
1456
1457         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1458         if (unlikely(!mp))
1459                 return -ENOMEM;
1460
1461         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1462                 int type = nla_type(nla);
1463
1464                 if (type) {
1465                         u32 val;
1466
1467                         if (unlikely(type > RTAX_MAX))
1468                                 goto err;
1469                         if (type == RTAX_CC_ALGO) {
1470                                 char tmp[TCP_CA_NAME_MAX];
1471
1472                                 nla_strlcpy(tmp, nla, sizeof(tmp));
1473                                 val = tcp_ca_get_key_by_name(tmp);
1474                                 if (val == TCP_CA_UNSPEC)
1475                                         goto err;
1476                         } else {
1477                                 val = nla_get_u32(nla);
1478                         }
1479
1480                         mp[type - 1] = val;
1481                         __set_bit(type - 1, mxc->mx_valid);
1482                 }
1483         }
1484
1485         mxc->mx = mp;
1486
1487         return 0;
1488  err:
1489         kfree(mp);
1490         return -EINVAL;
1491 }
1492
1493 int ip6_route_add(struct fib6_config *cfg)
1494 {
1495         int err;
1496         struct net *net = cfg->fc_nlinfo.nl_net;
1497         struct rt6_info *rt = NULL;
1498         struct net_device *dev = NULL;
1499         struct inet6_dev *idev = NULL;
1500         struct fib6_table *table;
1501         struct mx6_config mxc = { .mx = NULL, };
1502         int addr_type;
1503
1504         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1505                 return -EINVAL;
1506 #ifndef CONFIG_IPV6_SUBTREES
1507         if (cfg->fc_src_len)
1508                 return -EINVAL;
1509 #endif
1510         if (cfg->fc_ifindex) {
1511                 err = -ENODEV;
1512                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1513                 if (!dev)
1514                         goto out;
1515                 idev = in6_dev_get(dev);
1516                 if (!idev)
1517                         goto out;
1518         }
1519
1520         if (cfg->fc_metric == 0)
1521                 cfg->fc_metric = IP6_RT_PRIO_USER;
1522
1523         err = -ENOBUFS;
1524         if (cfg->fc_nlinfo.nlh &&
1525             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1526                 table = fib6_get_table(net, cfg->fc_table);
1527                 if (!table) {
1528                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1529                         table = fib6_new_table(net, cfg->fc_table);
1530                 }
1531         } else {
1532                 table = fib6_new_table(net, cfg->fc_table);
1533         }
1534
1535         if (!table)
1536                 goto out;
1537
1538         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1539
1540         if (!rt) {
1541                 err = -ENOMEM;
1542                 goto out;
1543         }
1544
1545         if (cfg->fc_flags & RTF_EXPIRES)
1546                 rt6_set_expires(rt, jiffies +
1547                                 clock_t_to_jiffies(cfg->fc_expires));
1548         else
1549                 rt6_clean_expires(rt);
1550
1551         if (cfg->fc_protocol == RTPROT_UNSPEC)
1552                 cfg->fc_protocol = RTPROT_BOOT;
1553         rt->rt6i_protocol = cfg->fc_protocol;
1554
1555         addr_type = ipv6_addr_type(&cfg->fc_dst);
1556
1557         if (addr_type & IPV6_ADDR_MULTICAST)
1558                 rt->dst.input = ip6_mc_input;
1559         else if (cfg->fc_flags & RTF_LOCAL)
1560                 rt->dst.input = ip6_input;
1561         else
1562                 rt->dst.input = ip6_forward;
1563
1564         rt->dst.output = ip6_output;
1565
1566         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1567         rt->rt6i_dst.plen = cfg->fc_dst_len;
1568         if (rt->rt6i_dst.plen == 128)
1569                 rt->dst.flags |= DST_HOST;
1570
1571 #ifdef CONFIG_IPV6_SUBTREES
1572         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1573         rt->rt6i_src.plen = cfg->fc_src_len;
1574 #endif
1575
1576         rt->rt6i_metric = cfg->fc_metric;
1577
1578         /* We cannot add true routes via loopback here,
1579            they would result in kernel looping; promote them to reject routes
1580          */
1581         if ((cfg->fc_flags & RTF_REJECT) ||
1582             (dev && (dev->flags & IFF_LOOPBACK) &&
1583              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1584              !(cfg->fc_flags & RTF_LOCAL))) {
1585                 /* hold loopback dev/idev if we haven't done so. */
1586                 if (dev != net->loopback_dev) {
1587                         if (dev) {
1588                                 dev_put(dev);
1589                                 in6_dev_put(idev);
1590                         }
1591                         dev = net->loopback_dev;
1592                         dev_hold(dev);
1593                         idev = in6_dev_get(dev);
1594                         if (!idev) {
1595                                 err = -ENODEV;
1596                                 goto out;
1597                         }
1598                 }
1599                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1600                 switch (cfg->fc_type) {
1601                 case RTN_BLACKHOLE:
1602                         rt->dst.error = -EINVAL;
1603                         rt->dst.output = dst_discard_sk;
1604                         rt->dst.input = dst_discard;
1605                         break;
1606                 case RTN_PROHIBIT:
1607                         rt->dst.error = -EACCES;
1608                         rt->dst.output = ip6_pkt_prohibit_out;
1609                         rt->dst.input = ip6_pkt_prohibit;
1610                         break;
1611                 case RTN_THROW:
1612                 default:
1613                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1614                                         : -ENETUNREACH;
1615                         rt->dst.output = ip6_pkt_discard_out;
1616                         rt->dst.input = ip6_pkt_discard;
1617                         break;
1618                 }
1619                 goto install_route;
1620         }
1621
1622         if (cfg->fc_flags & RTF_GATEWAY) {
1623                 const struct in6_addr *gw_addr;
1624                 int gwa_type;
1625
1626                 gw_addr = &cfg->fc_gateway;
1627
1628                 /* if gw_addr is local we will fail to detect this in case
1629                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1630                  * will return already-added prefix route via interface that
1631                  * prefix route was assigned to, which might be non-loopback.
1632                  */
1633                 err = -EINVAL;
1634                 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1635                         goto out;
1636
1637                 rt->rt6i_gateway = *gw_addr;
1638                 gwa_type = ipv6_addr_type(gw_addr);
1639
1640                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1641                         struct rt6_info *grt;
1642
1643                         /* IPv6 strictly inhibits using not link-local
1644                            addresses as nexthop address.
1645                            Otherwise, router will not able to send redirects.
1646                            It is very good, but in some (rare!) circumstances
1647                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1648                            some exceptions. --ANK
1649                          */
1650                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1651                                 goto out;
1652
1653                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1654
1655                         err = -EHOSTUNREACH;
1656                         if (!grt)
1657                                 goto out;
1658                         if (dev) {
1659                                 if (dev != grt->dst.dev) {
1660                                         ip6_rt_put(grt);
1661                                         goto out;
1662                                 }
1663                         } else {
1664                                 dev = grt->dst.dev;
1665                                 idev = grt->rt6i_idev;
1666                                 dev_hold(dev);
1667                                 in6_dev_hold(grt->rt6i_idev);
1668                         }
1669                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1670                                 err = 0;
1671                         ip6_rt_put(grt);
1672
1673                         if (err)
1674                                 goto out;
1675                 }
1676                 err = -EINVAL;
1677                 if (!dev || (dev->flags & IFF_LOOPBACK))
1678                         goto out;
1679         }
1680
1681         err = -ENODEV;
1682         if (!dev)
1683                 goto out;
1684
1685         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1686                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1687                         err = -EINVAL;
1688                         goto out;
1689                 }
1690                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1691                 rt->rt6i_prefsrc.plen = 128;
1692         } else
1693                 rt->rt6i_prefsrc.plen = 0;
1694
1695         rt->rt6i_flags = cfg->fc_flags;
1696
1697 install_route:
1698         rt->dst.dev = dev;
1699         rt->rt6i_idev = idev;
1700         rt->rt6i_table = table;
1701
1702         cfg->fc_nlinfo.nl_net = dev_net(dev);
1703
1704         err = ip6_convert_metrics(&mxc, cfg);
1705         if (err)
1706                 goto out;
1707
1708         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1709
1710         kfree(mxc.mx);
1711         return err;
1712 out:
1713         if (dev)
1714                 dev_put(dev);
1715         if (idev)
1716                 in6_dev_put(idev);
1717         if (rt)
1718                 dst_free(&rt->dst);
1719         return err;
1720 }
1721
1722 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1723 {
1724         int err;
1725         struct fib6_table *table;
1726         struct net *net = dev_net(rt->dst.dev);
1727
1728         if (rt == net->ipv6.ip6_null_entry) {
1729                 err = -ENOENT;
1730                 goto out;
1731         }
1732
1733         table = rt->rt6i_table;
1734         write_lock_bh(&table->tb6_lock);
1735         err = fib6_del(rt, info);
1736         write_unlock_bh(&table->tb6_lock);
1737
1738 out:
1739         ip6_rt_put(rt);
1740         return err;
1741 }
1742
1743 int ip6_del_rt(struct rt6_info *rt)
1744 {
1745         struct nl_info info = {
1746                 .nl_net = dev_net(rt->dst.dev),
1747         };
1748         return __ip6_del_rt(rt, &info);
1749 }
1750
1751 static int ip6_route_del(struct fib6_config *cfg)
1752 {
1753         struct fib6_table *table;
1754         struct fib6_node *fn;
1755         struct rt6_info *rt;
1756         int err = -ESRCH;
1757
1758         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1759         if (!table)
1760                 return err;
1761
1762         read_lock_bh(&table->tb6_lock);
1763
1764         fn = fib6_locate(&table->tb6_root,
1765                          &cfg->fc_dst, cfg->fc_dst_len,
1766                          &cfg->fc_src, cfg->fc_src_len);
1767
1768         if (fn) {
1769                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1770                         if ((rt->rt6i_flags & RTF_CACHE) &&
1771                             !(cfg->fc_flags & RTF_CACHE))
1772                                 continue;
1773                         if (cfg->fc_ifindex &&
1774                             (!rt->dst.dev ||
1775                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1776                                 continue;
1777                         if (cfg->fc_flags & RTF_GATEWAY &&
1778                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1779                                 continue;
1780                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1781                                 continue;
1782                         dst_hold(&rt->dst);
1783                         read_unlock_bh(&table->tb6_lock);
1784
1785                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1786                 }
1787         }
1788         read_unlock_bh(&table->tb6_lock);
1789
1790         return err;
1791 }
1792
1793 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1794 {
1795         struct net *net = dev_net(skb->dev);
1796         struct netevent_redirect netevent;
1797         struct rt6_info *rt, *nrt = NULL;
1798         struct ndisc_options ndopts;
1799         struct inet6_dev *in6_dev;
1800         struct neighbour *neigh;
1801         struct rd_msg *msg;
1802         int optlen, on_link;
1803         u8 *lladdr;
1804
1805         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1806         optlen -= sizeof(*msg);
1807
1808         if (optlen < 0) {
1809                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1810                 return;
1811         }
1812
1813         msg = (struct rd_msg *)icmp6_hdr(skb);
1814
1815         if (ipv6_addr_is_multicast(&msg->dest)) {
1816                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1817                 return;
1818         }
1819
1820         on_link = 0;
1821         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1822                 on_link = 1;
1823         } else if (ipv6_addr_type(&msg->target) !=
1824                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1825                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1826                 return;
1827         }
1828
1829         in6_dev = __in6_dev_get(skb->dev);
1830         if (!in6_dev)
1831                 return;
1832         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1833                 return;
1834
1835         /* RFC2461 8.1:
1836          *      The IP source address of the Redirect MUST be the same as the current
1837          *      first-hop router for the specified ICMP Destination Address.
1838          */
1839
1840         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1841                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1842                 return;
1843         }
1844
1845         lladdr = NULL;
1846         if (ndopts.nd_opts_tgt_lladdr) {
1847                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1848                                              skb->dev);
1849                 if (!lladdr) {
1850                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1851                         return;
1852                 }
1853         }
1854
1855         rt = (struct rt6_info *) dst;
1856         if (rt == net->ipv6.ip6_null_entry) {
1857                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1858                 return;
1859         }
1860
1861         /* Redirect received -> path was valid.
1862          * Look, redirects are sent only in response to data packets,
1863          * so that this nexthop apparently is reachable. --ANK
1864          */
1865         dst_confirm(&rt->dst);
1866
1867         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1868         if (!neigh)
1869                 return;
1870
1871         /*
1872          *      We have finally decided to accept it.
1873          */
1874
1875         neigh_update(neigh, lladdr, NUD_STALE,
1876                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1877                      NEIGH_UPDATE_F_OVERRIDE|
1878                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1879                                      NEIGH_UPDATE_F_ISROUTER))
1880                      );
1881
1882         nrt = ip6_rt_copy(rt, &msg->dest);
1883         if (!nrt)
1884                 goto out;
1885
1886         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1887         if (on_link)
1888                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1889
1890         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1891
1892         if (ip6_ins_rt(nrt))
1893                 goto out;
1894
1895         netevent.old = &rt->dst;
1896         netevent.new = &nrt->dst;
1897         netevent.daddr = &msg->dest;
1898         netevent.neigh = neigh;
1899         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1900
1901         if (rt->rt6i_flags & RTF_CACHE) {
1902                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1903                 ip6_del_rt(rt);
1904         }
1905
1906 out:
1907         neigh_release(neigh);
1908 }
1909
1910 /*
1911  *      Misc support functions
1912  */
1913
1914 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1915 {
1916         BUG_ON(from->dst.from);
1917
1918         rt->rt6i_flags &= ~RTF_EXPIRES;
1919         dst_hold(&from->dst);
1920         rt->dst.from = &from->dst;
1921         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1922 }
1923
1924 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1925                                     const struct in6_addr *dest)
1926 {
1927         struct net *net = dev_net(ort->dst.dev);
1928         struct rt6_info *rt;
1929
1930         if (ort->rt6i_flags & RTF_CACHE)
1931                 ort = (struct rt6_info *)ort->dst.from;
1932
1933         rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1934                            ort->rt6i_table);
1935
1936         if (rt) {
1937                 rt->dst.input = ort->dst.input;
1938                 rt->dst.output = ort->dst.output;
1939                 rt->dst.flags |= DST_HOST;
1940
1941                 rt->rt6i_dst.addr = *dest;
1942                 rt->rt6i_dst.plen = 128;
1943                 rt->dst.error = ort->dst.error;
1944                 rt->rt6i_idev = ort->rt6i_idev;
1945                 if (rt->rt6i_idev)
1946                         in6_dev_hold(rt->rt6i_idev);
1947                 rt->dst.lastuse = jiffies;
1948                 rt->rt6i_gateway = ort->rt6i_gateway;
1949                 rt->rt6i_flags = ort->rt6i_flags;
1950                 rt6_set_from(rt, ort);
1951                 rt->rt6i_metric = 0;
1952
1953 #ifdef CONFIG_IPV6_SUBTREES
1954                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1955 #endif
1956                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1957                 rt->rt6i_table = ort->rt6i_table;
1958         }
1959         return rt;
1960 }
1961
1962 #ifdef CONFIG_IPV6_ROUTE_INFO
1963 static struct rt6_info *rt6_get_route_info(struct net *net,
1964                                            const struct in6_addr *prefix, int prefixlen,
1965                                            const struct in6_addr *gwaddr, int ifindex)
1966 {
1967         struct fib6_node *fn;
1968         struct rt6_info *rt = NULL;
1969         struct fib6_table *table;
1970
1971         table = fib6_get_table(net, RT6_TABLE_INFO);
1972         if (!table)
1973                 return NULL;
1974
1975         read_lock_bh(&table->tb6_lock);
1976         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1977         if (!fn)
1978                 goto out;
1979
1980         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1981                 if (rt->dst.dev->ifindex != ifindex)
1982                         continue;
1983                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1984                         continue;
1985                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1986                         continue;
1987                 dst_hold(&rt->dst);
1988                 break;
1989         }
1990 out:
1991         read_unlock_bh(&table->tb6_lock);
1992         return rt;
1993 }
1994
1995 static struct rt6_info *rt6_add_route_info(struct net *net,
1996                                            const struct in6_addr *prefix, int prefixlen,
1997                                            const struct in6_addr *gwaddr, int ifindex,
1998                                            unsigned int pref)
1999 {
2000         struct fib6_config cfg = {
2001                 .fc_table       = RT6_TABLE_INFO,
2002                 .fc_metric      = IP6_RT_PRIO_USER,
2003                 .fc_ifindex     = ifindex,
2004                 .fc_dst_len     = prefixlen,
2005                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2006                                   RTF_UP | RTF_PREF(pref),
2007                 .fc_nlinfo.portid = 0,
2008                 .fc_nlinfo.nlh = NULL,
2009                 .fc_nlinfo.nl_net = net,
2010         };
2011
2012         cfg.fc_dst = *prefix;
2013         cfg.fc_gateway = *gwaddr;
2014
2015         /* We should treat it as a default route if prefix length is 0. */
2016         if (!prefixlen)
2017                 cfg.fc_flags |= RTF_DEFAULT;
2018
2019         ip6_route_add(&cfg);
2020
2021         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2022 }
2023 #endif
2024
2025 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2026 {
2027         struct rt6_info *rt;
2028         struct fib6_table *table;
2029
2030         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2031         if (!table)
2032                 return NULL;
2033
2034         read_lock_bh(&table->tb6_lock);
2035         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2036                 if (dev == rt->dst.dev &&
2037                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2038                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2039                         break;
2040         }
2041         if (rt)
2042                 dst_hold(&rt->dst);
2043         read_unlock_bh(&table->tb6_lock);
2044         return rt;
2045 }
2046
2047 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2048                                      struct net_device *dev,
2049                                      unsigned int pref)
2050 {
2051         struct fib6_config cfg = {
2052                 .fc_table       = RT6_TABLE_DFLT,
2053                 .fc_metric      = IP6_RT_PRIO_USER,
2054                 .fc_ifindex     = dev->ifindex,
2055                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2056                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2057                 .fc_nlinfo.portid = 0,
2058                 .fc_nlinfo.nlh = NULL,
2059                 .fc_nlinfo.nl_net = dev_net(dev),
2060         };
2061
2062         cfg.fc_gateway = *gwaddr;
2063
2064         ip6_route_add(&cfg);
2065
2066         return rt6_get_dflt_router(gwaddr, dev);
2067 }
2068
2069 void rt6_purge_dflt_routers(struct net *net)
2070 {
2071         struct rt6_info *rt;
2072         struct fib6_table *table;
2073
2074         /* NOTE: Keep consistent with rt6_get_dflt_router */
2075         table = fib6_get_table(net, RT6_TABLE_DFLT);
2076         if (!table)
2077                 return;
2078
2079 restart:
2080         read_lock_bh(&table->tb6_lock);
2081         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2082                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2083                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2084                         dst_hold(&rt->dst);
2085                         read_unlock_bh(&table->tb6_lock);
2086                         ip6_del_rt(rt);
2087                         goto restart;
2088                 }
2089         }
2090         read_unlock_bh(&table->tb6_lock);
2091 }
2092
2093 static void rtmsg_to_fib6_config(struct net *net,
2094                                  struct in6_rtmsg *rtmsg,
2095                                  struct fib6_config *cfg)
2096 {
2097         memset(cfg, 0, sizeof(*cfg));
2098
2099         cfg->fc_table = RT6_TABLE_MAIN;
2100         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2101         cfg->fc_metric = rtmsg->rtmsg_metric;
2102         cfg->fc_expires = rtmsg->rtmsg_info;
2103         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2104         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2105         cfg->fc_flags = rtmsg->rtmsg_flags;
2106
2107         cfg->fc_nlinfo.nl_net = net;
2108
2109         cfg->fc_dst = rtmsg->rtmsg_dst;
2110         cfg->fc_src = rtmsg->rtmsg_src;
2111         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2112 }
2113
2114 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2115 {
2116         struct fib6_config cfg;
2117         struct in6_rtmsg rtmsg;
2118         int err;
2119
2120         switch (cmd) {
2121         case SIOCADDRT:         /* Add a route */
2122         case SIOCDELRT:         /* Delete a route */
2123                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2124                         return -EPERM;
2125                 err = copy_from_user(&rtmsg, arg,
2126                                      sizeof(struct in6_rtmsg));
2127                 if (err)
2128                         return -EFAULT;
2129
2130                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2131
2132                 rtnl_lock();
2133                 switch (cmd) {
2134                 case SIOCADDRT:
2135                         err = ip6_route_add(&cfg);
2136                         break;
2137                 case SIOCDELRT:
2138                         err = ip6_route_del(&cfg);
2139                         break;
2140                 default:
2141                         err = -EINVAL;
2142                 }
2143                 rtnl_unlock();
2144
2145                 return err;
2146         }
2147
2148         return -EINVAL;
2149 }
2150
2151 /*
2152  *      Drop the packet on the floor
2153  */
2154
2155 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2156 {
2157         int type;
2158         struct dst_entry *dst = skb_dst(skb);
2159         switch (ipstats_mib_noroutes) {
2160         case IPSTATS_MIB_INNOROUTES:
2161                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2162                 if (type == IPV6_ADDR_ANY) {
2163                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2164                                       IPSTATS_MIB_INADDRERRORS);
2165                         break;
2166                 }
2167                 /* FALLTHROUGH */
2168         case IPSTATS_MIB_OUTNOROUTES:
2169                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2170                               ipstats_mib_noroutes);
2171                 break;
2172         }
2173         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2174         kfree_skb(skb);
2175         return 0;
2176 }
2177
2178 static int ip6_pkt_discard(struct sk_buff *skb)
2179 {
2180         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2181 }
2182
2183 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2184 {
2185         skb->dev = skb_dst(skb)->dev;
2186         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2187 }
2188
2189 static int ip6_pkt_prohibit(struct sk_buff *skb)
2190 {
2191         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2192 }
2193
2194 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2195 {
2196         skb->dev = skb_dst(skb)->dev;
2197         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2198 }
2199
2200 /*
2201  *      Allocate a dst for local (unicast / anycast) address.
2202  */
2203
2204 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2205                                     const struct in6_addr *addr,
2206                                     bool anycast)
2207 {
2208         struct net *net = dev_net(idev->dev);
2209         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2210                                             DST_NOCOUNT, NULL);
2211         if (!rt)
2212                 return ERR_PTR(-ENOMEM);
2213
2214         in6_dev_hold(idev);
2215
2216         rt->dst.flags |= DST_HOST;
2217         rt->dst.input = ip6_input;
2218         rt->dst.output = ip6_output;
2219         rt->rt6i_idev = idev;
2220
2221         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2222         if (anycast)
2223                 rt->rt6i_flags |= RTF_ANYCAST;
2224         else
2225                 rt->rt6i_flags |= RTF_LOCAL;
2226
2227         rt->rt6i_gateway  = *addr;
2228         rt->rt6i_dst.addr = *addr;
2229         rt->rt6i_dst.plen = 128;
2230         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2231
2232         atomic_set(&rt->dst.__refcnt, 1);
2233
2234         return rt;
2235 }
2236
2237 int ip6_route_get_saddr(struct net *net,
2238                         struct rt6_info *rt,
2239                         const struct in6_addr *daddr,
2240                         unsigned int prefs,
2241                         struct in6_addr *saddr)
2242 {
2243         struct inet6_dev *idev =
2244                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2245         int err = 0;
2246         if (rt && rt->rt6i_prefsrc.plen)
2247                 *saddr = rt->rt6i_prefsrc.addr;
2248         else
2249                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2250                                          daddr, prefs, saddr);
2251         return err;
2252 }
2253
2254 /* remove deleted ip from prefsrc entries */
2255 struct arg_dev_net_ip {
2256         struct net_device *dev;
2257         struct net *net;
2258         struct in6_addr *addr;
2259 };
2260
2261 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2262 {
2263         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2264         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2265         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2266
2267         if (((void *)rt->dst.dev == dev || !dev) &&
2268             rt != net->ipv6.ip6_null_entry &&
2269             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2270                 /* remove prefsrc entry */
2271                 rt->rt6i_prefsrc.plen = 0;
2272         }
2273         return 0;
2274 }
2275
2276 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2277 {
2278         struct net *net = dev_net(ifp->idev->dev);
2279         struct arg_dev_net_ip adni = {
2280                 .dev = ifp->idev->dev,
2281                 .net = net,
2282                 .addr = &ifp->addr,
2283         };
2284         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2285 }
2286
2287 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2288 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2289
2290 /* Remove routers and update dst entries when gateway turn into host. */
2291 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2292 {
2293         struct in6_addr *gateway = (struct in6_addr *)arg;
2294
2295         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2296              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2297              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2298                 return -1;
2299         }
2300         return 0;
2301 }
2302
2303 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2304 {
2305         fib6_clean_all(net, fib6_clean_tohost, gateway);
2306 }
2307
2308 struct arg_dev_net {
2309         struct net_device *dev;
2310         struct net *net;
2311 };
2312
2313 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2314 {
2315         const struct arg_dev_net *adn = arg;
2316         const struct net_device *dev = adn->dev;
2317
2318         if ((rt->dst.dev == dev || !dev) &&
2319             rt != adn->net->ipv6.ip6_null_entry)
2320                 return -1;
2321
2322         return 0;
2323 }
2324
2325 void rt6_ifdown(struct net *net, struct net_device *dev)
2326 {
2327         struct arg_dev_net adn = {
2328                 .dev = dev,
2329                 .net = net,
2330         };
2331
2332         fib6_clean_all(net, fib6_ifdown, &adn);
2333         icmp6_clean_all(fib6_ifdown, &adn);
2334 }
2335
2336 struct rt6_mtu_change_arg {
2337         struct net_device *dev;
2338         unsigned int mtu;
2339 };
2340
2341 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2342 {
2343         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2344         struct inet6_dev *idev;
2345
2346         /* In IPv6 pmtu discovery is not optional,
2347            so that RTAX_MTU lock cannot disable it.
2348            We still use this lock to block changes
2349            caused by addrconf/ndisc.
2350         */
2351
2352         idev = __in6_dev_get(arg->dev);
2353         if (!idev)
2354                 return 0;
2355
2356         /* For administrative MTU increase, there is no way to discover
2357            IPv6 PMTU increase, so PMTU increase should be updated here.
2358            Since RFC 1981 doesn't include administrative MTU increase
2359            update PMTU increase is a MUST. (i.e. jumbo frame)
2360          */
2361         /*
2362            If new MTU is less than route PMTU, this new MTU will be the
2363            lowest MTU in the path, update the route PMTU to reflect PMTU
2364            decreases; if new MTU is greater than route PMTU, and the
2365            old MTU is the lowest MTU in the path, update the route PMTU
2366            to reflect the increase. In this case if the other nodes' MTU
2367            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2368            PMTU discouvery.
2369          */
2370         if (rt->dst.dev == arg->dev &&
2371             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2372                 if (rt->rt6i_flags & RTF_CACHE) {
2373                         /* For RTF_CACHE with rt6i_pmtu == 0
2374                          * (i.e. a redirected route),
2375                          * the metrics of its rt->dst.from has already
2376                          * been updated.
2377                          */
2378                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2379                                 rt->rt6i_pmtu = arg->mtu;
2380                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2381                            (dst_mtu(&rt->dst) < arg->mtu &&
2382                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2383                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2384                 }
2385         }
2386         return 0;
2387 }
2388
2389 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2390 {
2391         struct rt6_mtu_change_arg arg = {
2392                 .dev = dev,
2393                 .mtu = mtu,
2394         };
2395
2396         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2397 }
2398
2399 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2400         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2401         [RTA_OIF]               = { .type = NLA_U32 },
2402         [RTA_IIF]               = { .type = NLA_U32 },
2403         [RTA_PRIORITY]          = { .type = NLA_U32 },
2404         [RTA_METRICS]           = { .type = NLA_NESTED },
2405         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2406         [RTA_PREF]              = { .type = NLA_U8 },
2407 };
2408
2409 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2410                               struct fib6_config *cfg)
2411 {
2412         struct rtmsg *rtm;
2413         struct nlattr *tb[RTA_MAX+1];
2414         unsigned int pref;
2415         int err;
2416
2417         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2418         if (err < 0)
2419                 goto errout;
2420
2421         err = -EINVAL;
2422         rtm = nlmsg_data(nlh);
2423         memset(cfg, 0, sizeof(*cfg));
2424
2425         cfg->fc_table = rtm->rtm_table;
2426         cfg->fc_dst_len = rtm->rtm_dst_len;
2427         cfg->fc_src_len = rtm->rtm_src_len;
2428         cfg->fc_flags = RTF_UP;
2429         cfg->fc_protocol = rtm->rtm_protocol;
2430         cfg->fc_type = rtm->rtm_type;
2431
2432         if (rtm->rtm_type == RTN_UNREACHABLE ||
2433             rtm->rtm_type == RTN_BLACKHOLE ||
2434             rtm->rtm_type == RTN_PROHIBIT ||
2435             rtm->rtm_type == RTN_THROW)
2436                 cfg->fc_flags |= RTF_REJECT;
2437
2438         if (rtm->rtm_type == RTN_LOCAL)
2439                 cfg->fc_flags |= RTF_LOCAL;
2440
2441         if (rtm->rtm_flags & RTM_F_CLONED)
2442                 cfg->fc_flags |= RTF_CACHE;
2443
2444         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2445         cfg->fc_nlinfo.nlh = nlh;
2446         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2447
2448         if (tb[RTA_GATEWAY]) {
2449                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2450                 cfg->fc_flags |= RTF_GATEWAY;
2451         }
2452
2453         if (tb[RTA_DST]) {
2454                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2455
2456                 if (nla_len(tb[RTA_DST]) < plen)
2457                         goto errout;
2458
2459                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2460         }
2461
2462         if (tb[RTA_SRC]) {
2463                 int plen = (rtm->rtm_src_len + 7) >> 3;
2464
2465                 if (nla_len(tb[RTA_SRC]) < plen)
2466                         goto errout;
2467
2468                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2469         }
2470
2471         if (tb[RTA_PREFSRC])
2472                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2473
2474         if (tb[RTA_OIF])
2475                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2476
2477         if (tb[RTA_PRIORITY])
2478                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2479
2480         if (tb[RTA_METRICS]) {
2481                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2482                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2483         }
2484
2485         if (tb[RTA_TABLE])
2486                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2487
2488         if (tb[RTA_MULTIPATH]) {
2489                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2490                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2491         }
2492
2493         if (tb[RTA_PREF]) {
2494                 pref = nla_get_u8(tb[RTA_PREF]);
2495                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2496                     pref != ICMPV6_ROUTER_PREF_HIGH)
2497                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2498                 cfg->fc_flags |= RTF_PREF(pref);
2499         }
2500
2501         err = 0;
2502 errout:
2503         return err;
2504 }
2505
2506 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2507 {
2508         struct fib6_config r_cfg;
2509         struct rtnexthop *rtnh;
2510         int remaining;
2511         int attrlen;
2512         int err = 0, last_err = 0;
2513
2514         remaining = cfg->fc_mp_len;
2515 beginning:
2516         rtnh = (struct rtnexthop *)cfg->fc_mp;
2517
2518         /* Parse a Multipath Entry */
2519         while (rtnh_ok(rtnh, remaining)) {
2520                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2521                 if (rtnh->rtnh_ifindex)
2522                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2523
2524                 attrlen = rtnh_attrlen(rtnh);
2525                 if (attrlen > 0) {
2526                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2527
2528                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2529                         if (nla) {
2530                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2531                                 r_cfg.fc_flags |= RTF_GATEWAY;
2532                         }
2533                 }
2534                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2535                 if (err) {
2536                         last_err = err;
2537                         /* If we are trying to remove a route, do not stop the
2538                          * loop when ip6_route_del() fails (because next hop is
2539                          * already gone), we should try to remove all next hops.
2540                          */
2541                         if (add) {
2542                                 /* If add fails, we should try to delete all
2543                                  * next hops that have been already added.
2544                                  */
2545                                 add = 0;
2546                                 remaining = cfg->fc_mp_len - remaining;
2547                                 goto beginning;
2548                         }
2549                 }
2550                 /* Because each route is added like a single route we remove
2551                  * these flags after the first nexthop: if there is a collision,
2552                  * we have already failed to add the first nexthop:
2553                  * fib6_add_rt2node() has rejected it; when replacing, old
2554                  * nexthops have been replaced by first new, the rest should
2555                  * be added to it.
2556                  */
2557                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2558                                                      NLM_F_REPLACE);
2559                 rtnh = rtnh_next(rtnh, &remaining);
2560         }
2561
2562         return last_err;
2563 }
2564
2565 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2566 {
2567         struct fib6_config cfg;
2568         int err;
2569
2570         err = rtm_to_fib6_config(skb, nlh, &cfg);
2571         if (err < 0)
2572                 return err;
2573
2574         if (cfg.fc_mp)
2575                 return ip6_route_multipath(&cfg, 0);
2576         else
2577                 return ip6_route_del(&cfg);
2578 }
2579
2580 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2581 {
2582         struct fib6_config cfg;
2583         int err;
2584
2585         err = rtm_to_fib6_config(skb, nlh, &cfg);
2586         if (err < 0)
2587                 return err;
2588
2589         if (cfg.fc_mp)
2590                 return ip6_route_multipath(&cfg, 1);
2591         else
2592                 return ip6_route_add(&cfg);
2593 }
2594
2595 static inline size_t rt6_nlmsg_size(void)
2596 {
2597         return NLMSG_ALIGN(sizeof(struct rtmsg))
2598                + nla_total_size(16) /* RTA_SRC */
2599                + nla_total_size(16) /* RTA_DST */
2600                + nla_total_size(16) /* RTA_GATEWAY */
2601                + nla_total_size(16) /* RTA_PREFSRC */
2602                + nla_total_size(4) /* RTA_TABLE */
2603                + nla_total_size(4) /* RTA_IIF */
2604                + nla_total_size(4) /* RTA_OIF */
2605                + nla_total_size(4) /* RTA_PRIORITY */
2606                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2607                + nla_total_size(sizeof(struct rta_cacheinfo))
2608                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2609                + nla_total_size(1); /* RTA_PREF */
2610 }
2611
2612 static int rt6_fill_node(struct net *net,
2613                          struct sk_buff *skb, struct rt6_info *rt,
2614                          struct in6_addr *dst, struct in6_addr *src,
2615                          int iif, int type, u32 portid, u32 seq,
2616                          int prefix, int nowait, unsigned int flags)
2617 {
2618         u32 metrics[RTAX_MAX];
2619         struct rtmsg *rtm;
2620         struct nlmsghdr *nlh;
2621         long expires;
2622         u32 table;
2623
2624         if (prefix) {   /* user wants prefix routes only */
2625                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2626                         /* success since this is not a prefix route */
2627                         return 1;
2628                 }
2629         }
2630
2631         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2632         if (!nlh)
2633                 return -EMSGSIZE;
2634
2635         rtm = nlmsg_data(nlh);
2636         rtm->rtm_family = AF_INET6;
2637         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2638         rtm->rtm_src_len = rt->rt6i_src.plen;
2639         rtm->rtm_tos = 0;
2640         if (rt->rt6i_table)
2641                 table = rt->rt6i_table->tb6_id;
2642         else
2643                 table = RT6_TABLE_UNSPEC;
2644         rtm->rtm_table = table;
2645         if (nla_put_u32(skb, RTA_TABLE, table))
2646                 goto nla_put_failure;
2647         if (rt->rt6i_flags & RTF_REJECT) {
2648                 switch (rt->dst.error) {
2649                 case -EINVAL:
2650                         rtm->rtm_type = RTN_BLACKHOLE;
2651                         break;
2652                 case -EACCES:
2653                         rtm->rtm_type = RTN_PROHIBIT;
2654                         break;
2655                 case -EAGAIN:
2656                         rtm->rtm_type = RTN_THROW;
2657                         break;
2658                 default:
2659                         rtm->rtm_type = RTN_UNREACHABLE;
2660                         break;
2661                 }
2662         }
2663         else if (rt->rt6i_flags & RTF_LOCAL)
2664                 rtm->rtm_type = RTN_LOCAL;
2665         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2666                 rtm->rtm_type = RTN_LOCAL;
2667         else
2668                 rtm->rtm_type = RTN_UNICAST;
2669         rtm->rtm_flags = 0;
2670         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2671         rtm->rtm_protocol = rt->rt6i_protocol;
2672         if (rt->rt6i_flags & RTF_DYNAMIC)
2673                 rtm->rtm_protocol = RTPROT_REDIRECT;
2674         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2675                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2676                         rtm->rtm_protocol = RTPROT_RA;
2677                 else
2678                         rtm->rtm_protocol = RTPROT_KERNEL;
2679         }
2680
2681         if (rt->rt6i_flags & RTF_CACHE)
2682                 rtm->rtm_flags |= RTM_F_CLONED;
2683
2684         if (dst) {
2685                 if (nla_put_in6_addr(skb, RTA_DST, dst))
2686                         goto nla_put_failure;
2687                 rtm->rtm_dst_len = 128;
2688         } else if (rtm->rtm_dst_len)
2689                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2690                         goto nla_put_failure;
2691 #ifdef CONFIG_IPV6_SUBTREES
2692         if (src) {
2693                 if (nla_put_in6_addr(skb, RTA_SRC, src))
2694                         goto nla_put_failure;
2695                 rtm->rtm_src_len = 128;
2696         } else if (rtm->rtm_src_len &&
2697                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2698                 goto nla_put_failure;
2699 #endif
2700         if (iif) {
2701 #ifdef CONFIG_IPV6_MROUTE
2702                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2703                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2704                         if (err <= 0) {
2705                                 if (!nowait) {
2706                                         if (err == 0)
2707                                                 return 0;
2708                                         goto nla_put_failure;
2709                                 } else {
2710                                         if (err == -EMSGSIZE)
2711                                                 goto nla_put_failure;
2712                                 }
2713                         }
2714                 } else
2715 #endif
2716                         if (nla_put_u32(skb, RTA_IIF, iif))
2717                                 goto nla_put_failure;
2718         } else if (dst) {
2719                 struct in6_addr saddr_buf;
2720                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2721                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2722                         goto nla_put_failure;
2723         }
2724
2725         if (rt->rt6i_prefsrc.plen) {
2726                 struct in6_addr saddr_buf;
2727                 saddr_buf = rt->rt6i_prefsrc.addr;
2728                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2729                         goto nla_put_failure;
2730         }
2731
2732         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2733         if (rt->rt6i_pmtu)
2734                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2735         if (rtnetlink_put_metrics(skb, metrics) < 0)
2736                 goto nla_put_failure;
2737
2738         if (rt->rt6i_flags & RTF_GATEWAY) {
2739                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2740                         goto nla_put_failure;
2741         }
2742
2743         if (rt->dst.dev &&
2744             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2745                 goto nla_put_failure;
2746         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2747                 goto nla_put_failure;
2748
2749         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2750
2751         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2752                 goto nla_put_failure;
2753
2754         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2755                 goto nla_put_failure;
2756
2757         nlmsg_end(skb, nlh);
2758         return 0;
2759
2760 nla_put_failure:
2761         nlmsg_cancel(skb, nlh);
2762         return -EMSGSIZE;
2763 }
2764
2765 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2766 {
2767         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2768         int prefix;
2769
2770         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2771                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2772                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2773         } else
2774                 prefix = 0;
2775
2776         return rt6_fill_node(arg->net,
2777                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2778                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2779                      prefix, 0, NLM_F_MULTI);
2780 }
2781
2782 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2783 {
2784         struct net *net = sock_net(in_skb->sk);
2785         struct nlattr *tb[RTA_MAX+1];
2786         struct rt6_info *rt;
2787         struct sk_buff *skb;
2788         struct rtmsg *rtm;
2789         struct flowi6 fl6;
2790         int err, iif = 0, oif = 0;
2791
2792         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2793         if (err < 0)
2794                 goto errout;
2795
2796         err = -EINVAL;
2797         memset(&fl6, 0, sizeof(fl6));
2798
2799         if (tb[RTA_SRC]) {
2800                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2801                         goto errout;
2802
2803                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2804         }
2805
2806         if (tb[RTA_DST]) {
2807                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2808                         goto errout;
2809
2810                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2811         }
2812
2813         if (tb[RTA_IIF])
2814                 iif = nla_get_u32(tb[RTA_IIF]);
2815
2816         if (tb[RTA_OIF])
2817                 oif = nla_get_u32(tb[RTA_OIF]);
2818
2819         if (tb[RTA_MARK])
2820                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2821
2822         if (iif) {
2823                 struct net_device *dev;
2824                 int flags = 0;
2825
2826                 dev = __dev_get_by_index(net, iif);
2827                 if (!dev) {
2828                         err = -ENODEV;
2829                         goto errout;
2830                 }
2831
2832                 fl6.flowi6_iif = iif;
2833
2834                 if (!ipv6_addr_any(&fl6.saddr))
2835                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2836
2837                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2838                                                                flags);
2839         } else {
2840                 fl6.flowi6_oif = oif;
2841
2842                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2843         }
2844
2845         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2846         if (!skb) {
2847                 ip6_rt_put(rt);
2848                 err = -ENOBUFS;
2849                 goto errout;
2850         }
2851
2852         /* Reserve room for dummy headers, this skb can pass
2853            through good chunk of routing engine.
2854          */
2855         skb_reset_mac_header(skb);
2856         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2857
2858         skb_dst_set(skb, &rt->dst);
2859
2860         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2861                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2862                             nlh->nlmsg_seq, 0, 0, 0);
2863         if (err < 0) {
2864                 kfree_skb(skb);
2865                 goto errout;
2866         }
2867
2868         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2869 errout:
2870         return err;
2871 }
2872
2873 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2874 {
2875         struct sk_buff *skb;
2876         struct net *net = info->nl_net;
2877         u32 seq;
2878         int err;
2879
2880         err = -ENOBUFS;
2881         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2882
2883         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2884         if (!skb)
2885                 goto errout;
2886
2887         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2888                                 event, info->portid, seq, 0, 0, 0);
2889         if (err < 0) {
2890                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2891                 WARN_ON(err == -EMSGSIZE);
2892                 kfree_skb(skb);
2893                 goto errout;
2894         }
2895         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2896                     info->nlh, gfp_any());
2897         return;
2898 errout:
2899         if (err < 0)
2900                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2901 }
2902
2903 static int ip6_route_dev_notify(struct notifier_block *this,
2904                                 unsigned long event, void *ptr)
2905 {
2906         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2907         struct net *net = dev_net(dev);
2908
2909         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2910                 net->ipv6.ip6_null_entry->dst.dev = dev;
2911                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2914                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2915                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2916                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2917 #endif
2918         }
2919
2920         return NOTIFY_OK;
2921 }
2922
2923 /*
2924  *      /proc
2925  */
2926
2927 #ifdef CONFIG_PROC_FS
2928
2929 static const struct file_operations ipv6_route_proc_fops = {
2930         .owner          = THIS_MODULE,
2931         .open           = ipv6_route_open,
2932         .read           = seq_read,
2933         .llseek         = seq_lseek,
2934         .release        = seq_release_net,
2935 };
2936
2937 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2938 {
2939         struct net *net = (struct net *)seq->private;
2940         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2941                    net->ipv6.rt6_stats->fib_nodes,
2942                    net->ipv6.rt6_stats->fib_route_nodes,
2943                    net->ipv6.rt6_stats->fib_rt_alloc,
2944                    net->ipv6.rt6_stats->fib_rt_entries,
2945                    net->ipv6.rt6_stats->fib_rt_cache,
2946                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2947                    net->ipv6.rt6_stats->fib_discarded_routes);
2948
2949         return 0;
2950 }
2951
2952 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2953 {
2954         return single_open_net(inode, file, rt6_stats_seq_show);
2955 }
2956
2957 static const struct file_operations rt6_stats_seq_fops = {
2958         .owner   = THIS_MODULE,
2959         .open    = rt6_stats_seq_open,
2960         .read    = seq_read,
2961         .llseek  = seq_lseek,
2962         .release = single_release_net,
2963 };
2964 #endif  /* CONFIG_PROC_FS */
2965
2966 #ifdef CONFIG_SYSCTL
2967
2968 static
2969 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2970                               void __user *buffer, size_t *lenp, loff_t *ppos)
2971 {
2972         struct net *net;
2973         int delay;
2974         if (!write)
2975                 return -EINVAL;
2976
2977         net = (struct net *)ctl->extra1;
2978         delay = net->ipv6.sysctl.flush_delay;
2979         proc_dointvec(ctl, write, buffer, lenp, ppos);
2980         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2981         return 0;
2982 }
2983
2984 struct ctl_table ipv6_route_table_template[] = {
2985         {
2986                 .procname       =       "flush",
2987                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2988                 .maxlen         =       sizeof(int),
2989                 .mode           =       0200,
2990                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2991         },
2992         {
2993                 .procname       =       "gc_thresh",
2994                 .data           =       &ip6_dst_ops_template.gc_thresh,
2995                 .maxlen         =       sizeof(int),
2996                 .mode           =       0644,
2997                 .proc_handler   =       proc_dointvec,
2998         },
2999         {
3000                 .procname       =       "max_size",
3001                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3002                 .maxlen         =       sizeof(int),
3003                 .mode           =       0644,
3004                 .proc_handler   =       proc_dointvec,
3005         },
3006         {
3007                 .procname       =       "gc_min_interval",
3008                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3009                 .maxlen         =       sizeof(int),
3010                 .mode           =       0644,
3011                 .proc_handler   =       proc_dointvec_jiffies,
3012         },
3013         {
3014                 .procname       =       "gc_timeout",
3015                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3016                 .maxlen         =       sizeof(int),
3017                 .mode           =       0644,
3018                 .proc_handler   =       proc_dointvec_jiffies,
3019         },
3020         {
3021                 .procname       =       "gc_interval",
3022                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3023                 .maxlen         =       sizeof(int),
3024                 .mode           =       0644,
3025                 .proc_handler   =       proc_dointvec_jiffies,
3026         },
3027         {
3028                 .procname       =       "gc_elasticity",
3029                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3030                 .maxlen         =       sizeof(int),
3031                 .mode           =       0644,
3032                 .proc_handler   =       proc_dointvec,
3033         },
3034         {
3035                 .procname       =       "mtu_expires",
3036                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3037                 .maxlen         =       sizeof(int),
3038                 .mode           =       0644,
3039                 .proc_handler   =       proc_dointvec_jiffies,
3040         },
3041         {
3042                 .procname       =       "min_adv_mss",
3043                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3044                 .maxlen         =       sizeof(int),
3045                 .mode           =       0644,
3046                 .proc_handler   =       proc_dointvec,
3047         },
3048         {
3049                 .procname       =       "gc_min_interval_ms",
3050                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3051                 .maxlen         =       sizeof(int),
3052                 .mode           =       0644,
3053                 .proc_handler   =       proc_dointvec_ms_jiffies,
3054         },
3055         { }
3056 };
3057
3058 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3059 {
3060         struct ctl_table *table;
3061
3062         table = kmemdup(ipv6_route_table_template,
3063                         sizeof(ipv6_route_table_template),
3064                         GFP_KERNEL);
3065
3066         if (table) {
3067                 table[0].data = &net->ipv6.sysctl.flush_delay;
3068                 table[0].extra1 = net;
3069                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3070                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3071                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3072                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3073                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3074                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3075                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3076                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3077                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3078
3079                 /* Don't export sysctls to unprivileged users */
3080                 if (net->user_ns != &init_user_ns)
3081                         table[0].procname = NULL;
3082         }
3083
3084         return table;
3085 }
3086 #endif
3087
3088 static int __net_init ip6_route_net_init(struct net *net)
3089 {
3090         int ret = -ENOMEM;
3091
3092         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3093                sizeof(net->ipv6.ip6_dst_ops));
3094
3095         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3096                 goto out_ip6_dst_ops;
3097
3098         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3099                                            sizeof(*net->ipv6.ip6_null_entry),
3100                                            GFP_KERNEL);
3101         if (!net->ipv6.ip6_null_entry)
3102                 goto out_ip6_dst_entries;
3103         net->ipv6.ip6_null_entry->dst.path =
3104                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3105         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3106         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3107                          ip6_template_metrics, true);
3108
3109 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3110         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3111                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3112                                                GFP_KERNEL);
3113         if (!net->ipv6.ip6_prohibit_entry)
3114                 goto out_ip6_null_entry;
3115         net->ipv6.ip6_prohibit_entry->dst.path =
3116                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3117         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3118         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3119                          ip6_template_metrics, true);
3120
3121         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3122                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3123                                                GFP_KERNEL);
3124         if (!net->ipv6.ip6_blk_hole_entry)
3125                 goto out_ip6_prohibit_entry;
3126         net->ipv6.ip6_blk_hole_entry->dst.path =
3127                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3128         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3129         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3130                          ip6_template_metrics, true);
3131 #endif
3132
3133         net->ipv6.sysctl.flush_delay = 0;
3134         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3135         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3136         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3137         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3138         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3139         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3140         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3141
3142         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3143
3144         ret = 0;
3145 out:
3146         return ret;
3147
3148 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3149 out_ip6_prohibit_entry:
3150         kfree(net->ipv6.ip6_prohibit_entry);
3151 out_ip6_null_entry:
3152         kfree(net->ipv6.ip6_null_entry);
3153 #endif
3154 out_ip6_dst_entries:
3155         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3156 out_ip6_dst_ops:
3157         goto out;
3158 }
3159
3160 static void __net_exit ip6_route_net_exit(struct net *net)
3161 {
3162         kfree(net->ipv6.ip6_null_entry);
3163 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3164         kfree(net->ipv6.ip6_prohibit_entry);
3165         kfree(net->ipv6.ip6_blk_hole_entry);
3166 #endif
3167         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3168 }
3169
3170 static int __net_init ip6_route_net_init_late(struct net *net)
3171 {
3172 #ifdef CONFIG_PROC_FS
3173         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3174         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3175 #endif
3176         return 0;
3177 }
3178
3179 static void __net_exit ip6_route_net_exit_late(struct net *net)
3180 {
3181 #ifdef CONFIG_PROC_FS
3182         remove_proc_entry("ipv6_route", net->proc_net);
3183         remove_proc_entry("rt6_stats", net->proc_net);
3184 #endif
3185 }
3186
3187 static struct pernet_operations ip6_route_net_ops = {
3188         .init = ip6_route_net_init,
3189         .exit = ip6_route_net_exit,
3190 };
3191
3192 static int __net_init ipv6_inetpeer_init(struct net *net)
3193 {
3194         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3195
3196         if (!bp)
3197                 return -ENOMEM;
3198         inet_peer_base_init(bp);
3199         net->ipv6.peers = bp;
3200         return 0;
3201 }
3202
3203 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3204 {
3205         struct inet_peer_base *bp = net->ipv6.peers;
3206
3207         net->ipv6.peers = NULL;
3208         inetpeer_invalidate_tree(bp);
3209         kfree(bp);
3210 }
3211
3212 static struct pernet_operations ipv6_inetpeer_ops = {
3213         .init   =       ipv6_inetpeer_init,
3214         .exit   =       ipv6_inetpeer_exit,
3215 };
3216
3217 static struct pernet_operations ip6_route_net_late_ops = {
3218         .init = ip6_route_net_init_late,
3219         .exit = ip6_route_net_exit_late,
3220 };
3221
3222 static struct notifier_block ip6_route_dev_notifier = {
3223         .notifier_call = ip6_route_dev_notify,
3224         .priority = 0,
3225 };
3226
3227 int __init ip6_route_init(void)
3228 {
3229         int ret;
3230
3231         ret = -ENOMEM;
3232         ip6_dst_ops_template.kmem_cachep =
3233                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3234                                   SLAB_HWCACHE_ALIGN, NULL);
3235         if (!ip6_dst_ops_template.kmem_cachep)
3236                 goto out;
3237
3238         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3239         if (ret)
3240                 goto out_kmem_cache;
3241
3242         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3243         if (ret)
3244                 goto out_dst_entries;
3245
3246         ret = register_pernet_subsys(&ip6_route_net_ops);
3247         if (ret)
3248                 goto out_register_inetpeer;
3249
3250         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3251
3252         /* Registering of the loopback is done before this portion of code,
3253          * the loopback reference in rt6_info will not be taken, do it
3254          * manually for init_net */
3255         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3256         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3257   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3258         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3259         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3260         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3261         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3262   #endif
3263         ret = fib6_init();
3264         if (ret)
3265                 goto out_register_subsys;
3266
3267         ret = xfrm6_init();
3268         if (ret)
3269                 goto out_fib6_init;
3270
3271         ret = fib6_rules_init();
3272         if (ret)
3273                 goto xfrm6_init;
3274
3275         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3276         if (ret)
3277                 goto fib6_rules_init;
3278
3279         ret = -ENOBUFS;
3280         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3281             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3282             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3283                 goto out_register_late_subsys;
3284
3285         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3286         if (ret)
3287                 goto out_register_late_subsys;
3288
3289 out:
3290         return ret;
3291
3292 out_register_late_subsys:
3293         unregister_pernet_subsys(&ip6_route_net_late_ops);
3294 fib6_rules_init:
3295         fib6_rules_cleanup();
3296 xfrm6_init:
3297         xfrm6_fini();
3298 out_fib6_init:
3299         fib6_gc_cleanup();
3300 out_register_subsys:
3301         unregister_pernet_subsys(&ip6_route_net_ops);
3302 out_register_inetpeer:
3303         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3304 out_dst_entries:
3305         dst_entries_destroy(&ip6_dst_blackhole_ops);
3306 out_kmem_cache:
3307         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3308         goto out;
3309 }
3310
3311 void ip6_route_cleanup(void)
3312 {
3313         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3314         unregister_pernet_subsys(&ip6_route_net_late_ops);
3315         fib6_rules_cleanup();
3316         xfrm6_fini();
3317         fib6_gc_cleanup();
3318         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3319         unregister_pernet_subsys(&ip6_route_net_ops);
3320         dst_entries_destroy(&ip6_dst_blackhole_ops);
3321         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3322 }