Merge branch 'android-4.4' of https://android.googlesource.com/kernel/common
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
106                                            const struct in6_addr *prefix, int prefixlen,
107                                            const struct in6_addr *gwaddr);
108 #endif
109
110 struct uncached_list {
111         spinlock_t              lock;
112         struct list_head        head;
113 };
114
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121         rt->dst.flags |= DST_NOCACHE;
122         rt->rt6i_uncached_list = ul;
123
124         spin_lock_bh(&ul->lock);
125         list_add_tail(&rt->rt6i_uncached, &ul->head);
126         spin_unlock_bh(&ul->lock);
127 }
128
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131         if (!list_empty(&rt->rt6i_uncached)) {
132                 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134                 spin_lock_bh(&ul->lock);
135                 list_del(&rt->rt6i_uncached);
136                 spin_unlock_bh(&ul->lock);
137         }
138 }
139
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142         struct net_device *loopback_dev = net->loopback_dev;
143         int cpu;
144
145         if (dev == loopback_dev)
146                 return;
147
148         for_each_possible_cpu(cpu) {
149                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150                 struct rt6_info *rt;
151
152                 spin_lock_bh(&ul->lock);
153                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154                         struct inet6_dev *rt_idev = rt->rt6i_idev;
155                         struct net_device *rt_dev = rt->dst.dev;
156
157                         if (rt_idev->dev == dev) {
158                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
159                                 in6_dev_put(rt_idev);
160                         }
161
162                         if (rt_dev == dev) {
163                                 rt->dst.dev = loopback_dev;
164                                 dev_hold(rt->dst.dev);
165                                 dev_put(rt_dev);
166                         }
167                 }
168                 spin_unlock_bh(&ul->lock);
169         }
170 }
171
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174         return dst_metrics_write_ptr(rt->dst.from);
175 }
176
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179         struct rt6_info *rt = (struct rt6_info *)dst;
180
181         if (rt->rt6i_flags & RTF_PCPU)
182                 return rt6_pcpu_cow_metrics(rt);
183         else if (rt->rt6i_flags & RTF_CACHE)
184                 return NULL;
185         else
186                 return dst_cow_metrics_generic(dst, old);
187 }
188
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190                                              struct sk_buff *skb,
191                                              const void *daddr)
192 {
193         struct in6_addr *p = &rt->rt6i_gateway;
194
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203                                           struct sk_buff *skb,
204                                           const void *daddr)
205 {
206         struct rt6_info *rt = (struct rt6_info *) dst;
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(rt, skb, daddr);
210         n = __ipv6_neigh_lookup(dst->dev, daddr);
211         if (n)
212                 return n;
213         return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215
216 static struct dst_ops ip6_dst_ops_template = {
217         .family                 =       AF_INET6,
218         .gc                     =       ip6_dst_gc,
219         .gc_thresh              =       1024,
220         .check                  =       ip6_dst_check,
221         .default_advmss         =       ip6_default_advmss,
222         .mtu                    =       ip6_mtu,
223         .cow_metrics            =       ipv6_cow_metrics,
224         .destroy                =       ip6_dst_destroy,
225         .ifdown                 =       ip6_dst_ifdown,
226         .negative_advice        =       ip6_negative_advice,
227         .link_failure           =       ip6_link_failure,
228         .update_pmtu            =       ip6_rt_update_pmtu,
229         .redirect               =       rt6_do_redirect,
230         .local_out              =       __ip6_local_out,
231         .neigh_lookup           =       ip6_neigh_lookup,
232 };
233
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238         return mtu ? : dst->dev->mtu;
239 }
240
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242                                          struct sk_buff *skb, u32 mtu)
243 {
244 }
245
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247                                       struct sk_buff *skb)
248 {
249 }
250
251 static struct dst_ops ip6_dst_blackhole_ops = {
252         .family                 =       AF_INET6,
253         .destroy                =       ip6_dst_destroy,
254         .check                  =       ip6_dst_check,
255         .mtu                    =       ip6_blackhole_mtu,
256         .default_advmss         =       ip6_default_advmss,
257         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
258         .redirect               =       ip6_rt_blackhole_redirect,
259         .cow_metrics            =       dst_cow_metrics_generic,
260         .neigh_lookup           =       ip6_neigh_lookup,
261 };
262
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264         [RTAX_HOPLIMIT - 1] = 0,
265 };
266
267 static const struct rt6_info ip6_null_entry_template = {
268         .dst = {
269                 .__refcnt       = ATOMIC_INIT(1),
270                 .__use          = 1,
271                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
272                 .error          = -ENETUNREACH,
273                 .input          = ip6_pkt_discard,
274                 .output         = ip6_pkt_discard_out,
275         },
276         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
277         .rt6i_protocol  = RTPROT_KERNEL,
278         .rt6i_metric    = ~(u32) 0,
279         .rt6i_ref       = ATOMIC_INIT(1),
280 };
281
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
284 static const struct rt6_info ip6_prohibit_entry_template = {
285         .dst = {
286                 .__refcnt       = ATOMIC_INIT(1),
287                 .__use          = 1,
288                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
289                 .error          = -EACCES,
290                 .input          = ip6_pkt_prohibit,
291                 .output         = ip6_pkt_prohibit_out,
292         },
293         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
294         .rt6i_protocol  = RTPROT_KERNEL,
295         .rt6i_metric    = ~(u32) 0,
296         .rt6i_ref       = ATOMIC_INIT(1),
297 };
298
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300         .dst = {
301                 .__refcnt       = ATOMIC_INIT(1),
302                 .__use          = 1,
303                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
304                 .error          = -EINVAL,
305                 .input          = dst_discard,
306                 .output         = dst_discard_out,
307         },
308         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
309         .rt6i_protocol  = RTPROT_KERNEL,
310         .rt6i_metric    = ~(u32) 0,
311         .rt6i_ref       = ATOMIC_INIT(1),
312 };
313
314 #endif
315
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318         struct dst_entry *dst = &rt->dst;
319
320         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321         INIT_LIST_HEAD(&rt->rt6i_siblings);
322         INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327                                         struct net_device *dev,
328                                         int flags)
329 {
330         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331                                         0, DST_OBSOLETE_FORCE_CHK, flags);
332
333         if (rt)
334                 rt6_info_init(rt);
335
336         return rt;
337 }
338
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340                                       struct net_device *dev,
341                                       int flags)
342 {
343         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344
345         if (rt) {
346                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347                 if (rt->rt6i_pcpu) {
348                         int cpu;
349
350                         for_each_possible_cpu(cpu) {
351                                 struct rt6_info **p;
352
353                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354                                 /* no one shares rt */
355                                 *p =  NULL;
356                         }
357                 } else {
358                         dst_destroy((struct dst_entry *)rt);
359                         return NULL;
360                 }
361         }
362
363         return rt;
364 }
365
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368         struct rt6_info *rt = (struct rt6_info *)dst;
369         struct dst_entry *from = dst->from;
370         struct inet6_dev *idev;
371
372         dst_destroy_metrics_generic(dst);
373         free_percpu(rt->rt6i_pcpu);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         dst->from = NULL;
383         dst_release(from);
384 }
385
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387                            int how)
388 {
389         struct rt6_info *rt = (struct rt6_info *)dst;
390         struct inet6_dev *idev = rt->rt6i_idev;
391         struct net_device *loopback_dev =
392                 dev_net(dev)->loopback_dev;
393
394         if (dev != loopback_dev) {
395                 if (idev && idev->dev == dev) {
396                         struct inet6_dev *loopback_idev =
397                                 in6_dev_get(loopback_dev);
398                         if (loopback_idev) {
399                                 rt->rt6i_idev = loopback_idev;
400                                 in6_dev_put(idev);
401                         }
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         if (rt->rt6i_flags & RTF_EXPIRES) {
417                 if (time_after(jiffies, rt->dst.expires))
418                         return true;
419         } else if (rt->dst.from) {
420                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
421         }
422         return false;
423 }
424
425 /* Multipath route selection:
426  *   Hash based function using packet header and flowlabel.
427  * Adapted from fib_info_hashfn()
428  */
429 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
430                                const struct flowi6 *fl6)
431 {
432         return get_hash_from_flowi6(fl6) % candidate_count;
433 }
434
435 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
436                                              struct flowi6 *fl6, int oif,
437                                              int strict)
438 {
439         struct rt6_info *sibling, *next_sibling;
440         int route_choosen;
441
442         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
443         /* Don't change the route, if route_choosen == 0
444          * (siblings does not include ourself)
445          */
446         if (route_choosen)
447                 list_for_each_entry_safe(sibling, next_sibling,
448                                 &match->rt6i_siblings, rt6i_siblings) {
449                         route_choosen--;
450                         if (route_choosen == 0) {
451                                 if (rt6_score_route(sibling, oif, strict) < 0)
452                                         break;
453                                 match = sibling;
454                                 break;
455                         }
456                 }
457         return match;
458 }
459
460 /*
461  *      Route lookup. Any table->tb6_lock is implied.
462  */
463
464 static inline struct rt6_info *rt6_device_match(struct net *net,
465                                                     struct rt6_info *rt,
466                                                     const struct in6_addr *saddr,
467                                                     int oif,
468                                                     int flags)
469 {
470         struct rt6_info *local = NULL;
471         struct rt6_info *sprt;
472
473         if (!oif && ipv6_addr_any(saddr))
474                 goto out;
475
476         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
477                 struct net_device *dev = sprt->dst.dev;
478
479                 if (oif) {
480                         if (dev->ifindex == oif)
481                                 return sprt;
482                         if (dev->flags & IFF_LOOPBACK) {
483                                 if (!sprt->rt6i_idev ||
484                                     sprt->rt6i_idev->dev->ifindex != oif) {
485                                         if (flags & RT6_LOOKUP_F_IFACE)
486                                                 continue;
487                                         if (local &&
488                                             local->rt6i_idev->dev->ifindex == oif)
489                                                 continue;
490                                 }
491                                 local = sprt;
492                         }
493                 } else {
494                         if (ipv6_chk_addr(net, saddr, dev,
495                                           flags & RT6_LOOKUP_F_IFACE))
496                                 return sprt;
497                 }
498         }
499
500         if (oif) {
501                 if (local)
502                         return local;
503
504                 if (flags & RT6_LOOKUP_F_IFACE)
505                         return net->ipv6.ip6_null_entry;
506         }
507 out:
508         return rt;
509 }
510
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 struct __rt6_probe_work {
513         struct work_struct work;
514         struct in6_addr target;
515         struct net_device *dev;
516 };
517
518 static void rt6_probe_deferred(struct work_struct *w)
519 {
520         struct in6_addr mcaddr;
521         struct __rt6_probe_work *work =
522                 container_of(w, struct __rt6_probe_work, work);
523
524         addrconf_addr_solict_mult(&work->target, &mcaddr);
525         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
526         dev_put(work->dev);
527         kfree(work);
528 }
529
530 static void rt6_probe(struct rt6_info *rt)
531 {
532         struct __rt6_probe_work *work;
533         struct neighbour *neigh;
534         /*
535          * Okay, this does not seem to be appropriate
536          * for now, however, we need to check if it
537          * is really so; aka Router Reachability Probing.
538          *
539          * Router Reachability Probe MUST be rate-limited
540          * to no more than one per minute.
541          */
542         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
543                 return;
544         rcu_read_lock_bh();
545         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
546         if (neigh) {
547                 if (neigh->nud_state & NUD_VALID)
548                         goto out;
549
550                 work = NULL;
551                 write_lock(&neigh->lock);
552                 if (!(neigh->nud_state & NUD_VALID) &&
553                     time_after(jiffies,
554                                neigh->updated +
555                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else {
562                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563         }
564
565         if (work) {
566                 INIT_WORK(&work->work, rt6_probe_deferred);
567                 work->target = rt->rt6i_gateway;
568                 dev_hold(rt->dst.dev);
569                 work->dev = rt->dst.dev;
570                 schedule_work(&work->work);
571         }
572
573 out:
574         rcu_read_unlock_bh();
575 }
576 #else
577 static inline void rt6_probe(struct rt6_info *rt)
578 {
579 }
580 #endif
581
582 /*
583  * Default Router Selection (RFC 2461 6.3.6)
584  */
585 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
586 {
587         struct net_device *dev = rt->dst.dev;
588         if (!oif || dev->ifindex == oif)
589                 return 2;
590         if ((dev->flags & IFF_LOOPBACK) &&
591             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
592                 return 1;
593         return 0;
594 }
595
596 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
597 {
598         struct neighbour *neigh;
599         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
600
601         if (rt->rt6i_flags & RTF_NONEXTHOP ||
602             !(rt->rt6i_flags & RTF_GATEWAY))
603                 return RT6_NUD_SUCCEED;
604
605         rcu_read_lock_bh();
606         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
607         if (neigh) {
608                 read_lock(&neigh->lock);
609                 if (neigh->nud_state & NUD_VALID)
610                         ret = RT6_NUD_SUCCEED;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612                 else if (!(neigh->nud_state & NUD_FAILED))
613                         ret = RT6_NUD_SUCCEED;
614                 else
615                         ret = RT6_NUD_FAIL_PROBE;
616 #endif
617                 read_unlock(&neigh->lock);
618         } else {
619                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
620                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
621         }
622         rcu_read_unlock_bh();
623
624         return ret;
625 }
626
627 static int rt6_score_route(struct rt6_info *rt, int oif,
628                            int strict)
629 {
630         int m;
631
632         m = rt6_check_dev(rt, oif);
633         if (!m && (strict & RT6_LOOKUP_F_IFACE))
634                 return RT6_NUD_FAIL_HARD;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
637 #endif
638         if (strict & RT6_LOOKUP_F_REACHABLE) {
639                 int n = rt6_check_neigh(rt);
640                 if (n < 0)
641                         return n;
642         }
643         return m;
644 }
645
646 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
647                                    int *mpri, struct rt6_info *match,
648                                    bool *do_rr)
649 {
650         int m;
651         bool match_do_rr = false;
652         struct inet6_dev *idev = rt->rt6i_idev;
653         struct net_device *dev = rt->dst.dev;
654
655         if (dev && !netif_carrier_ok(dev) &&
656             idev->cnf.ignore_routes_with_linkdown)
657                 goto out;
658
659         if (rt6_check_expired(rt))
660                 goto out;
661
662         m = rt6_score_route(rt, oif, strict);
663         if (m == RT6_NUD_FAIL_DO_RR) {
664                 match_do_rr = true;
665                 m = 0; /* lowest valid score */
666         } else if (m == RT6_NUD_FAIL_HARD) {
667                 goto out;
668         }
669
670         if (strict & RT6_LOOKUP_F_REACHABLE)
671                 rt6_probe(rt);
672
673         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
674         if (m > *mpri) {
675                 *do_rr = match_do_rr;
676                 *mpri = m;
677                 match = rt;
678         }
679 out:
680         return match;
681 }
682
683 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
684                                      struct rt6_info *rr_head,
685                                      u32 metric, int oif, int strict,
686                                      bool *do_rr)
687 {
688         struct rt6_info *rt, *match, *cont;
689         int mpri = -1;
690
691         match = NULL;
692         cont = NULL;
693         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
694                 if (rt->rt6i_metric != metric) {
695                         cont = rt;
696                         break;
697                 }
698
699                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
700         }
701
702         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
703                 if (rt->rt6i_metric != metric) {
704                         cont = rt;
705                         break;
706                 }
707
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709         }
710
711         if (match || !cont)
712                 return match;
713
714         for (rt = cont; rt; rt = rt->dst.rt6_next)
715                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716
717         return match;
718 }
719
720 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
721 {
722         struct rt6_info *match, *rt0;
723         struct net *net;
724         bool do_rr = false;
725
726         rt0 = fn->rr_ptr;
727         if (!rt0)
728                 fn->rr_ptr = rt0 = fn->leaf;
729
730         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
731                              &do_rr);
732
733         if (do_rr) {
734                 struct rt6_info *next = rt0->dst.rt6_next;
735
736                 /* no entries matched; do round-robin */
737                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
738                         next = fn->leaf;
739
740                 if (next != rt0)
741                         fn->rr_ptr = next;
742         }
743
744         net = dev_net(rt0->dst.dev);
745         return match ? match : net->ipv6.ip6_null_entry;
746 }
747
748 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
749 {
750         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
751 }
752
753 #ifdef CONFIG_IPV6_ROUTE_INFO
754 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
755                   const struct in6_addr *gwaddr)
756 {
757         struct route_info *rinfo = (struct route_info *) opt;
758         struct in6_addr prefix_buf, *prefix;
759         unsigned int pref;
760         unsigned long lifetime;
761         struct rt6_info *rt;
762
763         if (len < sizeof(struct route_info)) {
764                 return -EINVAL;
765         }
766
767         /* Sanity check for prefix_len and length */
768         if (rinfo->length > 3) {
769                 return -EINVAL;
770         } else if (rinfo->prefix_len > 128) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 64) {
773                 if (rinfo->length < 2) {
774                         return -EINVAL;
775                 }
776         } else if (rinfo->prefix_len > 0) {
777                 if (rinfo->length < 1) {
778                         return -EINVAL;
779                 }
780         }
781
782         pref = rinfo->route_pref;
783         if (pref == ICMPV6_ROUTER_PREF_INVALID)
784                 return -EINVAL;
785
786         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
787
788         if (rinfo->length == 3)
789                 prefix = (struct in6_addr *)rinfo->prefix;
790         else {
791                 /* this function is safe */
792                 ipv6_addr_prefix(&prefix_buf,
793                                  (struct in6_addr *)rinfo->prefix,
794                                  rinfo->prefix_len);
795                 prefix = &prefix_buf;
796         }
797
798         if (rinfo->prefix_len == 0)
799                 rt = rt6_get_dflt_router(gwaddr, dev);
800         else
801                 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
802
803         if (rt && !lifetime) {
804                 ip6_del_rt(rt);
805                 rt = NULL;
806         }
807
808         if (!rt && lifetime)
809                 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
810         else if (rt)
811                 rt->rt6i_flags = RTF_ROUTEINFO |
812                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
813
814         if (rt) {
815                 if (!addrconf_finite_timeout(lifetime))
816                         rt6_clean_expires(rt);
817                 else
818                         rt6_set_expires(rt, jiffies + HZ * lifetime);
819
820                 ip6_rt_put(rt);
821         }
822         return 0;
823 }
824 #endif
825
826 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
827                                         struct in6_addr *saddr)
828 {
829         struct fib6_node *pn;
830         while (1) {
831                 if (fn->fn_flags & RTN_TL_ROOT)
832                         return NULL;
833                 pn = fn->parent;
834                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
835                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
836                 else
837                         fn = pn;
838                 if (fn->fn_flags & RTN_RTINFO)
839                         return fn;
840         }
841 }
842
843 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
844                                              struct fib6_table *table,
845                                              struct flowi6 *fl6, int flags)
846 {
847         struct fib6_node *fn;
848         struct rt6_info *rt;
849
850         read_lock_bh(&table->tb6_lock);
851         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
852 restart:
853         rt = fn->leaf;
854         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
855         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
856                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
857         if (rt == net->ipv6.ip6_null_entry) {
858                 fn = fib6_backtrack(fn, &fl6->saddr);
859                 if (fn)
860                         goto restart;
861         }
862         dst_use(&rt->dst, jiffies);
863         read_unlock_bh(&table->tb6_lock);
864         return rt;
865
866 }
867
868 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
869                                     int flags)
870 {
871         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
872 }
873 EXPORT_SYMBOL_GPL(ip6_route_lookup);
874
875 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
876                             const struct in6_addr *saddr, int oif, int strict)
877 {
878         struct flowi6 fl6 = {
879                 .flowi6_oif = oif,
880                 .daddr = *daddr,
881         };
882         struct dst_entry *dst;
883         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
884
885         if (saddr) {
886                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
887                 flags |= RT6_LOOKUP_F_HAS_SADDR;
888         }
889
890         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
891         if (dst->error == 0)
892                 return (struct rt6_info *) dst;
893
894         dst_release(dst);
895
896         return NULL;
897 }
898 EXPORT_SYMBOL(rt6_lookup);
899
900 /* ip6_ins_rt is called with FREE table->tb6_lock.
901    It takes new route entry, the addition fails by any reason the
902    route is freed. In any case, if caller does not hold it, it may
903    be destroyed.
904  */
905
906 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
907                         struct mx6_config *mxc)
908 {
909         int err;
910         struct fib6_table *table;
911
912         table = rt->rt6i_table;
913         write_lock_bh(&table->tb6_lock);
914         err = fib6_add(&table->tb6_root, rt, info, mxc);
915         write_unlock_bh(&table->tb6_lock);
916
917         return err;
918 }
919
920 int ip6_ins_rt(struct rt6_info *rt)
921 {
922         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
923         struct mx6_config mxc = { .mx = NULL, };
924
925         return __ip6_ins_rt(rt, &info, &mxc);
926 }
927
928 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
929                                            const struct in6_addr *daddr,
930                                            const struct in6_addr *saddr)
931 {
932         struct rt6_info *rt;
933
934         /*
935          *      Clone the route.
936          */
937
938         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
939                 ort = (struct rt6_info *)ort->dst.from;
940
941         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
942
943         if (!rt)
944                 return NULL;
945
946         ip6_rt_copy_init(rt, ort);
947         rt->rt6i_flags |= RTF_CACHE;
948         rt->rt6i_metric = 0;
949         rt->dst.flags |= DST_HOST;
950         rt->rt6i_dst.addr = *daddr;
951         rt->rt6i_dst.plen = 128;
952
953         if (!rt6_is_gw_or_nonexthop(ort)) {
954                 if (ort->rt6i_dst.plen != 128 &&
955                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
956                         rt->rt6i_flags |= RTF_ANYCAST;
957 #ifdef CONFIG_IPV6_SUBTREES
958                 if (rt->rt6i_src.plen && saddr) {
959                         rt->rt6i_src.addr = *saddr;
960                         rt->rt6i_src.plen = 128;
961                 }
962 #endif
963         }
964
965         return rt;
966 }
967
968 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
969 {
970         struct rt6_info *pcpu_rt;
971
972         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
973                                   rt->dst.dev, rt->dst.flags);
974
975         if (!pcpu_rt)
976                 return NULL;
977         ip6_rt_copy_init(pcpu_rt, rt);
978         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
979         pcpu_rt->rt6i_flags |= RTF_PCPU;
980         return pcpu_rt;
981 }
982
983 /* It should be called with read_lock_bh(&tb6_lock) acquired */
984 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
985 {
986         struct rt6_info *pcpu_rt, **p;
987
988         p = this_cpu_ptr(rt->rt6i_pcpu);
989         pcpu_rt = *p;
990
991         if (pcpu_rt) {
992                 dst_hold(&pcpu_rt->dst);
993                 rt6_dst_from_metrics_check(pcpu_rt);
994         }
995         return pcpu_rt;
996 }
997
998 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
999 {
1000         struct fib6_table *table = rt->rt6i_table;
1001         struct rt6_info *pcpu_rt, *prev, **p;
1002
1003         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1004         if (!pcpu_rt) {
1005                 struct net *net = dev_net(rt->dst.dev);
1006
1007                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1008                 return net->ipv6.ip6_null_entry;
1009         }
1010
1011         read_lock_bh(&table->tb6_lock);
1012         if (rt->rt6i_pcpu) {
1013                 p = this_cpu_ptr(rt->rt6i_pcpu);
1014                 prev = cmpxchg(p, NULL, pcpu_rt);
1015                 if (prev) {
1016                         /* If someone did it before us, return prev instead */
1017                         dst_destroy(&pcpu_rt->dst);
1018                         pcpu_rt = prev;
1019                 }
1020         } else {
1021                 /* rt has been removed from the fib6 tree
1022                  * before we have a chance to acquire the read_lock.
1023                  * In this case, don't brother to create a pcpu rt
1024                  * since rt is going away anyway.  The next
1025                  * dst_check() will trigger a re-lookup.
1026                  */
1027                 dst_destroy(&pcpu_rt->dst);
1028                 pcpu_rt = rt;
1029         }
1030         dst_hold(&pcpu_rt->dst);
1031         rt6_dst_from_metrics_check(pcpu_rt);
1032         read_unlock_bh(&table->tb6_lock);
1033         return pcpu_rt;
1034 }
1035
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037                                       struct flowi6 *fl6, int flags)
1038 {
1039         struct fib6_node *fn, *saved_fn;
1040         struct rt6_info *rt;
1041         int strict = 0;
1042
1043         strict |= flags & RT6_LOOKUP_F_IFACE;
1044         if (net->ipv6.devconf_all->forwarding == 0)
1045                 strict |= RT6_LOOKUP_F_REACHABLE;
1046
1047         read_lock_bh(&table->tb6_lock);
1048
1049         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050         saved_fn = fn;
1051
1052         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1053                 oif = 0;
1054
1055 redo_rt6_select:
1056         rt = rt6_select(fn, oif, strict);
1057         if (rt->rt6i_nsiblings)
1058                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1059         if (rt == net->ipv6.ip6_null_entry) {
1060                 fn = fib6_backtrack(fn, &fl6->saddr);
1061                 if (fn)
1062                         goto redo_rt6_select;
1063                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1064                         /* also consider unreachable route */
1065                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1066                         fn = saved_fn;
1067                         goto redo_rt6_select;
1068                 }
1069         }
1070
1071
1072         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1073                 dst_use(&rt->dst, jiffies);
1074                 read_unlock_bh(&table->tb6_lock);
1075
1076                 rt6_dst_from_metrics_check(rt);
1077                 return rt;
1078         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1079                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1080                 /* Create a RTF_CACHE clone which will not be
1081                  * owned by the fib6 tree.  It is for the special case where
1082                  * the daddr in the skb during the neighbor look-up is different
1083                  * from the fl6->daddr used to look-up route here.
1084                  */
1085
1086                 struct rt6_info *uncached_rt;
1087
1088                 dst_use(&rt->dst, jiffies);
1089                 read_unlock_bh(&table->tb6_lock);
1090
1091                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1092                 dst_release(&rt->dst);
1093
1094                 if (uncached_rt)
1095                         rt6_uncached_list_add(uncached_rt);
1096                 else
1097                         uncached_rt = net->ipv6.ip6_null_entry;
1098
1099                 dst_hold(&uncached_rt->dst);
1100                 return uncached_rt;
1101
1102         } else {
1103                 /* Get a percpu copy */
1104
1105                 struct rt6_info *pcpu_rt;
1106
1107                 rt->dst.lastuse = jiffies;
1108                 rt->dst.__use++;
1109                 pcpu_rt = rt6_get_pcpu_route(rt);
1110
1111                 if (pcpu_rt) {
1112                         read_unlock_bh(&table->tb6_lock);
1113                 } else {
1114                         /* We have to do the read_unlock first
1115                          * because rt6_make_pcpu_route() may trigger
1116                          * ip6_dst_gc() which will take the write_lock.
1117                          */
1118                         dst_hold(&rt->dst);
1119                         read_unlock_bh(&table->tb6_lock);
1120                         pcpu_rt = rt6_make_pcpu_route(rt);
1121                         dst_release(&rt->dst);
1122                 }
1123
1124                 return pcpu_rt;
1125
1126         }
1127 }
1128
1129 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1130                                             struct flowi6 *fl6, int flags)
1131 {
1132         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1133 }
1134
1135 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1136                                                 struct net_device *dev,
1137                                                 struct flowi6 *fl6, int flags)
1138 {
1139         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1140                 flags |= RT6_LOOKUP_F_IFACE;
1141
1142         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1143 }
1144
1145 void ip6_route_input(struct sk_buff *skb)
1146 {
1147         const struct ipv6hdr *iph = ipv6_hdr(skb);
1148         struct net *net = dev_net(skb->dev);
1149         int flags = RT6_LOOKUP_F_HAS_SADDR;
1150         struct ip_tunnel_info *tun_info;
1151         struct flowi6 fl6 = {
1152                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1153                 .daddr = iph->daddr,
1154                 .saddr = iph->saddr,
1155                 .flowlabel = ip6_flowinfo(iph),
1156                 .flowi6_mark = skb->mark,
1157                 .flowi6_proto = iph->nexthdr,
1158         };
1159
1160         tun_info = skb_tunnel_info(skb);
1161         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1162                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1163         skb_dst_drop(skb);
1164         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1165 }
1166
1167 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1168                                              struct flowi6 *fl6, int flags)
1169 {
1170         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1171 }
1172
1173 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1174                                     struct flowi6 *fl6)
1175 {
1176         struct dst_entry *dst;
1177         int flags = 0;
1178         bool any_src;
1179
1180         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1181         if (dst)
1182                 return dst;
1183
1184         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1185
1186         any_src = ipv6_addr_any(&fl6->saddr);
1187         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1188             (fl6->flowi6_oif && any_src))
1189                 flags |= RT6_LOOKUP_F_IFACE;
1190
1191         if (!any_src)
1192                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1193         else if (sk)
1194                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1195
1196         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1197 }
1198 EXPORT_SYMBOL(ip6_route_output);
1199
1200 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1201 {
1202         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1203         struct dst_entry *new = NULL;
1204
1205         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1206         if (rt) {
1207                 rt6_info_init(rt);
1208
1209                 new = &rt->dst;
1210                 new->__use = 1;
1211                 new->input = dst_discard;
1212                 new->output = dst_discard_out;
1213
1214                 dst_copy_metrics(new, &ort->dst);
1215                 rt->rt6i_idev = ort->rt6i_idev;
1216                 if (rt->rt6i_idev)
1217                         in6_dev_hold(rt->rt6i_idev);
1218
1219                 rt->rt6i_gateway = ort->rt6i_gateway;
1220                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1221                 rt->rt6i_metric = 0;
1222
1223                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1224 #ifdef CONFIG_IPV6_SUBTREES
1225                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1226 #endif
1227
1228                 dst_free(new);
1229         }
1230
1231         dst_release(dst_orig);
1232         return new ? new : ERR_PTR(-ENOMEM);
1233 }
1234
1235 /*
1236  *      Destination cache support functions
1237  */
1238
1239 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1240 {
1241         if (rt->dst.from &&
1242             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1243                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1244 }
1245
1246 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1247 {
1248         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1249                 return NULL;
1250
1251         if (rt6_check_expired(rt))
1252                 return NULL;
1253
1254         return &rt->dst;
1255 }
1256
1257 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1258 {
1259         if (!__rt6_check_expired(rt) &&
1260             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1261             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1262                 return &rt->dst;
1263         else
1264                 return NULL;
1265 }
1266
1267 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1268 {
1269         struct rt6_info *rt;
1270
1271         rt = (struct rt6_info *) dst;
1272
1273         /* All IPV6 dsts are created with ->obsolete set to the value
1274          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1275          * into this function always.
1276          */
1277
1278         rt6_dst_from_metrics_check(rt);
1279
1280         if (rt->rt6i_flags & RTF_PCPU ||
1281             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1282                 return rt6_dst_from_check(rt, cookie);
1283         else
1284                 return rt6_check(rt, cookie);
1285 }
1286
1287 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1288 {
1289         struct rt6_info *rt = (struct rt6_info *) dst;
1290
1291         if (rt) {
1292                 if (rt->rt6i_flags & RTF_CACHE) {
1293                         if (rt6_check_expired(rt)) {
1294                                 ip6_del_rt(rt);
1295                                 dst = NULL;
1296                         }
1297                 } else {
1298                         dst_release(dst);
1299                         dst = NULL;
1300                 }
1301         }
1302         return dst;
1303 }
1304
1305 static void ip6_link_failure(struct sk_buff *skb)
1306 {
1307         struct rt6_info *rt;
1308
1309         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1310
1311         rt = (struct rt6_info *) skb_dst(skb);
1312         if (rt) {
1313                 if (rt->rt6i_flags & RTF_CACHE) {
1314                         dst_hold(&rt->dst);
1315                         ip6_del_rt(rt);
1316                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1317                         rt->rt6i_node->fn_sernum = -1;
1318                 }
1319         }
1320 }
1321
1322 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1323 {
1324         struct net *net = dev_net(rt->dst.dev);
1325
1326         rt->rt6i_flags |= RTF_MODIFIED;
1327         rt->rt6i_pmtu = mtu;
1328         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1329 }
1330
1331 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1332 {
1333         return !(rt->rt6i_flags & RTF_CACHE) &&
1334                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1335 }
1336
1337 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1338                                  const struct ipv6hdr *iph, u32 mtu)
1339 {
1340         struct rt6_info *rt6 = (struct rt6_info *)dst;
1341
1342         if (rt6->rt6i_flags & RTF_LOCAL)
1343                 return;
1344
1345         dst_confirm(dst);
1346         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1347         if (mtu >= dst_mtu(dst))
1348                 return;
1349
1350         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1351                 rt6_do_update_pmtu(rt6, mtu);
1352         } else {
1353                 const struct in6_addr *daddr, *saddr;
1354                 struct rt6_info *nrt6;
1355
1356                 if (iph) {
1357                         daddr = &iph->daddr;
1358                         saddr = &iph->saddr;
1359                 } else if (sk) {
1360                         daddr = &sk->sk_v6_daddr;
1361                         saddr = &inet6_sk(sk)->saddr;
1362                 } else {
1363                         return;
1364                 }
1365                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1366                 if (nrt6) {
1367                         rt6_do_update_pmtu(nrt6, mtu);
1368
1369                         /* ip6_ins_rt(nrt6) will bump the
1370                          * rt6->rt6i_node->fn_sernum
1371                          * which will fail the next rt6_check() and
1372                          * invalidate the sk->sk_dst_cache.
1373                          */
1374                         ip6_ins_rt(nrt6);
1375                 }
1376         }
1377 }
1378
1379 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1380                                struct sk_buff *skb, u32 mtu)
1381 {
1382         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1383 }
1384
1385 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1386                      int oif, u32 mark, kuid_t uid)
1387 {
1388         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1389         struct dst_entry *dst;
1390         struct flowi6 fl6;
1391
1392         memset(&fl6, 0, sizeof(fl6));
1393         fl6.flowi6_oif = oif;
1394         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1395         fl6.daddr = iph->daddr;
1396         fl6.saddr = iph->saddr;
1397         fl6.flowlabel = ip6_flowinfo(iph);
1398         fl6.flowi6_uid = uid;
1399
1400         dst = ip6_route_output(net, NULL, &fl6);
1401         if (!dst->error)
1402                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1403         dst_release(dst);
1404 }
1405 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1406
1407 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1408 {
1409         ip6_update_pmtu(skb, sock_net(sk), mtu,
1410                         sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk));
1411 }
1412 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1413
1414 /* Handle redirects */
1415 struct ip6rd_flowi {
1416         struct flowi6 fl6;
1417         struct in6_addr gateway;
1418 };
1419
1420 static struct rt6_info *__ip6_route_redirect(struct net *net,
1421                                              struct fib6_table *table,
1422                                              struct flowi6 *fl6,
1423                                              int flags)
1424 {
1425         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1426         struct rt6_info *rt;
1427         struct fib6_node *fn;
1428
1429         /* Get the "current" route for this destination and
1430          * check if the redirect has come from approriate router.
1431          *
1432          * RFC 4861 specifies that redirects should only be
1433          * accepted if they come from the nexthop to the target.
1434          * Due to the way the routes are chosen, this notion
1435          * is a bit fuzzy and one might need to check all possible
1436          * routes.
1437          */
1438
1439         read_lock_bh(&table->tb6_lock);
1440         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1441 restart:
1442         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1443                 if (rt6_check_expired(rt))
1444                         continue;
1445                 if (rt->dst.error)
1446                         break;
1447                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1448                         continue;
1449                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1450                         continue;
1451                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1452                         continue;
1453                 break;
1454         }
1455
1456         if (!rt)
1457                 rt = net->ipv6.ip6_null_entry;
1458         else if (rt->dst.error) {
1459                 rt = net->ipv6.ip6_null_entry;
1460                 goto out;
1461         }
1462
1463         if (rt == net->ipv6.ip6_null_entry) {
1464                 fn = fib6_backtrack(fn, &fl6->saddr);
1465                 if (fn)
1466                         goto restart;
1467         }
1468
1469 out:
1470         dst_hold(&rt->dst);
1471
1472         read_unlock_bh(&table->tb6_lock);
1473
1474         return rt;
1475 };
1476
1477 static struct dst_entry *ip6_route_redirect(struct net *net,
1478                                         const struct flowi6 *fl6,
1479                                         const struct in6_addr *gateway)
1480 {
1481         int flags = RT6_LOOKUP_F_HAS_SADDR;
1482         struct ip6rd_flowi rdfl;
1483
1484         rdfl.fl6 = *fl6;
1485         rdfl.gateway = *gateway;
1486
1487         return fib6_rule_lookup(net, &rdfl.fl6,
1488                                 flags, __ip6_route_redirect);
1489 }
1490
1491 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1492 {
1493         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1494         struct dst_entry *dst;
1495         struct flowi6 fl6;
1496
1497         memset(&fl6, 0, sizeof(fl6));
1498         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1499         fl6.flowi6_oif = oif;
1500         fl6.flowi6_mark = mark;
1501         fl6.daddr = iph->daddr;
1502         fl6.saddr = iph->saddr;
1503         fl6.flowlabel = ip6_flowinfo(iph);
1504
1505         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1506         rt6_do_redirect(dst, NULL, skb);
1507         dst_release(dst);
1508 }
1509 EXPORT_SYMBOL_GPL(ip6_redirect);
1510
1511 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1512                             u32 mark)
1513 {
1514         const struct ipv6hdr *iph = ipv6_hdr(skb);
1515         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1516         struct dst_entry *dst;
1517         struct flowi6 fl6;
1518
1519         memset(&fl6, 0, sizeof(fl6));
1520         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1521         fl6.flowi6_oif = oif;
1522         fl6.flowi6_mark = mark;
1523         fl6.daddr = msg->dest;
1524         fl6.saddr = iph->daddr;
1525
1526         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1527         rt6_do_redirect(dst, NULL, skb);
1528         dst_release(dst);
1529 }
1530
1531 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1532 {
1533         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1534 }
1535 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1536
1537 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1538 {
1539         struct net_device *dev = dst->dev;
1540         unsigned int mtu = dst_mtu(dst);
1541         struct net *net = dev_net(dev);
1542
1543         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1544
1545         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1546                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1547
1548         /*
1549          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1550          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1551          * IPV6_MAXPLEN is also valid and means: "any MSS,
1552          * rely only on pmtu discovery"
1553          */
1554         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1555                 mtu = IPV6_MAXPLEN;
1556         return mtu;
1557 }
1558
1559 static unsigned int ip6_mtu(const struct dst_entry *dst)
1560 {
1561         const struct rt6_info *rt = (const struct rt6_info *)dst;
1562         unsigned int mtu = rt->rt6i_pmtu;
1563         struct inet6_dev *idev;
1564
1565         if (mtu)
1566                 goto out;
1567
1568         mtu = dst_metric_raw(dst, RTAX_MTU);
1569         if (mtu)
1570                 goto out;
1571
1572         mtu = IPV6_MIN_MTU;
1573
1574         rcu_read_lock();
1575         idev = __in6_dev_get(dst->dev);
1576         if (idev)
1577                 mtu = idev->cnf.mtu6;
1578         rcu_read_unlock();
1579
1580 out:
1581         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1582 }
1583
1584 static struct dst_entry *icmp6_dst_gc_list;
1585 static DEFINE_SPINLOCK(icmp6_dst_lock);
1586
1587 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1588                                   struct flowi6 *fl6)
1589 {
1590         struct dst_entry *dst;
1591         struct rt6_info *rt;
1592         struct inet6_dev *idev = in6_dev_get(dev);
1593         struct net *net = dev_net(dev);
1594
1595         if (unlikely(!idev))
1596                 return ERR_PTR(-ENODEV);
1597
1598         rt = ip6_dst_alloc(net, dev, 0);
1599         if (unlikely(!rt)) {
1600                 in6_dev_put(idev);
1601                 dst = ERR_PTR(-ENOMEM);
1602                 goto out;
1603         }
1604
1605         rt->dst.flags |= DST_HOST;
1606         rt->dst.output  = ip6_output;
1607         atomic_set(&rt->dst.__refcnt, 1);
1608         rt->rt6i_gateway  = fl6->daddr;
1609         rt->rt6i_dst.addr = fl6->daddr;
1610         rt->rt6i_dst.plen = 128;
1611         rt->rt6i_idev     = idev;
1612         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1613
1614         spin_lock_bh(&icmp6_dst_lock);
1615         rt->dst.next = icmp6_dst_gc_list;
1616         icmp6_dst_gc_list = &rt->dst;
1617         spin_unlock_bh(&icmp6_dst_lock);
1618
1619         fib6_force_start_gc(net);
1620
1621         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1622
1623 out:
1624         return dst;
1625 }
1626
1627 int icmp6_dst_gc(void)
1628 {
1629         struct dst_entry *dst, **pprev;
1630         int more = 0;
1631
1632         spin_lock_bh(&icmp6_dst_lock);
1633         pprev = &icmp6_dst_gc_list;
1634
1635         while ((dst = *pprev) != NULL) {
1636                 if (!atomic_read(&dst->__refcnt)) {
1637                         *pprev = dst->next;
1638                         dst_free(dst);
1639                 } else {
1640                         pprev = &dst->next;
1641                         ++more;
1642                 }
1643         }
1644
1645         spin_unlock_bh(&icmp6_dst_lock);
1646
1647         return more;
1648 }
1649
1650 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1651                             void *arg)
1652 {
1653         struct dst_entry *dst, **pprev;
1654
1655         spin_lock_bh(&icmp6_dst_lock);
1656         pprev = &icmp6_dst_gc_list;
1657         while ((dst = *pprev) != NULL) {
1658                 struct rt6_info *rt = (struct rt6_info *) dst;
1659                 if (func(rt, arg)) {
1660                         *pprev = dst->next;
1661                         dst_free(dst);
1662                 } else {
1663                         pprev = &dst->next;
1664                 }
1665         }
1666         spin_unlock_bh(&icmp6_dst_lock);
1667 }
1668
1669 static int ip6_dst_gc(struct dst_ops *ops)
1670 {
1671         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1672         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1673         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1674         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1675         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1676         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1677         int entries;
1678
1679         entries = dst_entries_get_fast(ops);
1680         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1681             entries <= rt_max_size)
1682                 goto out;
1683
1684         net->ipv6.ip6_rt_gc_expire++;
1685         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1686         entries = dst_entries_get_slow(ops);
1687         if (entries < ops->gc_thresh)
1688                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1689 out:
1690         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1691         return entries > rt_max_size;
1692 }
1693
1694 static int ip6_convert_metrics(struct mx6_config *mxc,
1695                                const struct fib6_config *cfg)
1696 {
1697         bool ecn_ca = false;
1698         struct nlattr *nla;
1699         int remaining;
1700         u32 *mp;
1701
1702         if (!cfg->fc_mx)
1703                 return 0;
1704
1705         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1706         if (unlikely(!mp))
1707                 return -ENOMEM;
1708
1709         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1710                 int type = nla_type(nla);
1711                 u32 val;
1712
1713                 if (!type)
1714                         continue;
1715                 if (unlikely(type > RTAX_MAX))
1716                         goto err;
1717
1718                 if (type == RTAX_CC_ALGO) {
1719                         char tmp[TCP_CA_NAME_MAX];
1720
1721                         nla_strlcpy(tmp, nla, sizeof(tmp));
1722                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1723                         if (val == TCP_CA_UNSPEC)
1724                                 goto err;
1725                 } else {
1726                         val = nla_get_u32(nla);
1727                 }
1728                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1729                         goto err;
1730
1731                 mp[type - 1] = val;
1732                 __set_bit(type - 1, mxc->mx_valid);
1733         }
1734
1735         if (ecn_ca) {
1736                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1737                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1738         }
1739
1740         mxc->mx = mp;
1741         return 0;
1742  err:
1743         kfree(mp);
1744         return -EINVAL;
1745 }
1746
1747 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1748 {
1749         struct net *net = cfg->fc_nlinfo.nl_net;
1750         struct rt6_info *rt = NULL;
1751         struct net_device *dev = NULL;
1752         struct inet6_dev *idev = NULL;
1753         struct fib6_table *table;
1754         int addr_type;
1755         int err = -EINVAL;
1756
1757         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1758                 goto out;
1759 #ifndef CONFIG_IPV6_SUBTREES
1760         if (cfg->fc_src_len)
1761                 goto out;
1762 #endif
1763         if (cfg->fc_ifindex) {
1764                 err = -ENODEV;
1765                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1766                 if (!dev)
1767                         goto out;
1768                 idev = in6_dev_get(dev);
1769                 if (!idev)
1770                         goto out;
1771         }
1772
1773         if (cfg->fc_metric == 0)
1774                 cfg->fc_metric = IP6_RT_PRIO_USER;
1775
1776         err = -ENOBUFS;
1777         if (cfg->fc_nlinfo.nlh &&
1778             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1779                 table = fib6_get_table(net, cfg->fc_table);
1780                 if (!table) {
1781                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1782                         table = fib6_new_table(net, cfg->fc_table);
1783                 }
1784         } else {
1785                 table = fib6_new_table(net, cfg->fc_table);
1786         }
1787
1788         if (!table)
1789                 goto out;
1790
1791         rt = ip6_dst_alloc(net, NULL,
1792                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1793
1794         if (!rt) {
1795                 err = -ENOMEM;
1796                 goto out;
1797         }
1798
1799         if (cfg->fc_flags & RTF_EXPIRES)
1800                 rt6_set_expires(rt, jiffies +
1801                                 clock_t_to_jiffies(cfg->fc_expires));
1802         else
1803                 rt6_clean_expires(rt);
1804
1805         if (cfg->fc_protocol == RTPROT_UNSPEC)
1806                 cfg->fc_protocol = RTPROT_BOOT;
1807         rt->rt6i_protocol = cfg->fc_protocol;
1808
1809         addr_type = ipv6_addr_type(&cfg->fc_dst);
1810
1811         if (addr_type & IPV6_ADDR_MULTICAST)
1812                 rt->dst.input = ip6_mc_input;
1813         else if (cfg->fc_flags & RTF_LOCAL)
1814                 rt->dst.input = ip6_input;
1815         else
1816                 rt->dst.input = ip6_forward;
1817
1818         rt->dst.output = ip6_output;
1819
1820         if (cfg->fc_encap) {
1821                 struct lwtunnel_state *lwtstate;
1822
1823                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1824                                            cfg->fc_encap, AF_INET6, cfg,
1825                                            &lwtstate);
1826                 if (err)
1827                         goto out;
1828                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1829                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1830                         rt->dst.lwtstate->orig_output = rt->dst.output;
1831                         rt->dst.output = lwtunnel_output;
1832                 }
1833                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1834                         rt->dst.lwtstate->orig_input = rt->dst.input;
1835                         rt->dst.input = lwtunnel_input;
1836                 }
1837         }
1838
1839         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1840         rt->rt6i_dst.plen = cfg->fc_dst_len;
1841         if (rt->rt6i_dst.plen == 128)
1842                 rt->dst.flags |= DST_HOST;
1843
1844 #ifdef CONFIG_IPV6_SUBTREES
1845         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1846         rt->rt6i_src.plen = cfg->fc_src_len;
1847 #endif
1848
1849         rt->rt6i_metric = cfg->fc_metric;
1850
1851         /* We cannot add true routes via loopback here,
1852            they would result in kernel looping; promote them to reject routes
1853          */
1854         if ((cfg->fc_flags & RTF_REJECT) ||
1855             (dev && (dev->flags & IFF_LOOPBACK) &&
1856              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1857              !(cfg->fc_flags & RTF_LOCAL))) {
1858                 /* hold loopback dev/idev if we haven't done so. */
1859                 if (dev != net->loopback_dev) {
1860                         if (dev) {
1861                                 dev_put(dev);
1862                                 in6_dev_put(idev);
1863                         }
1864                         dev = net->loopback_dev;
1865                         dev_hold(dev);
1866                         idev = in6_dev_get(dev);
1867                         if (!idev) {
1868                                 err = -ENODEV;
1869                                 goto out;
1870                         }
1871                 }
1872                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1873                 switch (cfg->fc_type) {
1874                 case RTN_BLACKHOLE:
1875                         rt->dst.error = -EINVAL;
1876                         rt->dst.output = dst_discard_out;
1877                         rt->dst.input = dst_discard;
1878                         break;
1879                 case RTN_PROHIBIT:
1880                         rt->dst.error = -EACCES;
1881                         rt->dst.output = ip6_pkt_prohibit_out;
1882                         rt->dst.input = ip6_pkt_prohibit;
1883                         break;
1884                 case RTN_THROW:
1885                 case RTN_UNREACHABLE:
1886                 default:
1887                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1888                                         : (cfg->fc_type == RTN_UNREACHABLE)
1889                                         ? -EHOSTUNREACH : -ENETUNREACH;
1890                         rt->dst.output = ip6_pkt_discard_out;
1891                         rt->dst.input = ip6_pkt_discard;
1892                         break;
1893                 }
1894                 goto install_route;
1895         }
1896
1897         if (cfg->fc_flags & RTF_GATEWAY) {
1898                 const struct in6_addr *gw_addr;
1899                 int gwa_type;
1900
1901                 gw_addr = &cfg->fc_gateway;
1902                 gwa_type = ipv6_addr_type(gw_addr);
1903
1904                 /* if gw_addr is local we will fail to detect this in case
1905                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1906                  * will return already-added prefix route via interface that
1907                  * prefix route was assigned to, which might be non-loopback.
1908                  */
1909                 err = -EINVAL;
1910                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1911                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1912                                             dev : NULL, 0, 0))
1913                         goto out;
1914
1915                 rt->rt6i_gateway = *gw_addr;
1916
1917                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1918                         struct rt6_info *grt;
1919
1920                         /* IPv6 strictly inhibits using not link-local
1921                            addresses as nexthop address.
1922                            Otherwise, router will not able to send redirects.
1923                            It is very good, but in some (rare!) circumstances
1924                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1925                            some exceptions. --ANK
1926                          */
1927                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1928                                 goto out;
1929
1930                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1931
1932                         err = -EHOSTUNREACH;
1933                         if (!grt)
1934                                 goto out;
1935                         if (dev) {
1936                                 if (dev != grt->dst.dev) {
1937                                         ip6_rt_put(grt);
1938                                         goto out;
1939                                 }
1940                         } else {
1941                                 dev = grt->dst.dev;
1942                                 idev = grt->rt6i_idev;
1943                                 dev_hold(dev);
1944                                 in6_dev_hold(grt->rt6i_idev);
1945                         }
1946                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1947                                 err = 0;
1948                         ip6_rt_put(grt);
1949
1950                         if (err)
1951                                 goto out;
1952                 }
1953                 err = -EINVAL;
1954                 if (!dev || (dev->flags & IFF_LOOPBACK))
1955                         goto out;
1956         }
1957
1958         err = -ENODEV;
1959         if (!dev)
1960                 goto out;
1961
1962         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1963                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1964                         err = -EINVAL;
1965                         goto out;
1966                 }
1967                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1968                 rt->rt6i_prefsrc.plen = 128;
1969         } else
1970                 rt->rt6i_prefsrc.plen = 0;
1971
1972         rt->rt6i_flags = cfg->fc_flags;
1973
1974 install_route:
1975         rt->dst.dev = dev;
1976         rt->rt6i_idev = idev;
1977         rt->rt6i_table = table;
1978
1979         cfg->fc_nlinfo.nl_net = dev_net(dev);
1980
1981         return rt;
1982 out:
1983         if (dev)
1984                 dev_put(dev);
1985         if (idev)
1986                 in6_dev_put(idev);
1987         if (rt)
1988                 dst_free(&rt->dst);
1989
1990         return ERR_PTR(err);
1991 }
1992
1993 int ip6_route_add(struct fib6_config *cfg)
1994 {
1995         struct mx6_config mxc = { .mx = NULL, };
1996         struct rt6_info *rt;
1997         int err;
1998
1999         rt = ip6_route_info_create(cfg);
2000         if (IS_ERR(rt)) {
2001                 err = PTR_ERR(rt);
2002                 rt = NULL;
2003                 goto out;
2004         }
2005
2006         err = ip6_convert_metrics(&mxc, cfg);
2007         if (err)
2008                 goto out;
2009
2010         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2011
2012         kfree(mxc.mx);
2013
2014         return err;
2015 out:
2016         if (rt)
2017                 dst_free(&rt->dst);
2018
2019         return err;
2020 }
2021
2022 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2023 {
2024         int err;
2025         struct fib6_table *table;
2026         struct net *net = dev_net(rt->dst.dev);
2027
2028         if (rt == net->ipv6.ip6_null_entry ||
2029             rt->dst.flags & DST_NOCACHE) {
2030                 err = -ENOENT;
2031                 goto out;
2032         }
2033
2034         table = rt->rt6i_table;
2035         write_lock_bh(&table->tb6_lock);
2036         err = fib6_del(rt, info);
2037         write_unlock_bh(&table->tb6_lock);
2038
2039 out:
2040         ip6_rt_put(rt);
2041         return err;
2042 }
2043
2044 int ip6_del_rt(struct rt6_info *rt)
2045 {
2046         struct nl_info info = {
2047                 .nl_net = dev_net(rt->dst.dev),
2048         };
2049         return __ip6_del_rt(rt, &info);
2050 }
2051
2052 static int ip6_route_del(struct fib6_config *cfg)
2053 {
2054         struct fib6_table *table;
2055         struct fib6_node *fn;
2056         struct rt6_info *rt;
2057         int err = -ESRCH;
2058
2059         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2060         if (!table)
2061                 return err;
2062
2063         read_lock_bh(&table->tb6_lock);
2064
2065         fn = fib6_locate(&table->tb6_root,
2066                          &cfg->fc_dst, cfg->fc_dst_len,
2067                          &cfg->fc_src, cfg->fc_src_len);
2068
2069         if (fn) {
2070                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2071                         if ((rt->rt6i_flags & RTF_CACHE) &&
2072                             !(cfg->fc_flags & RTF_CACHE))
2073                                 continue;
2074                         if (cfg->fc_ifindex &&
2075                             (!rt->dst.dev ||
2076                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2077                                 continue;
2078                         if (cfg->fc_flags & RTF_GATEWAY &&
2079                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2080                                 continue;
2081                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2082                                 continue;
2083                         dst_hold(&rt->dst);
2084                         read_unlock_bh(&table->tb6_lock);
2085
2086                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2087                 }
2088         }
2089         read_unlock_bh(&table->tb6_lock);
2090
2091         return err;
2092 }
2093
2094 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2095 {
2096         struct netevent_redirect netevent;
2097         struct rt6_info *rt, *nrt = NULL;
2098         struct ndisc_options ndopts;
2099         struct inet6_dev *in6_dev;
2100         struct neighbour *neigh;
2101         struct rd_msg *msg;
2102         int optlen, on_link;
2103         u8 *lladdr;
2104
2105         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2106         optlen -= sizeof(*msg);
2107
2108         if (optlen < 0) {
2109                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2110                 return;
2111         }
2112
2113         msg = (struct rd_msg *)icmp6_hdr(skb);
2114
2115         if (ipv6_addr_is_multicast(&msg->dest)) {
2116                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2117                 return;
2118         }
2119
2120         on_link = 0;
2121         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2122                 on_link = 1;
2123         } else if (ipv6_addr_type(&msg->target) !=
2124                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2125                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2126                 return;
2127         }
2128
2129         in6_dev = __in6_dev_get(skb->dev);
2130         if (!in6_dev)
2131                 return;
2132         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2133                 return;
2134
2135         /* RFC2461 8.1:
2136          *      The IP source address of the Redirect MUST be the same as the current
2137          *      first-hop router for the specified ICMP Destination Address.
2138          */
2139
2140         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2141                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2142                 return;
2143         }
2144
2145         lladdr = NULL;
2146         if (ndopts.nd_opts_tgt_lladdr) {
2147                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2148                                              skb->dev);
2149                 if (!lladdr) {
2150                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2151                         return;
2152                 }
2153         }
2154
2155         rt = (struct rt6_info *) dst;
2156         if (rt->rt6i_flags & RTF_REJECT) {
2157                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2158                 return;
2159         }
2160
2161         /* Redirect received -> path was valid.
2162          * Look, redirects are sent only in response to data packets,
2163          * so that this nexthop apparently is reachable. --ANK
2164          */
2165         dst_confirm(&rt->dst);
2166
2167         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2168         if (!neigh)
2169                 return;
2170
2171         /*
2172          *      We have finally decided to accept it.
2173          */
2174
2175         neigh_update(neigh, lladdr, NUD_STALE,
2176                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2177                      NEIGH_UPDATE_F_OVERRIDE|
2178                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2179                                      NEIGH_UPDATE_F_ISROUTER))
2180                      );
2181
2182         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2183         if (!nrt)
2184                 goto out;
2185
2186         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2187         if (on_link)
2188                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2189
2190         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2191
2192         if (ip6_ins_rt(nrt))
2193                 goto out;
2194
2195         netevent.old = &rt->dst;
2196         netevent.new = &nrt->dst;
2197         netevent.daddr = &msg->dest;
2198         netevent.neigh = neigh;
2199         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2200
2201         if (rt->rt6i_flags & RTF_CACHE) {
2202                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2203                 ip6_del_rt(rt);
2204         }
2205
2206 out:
2207         neigh_release(neigh);
2208 }
2209
2210 /*
2211  *      Misc support functions
2212  */
2213
2214 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2215 {
2216         BUG_ON(from->dst.from);
2217
2218         rt->rt6i_flags &= ~RTF_EXPIRES;
2219         dst_hold(&from->dst);
2220         rt->dst.from = &from->dst;
2221         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2222 }
2223
2224 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2225 {
2226         rt->dst.input = ort->dst.input;
2227         rt->dst.output = ort->dst.output;
2228         rt->rt6i_dst = ort->rt6i_dst;
2229         rt->dst.error = ort->dst.error;
2230         rt->rt6i_idev = ort->rt6i_idev;
2231         if (rt->rt6i_idev)
2232                 in6_dev_hold(rt->rt6i_idev);
2233         rt->dst.lastuse = jiffies;
2234         rt->rt6i_gateway = ort->rt6i_gateway;
2235         rt->rt6i_flags = ort->rt6i_flags;
2236         rt6_set_from(rt, ort);
2237         rt->rt6i_metric = ort->rt6i_metric;
2238 #ifdef CONFIG_IPV6_SUBTREES
2239         rt->rt6i_src = ort->rt6i_src;
2240 #endif
2241         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2242         rt->rt6i_table = ort->rt6i_table;
2243         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2244 }
2245
2246 #ifdef CONFIG_IPV6_ROUTE_INFO
2247 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
2248                                            const struct in6_addr *prefix, int prefixlen,
2249                                            const struct in6_addr *gwaddr)
2250 {
2251         struct fib6_node *fn;
2252         struct rt6_info *rt = NULL;
2253         struct fib6_table *table;
2254
2255         table = fib6_get_table(dev_net(dev),
2256                                addrconf_rt_table(dev, RT6_TABLE_INFO));
2257         if (!table)
2258                 return NULL;
2259
2260         read_lock_bh(&table->tb6_lock);
2261         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2262         if (!fn)
2263                 goto out;
2264
2265         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2266                 if (rt->dst.dev->ifindex != dev->ifindex)
2267                         continue;
2268                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2269                         continue;
2270                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2271                         continue;
2272                 dst_hold(&rt->dst);
2273                 break;
2274         }
2275 out:
2276         read_unlock_bh(&table->tb6_lock);
2277         return rt;
2278 }
2279
2280 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
2281                                            const struct in6_addr *prefix, int prefixlen,
2282                                            const struct in6_addr *gwaddr, unsigned int pref)
2283 {
2284         struct fib6_config cfg = {
2285                 .fc_metric      = IP6_RT_PRIO_USER,
2286                 .fc_ifindex     = dev->ifindex,
2287                 .fc_dst_len     = prefixlen,
2288                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2289                                   RTF_UP | RTF_PREF(pref),
2290                 .fc_nlinfo.portid = 0,
2291                 .fc_nlinfo.nlh = NULL,
2292                 .fc_nlinfo.nl_net = dev_net(dev),
2293         };
2294
2295         cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2296         cfg.fc_dst = *prefix;
2297         cfg.fc_gateway = *gwaddr;
2298
2299         /* We should treat it as a default route if prefix length is 0. */
2300         if (!prefixlen)
2301                 cfg.fc_flags |= RTF_DEFAULT;
2302
2303         ip6_route_add(&cfg);
2304
2305         return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
2306 }
2307 #endif
2308
2309 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2310 {
2311         struct rt6_info *rt;
2312         struct fib6_table *table;
2313
2314         table = fib6_get_table(dev_net(dev),
2315                                addrconf_rt_table(dev, RT6_TABLE_MAIN));
2316         if (!table)
2317                 return NULL;
2318
2319         read_lock_bh(&table->tb6_lock);
2320         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2321                 if (dev == rt->dst.dev &&
2322                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2323                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2324                         break;
2325         }
2326         if (rt)
2327                 dst_hold(&rt->dst);
2328         read_unlock_bh(&table->tb6_lock);
2329         return rt;
2330 }
2331
2332 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2333                                      struct net_device *dev,
2334                                      unsigned int pref)
2335 {
2336         struct fib6_config cfg = {
2337                 .fc_table       = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2338                 .fc_metric      = IP6_RT_PRIO_USER,
2339                 .fc_ifindex     = dev->ifindex,
2340                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2341                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2342                 .fc_nlinfo.portid = 0,
2343                 .fc_nlinfo.nlh = NULL,
2344                 .fc_nlinfo.nl_net = dev_net(dev),
2345         };
2346
2347         cfg.fc_gateway = *gwaddr;
2348
2349         ip6_route_add(&cfg);
2350
2351         return rt6_get_dflt_router(gwaddr, dev);
2352 }
2353
2354
2355 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2356         if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2357             (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2358                 return -1;
2359         return 0;
2360 }
2361
2362 void rt6_purge_dflt_routers(struct net *net)
2363 {
2364         fib6_clean_all(net, rt6_addrconf_purge, NULL);
2365 }
2366
2367 static void rtmsg_to_fib6_config(struct net *net,
2368                                  struct in6_rtmsg *rtmsg,
2369                                  struct fib6_config *cfg)
2370 {
2371         memset(cfg, 0, sizeof(*cfg));
2372
2373         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2374                          : RT6_TABLE_MAIN;
2375         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2376         cfg->fc_metric = rtmsg->rtmsg_metric;
2377         cfg->fc_expires = rtmsg->rtmsg_info;
2378         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2379         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2380         cfg->fc_flags = rtmsg->rtmsg_flags;
2381
2382         cfg->fc_nlinfo.nl_net = net;
2383
2384         cfg->fc_dst = rtmsg->rtmsg_dst;
2385         cfg->fc_src = rtmsg->rtmsg_src;
2386         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2387 }
2388
2389 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2390 {
2391         struct fib6_config cfg;
2392         struct in6_rtmsg rtmsg;
2393         int err;
2394
2395         switch (cmd) {
2396         case SIOCADDRT:         /* Add a route */
2397         case SIOCDELRT:         /* Delete a route */
2398                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2399                         return -EPERM;
2400                 err = copy_from_user(&rtmsg, arg,
2401                                      sizeof(struct in6_rtmsg));
2402                 if (err)
2403                         return -EFAULT;
2404
2405                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2406
2407                 rtnl_lock();
2408                 switch (cmd) {
2409                 case SIOCADDRT:
2410                         err = ip6_route_add(&cfg);
2411                         break;
2412                 case SIOCDELRT:
2413                         err = ip6_route_del(&cfg);
2414                         break;
2415                 default:
2416                         err = -EINVAL;
2417                 }
2418                 rtnl_unlock();
2419
2420                 return err;
2421         }
2422
2423         return -EINVAL;
2424 }
2425
2426 /*
2427  *      Drop the packet on the floor
2428  */
2429
2430 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2431 {
2432         int type;
2433         struct dst_entry *dst = skb_dst(skb);
2434         switch (ipstats_mib_noroutes) {
2435         case IPSTATS_MIB_INNOROUTES:
2436                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2437                 if (type == IPV6_ADDR_ANY) {
2438                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2439                                       IPSTATS_MIB_INADDRERRORS);
2440                         break;
2441                 }
2442                 /* FALLTHROUGH */
2443         case IPSTATS_MIB_OUTNOROUTES:
2444                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2445                               ipstats_mib_noroutes);
2446                 break;
2447         }
2448         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2449         kfree_skb(skb);
2450         return 0;
2451 }
2452
2453 static int ip6_pkt_discard(struct sk_buff *skb)
2454 {
2455         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2456 }
2457
2458 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2459 {
2460         skb->dev = skb_dst(skb)->dev;
2461         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2462 }
2463
2464 static int ip6_pkt_prohibit(struct sk_buff *skb)
2465 {
2466         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2467 }
2468
2469 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2470 {
2471         skb->dev = skb_dst(skb)->dev;
2472         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2473 }
2474
2475 /*
2476  *      Allocate a dst for local (unicast / anycast) address.
2477  */
2478
2479 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2480                                     const struct in6_addr *addr,
2481                                     bool anycast)
2482 {
2483         u32 tb_id;
2484         struct net *net = dev_net(idev->dev);
2485         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2486                                             DST_NOCOUNT);
2487         if (!rt)
2488                 return ERR_PTR(-ENOMEM);
2489
2490         in6_dev_hold(idev);
2491
2492         rt->dst.flags |= DST_HOST;
2493         rt->dst.input = ip6_input;
2494         rt->dst.output = ip6_output;
2495         rt->rt6i_idev = idev;
2496
2497         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2498         if (anycast)
2499                 rt->rt6i_flags |= RTF_ANYCAST;
2500         else
2501                 rt->rt6i_flags |= RTF_LOCAL;
2502
2503         rt->rt6i_gateway  = *addr;
2504         rt->rt6i_dst.addr = *addr;
2505         rt->rt6i_dst.plen = 128;
2506         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2507         rt->rt6i_table = fib6_get_table(net, tb_id);
2508         rt->dst.flags |= DST_NOCACHE;
2509
2510         atomic_set(&rt->dst.__refcnt, 1);
2511
2512         return rt;
2513 }
2514
2515 int ip6_route_get_saddr(struct net *net,
2516                         struct rt6_info *rt,
2517                         const struct in6_addr *daddr,
2518                         unsigned int prefs,
2519                         struct in6_addr *saddr)
2520 {
2521         struct inet6_dev *idev =
2522                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2523         int err = 0;
2524         if (rt && rt->rt6i_prefsrc.plen)
2525                 *saddr = rt->rt6i_prefsrc.addr;
2526         else
2527                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2528                                          daddr, prefs, saddr);
2529         return err;
2530 }
2531
2532 /* remove deleted ip from prefsrc entries */
2533 struct arg_dev_net_ip {
2534         struct net_device *dev;
2535         struct net *net;
2536         struct in6_addr *addr;
2537 };
2538
2539 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2540 {
2541         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2542         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2543         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2544
2545         if (((void *)rt->dst.dev == dev || !dev) &&
2546             rt != net->ipv6.ip6_null_entry &&
2547             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2548                 /* remove prefsrc entry */
2549                 rt->rt6i_prefsrc.plen = 0;
2550         }
2551         return 0;
2552 }
2553
2554 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2555 {
2556         struct net *net = dev_net(ifp->idev->dev);
2557         struct arg_dev_net_ip adni = {
2558                 .dev = ifp->idev->dev,
2559                 .net = net,
2560                 .addr = &ifp->addr,
2561         };
2562         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2563 }
2564
2565 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2566 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2567
2568 /* Remove routers and update dst entries when gateway turn into host. */
2569 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2570 {
2571         struct in6_addr *gateway = (struct in6_addr *)arg;
2572
2573         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2574              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2575              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2576                 return -1;
2577         }
2578         return 0;
2579 }
2580
2581 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2582 {
2583         fib6_clean_all(net, fib6_clean_tohost, gateway);
2584 }
2585
2586 struct arg_dev_net {
2587         struct net_device *dev;
2588         struct net *net;
2589 };
2590
2591 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2592 {
2593         const struct arg_dev_net *adn = arg;
2594         const struct net_device *dev = adn->dev;
2595
2596         if ((rt->dst.dev == dev || !dev) &&
2597             rt != adn->net->ipv6.ip6_null_entry)
2598                 return -1;
2599
2600         return 0;
2601 }
2602
2603 void rt6_ifdown(struct net *net, struct net_device *dev)
2604 {
2605         struct arg_dev_net adn = {
2606                 .dev = dev,
2607                 .net = net,
2608         };
2609
2610         fib6_clean_all(net, fib6_ifdown, &adn);
2611         icmp6_clean_all(fib6_ifdown, &adn);
2612         if (dev)
2613                 rt6_uncached_list_flush_dev(net, dev);
2614 }
2615
2616 struct rt6_mtu_change_arg {
2617         struct net_device *dev;
2618         unsigned int mtu;
2619 };
2620
2621 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2622 {
2623         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2624         struct inet6_dev *idev;
2625
2626         /* In IPv6 pmtu discovery is not optional,
2627            so that RTAX_MTU lock cannot disable it.
2628            We still use this lock to block changes
2629            caused by addrconf/ndisc.
2630         */
2631
2632         idev = __in6_dev_get(arg->dev);
2633         if (!idev)
2634                 return 0;
2635
2636         /* For administrative MTU increase, there is no way to discover
2637            IPv6 PMTU increase, so PMTU increase should be updated here.
2638            Since RFC 1981 doesn't include administrative MTU increase
2639            update PMTU increase is a MUST. (i.e. jumbo frame)
2640          */
2641         /*
2642            If new MTU is less than route PMTU, this new MTU will be the
2643            lowest MTU in the path, update the route PMTU to reflect PMTU
2644            decreases; if new MTU is greater than route PMTU, and the
2645            old MTU is the lowest MTU in the path, update the route PMTU
2646            to reflect the increase. In this case if the other nodes' MTU
2647            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2648            PMTU discouvery.
2649          */
2650         if (rt->dst.dev == arg->dev &&
2651             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2652                 if (rt->rt6i_flags & RTF_CACHE) {
2653                         /* For RTF_CACHE with rt6i_pmtu == 0
2654                          * (i.e. a redirected route),
2655                          * the metrics of its rt->dst.from has already
2656                          * been updated.
2657                          */
2658                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2659                                 rt->rt6i_pmtu = arg->mtu;
2660                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2661                            (dst_mtu(&rt->dst) < arg->mtu &&
2662                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2663                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2664                 }
2665         }
2666         return 0;
2667 }
2668
2669 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2670 {
2671         struct rt6_mtu_change_arg arg = {
2672                 .dev = dev,
2673                 .mtu = mtu,
2674         };
2675
2676         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2677 }
2678
2679 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2680         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2681         [RTA_OIF]               = { .type = NLA_U32 },
2682         [RTA_IIF]               = { .type = NLA_U32 },
2683         [RTA_PRIORITY]          = { .type = NLA_U32 },
2684         [RTA_METRICS]           = { .type = NLA_NESTED },
2685         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2686         [RTA_PREF]              = { .type = NLA_U8 },
2687         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2688         [RTA_ENCAP]             = { .type = NLA_NESTED },
2689         [RTA_UID]               = { .type = NLA_U32 },
2690 };
2691
2692 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2693                               struct fib6_config *cfg)
2694 {
2695         struct rtmsg *rtm;
2696         struct nlattr *tb[RTA_MAX+1];
2697         unsigned int pref;
2698         int err;
2699
2700         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2701         if (err < 0)
2702                 goto errout;
2703
2704         err = -EINVAL;
2705         rtm = nlmsg_data(nlh);
2706         memset(cfg, 0, sizeof(*cfg));
2707
2708         cfg->fc_table = rtm->rtm_table;
2709         cfg->fc_dst_len = rtm->rtm_dst_len;
2710         cfg->fc_src_len = rtm->rtm_src_len;
2711         cfg->fc_flags = RTF_UP;
2712         cfg->fc_protocol = rtm->rtm_protocol;
2713         cfg->fc_type = rtm->rtm_type;
2714
2715         if (rtm->rtm_type == RTN_UNREACHABLE ||
2716             rtm->rtm_type == RTN_BLACKHOLE ||
2717             rtm->rtm_type == RTN_PROHIBIT ||
2718             rtm->rtm_type == RTN_THROW)
2719                 cfg->fc_flags |= RTF_REJECT;
2720
2721         if (rtm->rtm_type == RTN_LOCAL)
2722                 cfg->fc_flags |= RTF_LOCAL;
2723
2724         if (rtm->rtm_flags & RTM_F_CLONED)
2725                 cfg->fc_flags |= RTF_CACHE;
2726
2727         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2728         cfg->fc_nlinfo.nlh = nlh;
2729         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2730
2731         if (tb[RTA_GATEWAY]) {
2732                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2733                 cfg->fc_flags |= RTF_GATEWAY;
2734         }
2735
2736         if (tb[RTA_DST]) {
2737                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2738
2739                 if (nla_len(tb[RTA_DST]) < plen)
2740                         goto errout;
2741
2742                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2743         }
2744
2745         if (tb[RTA_SRC]) {
2746                 int plen = (rtm->rtm_src_len + 7) >> 3;
2747
2748                 if (nla_len(tb[RTA_SRC]) < plen)
2749                         goto errout;
2750
2751                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2752         }
2753
2754         if (tb[RTA_PREFSRC])
2755                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2756
2757         if (tb[RTA_OIF])
2758                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2759
2760         if (tb[RTA_PRIORITY])
2761                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2762
2763         if (tb[RTA_METRICS]) {
2764                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2765                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2766         }
2767
2768         if (tb[RTA_TABLE])
2769                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2770
2771         if (tb[RTA_MULTIPATH]) {
2772                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2773                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2774         }
2775
2776         if (tb[RTA_PREF]) {
2777                 pref = nla_get_u8(tb[RTA_PREF]);
2778                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2779                     pref != ICMPV6_ROUTER_PREF_HIGH)
2780                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2781                 cfg->fc_flags |= RTF_PREF(pref);
2782         }
2783
2784         if (tb[RTA_ENCAP])
2785                 cfg->fc_encap = tb[RTA_ENCAP];
2786
2787         if (tb[RTA_ENCAP_TYPE])
2788                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2789
2790         err = 0;
2791 errout:
2792         return err;
2793 }
2794
2795 struct rt6_nh {
2796         struct rt6_info *rt6_info;
2797         struct fib6_config r_cfg;
2798         struct mx6_config mxc;
2799         struct list_head next;
2800 };
2801
2802 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2803 {
2804         struct rt6_nh *nh;
2805
2806         list_for_each_entry(nh, rt6_nh_list, next) {
2807                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2808                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2809                         nh->r_cfg.fc_ifindex);
2810         }
2811 }
2812
2813 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2814                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2815 {
2816         struct rt6_nh *nh;
2817         struct rt6_info *rtnh;
2818         int err = -EEXIST;
2819
2820         list_for_each_entry(nh, rt6_nh_list, next) {
2821                 /* check if rt6_info already exists */
2822                 rtnh = nh->rt6_info;
2823
2824                 if (rtnh->dst.dev == rt->dst.dev &&
2825                     rtnh->rt6i_idev == rt->rt6i_idev &&
2826                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2827                                     &rt->rt6i_gateway))
2828                         return err;
2829         }
2830
2831         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2832         if (!nh)
2833                 return -ENOMEM;
2834         nh->rt6_info = rt;
2835         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2836         if (err) {
2837                 kfree(nh);
2838                 return err;
2839         }
2840         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2841         list_add_tail(&nh->next, rt6_nh_list);
2842
2843         return 0;
2844 }
2845
2846 static int ip6_route_multipath_add(struct fib6_config *cfg)
2847 {
2848         struct fib6_config r_cfg;
2849         struct rtnexthop *rtnh;
2850         struct rt6_info *rt;
2851         struct rt6_nh *err_nh;
2852         struct rt6_nh *nh, *nh_safe;
2853         int remaining;
2854         int attrlen;
2855         int err = 1;
2856         int nhn = 0;
2857         int replace = (cfg->fc_nlinfo.nlh &&
2858                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2859         LIST_HEAD(rt6_nh_list);
2860
2861         remaining = cfg->fc_mp_len;
2862         rtnh = (struct rtnexthop *)cfg->fc_mp;
2863
2864         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2865          * rt6_info structs per nexthop
2866          */
2867         while (rtnh_ok(rtnh, remaining)) {
2868                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2869                 if (rtnh->rtnh_ifindex)
2870                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2871
2872                 attrlen = rtnh_attrlen(rtnh);
2873                 if (attrlen > 0) {
2874                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2875
2876                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2877                         if (nla) {
2878                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2879                                 r_cfg.fc_flags |= RTF_GATEWAY;
2880                         }
2881                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2882                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2883                         if (nla)
2884                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2885                 }
2886
2887                 rt = ip6_route_info_create(&r_cfg);
2888                 if (IS_ERR(rt)) {
2889                         err = PTR_ERR(rt);
2890                         rt = NULL;
2891                         goto cleanup;
2892                 }
2893
2894                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2895                 if (err) {
2896                         dst_free(&rt->dst);
2897                         goto cleanup;
2898                 }
2899
2900                 rtnh = rtnh_next(rtnh, &remaining);
2901         }
2902
2903         err_nh = NULL;
2904         list_for_each_entry(nh, &rt6_nh_list, next) {
2905                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2906                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2907                 nh->rt6_info = NULL;
2908                 if (err) {
2909                         if (replace && nhn)
2910                                 ip6_print_replace_route_err(&rt6_nh_list);
2911                         err_nh = nh;
2912                         goto add_errout;
2913                 }
2914
2915                 /* Because each route is added like a single route we remove
2916                  * these flags after the first nexthop: if there is a collision,
2917                  * we have already failed to add the first nexthop:
2918                  * fib6_add_rt2node() has rejected it; when replacing, old
2919                  * nexthops have been replaced by first new, the rest should
2920                  * be added to it.
2921                  */
2922                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2923                                                      NLM_F_REPLACE);
2924                 nhn++;
2925         }
2926
2927         goto cleanup;
2928
2929 add_errout:
2930         /* Delete routes that were already added */
2931         list_for_each_entry(nh, &rt6_nh_list, next) {
2932                 if (err_nh == nh)
2933                         break;
2934                 ip6_route_del(&nh->r_cfg);
2935         }
2936
2937 cleanup:
2938         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2939                 if (nh->rt6_info)
2940                         dst_free(&nh->rt6_info->dst);
2941                 kfree(nh->mxc.mx);
2942                 list_del(&nh->next);
2943                 kfree(nh);
2944         }
2945
2946         return err;
2947 }
2948
2949 static int ip6_route_multipath_del(struct fib6_config *cfg)
2950 {
2951         struct fib6_config r_cfg;
2952         struct rtnexthop *rtnh;
2953         int remaining;
2954         int attrlen;
2955         int err = 1, last_err = 0;
2956
2957         remaining = cfg->fc_mp_len;
2958         rtnh = (struct rtnexthop *)cfg->fc_mp;
2959
2960         /* Parse a Multipath Entry */
2961         while (rtnh_ok(rtnh, remaining)) {
2962                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2963                 if (rtnh->rtnh_ifindex)
2964                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2965
2966                 attrlen = rtnh_attrlen(rtnh);
2967                 if (attrlen > 0) {
2968                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2969
2970                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2971                         if (nla) {
2972                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2973                                 r_cfg.fc_flags |= RTF_GATEWAY;
2974                         }
2975                 }
2976                 err = ip6_route_del(&r_cfg);
2977                 if (err)
2978                         last_err = err;
2979
2980                 rtnh = rtnh_next(rtnh, &remaining);
2981         }
2982
2983         return last_err;
2984 }
2985
2986 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2987 {
2988         struct fib6_config cfg;
2989         int err;
2990
2991         err = rtm_to_fib6_config(skb, nlh, &cfg);
2992         if (err < 0)
2993                 return err;
2994
2995         if (cfg.fc_mp)
2996                 return ip6_route_multipath_del(&cfg);
2997         else
2998                 return ip6_route_del(&cfg);
2999 }
3000
3001 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3002 {
3003         struct fib6_config cfg;
3004         int err;
3005
3006         err = rtm_to_fib6_config(skb, nlh, &cfg);
3007         if (err < 0)
3008                 return err;
3009
3010         if (cfg.fc_mp)
3011                 return ip6_route_multipath_add(&cfg);
3012         else
3013                 return ip6_route_add(&cfg);
3014 }
3015
3016 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3017 {
3018         return NLMSG_ALIGN(sizeof(struct rtmsg))
3019                + nla_total_size(16) /* RTA_SRC */
3020                + nla_total_size(16) /* RTA_DST */
3021                + nla_total_size(16) /* RTA_GATEWAY */
3022                + nla_total_size(16) /* RTA_PREFSRC */
3023                + nla_total_size(4) /* RTA_TABLE */
3024                + nla_total_size(4) /* RTA_IIF */
3025                + nla_total_size(4) /* RTA_OIF */
3026                + nla_total_size(4) /* RTA_PRIORITY */
3027                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3028                + nla_total_size(sizeof(struct rta_cacheinfo))
3029                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3030                + nla_total_size(1) /* RTA_PREF */
3031                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3032 }
3033
3034 static int rt6_fill_node(struct net *net,
3035                          struct sk_buff *skb, struct rt6_info *rt,
3036                          struct in6_addr *dst, struct in6_addr *src,
3037                          int iif, int type, u32 portid, u32 seq,
3038                          int prefix, int nowait, unsigned int flags)
3039 {
3040         u32 metrics[RTAX_MAX];
3041         struct rtmsg *rtm;
3042         struct nlmsghdr *nlh;
3043         long expires;
3044         u32 table;
3045
3046         if (prefix) {   /* user wants prefix routes only */
3047                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3048                         /* success since this is not a prefix route */
3049                         return 1;
3050                 }
3051         }
3052
3053         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3054         if (!nlh)
3055                 return -EMSGSIZE;
3056
3057         rtm = nlmsg_data(nlh);
3058         rtm->rtm_family = AF_INET6;
3059         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3060         rtm->rtm_src_len = rt->rt6i_src.plen;
3061         rtm->rtm_tos = 0;
3062         if (rt->rt6i_table)
3063                 table = rt->rt6i_table->tb6_id;
3064         else
3065                 table = RT6_TABLE_UNSPEC;
3066         rtm->rtm_table = table;
3067         if (nla_put_u32(skb, RTA_TABLE, table))
3068                 goto nla_put_failure;
3069         if (rt->rt6i_flags & RTF_REJECT) {
3070                 switch (rt->dst.error) {
3071                 case -EINVAL:
3072                         rtm->rtm_type = RTN_BLACKHOLE;
3073                         break;
3074                 case -EACCES:
3075                         rtm->rtm_type = RTN_PROHIBIT;
3076                         break;
3077                 case -EAGAIN:
3078                         rtm->rtm_type = RTN_THROW;
3079                         break;
3080                 default:
3081                         rtm->rtm_type = RTN_UNREACHABLE;
3082                         break;
3083                 }
3084         }
3085         else if (rt->rt6i_flags & RTF_LOCAL)
3086                 rtm->rtm_type = RTN_LOCAL;
3087         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3088                 rtm->rtm_type = RTN_LOCAL;
3089         else
3090                 rtm->rtm_type = RTN_UNICAST;
3091         rtm->rtm_flags = 0;
3092         if (!netif_carrier_ok(rt->dst.dev)) {
3093                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3094                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3095                         rtm->rtm_flags |= RTNH_F_DEAD;
3096         }
3097         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3098         rtm->rtm_protocol = rt->rt6i_protocol;
3099         if (rt->rt6i_flags & RTF_DYNAMIC)
3100                 rtm->rtm_protocol = RTPROT_REDIRECT;
3101         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3102                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3103                         rtm->rtm_protocol = RTPROT_RA;
3104                 else
3105                         rtm->rtm_protocol = RTPROT_KERNEL;
3106         }
3107
3108         if (rt->rt6i_flags & RTF_CACHE)
3109                 rtm->rtm_flags |= RTM_F_CLONED;
3110
3111         if (dst) {
3112                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3113                         goto nla_put_failure;
3114                 rtm->rtm_dst_len = 128;
3115         } else if (rtm->rtm_dst_len)
3116                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3117                         goto nla_put_failure;
3118 #ifdef CONFIG_IPV6_SUBTREES
3119         if (src) {
3120                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3121                         goto nla_put_failure;
3122                 rtm->rtm_src_len = 128;
3123         } else if (rtm->rtm_src_len &&
3124                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3125                 goto nla_put_failure;
3126 #endif
3127         if (iif) {
3128 #ifdef CONFIG_IPV6_MROUTE
3129                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3130                         int err = ip6mr_get_route(net, skb, rtm, nowait);
3131                         if (err <= 0) {
3132                                 if (!nowait) {
3133                                         if (err == 0)
3134                                                 return 0;
3135                                         goto nla_put_failure;
3136                                 } else {
3137                                         if (err == -EMSGSIZE)
3138                                                 goto nla_put_failure;
3139                                 }
3140                         }
3141                 } else
3142 #endif
3143                         if (nla_put_u32(skb, RTA_IIF, iif))
3144                                 goto nla_put_failure;
3145         } else if (dst) {
3146                 struct in6_addr saddr_buf;
3147                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3148                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3149                         goto nla_put_failure;
3150         }
3151
3152         if (rt->rt6i_prefsrc.plen) {
3153                 struct in6_addr saddr_buf;
3154                 saddr_buf = rt->rt6i_prefsrc.addr;
3155                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3156                         goto nla_put_failure;
3157         }
3158
3159         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3160         if (rt->rt6i_pmtu)
3161                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3162         if (rtnetlink_put_metrics(skb, metrics) < 0)
3163                 goto nla_put_failure;
3164
3165         if (rt->rt6i_flags & RTF_GATEWAY) {
3166                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3167                         goto nla_put_failure;
3168         }
3169
3170         if (rt->dst.dev &&
3171             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3172                 goto nla_put_failure;
3173         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3174                 goto nla_put_failure;
3175
3176         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3177
3178         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3179                 goto nla_put_failure;
3180
3181         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3182                 goto nla_put_failure;
3183
3184         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3185
3186         nlmsg_end(skb, nlh);
3187         return 0;
3188
3189 nla_put_failure:
3190         nlmsg_cancel(skb, nlh);
3191         return -EMSGSIZE;
3192 }
3193
3194 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3195 {
3196         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3197         int prefix;
3198
3199         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3200                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3201                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3202         } else
3203                 prefix = 0;
3204
3205         return rt6_fill_node(arg->net,
3206                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3207                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3208                      prefix, 0, NLM_F_MULTI);
3209 }
3210
3211 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3212 {
3213         struct net *net = sock_net(in_skb->sk);
3214         struct nlattr *tb[RTA_MAX+1];
3215         struct rt6_info *rt;
3216         struct sk_buff *skb;
3217         struct rtmsg *rtm;
3218         struct flowi6 fl6;
3219         int err, iif = 0, oif = 0;
3220
3221         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3222         if (err < 0)
3223                 goto errout;
3224
3225         err = -EINVAL;
3226         memset(&fl6, 0, sizeof(fl6));
3227
3228         if (tb[RTA_SRC]) {
3229                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3230                         goto errout;
3231
3232                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3233         }
3234
3235         if (tb[RTA_DST]) {
3236                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3237                         goto errout;
3238
3239                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3240         }
3241
3242         if (tb[RTA_IIF])
3243                 iif = nla_get_u32(tb[RTA_IIF]);
3244
3245         if (tb[RTA_OIF])
3246                 oif = nla_get_u32(tb[RTA_OIF]);
3247
3248         if (tb[RTA_MARK])
3249                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3250
3251         if (tb[RTA_UID])
3252                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3253                                            nla_get_u32(tb[RTA_UID]));
3254         else
3255                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3256         if (iif) {
3257                 struct net_device *dev;
3258                 int flags = 0;
3259
3260                 dev = __dev_get_by_index(net, iif);
3261                 if (!dev) {
3262                         err = -ENODEV;
3263                         goto errout;
3264                 }
3265
3266                 fl6.flowi6_iif = iif;
3267
3268                 if (!ipv6_addr_any(&fl6.saddr))
3269                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3270
3271                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3272                                                                flags);
3273         } else {
3274                 fl6.flowi6_oif = oif;
3275
3276                 if (netif_index_is_l3_master(net, oif)) {
3277                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3278                                            FLOWI_FLAG_SKIP_NH_OIF;
3279                 }
3280
3281                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3282         }
3283
3284         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3285         if (!skb) {
3286                 ip6_rt_put(rt);
3287                 err = -ENOBUFS;
3288                 goto errout;
3289         }
3290
3291         /* Reserve room for dummy headers, this skb can pass
3292            through good chunk of routing engine.
3293          */
3294         skb_reset_mac_header(skb);
3295         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3296
3297         skb_dst_set(skb, &rt->dst);
3298
3299         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3300                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3301                             nlh->nlmsg_seq, 0, 0, 0);
3302         if (err < 0) {
3303                 kfree_skb(skb);
3304                 goto errout;
3305         }
3306
3307         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3308 errout:
3309         return err;
3310 }
3311
3312 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3313                      unsigned int nlm_flags)
3314 {
3315         struct sk_buff *skb;
3316         struct net *net = info->nl_net;
3317         u32 seq;
3318         int err;
3319
3320         err = -ENOBUFS;
3321         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3322
3323         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3324         if (!skb)
3325                 goto errout;
3326
3327         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3328                                 event, info->portid, seq, 0, 0, nlm_flags);
3329         if (err < 0) {
3330                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3331                 WARN_ON(err == -EMSGSIZE);
3332                 kfree_skb(skb);
3333                 goto errout;
3334         }
3335         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3336                     info->nlh, gfp_any());
3337         return;
3338 errout:
3339         if (err < 0)
3340                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3341 }
3342
3343 static int ip6_route_dev_notify(struct notifier_block *this,
3344                                 unsigned long event, void *ptr)
3345 {
3346         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3347         struct net *net = dev_net(dev);
3348
3349         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3350                 net->ipv6.ip6_null_entry->dst.dev = dev;
3351                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3352 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3353                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3354                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3355                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3356                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3357 #endif
3358         }
3359
3360         return NOTIFY_OK;
3361 }
3362
3363 /*
3364  *      /proc
3365  */
3366
3367 #ifdef CONFIG_PROC_FS
3368
3369 static const struct file_operations ipv6_route_proc_fops = {
3370         .owner          = THIS_MODULE,
3371         .open           = ipv6_route_open,
3372         .read           = seq_read,
3373         .llseek         = seq_lseek,
3374         .release        = seq_release_net,
3375 };
3376
3377 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3378 {
3379         struct net *net = (struct net *)seq->private;
3380         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3381                    net->ipv6.rt6_stats->fib_nodes,
3382                    net->ipv6.rt6_stats->fib_route_nodes,
3383                    net->ipv6.rt6_stats->fib_rt_alloc,
3384                    net->ipv6.rt6_stats->fib_rt_entries,
3385                    net->ipv6.rt6_stats->fib_rt_cache,
3386                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3387                    net->ipv6.rt6_stats->fib_discarded_routes);
3388
3389         return 0;
3390 }
3391
3392 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3393 {
3394         return single_open_net(inode, file, rt6_stats_seq_show);
3395 }
3396
3397 static const struct file_operations rt6_stats_seq_fops = {
3398         .owner   = THIS_MODULE,
3399         .open    = rt6_stats_seq_open,
3400         .read    = seq_read,
3401         .llseek  = seq_lseek,
3402         .release = single_release_net,
3403 };
3404 #endif  /* CONFIG_PROC_FS */
3405
3406 #ifdef CONFIG_SYSCTL
3407
3408 static
3409 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3410                               void __user *buffer, size_t *lenp, loff_t *ppos)
3411 {
3412         struct net *net;
3413         int delay;
3414         if (!write)
3415                 return -EINVAL;
3416
3417         net = (struct net *)ctl->extra1;
3418         delay = net->ipv6.sysctl.flush_delay;
3419         proc_dointvec(ctl, write, buffer, lenp, ppos);
3420         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3421         return 0;
3422 }
3423
3424 struct ctl_table ipv6_route_table_template[] = {
3425         {
3426                 .procname       =       "flush",
3427                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3428                 .maxlen         =       sizeof(int),
3429                 .mode           =       0200,
3430                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3431         },
3432         {
3433                 .procname       =       "gc_thresh",
3434                 .data           =       &ip6_dst_ops_template.gc_thresh,
3435                 .maxlen         =       sizeof(int),
3436                 .mode           =       0644,
3437                 .proc_handler   =       proc_dointvec,
3438         },
3439         {
3440                 .procname       =       "max_size",
3441                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3442                 .maxlen         =       sizeof(int),
3443                 .mode           =       0644,
3444                 .proc_handler   =       proc_dointvec,
3445         },
3446         {
3447                 .procname       =       "gc_min_interval",
3448                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3449                 .maxlen         =       sizeof(int),
3450                 .mode           =       0644,
3451                 .proc_handler   =       proc_dointvec_jiffies,
3452         },
3453         {
3454                 .procname       =       "gc_timeout",
3455                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3456                 .maxlen         =       sizeof(int),
3457                 .mode           =       0644,
3458                 .proc_handler   =       proc_dointvec_jiffies,
3459         },
3460         {
3461                 .procname       =       "gc_interval",
3462                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3463                 .maxlen         =       sizeof(int),
3464                 .mode           =       0644,
3465                 .proc_handler   =       proc_dointvec_jiffies,
3466         },
3467         {
3468                 .procname       =       "gc_elasticity",
3469                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3470                 .maxlen         =       sizeof(int),
3471                 .mode           =       0644,
3472                 .proc_handler   =       proc_dointvec,
3473         },
3474         {
3475                 .procname       =       "mtu_expires",
3476                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3477                 .maxlen         =       sizeof(int),
3478                 .mode           =       0644,
3479                 .proc_handler   =       proc_dointvec_jiffies,
3480         },
3481         {
3482                 .procname       =       "min_adv_mss",
3483                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3484                 .maxlen         =       sizeof(int),
3485                 .mode           =       0644,
3486                 .proc_handler   =       proc_dointvec,
3487         },
3488         {
3489                 .procname       =       "gc_min_interval_ms",
3490                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3491                 .maxlen         =       sizeof(int),
3492                 .mode           =       0644,
3493                 .proc_handler   =       proc_dointvec_ms_jiffies,
3494         },
3495         { }
3496 };
3497
3498 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3499 {
3500         struct ctl_table *table;
3501
3502         table = kmemdup(ipv6_route_table_template,
3503                         sizeof(ipv6_route_table_template),
3504                         GFP_KERNEL);
3505
3506         if (table) {
3507                 table[0].data = &net->ipv6.sysctl.flush_delay;
3508                 table[0].extra1 = net;
3509                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3510                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3511                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3512                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3513                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3514                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3515                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3516                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3517                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3518
3519                 /* Don't export sysctls to unprivileged users */
3520                 if (net->user_ns != &init_user_ns)
3521                         table[0].procname = NULL;
3522         }
3523
3524         return table;
3525 }
3526 #endif
3527
3528 static int __net_init ip6_route_net_init(struct net *net)
3529 {
3530         int ret = -ENOMEM;
3531
3532         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3533                sizeof(net->ipv6.ip6_dst_ops));
3534
3535         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3536                 goto out_ip6_dst_ops;
3537
3538         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3539                                            sizeof(*net->ipv6.ip6_null_entry),
3540                                            GFP_KERNEL);
3541         if (!net->ipv6.ip6_null_entry)
3542                 goto out_ip6_dst_entries;
3543         net->ipv6.ip6_null_entry->dst.path =
3544                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3545         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3546         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3547                          ip6_template_metrics, true);
3548
3549 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3550         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3551                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3552                                                GFP_KERNEL);
3553         if (!net->ipv6.ip6_prohibit_entry)
3554                 goto out_ip6_null_entry;
3555         net->ipv6.ip6_prohibit_entry->dst.path =
3556                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3557         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3558         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3559                          ip6_template_metrics, true);
3560
3561         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3562                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3563                                                GFP_KERNEL);
3564         if (!net->ipv6.ip6_blk_hole_entry)
3565                 goto out_ip6_prohibit_entry;
3566         net->ipv6.ip6_blk_hole_entry->dst.path =
3567                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3568         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3569         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3570                          ip6_template_metrics, true);
3571 #endif
3572
3573         net->ipv6.sysctl.flush_delay = 0;
3574         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3575         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3576         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3577         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3578         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3579         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3580         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3581
3582         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3583
3584         ret = 0;
3585 out:
3586         return ret;
3587
3588 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3589 out_ip6_prohibit_entry:
3590         kfree(net->ipv6.ip6_prohibit_entry);
3591 out_ip6_null_entry:
3592         kfree(net->ipv6.ip6_null_entry);
3593 #endif
3594 out_ip6_dst_entries:
3595         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3596 out_ip6_dst_ops:
3597         goto out;
3598 }
3599
3600 static void __net_exit ip6_route_net_exit(struct net *net)
3601 {
3602         kfree(net->ipv6.ip6_null_entry);
3603 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3604         kfree(net->ipv6.ip6_prohibit_entry);
3605         kfree(net->ipv6.ip6_blk_hole_entry);
3606 #endif
3607         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3608 }
3609
3610 static int __net_init ip6_route_net_init_late(struct net *net)
3611 {
3612 #ifdef CONFIG_PROC_FS
3613         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3614         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3615 #endif
3616         return 0;
3617 }
3618
3619 static void __net_exit ip6_route_net_exit_late(struct net *net)
3620 {
3621 #ifdef CONFIG_PROC_FS
3622         remove_proc_entry("ipv6_route", net->proc_net);
3623         remove_proc_entry("rt6_stats", net->proc_net);
3624 #endif
3625 }
3626
3627 static struct pernet_operations ip6_route_net_ops = {
3628         .init = ip6_route_net_init,
3629         .exit = ip6_route_net_exit,
3630 };
3631
3632 static int __net_init ipv6_inetpeer_init(struct net *net)
3633 {
3634         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3635
3636         if (!bp)
3637                 return -ENOMEM;
3638         inet_peer_base_init(bp);
3639         net->ipv6.peers = bp;
3640         return 0;
3641 }
3642
3643 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3644 {
3645         struct inet_peer_base *bp = net->ipv6.peers;
3646
3647         net->ipv6.peers = NULL;
3648         inetpeer_invalidate_tree(bp);
3649         kfree(bp);
3650 }
3651
3652 static struct pernet_operations ipv6_inetpeer_ops = {
3653         .init   =       ipv6_inetpeer_init,
3654         .exit   =       ipv6_inetpeer_exit,
3655 };
3656
3657 static struct pernet_operations ip6_route_net_late_ops = {
3658         .init = ip6_route_net_init_late,
3659         .exit = ip6_route_net_exit_late,
3660 };
3661
3662 static struct notifier_block ip6_route_dev_notifier = {
3663         .notifier_call = ip6_route_dev_notify,
3664         .priority = 0,
3665 };
3666
3667 int __init ip6_route_init(void)
3668 {
3669         int ret;
3670         int cpu;
3671
3672         ret = -ENOMEM;
3673         ip6_dst_ops_template.kmem_cachep =
3674                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3675                                   SLAB_HWCACHE_ALIGN, NULL);
3676         if (!ip6_dst_ops_template.kmem_cachep)
3677                 goto out;
3678
3679         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3680         if (ret)
3681                 goto out_kmem_cache;
3682
3683         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3684         if (ret)
3685                 goto out_dst_entries;
3686
3687         ret = register_pernet_subsys(&ip6_route_net_ops);
3688         if (ret)
3689                 goto out_register_inetpeer;
3690
3691         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3692
3693         /* Registering of the loopback is done before this portion of code,
3694          * the loopback reference in rt6_info will not be taken, do it
3695          * manually for init_net */
3696         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3697         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3699         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3700         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3701         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3702         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3703   #endif
3704         ret = fib6_init();
3705         if (ret)
3706                 goto out_register_subsys;
3707
3708         ret = xfrm6_init();
3709         if (ret)
3710                 goto out_fib6_init;
3711
3712         ret = fib6_rules_init();
3713         if (ret)
3714                 goto xfrm6_init;
3715
3716         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3717         if (ret)
3718                 goto fib6_rules_init;
3719
3720         ret = -ENOBUFS;
3721         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3722             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3723             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3724                 goto out_register_late_subsys;
3725
3726         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3727         if (ret)
3728                 goto out_register_late_subsys;
3729
3730         for_each_possible_cpu(cpu) {
3731                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3732
3733                 INIT_LIST_HEAD(&ul->head);
3734                 spin_lock_init(&ul->lock);
3735         }
3736
3737 out:
3738         return ret;
3739
3740 out_register_late_subsys:
3741         unregister_pernet_subsys(&ip6_route_net_late_ops);
3742 fib6_rules_init:
3743         fib6_rules_cleanup();
3744 xfrm6_init:
3745         xfrm6_fini();
3746 out_fib6_init:
3747         fib6_gc_cleanup();
3748 out_register_subsys:
3749         unregister_pernet_subsys(&ip6_route_net_ops);
3750 out_register_inetpeer:
3751         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3752 out_dst_entries:
3753         dst_entries_destroy(&ip6_dst_blackhole_ops);
3754 out_kmem_cache:
3755         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3756         goto out;
3757 }
3758
3759 void ip6_route_cleanup(void)
3760 {
3761         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3762         unregister_pernet_subsys(&ip6_route_net_late_ops);
3763         fib6_rules_cleanup();
3764         xfrm6_fini();
3765         fib6_gc_cleanup();
3766         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3767         unregister_pernet_subsys(&ip6_route_net_ops);
3768         dst_entries_destroy(&ip6_dst_blackhole_ops);
3769         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3770 }