Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
106                                            const struct in6_addr *prefix, int prefixlen,
107                                            const struct in6_addr *gwaddr);
108 #endif
109
110 struct uncached_list {
111         spinlock_t              lock;
112         struct list_head        head;
113 };
114
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121         rt->dst.flags |= DST_NOCACHE;
122         rt->rt6i_uncached_list = ul;
123
124         spin_lock_bh(&ul->lock);
125         list_add_tail(&rt->rt6i_uncached, &ul->head);
126         spin_unlock_bh(&ul->lock);
127 }
128
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131         if (!list_empty(&rt->rt6i_uncached)) {
132                 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134                 spin_lock_bh(&ul->lock);
135                 list_del(&rt->rt6i_uncached);
136                 spin_unlock_bh(&ul->lock);
137         }
138 }
139
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142         struct net_device *loopback_dev = net->loopback_dev;
143         int cpu;
144
145         if (dev == loopback_dev)
146                 return;
147
148         for_each_possible_cpu(cpu) {
149                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150                 struct rt6_info *rt;
151
152                 spin_lock_bh(&ul->lock);
153                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154                         struct inet6_dev *rt_idev = rt->rt6i_idev;
155                         struct net_device *rt_dev = rt->dst.dev;
156
157                         if (rt_idev->dev == dev) {
158                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
159                                 in6_dev_put(rt_idev);
160                         }
161
162                         if (rt_dev == dev) {
163                                 rt->dst.dev = loopback_dev;
164                                 dev_hold(rt->dst.dev);
165                                 dev_put(rt_dev);
166                         }
167                 }
168                 spin_unlock_bh(&ul->lock);
169         }
170 }
171
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174         return dst_metrics_write_ptr(rt->dst.from);
175 }
176
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179         struct rt6_info *rt = (struct rt6_info *)dst;
180
181         if (rt->rt6i_flags & RTF_PCPU)
182                 return rt6_pcpu_cow_metrics(rt);
183         else if (rt->rt6i_flags & RTF_CACHE)
184                 return NULL;
185         else
186                 return dst_cow_metrics_generic(dst, old);
187 }
188
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190                                              struct sk_buff *skb,
191                                              const void *daddr)
192 {
193         struct in6_addr *p = &rt->rt6i_gateway;
194
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203                                           struct sk_buff *skb,
204                                           const void *daddr)
205 {
206         struct rt6_info *rt = (struct rt6_info *) dst;
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(rt, skb, daddr);
210         n = __ipv6_neigh_lookup(dst->dev, daddr);
211         if (n)
212                 return n;
213         return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215
216 static struct dst_ops ip6_dst_ops_template = {
217         .family                 =       AF_INET6,
218         .gc                     =       ip6_dst_gc,
219         .gc_thresh              =       1024,
220         .check                  =       ip6_dst_check,
221         .default_advmss         =       ip6_default_advmss,
222         .mtu                    =       ip6_mtu,
223         .cow_metrics            =       ipv6_cow_metrics,
224         .destroy                =       ip6_dst_destroy,
225         .ifdown                 =       ip6_dst_ifdown,
226         .negative_advice        =       ip6_negative_advice,
227         .link_failure           =       ip6_link_failure,
228         .update_pmtu            =       ip6_rt_update_pmtu,
229         .redirect               =       rt6_do_redirect,
230         .local_out              =       __ip6_local_out,
231         .neigh_lookup           =       ip6_neigh_lookup,
232 };
233
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238         return mtu ? : dst->dev->mtu;
239 }
240
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242                                          struct sk_buff *skb, u32 mtu)
243 {
244 }
245
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247                                       struct sk_buff *skb)
248 {
249 }
250
251 static struct dst_ops ip6_dst_blackhole_ops = {
252         .family                 =       AF_INET6,
253         .destroy                =       ip6_dst_destroy,
254         .check                  =       ip6_dst_check,
255         .mtu                    =       ip6_blackhole_mtu,
256         .default_advmss         =       ip6_default_advmss,
257         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
258         .redirect               =       ip6_rt_blackhole_redirect,
259         .cow_metrics            =       dst_cow_metrics_generic,
260         .neigh_lookup           =       ip6_neigh_lookup,
261 };
262
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264         [RTAX_HOPLIMIT - 1] = 0,
265 };
266
267 static const struct rt6_info ip6_null_entry_template = {
268         .dst = {
269                 .__refcnt       = ATOMIC_INIT(1),
270                 .__use          = 1,
271                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
272                 .error          = -ENETUNREACH,
273                 .input          = ip6_pkt_discard,
274                 .output         = ip6_pkt_discard_out,
275         },
276         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
277         .rt6i_protocol  = RTPROT_KERNEL,
278         .rt6i_metric    = ~(u32) 0,
279         .rt6i_ref       = ATOMIC_INIT(1),
280 };
281
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
284 static const struct rt6_info ip6_prohibit_entry_template = {
285         .dst = {
286                 .__refcnt       = ATOMIC_INIT(1),
287                 .__use          = 1,
288                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
289                 .error          = -EACCES,
290                 .input          = ip6_pkt_prohibit,
291                 .output         = ip6_pkt_prohibit_out,
292         },
293         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
294         .rt6i_protocol  = RTPROT_KERNEL,
295         .rt6i_metric    = ~(u32) 0,
296         .rt6i_ref       = ATOMIC_INIT(1),
297 };
298
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300         .dst = {
301                 .__refcnt       = ATOMIC_INIT(1),
302                 .__use          = 1,
303                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
304                 .error          = -EINVAL,
305                 .input          = dst_discard,
306                 .output         = dst_discard_out,
307         },
308         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
309         .rt6i_protocol  = RTPROT_KERNEL,
310         .rt6i_metric    = ~(u32) 0,
311         .rt6i_ref       = ATOMIC_INIT(1),
312 };
313
314 #endif
315
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318         struct dst_entry *dst = &rt->dst;
319
320         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321         INIT_LIST_HEAD(&rt->rt6i_siblings);
322         INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327                                         struct net_device *dev,
328                                         int flags)
329 {
330         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331                                         0, DST_OBSOLETE_FORCE_CHK, flags);
332
333         if (rt)
334                 rt6_info_init(rt);
335
336         return rt;
337 }
338
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340                                       struct net_device *dev,
341                                       int flags)
342 {
343         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344
345         if (rt) {
346                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347                 if (rt->rt6i_pcpu) {
348                         int cpu;
349
350                         for_each_possible_cpu(cpu) {
351                                 struct rt6_info **p;
352
353                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354                                 /* no one shares rt */
355                                 *p =  NULL;
356                         }
357                 } else {
358                         dst_destroy((struct dst_entry *)rt);
359                         return NULL;
360                 }
361         }
362
363         return rt;
364 }
365
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368         struct rt6_info *rt = (struct rt6_info *)dst;
369         struct dst_entry *from = dst->from;
370         struct inet6_dev *idev;
371
372         dst_destroy_metrics_generic(dst);
373         free_percpu(rt->rt6i_pcpu);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         dst->from = NULL;
383         dst_release(from);
384 }
385
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387                            int how)
388 {
389         struct rt6_info *rt = (struct rt6_info *)dst;
390         struct inet6_dev *idev = rt->rt6i_idev;
391         struct net_device *loopback_dev =
392                 dev_net(dev)->loopback_dev;
393
394         if (dev != loopback_dev) {
395                 if (idev && idev->dev == dev) {
396                         struct inet6_dev *loopback_idev =
397                                 in6_dev_get(loopback_dev);
398                         if (loopback_idev) {
399                                 rt->rt6i_idev = loopback_idev;
400                                 in6_dev_put(idev);
401                         }
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         if (rt->rt6i_flags & RTF_EXPIRES) {
417                 if (time_after(jiffies, rt->dst.expires))
418                         return true;
419         } else if (rt->dst.from) {
420                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
421         }
422         return false;
423 }
424
425 /* Multipath route selection:
426  *   Hash based function using packet header and flowlabel.
427  * Adapted from fib_info_hashfn()
428  */
429 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
430                                const struct flowi6 *fl6)
431 {
432         return get_hash_from_flowi6(fl6) % candidate_count;
433 }
434
435 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
436                                              struct flowi6 *fl6, int oif,
437                                              int strict)
438 {
439         struct rt6_info *sibling, *next_sibling;
440         int route_choosen;
441
442         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
443         /* Don't change the route, if route_choosen == 0
444          * (siblings does not include ourself)
445          */
446         if (route_choosen)
447                 list_for_each_entry_safe(sibling, next_sibling,
448                                 &match->rt6i_siblings, rt6i_siblings) {
449                         route_choosen--;
450                         if (route_choosen == 0) {
451                                 if (rt6_score_route(sibling, oif, strict) < 0)
452                                         break;
453                                 match = sibling;
454                                 break;
455                         }
456                 }
457         return match;
458 }
459
460 /*
461  *      Route lookup. Any table->tb6_lock is implied.
462  */
463
464 static inline struct rt6_info *rt6_device_match(struct net *net,
465                                                     struct rt6_info *rt,
466                                                     const struct in6_addr *saddr,
467                                                     int oif,
468                                                     int flags)
469 {
470         struct rt6_info *local = NULL;
471         struct rt6_info *sprt;
472
473         if (!oif && ipv6_addr_any(saddr))
474                 goto out;
475
476         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
477                 struct net_device *dev = sprt->dst.dev;
478
479                 if (oif) {
480                         if (dev->ifindex == oif)
481                                 return sprt;
482                         if (dev->flags & IFF_LOOPBACK) {
483                                 if (!sprt->rt6i_idev ||
484                                     sprt->rt6i_idev->dev->ifindex != oif) {
485                                         if (flags & RT6_LOOKUP_F_IFACE)
486                                                 continue;
487                                         if (local &&
488                                             local->rt6i_idev->dev->ifindex == oif)
489                                                 continue;
490                                 }
491                                 local = sprt;
492                         }
493                 } else {
494                         if (ipv6_chk_addr(net, saddr, dev,
495                                           flags & RT6_LOOKUP_F_IFACE))
496                                 return sprt;
497                 }
498         }
499
500         if (oif) {
501                 if (local)
502                         return local;
503
504                 if (flags & RT6_LOOKUP_F_IFACE)
505                         return net->ipv6.ip6_null_entry;
506         }
507 out:
508         return rt;
509 }
510
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 struct __rt6_probe_work {
513         struct work_struct work;
514         struct in6_addr target;
515         struct net_device *dev;
516 };
517
518 static void rt6_probe_deferred(struct work_struct *w)
519 {
520         struct in6_addr mcaddr;
521         struct __rt6_probe_work *work =
522                 container_of(w, struct __rt6_probe_work, work);
523
524         addrconf_addr_solict_mult(&work->target, &mcaddr);
525         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
526         dev_put(work->dev);
527         kfree(work);
528 }
529
530 static void rt6_probe(struct rt6_info *rt)
531 {
532         struct __rt6_probe_work *work;
533         struct neighbour *neigh;
534         /*
535          * Okay, this does not seem to be appropriate
536          * for now, however, we need to check if it
537          * is really so; aka Router Reachability Probing.
538          *
539          * Router Reachability Probe MUST be rate-limited
540          * to no more than one per minute.
541          */
542         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
543                 return;
544         rcu_read_lock_bh();
545         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
546         if (neigh) {
547                 if (neigh->nud_state & NUD_VALID)
548                         goto out;
549
550                 work = NULL;
551                 write_lock(&neigh->lock);
552                 if (!(neigh->nud_state & NUD_VALID) &&
553                     time_after(jiffies,
554                                neigh->updated +
555                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else {
562                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563         }
564
565         if (work) {
566                 INIT_WORK(&work->work, rt6_probe_deferred);
567                 work->target = rt->rt6i_gateway;
568                 dev_hold(rt->dst.dev);
569                 work->dev = rt->dst.dev;
570                 schedule_work(&work->work);
571         }
572
573 out:
574         rcu_read_unlock_bh();
575 }
576 #else
577 static inline void rt6_probe(struct rt6_info *rt)
578 {
579 }
580 #endif
581
582 /*
583  * Default Router Selection (RFC 2461 6.3.6)
584  */
585 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
586 {
587         struct net_device *dev = rt->dst.dev;
588         if (!oif || dev->ifindex == oif)
589                 return 2;
590         if ((dev->flags & IFF_LOOPBACK) &&
591             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
592                 return 1;
593         return 0;
594 }
595
596 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
597 {
598         struct neighbour *neigh;
599         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
600
601         if (rt->rt6i_flags & RTF_NONEXTHOP ||
602             !(rt->rt6i_flags & RTF_GATEWAY))
603                 return RT6_NUD_SUCCEED;
604
605         rcu_read_lock_bh();
606         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
607         if (neigh) {
608                 read_lock(&neigh->lock);
609                 if (neigh->nud_state & NUD_VALID)
610                         ret = RT6_NUD_SUCCEED;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612                 else if (!(neigh->nud_state & NUD_FAILED))
613                         ret = RT6_NUD_SUCCEED;
614                 else
615                         ret = RT6_NUD_FAIL_PROBE;
616 #endif
617                 read_unlock(&neigh->lock);
618         } else {
619                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
620                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
621         }
622         rcu_read_unlock_bh();
623
624         return ret;
625 }
626
627 static int rt6_score_route(struct rt6_info *rt, int oif,
628                            int strict)
629 {
630         int m;
631
632         m = rt6_check_dev(rt, oif);
633         if (!m && (strict & RT6_LOOKUP_F_IFACE))
634                 return RT6_NUD_FAIL_HARD;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
637 #endif
638         if (strict & RT6_LOOKUP_F_REACHABLE) {
639                 int n = rt6_check_neigh(rt);
640                 if (n < 0)
641                         return n;
642         }
643         return m;
644 }
645
646 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
647                                    int *mpri, struct rt6_info *match,
648                                    bool *do_rr)
649 {
650         int m;
651         bool match_do_rr = false;
652         struct inet6_dev *idev = rt->rt6i_idev;
653         struct net_device *dev = rt->dst.dev;
654
655         if (dev && !netif_carrier_ok(dev) &&
656             idev->cnf.ignore_routes_with_linkdown)
657                 goto out;
658
659         if (rt6_check_expired(rt))
660                 goto out;
661
662         m = rt6_score_route(rt, oif, strict);
663         if (m == RT6_NUD_FAIL_DO_RR) {
664                 match_do_rr = true;
665                 m = 0; /* lowest valid score */
666         } else if (m == RT6_NUD_FAIL_HARD) {
667                 goto out;
668         }
669
670         if (strict & RT6_LOOKUP_F_REACHABLE)
671                 rt6_probe(rt);
672
673         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
674         if (m > *mpri) {
675                 *do_rr = match_do_rr;
676                 *mpri = m;
677                 match = rt;
678         }
679 out:
680         return match;
681 }
682
683 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
684                                      struct rt6_info *rr_head,
685                                      u32 metric, int oif, int strict,
686                                      bool *do_rr)
687 {
688         struct rt6_info *rt, *match, *cont;
689         int mpri = -1;
690
691         match = NULL;
692         cont = NULL;
693         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
694                 if (rt->rt6i_metric != metric) {
695                         cont = rt;
696                         break;
697                 }
698
699                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
700         }
701
702         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
703                 if (rt->rt6i_metric != metric) {
704                         cont = rt;
705                         break;
706                 }
707
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709         }
710
711         if (match || !cont)
712                 return match;
713
714         for (rt = cont; rt; rt = rt->dst.rt6_next)
715                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716
717         return match;
718 }
719
720 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
721 {
722         struct rt6_info *match, *rt0;
723         struct net *net;
724         bool do_rr = false;
725
726         rt0 = fn->rr_ptr;
727         if (!rt0)
728                 fn->rr_ptr = rt0 = fn->leaf;
729
730         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
731                              &do_rr);
732
733         if (do_rr) {
734                 struct rt6_info *next = rt0->dst.rt6_next;
735
736                 /* no entries matched; do round-robin */
737                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
738                         next = fn->leaf;
739
740                 if (next != rt0)
741                         fn->rr_ptr = next;
742         }
743
744         net = dev_net(rt0->dst.dev);
745         return match ? match : net->ipv6.ip6_null_entry;
746 }
747
748 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
749 {
750         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
751 }
752
753 #ifdef CONFIG_IPV6_ROUTE_INFO
754 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
755                   const struct in6_addr *gwaddr)
756 {
757         struct route_info *rinfo = (struct route_info *) opt;
758         struct in6_addr prefix_buf, *prefix;
759         unsigned int pref;
760         unsigned long lifetime;
761         struct rt6_info *rt;
762
763         if (len < sizeof(struct route_info)) {
764                 return -EINVAL;
765         }
766
767         /* Sanity check for prefix_len and length */
768         if (rinfo->length > 3) {
769                 return -EINVAL;
770         } else if (rinfo->prefix_len > 128) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 64) {
773                 if (rinfo->length < 2) {
774                         return -EINVAL;
775                 }
776         } else if (rinfo->prefix_len > 0) {
777                 if (rinfo->length < 1) {
778                         return -EINVAL;
779                 }
780         }
781
782         pref = rinfo->route_pref;
783         if (pref == ICMPV6_ROUTER_PREF_INVALID)
784                 return -EINVAL;
785
786         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
787
788         if (rinfo->length == 3)
789                 prefix = (struct in6_addr *)rinfo->prefix;
790         else {
791                 /* this function is safe */
792                 ipv6_addr_prefix(&prefix_buf,
793                                  (struct in6_addr *)rinfo->prefix,
794                                  rinfo->prefix_len);
795                 prefix = &prefix_buf;
796         }
797
798         if (rinfo->prefix_len == 0)
799                 rt = rt6_get_dflt_router(gwaddr, dev);
800         else
801                 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
802
803         if (rt && !lifetime) {
804                 ip6_del_rt(rt);
805                 rt = NULL;
806         }
807
808         if (!rt && lifetime)
809                 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
810         else if (rt)
811                 rt->rt6i_flags = RTF_ROUTEINFO |
812                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
813
814         if (rt) {
815                 if (!addrconf_finite_timeout(lifetime))
816                         rt6_clean_expires(rt);
817                 else
818                         rt6_set_expires(rt, jiffies + HZ * lifetime);
819
820                 ip6_rt_put(rt);
821         }
822         return 0;
823 }
824 #endif
825
826 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
827                                         struct in6_addr *saddr)
828 {
829         struct fib6_node *pn;
830         while (1) {
831                 if (fn->fn_flags & RTN_TL_ROOT)
832                         return NULL;
833                 pn = fn->parent;
834                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
835                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
836                 else
837                         fn = pn;
838                 if (fn->fn_flags & RTN_RTINFO)
839                         return fn;
840         }
841 }
842
843 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
844                                              struct fib6_table *table,
845                                              struct flowi6 *fl6, int flags)
846 {
847         struct fib6_node *fn;
848         struct rt6_info *rt;
849
850         read_lock_bh(&table->tb6_lock);
851         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
852 restart:
853         rt = fn->leaf;
854         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
855         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
856                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
857         if (rt == net->ipv6.ip6_null_entry) {
858                 fn = fib6_backtrack(fn, &fl6->saddr);
859                 if (fn)
860                         goto restart;
861         }
862         dst_use(&rt->dst, jiffies);
863         read_unlock_bh(&table->tb6_lock);
864         return rt;
865
866 }
867
868 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
869                                     int flags)
870 {
871         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
872 }
873 EXPORT_SYMBOL_GPL(ip6_route_lookup);
874
875 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
876                             const struct in6_addr *saddr, int oif, int strict)
877 {
878         struct flowi6 fl6 = {
879                 .flowi6_oif = oif,
880                 .daddr = *daddr,
881         };
882         struct dst_entry *dst;
883         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
884
885         if (saddr) {
886                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
887                 flags |= RT6_LOOKUP_F_HAS_SADDR;
888         }
889
890         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
891         if (dst->error == 0)
892                 return (struct rt6_info *) dst;
893
894         dst_release(dst);
895
896         return NULL;
897 }
898 EXPORT_SYMBOL(rt6_lookup);
899
900 /* ip6_ins_rt is called with FREE table->tb6_lock.
901    It takes new route entry, the addition fails by any reason the
902    route is freed. In any case, if caller does not hold it, it may
903    be destroyed.
904  */
905
906 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
907                         struct mx6_config *mxc)
908 {
909         int err;
910         struct fib6_table *table;
911
912         table = rt->rt6i_table;
913         write_lock_bh(&table->tb6_lock);
914         err = fib6_add(&table->tb6_root, rt, info, mxc);
915         write_unlock_bh(&table->tb6_lock);
916
917         return err;
918 }
919
920 int ip6_ins_rt(struct rt6_info *rt)
921 {
922         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
923         struct mx6_config mxc = { .mx = NULL, };
924
925         return __ip6_ins_rt(rt, &info, &mxc);
926 }
927
928 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
929                                            const struct in6_addr *daddr,
930                                            const struct in6_addr *saddr)
931 {
932         struct rt6_info *rt;
933
934         /*
935          *      Clone the route.
936          */
937
938         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
939                 ort = (struct rt6_info *)ort->dst.from;
940
941         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
942
943         if (!rt)
944                 return NULL;
945
946         ip6_rt_copy_init(rt, ort);
947         rt->rt6i_flags |= RTF_CACHE;
948         rt->rt6i_metric = 0;
949         rt->dst.flags |= DST_HOST;
950         rt->rt6i_dst.addr = *daddr;
951         rt->rt6i_dst.plen = 128;
952
953         if (!rt6_is_gw_or_nonexthop(ort)) {
954                 if (ort->rt6i_dst.plen != 128 &&
955                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
956                         rt->rt6i_flags |= RTF_ANYCAST;
957 #ifdef CONFIG_IPV6_SUBTREES
958                 if (rt->rt6i_src.plen && saddr) {
959                         rt->rt6i_src.addr = *saddr;
960                         rt->rt6i_src.plen = 128;
961                 }
962 #endif
963         }
964
965         return rt;
966 }
967
968 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
969 {
970         struct rt6_info *pcpu_rt;
971
972         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
973                                   rt->dst.dev, rt->dst.flags);
974
975         if (!pcpu_rt)
976                 return NULL;
977         ip6_rt_copy_init(pcpu_rt, rt);
978         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
979         pcpu_rt->rt6i_flags |= RTF_PCPU;
980         return pcpu_rt;
981 }
982
983 /* It should be called with read_lock_bh(&tb6_lock) acquired */
984 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
985 {
986         struct rt6_info *pcpu_rt, **p;
987
988         p = this_cpu_ptr(rt->rt6i_pcpu);
989         pcpu_rt = *p;
990
991         if (pcpu_rt) {
992                 dst_hold(&pcpu_rt->dst);
993                 rt6_dst_from_metrics_check(pcpu_rt);
994         }
995         return pcpu_rt;
996 }
997
998 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
999 {
1000         struct fib6_table *table = rt->rt6i_table;
1001         struct rt6_info *pcpu_rt, *prev, **p;
1002
1003         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1004         if (!pcpu_rt) {
1005                 struct net *net = dev_net(rt->dst.dev);
1006
1007                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1008                 return net->ipv6.ip6_null_entry;
1009         }
1010
1011         read_lock_bh(&table->tb6_lock);
1012         if (rt->rt6i_pcpu) {
1013                 p = this_cpu_ptr(rt->rt6i_pcpu);
1014                 prev = cmpxchg(p, NULL, pcpu_rt);
1015                 if (prev) {
1016                         /* If someone did it before us, return prev instead */
1017                         dst_destroy(&pcpu_rt->dst);
1018                         pcpu_rt = prev;
1019                 }
1020         } else {
1021                 /* rt has been removed from the fib6 tree
1022                  * before we have a chance to acquire the read_lock.
1023                  * In this case, don't brother to create a pcpu rt
1024                  * since rt is going away anyway.  The next
1025                  * dst_check() will trigger a re-lookup.
1026                  */
1027                 dst_destroy(&pcpu_rt->dst);
1028                 pcpu_rt = rt;
1029         }
1030         dst_hold(&pcpu_rt->dst);
1031         rt6_dst_from_metrics_check(pcpu_rt);
1032         read_unlock_bh(&table->tb6_lock);
1033         return pcpu_rt;
1034 }
1035
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037                                       struct flowi6 *fl6, int flags)
1038 {
1039         struct fib6_node *fn, *saved_fn;
1040         struct rt6_info *rt;
1041         int strict = 0;
1042
1043         strict |= flags & RT6_LOOKUP_F_IFACE;
1044         if (net->ipv6.devconf_all->forwarding == 0)
1045                 strict |= RT6_LOOKUP_F_REACHABLE;
1046
1047         read_lock_bh(&table->tb6_lock);
1048
1049         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050         saved_fn = fn;
1051
1052         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1053                 oif = 0;
1054
1055 redo_rt6_select:
1056         rt = rt6_select(fn, oif, strict);
1057         if (rt->rt6i_nsiblings)
1058                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1059         if (rt == net->ipv6.ip6_null_entry) {
1060                 fn = fib6_backtrack(fn, &fl6->saddr);
1061                 if (fn)
1062                         goto redo_rt6_select;
1063                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1064                         /* also consider unreachable route */
1065                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1066                         fn = saved_fn;
1067                         goto redo_rt6_select;
1068                 }
1069         }
1070
1071
1072         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1073                 dst_use(&rt->dst, jiffies);
1074                 read_unlock_bh(&table->tb6_lock);
1075
1076                 rt6_dst_from_metrics_check(rt);
1077                 return rt;
1078         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1079                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1080                 /* Create a RTF_CACHE clone which will not be
1081                  * owned by the fib6 tree.  It is for the special case where
1082                  * the daddr in the skb during the neighbor look-up is different
1083                  * from the fl6->daddr used to look-up route here.
1084                  */
1085
1086                 struct rt6_info *uncached_rt;
1087
1088                 dst_use(&rt->dst, jiffies);
1089                 read_unlock_bh(&table->tb6_lock);
1090
1091                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1092                 dst_release(&rt->dst);
1093
1094                 if (uncached_rt)
1095                         rt6_uncached_list_add(uncached_rt);
1096                 else
1097                         uncached_rt = net->ipv6.ip6_null_entry;
1098
1099                 dst_hold(&uncached_rt->dst);
1100                 return uncached_rt;
1101
1102         } else {
1103                 /* Get a percpu copy */
1104
1105                 struct rt6_info *pcpu_rt;
1106
1107                 rt->dst.lastuse = jiffies;
1108                 rt->dst.__use++;
1109                 pcpu_rt = rt6_get_pcpu_route(rt);
1110
1111                 if (pcpu_rt) {
1112                         read_unlock_bh(&table->tb6_lock);
1113                 } else {
1114                         /* We have to do the read_unlock first
1115                          * because rt6_make_pcpu_route() may trigger
1116                          * ip6_dst_gc() which will take the write_lock.
1117                          */
1118                         dst_hold(&rt->dst);
1119                         read_unlock_bh(&table->tb6_lock);
1120                         pcpu_rt = rt6_make_pcpu_route(rt);
1121                         dst_release(&rt->dst);
1122                 }
1123
1124                 return pcpu_rt;
1125
1126         }
1127 }
1128
1129 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1130                                             struct flowi6 *fl6, int flags)
1131 {
1132         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1133 }
1134
1135 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1136                                                 struct net_device *dev,
1137                                                 struct flowi6 *fl6, int flags)
1138 {
1139         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1140                 flags |= RT6_LOOKUP_F_IFACE;
1141
1142         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1143 }
1144
1145 void ip6_route_input(struct sk_buff *skb)
1146 {
1147         const struct ipv6hdr *iph = ipv6_hdr(skb);
1148         struct net *net = dev_net(skb->dev);
1149         int flags = RT6_LOOKUP_F_HAS_SADDR;
1150         struct ip_tunnel_info *tun_info;
1151         struct flowi6 fl6 = {
1152                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1153                 .daddr = iph->daddr,
1154                 .saddr = iph->saddr,
1155                 .flowlabel = ip6_flowinfo(iph),
1156                 .flowi6_mark = skb->mark,
1157                 .flowi6_proto = iph->nexthdr,
1158         };
1159
1160         tun_info = skb_tunnel_info(skb);
1161         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1162                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1163         skb_dst_drop(skb);
1164         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1165 }
1166
1167 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1168                                              struct flowi6 *fl6, int flags)
1169 {
1170         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1171 }
1172
1173 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1174                                          struct flowi6 *fl6, int flags)
1175 {
1176         struct dst_entry *dst;
1177         bool any_src;
1178
1179         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1180         if (dst)
1181                 return dst;
1182
1183         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1184
1185         any_src = ipv6_addr_any(&fl6->saddr);
1186         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1187             (fl6->flowi6_oif && any_src))
1188                 flags |= RT6_LOOKUP_F_IFACE;
1189
1190         if (!any_src)
1191                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1192         else if (sk)
1193                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1194
1195         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1198
1199 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1200 {
1201         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1202         struct dst_entry *new = NULL;
1203
1204         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1205         if (rt) {
1206                 rt6_info_init(rt);
1207
1208                 new = &rt->dst;
1209                 new->__use = 1;
1210                 new->input = dst_discard;
1211                 new->output = dst_discard_out;
1212
1213                 dst_copy_metrics(new, &ort->dst);
1214                 rt->rt6i_idev = ort->rt6i_idev;
1215                 if (rt->rt6i_idev)
1216                         in6_dev_hold(rt->rt6i_idev);
1217
1218                 rt->rt6i_gateway = ort->rt6i_gateway;
1219                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1220                 rt->rt6i_metric = 0;
1221
1222                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225 #endif
1226
1227                 dst_free(new);
1228         }
1229
1230         dst_release(dst_orig);
1231         return new ? new : ERR_PTR(-ENOMEM);
1232 }
1233
1234 /*
1235  *      Destination cache support functions
1236  */
1237
1238 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1239 {
1240         if (rt->dst.from &&
1241             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1242                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1243 }
1244
1245 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1246 {
1247         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1248                 return NULL;
1249
1250         if (rt6_check_expired(rt))
1251                 return NULL;
1252
1253         return &rt->dst;
1254 }
1255
1256 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1257 {
1258         if (!__rt6_check_expired(rt) &&
1259             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1260             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1261                 return &rt->dst;
1262         else
1263                 return NULL;
1264 }
1265
1266 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1267 {
1268         struct rt6_info *rt;
1269
1270         rt = (struct rt6_info *) dst;
1271
1272         /* All IPV6 dsts are created with ->obsolete set to the value
1273          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1274          * into this function always.
1275          */
1276
1277         rt6_dst_from_metrics_check(rt);
1278
1279         if (rt->rt6i_flags & RTF_PCPU ||
1280             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1281                 return rt6_dst_from_check(rt, cookie);
1282         else
1283                 return rt6_check(rt, cookie);
1284 }
1285
1286 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1287 {
1288         struct rt6_info *rt = (struct rt6_info *) dst;
1289
1290         if (rt) {
1291                 if (rt->rt6i_flags & RTF_CACHE) {
1292                         if (rt6_check_expired(rt)) {
1293                                 ip6_del_rt(rt);
1294                                 dst = NULL;
1295                         }
1296                 } else {
1297                         dst_release(dst);
1298                         dst = NULL;
1299                 }
1300         }
1301         return dst;
1302 }
1303
1304 static void ip6_link_failure(struct sk_buff *skb)
1305 {
1306         struct rt6_info *rt;
1307
1308         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1309
1310         rt = (struct rt6_info *) skb_dst(skb);
1311         if (rt) {
1312                 if (rt->rt6i_flags & RTF_CACHE) {
1313                         dst_hold(&rt->dst);
1314                         ip6_del_rt(rt);
1315                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1316                         rt->rt6i_node->fn_sernum = -1;
1317                 }
1318         }
1319 }
1320
1321 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 {
1323         struct net *net = dev_net(rt->dst.dev);
1324
1325         rt->rt6i_flags |= RTF_MODIFIED;
1326         rt->rt6i_pmtu = mtu;
1327         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1328 }
1329
1330 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1331 {
1332         return !(rt->rt6i_flags & RTF_CACHE) &&
1333                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1334 }
1335
1336 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1337                                  const struct ipv6hdr *iph, u32 mtu)
1338 {
1339         struct rt6_info *rt6 = (struct rt6_info *)dst;
1340
1341         if (rt6->rt6i_flags & RTF_LOCAL)
1342                 return;
1343
1344         dst_confirm(dst);
1345         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1346         if (mtu >= dst_mtu(dst))
1347                 return;
1348
1349         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1350                 rt6_do_update_pmtu(rt6, mtu);
1351         } else {
1352                 const struct in6_addr *daddr, *saddr;
1353                 struct rt6_info *nrt6;
1354
1355                 if (iph) {
1356                         daddr = &iph->daddr;
1357                         saddr = &iph->saddr;
1358                 } else if (sk) {
1359                         daddr = &sk->sk_v6_daddr;
1360                         saddr = &inet6_sk(sk)->saddr;
1361                 } else {
1362                         return;
1363                 }
1364                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1365                 if (nrt6) {
1366                         rt6_do_update_pmtu(nrt6, mtu);
1367
1368                         /* ip6_ins_rt(nrt6) will bump the
1369                          * rt6->rt6i_node->fn_sernum
1370                          * which will fail the next rt6_check() and
1371                          * invalidate the sk->sk_dst_cache.
1372                          */
1373                         ip6_ins_rt(nrt6);
1374                 }
1375         }
1376 }
1377
1378 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1379                                struct sk_buff *skb, u32 mtu)
1380 {
1381         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1382 }
1383
1384 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1385                      int oif, u32 mark, kuid_t uid)
1386 {
1387         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1388         struct dst_entry *dst;
1389         struct flowi6 fl6;
1390
1391         memset(&fl6, 0, sizeof(fl6));
1392         fl6.flowi6_oif = oif;
1393         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1394         fl6.daddr = iph->daddr;
1395         fl6.saddr = iph->saddr;
1396         fl6.flowlabel = ip6_flowinfo(iph);
1397         fl6.flowi6_uid = uid;
1398
1399         dst = ip6_route_output(net, NULL, &fl6);
1400         if (!dst->error)
1401                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1402         dst_release(dst);
1403 }
1404 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1405
1406 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1407 {
1408         ip6_update_pmtu(skb, sock_net(sk), mtu,
1409                         sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1410 }
1411 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1412
1413 /* Handle redirects */
1414 struct ip6rd_flowi {
1415         struct flowi6 fl6;
1416         struct in6_addr gateway;
1417 };
1418
1419 static struct rt6_info *__ip6_route_redirect(struct net *net,
1420                                              struct fib6_table *table,
1421                                              struct flowi6 *fl6,
1422                                              int flags)
1423 {
1424         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1425         struct rt6_info *rt;
1426         struct fib6_node *fn;
1427
1428         /* Get the "current" route for this destination and
1429          * check if the redirect has come from approriate router.
1430          *
1431          * RFC 4861 specifies that redirects should only be
1432          * accepted if they come from the nexthop to the target.
1433          * Due to the way the routes are chosen, this notion
1434          * is a bit fuzzy and one might need to check all possible
1435          * routes.
1436          */
1437
1438         read_lock_bh(&table->tb6_lock);
1439         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1440 restart:
1441         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1442                 if (rt6_check_expired(rt))
1443                         continue;
1444                 if (rt->dst.error)
1445                         break;
1446                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447                         continue;
1448                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1449                         continue;
1450                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1451                         continue;
1452                 break;
1453         }
1454
1455         if (!rt)
1456                 rt = net->ipv6.ip6_null_entry;
1457         else if (rt->dst.error) {
1458                 rt = net->ipv6.ip6_null_entry;
1459                 goto out;
1460         }
1461
1462         if (rt == net->ipv6.ip6_null_entry) {
1463                 fn = fib6_backtrack(fn, &fl6->saddr);
1464                 if (fn)
1465                         goto restart;
1466         }
1467
1468 out:
1469         dst_hold(&rt->dst);
1470
1471         read_unlock_bh(&table->tb6_lock);
1472
1473         return rt;
1474 };
1475
1476 static struct dst_entry *ip6_route_redirect(struct net *net,
1477                                         const struct flowi6 *fl6,
1478                                         const struct in6_addr *gateway)
1479 {
1480         int flags = RT6_LOOKUP_F_HAS_SADDR;
1481         struct ip6rd_flowi rdfl;
1482
1483         rdfl.fl6 = *fl6;
1484         rdfl.gateway = *gateway;
1485
1486         return fib6_rule_lookup(net, &rdfl.fl6,
1487                                 flags, __ip6_route_redirect);
1488 }
1489
1490 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1491                   kuid_t uid)
1492 {
1493         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1494         struct dst_entry *dst;
1495         struct flowi6 fl6;
1496
1497         memset(&fl6, 0, sizeof(fl6));
1498         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1499         fl6.flowi6_oif = oif;
1500         fl6.flowi6_mark = mark;
1501         fl6.daddr = iph->daddr;
1502         fl6.saddr = iph->saddr;
1503         fl6.flowlabel = ip6_flowinfo(iph);
1504         fl6.flowi6_uid = uid;
1505
1506         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1507         rt6_do_redirect(dst, NULL, skb);
1508         dst_release(dst);
1509 }
1510 EXPORT_SYMBOL_GPL(ip6_redirect);
1511
1512 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1513                             u32 mark)
1514 {
1515         const struct ipv6hdr *iph = ipv6_hdr(skb);
1516         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1517         struct dst_entry *dst;
1518         struct flowi6 fl6;
1519
1520         memset(&fl6, 0, sizeof(fl6));
1521         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1522         fl6.flowi6_oif = oif;
1523         fl6.flowi6_mark = mark;
1524         fl6.daddr = msg->dest;
1525         fl6.saddr = iph->daddr;
1526         fl6.flowi6_uid = sock_net_uid(net, NULL);
1527
1528         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1529         rt6_do_redirect(dst, NULL, skb);
1530         dst_release(dst);
1531 }
1532
1533 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1534 {
1535         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1536                      sk->sk_uid);
1537 }
1538 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1539
1540 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1541 {
1542         struct net_device *dev = dst->dev;
1543         unsigned int mtu = dst_mtu(dst);
1544         struct net *net = dev_net(dev);
1545
1546         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1547
1548         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1549                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1550
1551         /*
1552          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1553          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1554          * IPV6_MAXPLEN is also valid and means: "any MSS,
1555          * rely only on pmtu discovery"
1556          */
1557         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1558                 mtu = IPV6_MAXPLEN;
1559         return mtu;
1560 }
1561
1562 static unsigned int ip6_mtu(const struct dst_entry *dst)
1563 {
1564         const struct rt6_info *rt = (const struct rt6_info *)dst;
1565         unsigned int mtu = rt->rt6i_pmtu;
1566         struct inet6_dev *idev;
1567
1568         if (mtu)
1569                 goto out;
1570
1571         mtu = dst_metric_raw(dst, RTAX_MTU);
1572         if (mtu)
1573                 goto out;
1574
1575         mtu = IPV6_MIN_MTU;
1576
1577         rcu_read_lock();
1578         idev = __in6_dev_get(dst->dev);
1579         if (idev)
1580                 mtu = idev->cnf.mtu6;
1581         rcu_read_unlock();
1582
1583 out:
1584         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1585 }
1586
1587 static struct dst_entry *icmp6_dst_gc_list;
1588 static DEFINE_SPINLOCK(icmp6_dst_lock);
1589
1590 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1591                                   struct flowi6 *fl6)
1592 {
1593         struct dst_entry *dst;
1594         struct rt6_info *rt;
1595         struct inet6_dev *idev = in6_dev_get(dev);
1596         struct net *net = dev_net(dev);
1597
1598         if (unlikely(!idev))
1599                 return ERR_PTR(-ENODEV);
1600
1601         rt = ip6_dst_alloc(net, dev, 0);
1602         if (unlikely(!rt)) {
1603                 in6_dev_put(idev);
1604                 dst = ERR_PTR(-ENOMEM);
1605                 goto out;
1606         }
1607
1608         rt->dst.flags |= DST_HOST;
1609         rt->dst.output  = ip6_output;
1610         atomic_set(&rt->dst.__refcnt, 1);
1611         rt->rt6i_gateway  = fl6->daddr;
1612         rt->rt6i_dst.addr = fl6->daddr;
1613         rt->rt6i_dst.plen = 128;
1614         rt->rt6i_idev     = idev;
1615         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1616
1617         spin_lock_bh(&icmp6_dst_lock);
1618         rt->dst.next = icmp6_dst_gc_list;
1619         icmp6_dst_gc_list = &rt->dst;
1620         spin_unlock_bh(&icmp6_dst_lock);
1621
1622         fib6_force_start_gc(net);
1623
1624         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1625
1626 out:
1627         return dst;
1628 }
1629
1630 int icmp6_dst_gc(void)
1631 {
1632         struct dst_entry *dst, **pprev;
1633         int more = 0;
1634
1635         spin_lock_bh(&icmp6_dst_lock);
1636         pprev = &icmp6_dst_gc_list;
1637
1638         while ((dst = *pprev) != NULL) {
1639                 if (!atomic_read(&dst->__refcnt)) {
1640                         *pprev = dst->next;
1641                         dst_free(dst);
1642                 } else {
1643                         pprev = &dst->next;
1644                         ++more;
1645                 }
1646         }
1647
1648         spin_unlock_bh(&icmp6_dst_lock);
1649
1650         return more;
1651 }
1652
1653 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1654                             void *arg)
1655 {
1656         struct dst_entry *dst, **pprev;
1657
1658         spin_lock_bh(&icmp6_dst_lock);
1659         pprev = &icmp6_dst_gc_list;
1660         while ((dst = *pprev) != NULL) {
1661                 struct rt6_info *rt = (struct rt6_info *) dst;
1662                 if (func(rt, arg)) {
1663                         *pprev = dst->next;
1664                         dst_free(dst);
1665                 } else {
1666                         pprev = &dst->next;
1667                 }
1668         }
1669         spin_unlock_bh(&icmp6_dst_lock);
1670 }
1671
1672 static int ip6_dst_gc(struct dst_ops *ops)
1673 {
1674         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1675         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1676         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1677         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1678         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1679         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1680         int entries;
1681
1682         entries = dst_entries_get_fast(ops);
1683         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1684             entries <= rt_max_size)
1685                 goto out;
1686
1687         net->ipv6.ip6_rt_gc_expire++;
1688         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1689         entries = dst_entries_get_slow(ops);
1690         if (entries < ops->gc_thresh)
1691                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1692 out:
1693         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1694         return entries > rt_max_size;
1695 }
1696
1697 static int ip6_convert_metrics(struct mx6_config *mxc,
1698                                const struct fib6_config *cfg)
1699 {
1700         bool ecn_ca = false;
1701         struct nlattr *nla;
1702         int remaining;
1703         u32 *mp;
1704
1705         if (!cfg->fc_mx)
1706                 return 0;
1707
1708         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1709         if (unlikely(!mp))
1710                 return -ENOMEM;
1711
1712         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713                 int type = nla_type(nla);
1714                 u32 val;
1715
1716                 if (!type)
1717                         continue;
1718                 if (unlikely(type > RTAX_MAX))
1719                         goto err;
1720
1721                 if (type == RTAX_CC_ALGO) {
1722                         char tmp[TCP_CA_NAME_MAX];
1723
1724                         nla_strlcpy(tmp, nla, sizeof(tmp));
1725                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1726                         if (val == TCP_CA_UNSPEC)
1727                                 goto err;
1728                 } else {
1729                         val = nla_get_u32(nla);
1730                 }
1731                 if (type == RTAX_HOPLIMIT && val > 255)
1732                         val = 255;
1733                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1734                         goto err;
1735
1736                 mp[type - 1] = val;
1737                 __set_bit(type - 1, mxc->mx_valid);
1738         }
1739
1740         if (ecn_ca) {
1741                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1742                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1743         }
1744
1745         mxc->mx = mp;
1746         return 0;
1747  err:
1748         kfree(mp);
1749         return -EINVAL;
1750 }
1751
1752 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1753 {
1754         struct net *net = cfg->fc_nlinfo.nl_net;
1755         struct rt6_info *rt = NULL;
1756         struct net_device *dev = NULL;
1757         struct inet6_dev *idev = NULL;
1758         struct fib6_table *table;
1759         int addr_type;
1760         int err = -EINVAL;
1761
1762         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1763                 goto out;
1764 #ifndef CONFIG_IPV6_SUBTREES
1765         if (cfg->fc_src_len)
1766                 goto out;
1767 #endif
1768         if (cfg->fc_ifindex) {
1769                 err = -ENODEV;
1770                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1771                 if (!dev)
1772                         goto out;
1773                 idev = in6_dev_get(dev);
1774                 if (!idev)
1775                         goto out;
1776         }
1777
1778         if (cfg->fc_metric == 0)
1779                 cfg->fc_metric = IP6_RT_PRIO_USER;
1780
1781         err = -ENOBUFS;
1782         if (cfg->fc_nlinfo.nlh &&
1783             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1784                 table = fib6_get_table(net, cfg->fc_table);
1785                 if (!table) {
1786                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1787                         table = fib6_new_table(net, cfg->fc_table);
1788                 }
1789         } else {
1790                 table = fib6_new_table(net, cfg->fc_table);
1791         }
1792
1793         if (!table)
1794                 goto out;
1795
1796         rt = ip6_dst_alloc(net, NULL,
1797                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1798
1799         if (!rt) {
1800                 err = -ENOMEM;
1801                 goto out;
1802         }
1803
1804         if (cfg->fc_flags & RTF_EXPIRES)
1805                 rt6_set_expires(rt, jiffies +
1806                                 clock_t_to_jiffies(cfg->fc_expires));
1807         else
1808                 rt6_clean_expires(rt);
1809
1810         if (cfg->fc_protocol == RTPROT_UNSPEC)
1811                 cfg->fc_protocol = RTPROT_BOOT;
1812         rt->rt6i_protocol = cfg->fc_protocol;
1813
1814         addr_type = ipv6_addr_type(&cfg->fc_dst);
1815
1816         if (addr_type & IPV6_ADDR_MULTICAST)
1817                 rt->dst.input = ip6_mc_input;
1818         else if (cfg->fc_flags & RTF_LOCAL)
1819                 rt->dst.input = ip6_input;
1820         else
1821                 rt->dst.input = ip6_forward;
1822
1823         rt->dst.output = ip6_output;
1824
1825         if (cfg->fc_encap) {
1826                 struct lwtunnel_state *lwtstate;
1827
1828                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1829                                            cfg->fc_encap, AF_INET6, cfg,
1830                                            &lwtstate);
1831                 if (err)
1832                         goto out;
1833                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1834                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1835                         rt->dst.lwtstate->orig_output = rt->dst.output;
1836                         rt->dst.output = lwtunnel_output;
1837                 }
1838                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1839                         rt->dst.lwtstate->orig_input = rt->dst.input;
1840                         rt->dst.input = lwtunnel_input;
1841                 }
1842         }
1843
1844         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1845         rt->rt6i_dst.plen = cfg->fc_dst_len;
1846         if (rt->rt6i_dst.plen == 128)
1847                 rt->dst.flags |= DST_HOST;
1848
1849 #ifdef CONFIG_IPV6_SUBTREES
1850         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1851         rt->rt6i_src.plen = cfg->fc_src_len;
1852 #endif
1853
1854         rt->rt6i_metric = cfg->fc_metric;
1855
1856         /* We cannot add true routes via loopback here,
1857            they would result in kernel looping; promote them to reject routes
1858          */
1859         if ((cfg->fc_flags & RTF_REJECT) ||
1860             (dev && (dev->flags & IFF_LOOPBACK) &&
1861              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1862              !(cfg->fc_flags & RTF_LOCAL))) {
1863                 /* hold loopback dev/idev if we haven't done so. */
1864                 if (dev != net->loopback_dev) {
1865                         if (dev) {
1866                                 dev_put(dev);
1867                                 in6_dev_put(idev);
1868                         }
1869                         dev = net->loopback_dev;
1870                         dev_hold(dev);
1871                         idev = in6_dev_get(dev);
1872                         if (!idev) {
1873                                 err = -ENODEV;
1874                                 goto out;
1875                         }
1876                 }
1877                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1878                 switch (cfg->fc_type) {
1879                 case RTN_BLACKHOLE:
1880                         rt->dst.error = -EINVAL;
1881                         rt->dst.output = dst_discard_out;
1882                         rt->dst.input = dst_discard;
1883                         break;
1884                 case RTN_PROHIBIT:
1885                         rt->dst.error = -EACCES;
1886                         rt->dst.output = ip6_pkt_prohibit_out;
1887                         rt->dst.input = ip6_pkt_prohibit;
1888                         break;
1889                 case RTN_THROW:
1890                 case RTN_UNREACHABLE:
1891                 default:
1892                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1893                                         : (cfg->fc_type == RTN_UNREACHABLE)
1894                                         ? -EHOSTUNREACH : -ENETUNREACH;
1895                         rt->dst.output = ip6_pkt_discard_out;
1896                         rt->dst.input = ip6_pkt_discard;
1897                         break;
1898                 }
1899                 goto install_route;
1900         }
1901
1902         if (cfg->fc_flags & RTF_GATEWAY) {
1903                 const struct in6_addr *gw_addr;
1904                 int gwa_type;
1905
1906                 gw_addr = &cfg->fc_gateway;
1907                 gwa_type = ipv6_addr_type(gw_addr);
1908
1909                 /* if gw_addr is local we will fail to detect this in case
1910                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1911                  * will return already-added prefix route via interface that
1912                  * prefix route was assigned to, which might be non-loopback.
1913                  */
1914                 err = -EINVAL;
1915                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1916                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1917                                             dev : NULL, 0, 0))
1918                         goto out;
1919
1920                 rt->rt6i_gateway = *gw_addr;
1921
1922                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1923                         struct rt6_info *grt;
1924
1925                         /* IPv6 strictly inhibits using not link-local
1926                            addresses as nexthop address.
1927                            Otherwise, router will not able to send redirects.
1928                            It is very good, but in some (rare!) circumstances
1929                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1930                            some exceptions. --ANK
1931                          */
1932                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1933                                 goto out;
1934
1935                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1936
1937                         err = -EHOSTUNREACH;
1938                         if (!grt)
1939                                 goto out;
1940                         if (dev) {
1941                                 if (dev != grt->dst.dev) {
1942                                         ip6_rt_put(grt);
1943                                         goto out;
1944                                 }
1945                         } else {
1946                                 dev = grt->dst.dev;
1947                                 idev = grt->rt6i_idev;
1948                                 dev_hold(dev);
1949                                 in6_dev_hold(grt->rt6i_idev);
1950                         }
1951                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1952                                 err = 0;
1953                         ip6_rt_put(grt);
1954
1955                         if (err)
1956                                 goto out;
1957                 }
1958                 err = -EINVAL;
1959                 if (!dev || (dev->flags & IFF_LOOPBACK))
1960                         goto out;
1961         }
1962
1963         err = -ENODEV;
1964         if (!dev)
1965                 goto out;
1966
1967         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1968                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1969                         err = -EINVAL;
1970                         goto out;
1971                 }
1972                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1973                 rt->rt6i_prefsrc.plen = 128;
1974         } else
1975                 rt->rt6i_prefsrc.plen = 0;
1976
1977         rt->rt6i_flags = cfg->fc_flags;
1978
1979 install_route:
1980         rt->dst.dev = dev;
1981         rt->rt6i_idev = idev;
1982         rt->rt6i_table = table;
1983
1984         cfg->fc_nlinfo.nl_net = dev_net(dev);
1985
1986         return rt;
1987 out:
1988         if (dev)
1989                 dev_put(dev);
1990         if (idev)
1991                 in6_dev_put(idev);
1992         if (rt)
1993                 dst_free(&rt->dst);
1994
1995         return ERR_PTR(err);
1996 }
1997
1998 int ip6_route_add(struct fib6_config *cfg)
1999 {
2000         struct mx6_config mxc = { .mx = NULL, };
2001         struct rt6_info *rt;
2002         int err;
2003
2004         rt = ip6_route_info_create(cfg);
2005         if (IS_ERR(rt)) {
2006                 err = PTR_ERR(rt);
2007                 rt = NULL;
2008                 goto out;
2009         }
2010
2011         err = ip6_convert_metrics(&mxc, cfg);
2012         if (err)
2013                 goto out;
2014
2015         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2016
2017         kfree(mxc.mx);
2018
2019         return err;
2020 out:
2021         if (rt)
2022                 dst_free(&rt->dst);
2023
2024         return err;
2025 }
2026
2027 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2028 {
2029         int err;
2030         struct fib6_table *table;
2031         struct net *net = dev_net(rt->dst.dev);
2032
2033         if (rt == net->ipv6.ip6_null_entry ||
2034             rt->dst.flags & DST_NOCACHE) {
2035                 err = -ENOENT;
2036                 goto out;
2037         }
2038
2039         table = rt->rt6i_table;
2040         write_lock_bh(&table->tb6_lock);
2041         err = fib6_del(rt, info);
2042         write_unlock_bh(&table->tb6_lock);
2043
2044 out:
2045         ip6_rt_put(rt);
2046         return err;
2047 }
2048
2049 int ip6_del_rt(struct rt6_info *rt)
2050 {
2051         struct nl_info info = {
2052                 .nl_net = dev_net(rt->dst.dev),
2053         };
2054         return __ip6_del_rt(rt, &info);
2055 }
2056
2057 static int ip6_route_del(struct fib6_config *cfg)
2058 {
2059         struct fib6_table *table;
2060         struct fib6_node *fn;
2061         struct rt6_info *rt;
2062         int err = -ESRCH;
2063
2064         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2065         if (!table)
2066                 return err;
2067
2068         read_lock_bh(&table->tb6_lock);
2069
2070         fn = fib6_locate(&table->tb6_root,
2071                          &cfg->fc_dst, cfg->fc_dst_len,
2072                          &cfg->fc_src, cfg->fc_src_len);
2073
2074         if (fn) {
2075                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2076                         if ((rt->rt6i_flags & RTF_CACHE) &&
2077                             !(cfg->fc_flags & RTF_CACHE))
2078                                 continue;
2079                         if (cfg->fc_ifindex &&
2080                             (!rt->dst.dev ||
2081                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2082                                 continue;
2083                         if (cfg->fc_flags & RTF_GATEWAY &&
2084                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2085                                 continue;
2086                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2087                                 continue;
2088                         dst_hold(&rt->dst);
2089                         read_unlock_bh(&table->tb6_lock);
2090
2091                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2092                 }
2093         }
2094         read_unlock_bh(&table->tb6_lock);
2095
2096         return err;
2097 }
2098
2099 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2100 {
2101         struct netevent_redirect netevent;
2102         struct rt6_info *rt, *nrt = NULL;
2103         struct ndisc_options ndopts;
2104         struct inet6_dev *in6_dev;
2105         struct neighbour *neigh;
2106         struct rd_msg *msg;
2107         int optlen, on_link;
2108         u8 *lladdr;
2109
2110         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2111         optlen -= sizeof(*msg);
2112
2113         if (optlen < 0) {
2114                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2115                 return;
2116         }
2117
2118         msg = (struct rd_msg *)icmp6_hdr(skb);
2119
2120         if (ipv6_addr_is_multicast(&msg->dest)) {
2121                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2122                 return;
2123         }
2124
2125         on_link = 0;
2126         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2127                 on_link = 1;
2128         } else if (ipv6_addr_type(&msg->target) !=
2129                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2130                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2131                 return;
2132         }
2133
2134         in6_dev = __in6_dev_get(skb->dev);
2135         if (!in6_dev)
2136                 return;
2137         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2138                 return;
2139
2140         /* RFC2461 8.1:
2141          *      The IP source address of the Redirect MUST be the same as the current
2142          *      first-hop router for the specified ICMP Destination Address.
2143          */
2144
2145         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2146                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2147                 return;
2148         }
2149
2150         lladdr = NULL;
2151         if (ndopts.nd_opts_tgt_lladdr) {
2152                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2153                                              skb->dev);
2154                 if (!lladdr) {
2155                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2156                         return;
2157                 }
2158         }
2159
2160         rt = (struct rt6_info *) dst;
2161         if (rt->rt6i_flags & RTF_REJECT) {
2162                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2163                 return;
2164         }
2165
2166         /* Redirect received -> path was valid.
2167          * Look, redirects are sent only in response to data packets,
2168          * so that this nexthop apparently is reachable. --ANK
2169          */
2170         dst_confirm(&rt->dst);
2171
2172         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2173         if (!neigh)
2174                 return;
2175
2176         /*
2177          *      We have finally decided to accept it.
2178          */
2179
2180         neigh_update(neigh, lladdr, NUD_STALE,
2181                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2182                      NEIGH_UPDATE_F_OVERRIDE|
2183                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2184                                      NEIGH_UPDATE_F_ISROUTER))
2185                      );
2186
2187         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2188         if (!nrt)
2189                 goto out;
2190
2191         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2192         if (on_link)
2193                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2194
2195         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2196
2197         if (ip6_ins_rt(nrt))
2198                 goto out;
2199
2200         netevent.old = &rt->dst;
2201         netevent.new = &nrt->dst;
2202         netevent.daddr = &msg->dest;
2203         netevent.neigh = neigh;
2204         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2205
2206         if (rt->rt6i_flags & RTF_CACHE) {
2207                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2208                 ip6_del_rt(rt);
2209         }
2210
2211 out:
2212         neigh_release(neigh);
2213 }
2214
2215 /*
2216  *      Misc support functions
2217  */
2218
2219 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2220 {
2221         BUG_ON(from->dst.from);
2222
2223         rt->rt6i_flags &= ~RTF_EXPIRES;
2224         dst_hold(&from->dst);
2225         rt->dst.from = &from->dst;
2226         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2227 }
2228
2229 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2230 {
2231         rt->dst.input = ort->dst.input;
2232         rt->dst.output = ort->dst.output;
2233         rt->rt6i_dst = ort->rt6i_dst;
2234         rt->dst.error = ort->dst.error;
2235         rt->rt6i_idev = ort->rt6i_idev;
2236         if (rt->rt6i_idev)
2237                 in6_dev_hold(rt->rt6i_idev);
2238         rt->dst.lastuse = jiffies;
2239         rt->rt6i_gateway = ort->rt6i_gateway;
2240         rt->rt6i_flags = ort->rt6i_flags;
2241         rt6_set_from(rt, ort);
2242         rt->rt6i_metric = ort->rt6i_metric;
2243 #ifdef CONFIG_IPV6_SUBTREES
2244         rt->rt6i_src = ort->rt6i_src;
2245 #endif
2246         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2247         rt->rt6i_table = ort->rt6i_table;
2248         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2249 }
2250
2251 #ifdef CONFIG_IPV6_ROUTE_INFO
2252 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
2253                                            const struct in6_addr *prefix, int prefixlen,
2254                                            const struct in6_addr *gwaddr)
2255 {
2256         struct fib6_node *fn;
2257         struct rt6_info *rt = NULL;
2258         struct fib6_table *table;
2259
2260         table = fib6_get_table(dev_net(dev),
2261                                addrconf_rt_table(dev, RT6_TABLE_INFO));
2262         if (!table)
2263                 return NULL;
2264
2265         read_lock_bh(&table->tb6_lock);
2266         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2267         if (!fn)
2268                 goto out;
2269
2270         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2271                 if (rt->dst.dev->ifindex != dev->ifindex)
2272                         continue;
2273                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2274                         continue;
2275                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2276                         continue;
2277                 dst_hold(&rt->dst);
2278                 break;
2279         }
2280 out:
2281         read_unlock_bh(&table->tb6_lock);
2282         return rt;
2283 }
2284
2285 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
2286                                            const struct in6_addr *prefix, int prefixlen,
2287                                            const struct in6_addr *gwaddr, unsigned int pref)
2288 {
2289         struct fib6_config cfg = {
2290                 .fc_metric      = IP6_RT_PRIO_USER,
2291                 .fc_ifindex     = dev->ifindex,
2292                 .fc_dst_len     = prefixlen,
2293                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2294                                   RTF_UP | RTF_PREF(pref),
2295                 .fc_nlinfo.portid = 0,
2296                 .fc_nlinfo.nlh = NULL,
2297                 .fc_nlinfo.nl_net = dev_net(dev),
2298         };
2299
2300         cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2301         cfg.fc_dst = *prefix;
2302         cfg.fc_gateway = *gwaddr;
2303
2304         /* We should treat it as a default route if prefix length is 0. */
2305         if (!prefixlen)
2306                 cfg.fc_flags |= RTF_DEFAULT;
2307
2308         ip6_route_add(&cfg);
2309
2310         return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
2311 }
2312 #endif
2313
2314 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2315 {
2316         struct rt6_info *rt;
2317         struct fib6_table *table;
2318
2319         table = fib6_get_table(dev_net(dev),
2320                                addrconf_rt_table(dev, RT6_TABLE_MAIN));
2321         if (!table)
2322                 return NULL;
2323
2324         read_lock_bh(&table->tb6_lock);
2325         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2326                 if (dev == rt->dst.dev &&
2327                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2328                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2329                         break;
2330         }
2331         if (rt)
2332                 dst_hold(&rt->dst);
2333         read_unlock_bh(&table->tb6_lock);
2334         return rt;
2335 }
2336
2337 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2338                                      struct net_device *dev,
2339                                      unsigned int pref)
2340 {
2341         struct fib6_config cfg = {
2342                 .fc_table       = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2343                 .fc_metric      = IP6_RT_PRIO_USER,
2344                 .fc_ifindex     = dev->ifindex,
2345                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2346                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2347                 .fc_nlinfo.portid = 0,
2348                 .fc_nlinfo.nlh = NULL,
2349                 .fc_nlinfo.nl_net = dev_net(dev),
2350         };
2351
2352         cfg.fc_gateway = *gwaddr;
2353
2354         ip6_route_add(&cfg);
2355
2356         return rt6_get_dflt_router(gwaddr, dev);
2357 }
2358
2359
2360 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2361         if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2362             (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2363                 return -1;
2364         return 0;
2365 }
2366
2367 void rt6_purge_dflt_routers(struct net *net)
2368 {
2369         fib6_clean_all(net, rt6_addrconf_purge, NULL);
2370 }
2371
2372 static void rtmsg_to_fib6_config(struct net *net,
2373                                  struct in6_rtmsg *rtmsg,
2374                                  struct fib6_config *cfg)
2375 {
2376         memset(cfg, 0, sizeof(*cfg));
2377
2378         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2379                          : RT6_TABLE_MAIN;
2380         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2381         cfg->fc_metric = rtmsg->rtmsg_metric;
2382         cfg->fc_expires = rtmsg->rtmsg_info;
2383         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2384         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2385         cfg->fc_flags = rtmsg->rtmsg_flags;
2386
2387         cfg->fc_nlinfo.nl_net = net;
2388
2389         cfg->fc_dst = rtmsg->rtmsg_dst;
2390         cfg->fc_src = rtmsg->rtmsg_src;
2391         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2392 }
2393
2394 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2395 {
2396         struct fib6_config cfg;
2397         struct in6_rtmsg rtmsg;
2398         int err;
2399
2400         switch (cmd) {
2401         case SIOCADDRT:         /* Add a route */
2402         case SIOCDELRT:         /* Delete a route */
2403                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2404                         return -EPERM;
2405                 err = copy_from_user(&rtmsg, arg,
2406                                      sizeof(struct in6_rtmsg));
2407                 if (err)
2408                         return -EFAULT;
2409
2410                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2411
2412                 rtnl_lock();
2413                 switch (cmd) {
2414                 case SIOCADDRT:
2415                         err = ip6_route_add(&cfg);
2416                         break;
2417                 case SIOCDELRT:
2418                         err = ip6_route_del(&cfg);
2419                         break;
2420                 default:
2421                         err = -EINVAL;
2422                 }
2423                 rtnl_unlock();
2424
2425                 return err;
2426         }
2427
2428         return -EINVAL;
2429 }
2430
2431 /*
2432  *      Drop the packet on the floor
2433  */
2434
2435 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2436 {
2437         int type;
2438         struct dst_entry *dst = skb_dst(skb);
2439         switch (ipstats_mib_noroutes) {
2440         case IPSTATS_MIB_INNOROUTES:
2441                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2442                 if (type == IPV6_ADDR_ANY) {
2443                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2444                                       IPSTATS_MIB_INADDRERRORS);
2445                         break;
2446                 }
2447                 /* FALLTHROUGH */
2448         case IPSTATS_MIB_OUTNOROUTES:
2449                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2450                               ipstats_mib_noroutes);
2451                 break;
2452         }
2453         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2454         kfree_skb(skb);
2455         return 0;
2456 }
2457
2458 static int ip6_pkt_discard(struct sk_buff *skb)
2459 {
2460         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2461 }
2462
2463 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2464 {
2465         skb->dev = skb_dst(skb)->dev;
2466         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2467 }
2468
2469 static int ip6_pkt_prohibit(struct sk_buff *skb)
2470 {
2471         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2472 }
2473
2474 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2475 {
2476         skb->dev = skb_dst(skb)->dev;
2477         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2478 }
2479
2480 /*
2481  *      Allocate a dst for local (unicast / anycast) address.
2482  */
2483
2484 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2485                                     const struct in6_addr *addr,
2486                                     bool anycast)
2487 {
2488         u32 tb_id;
2489         struct net *net = dev_net(idev->dev);
2490         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2491                                             DST_NOCOUNT);
2492         if (!rt)
2493                 return ERR_PTR(-ENOMEM);
2494
2495         in6_dev_hold(idev);
2496
2497         rt->dst.flags |= DST_HOST;
2498         rt->dst.input = ip6_input;
2499         rt->dst.output = ip6_output;
2500         rt->rt6i_idev = idev;
2501
2502         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2503         if (anycast)
2504                 rt->rt6i_flags |= RTF_ANYCAST;
2505         else
2506                 rt->rt6i_flags |= RTF_LOCAL;
2507
2508         rt->rt6i_gateway  = *addr;
2509         rt->rt6i_dst.addr = *addr;
2510         rt->rt6i_dst.plen = 128;
2511         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2512         rt->rt6i_table = fib6_get_table(net, tb_id);
2513         rt->dst.flags |= DST_NOCACHE;
2514
2515         atomic_set(&rt->dst.__refcnt, 1);
2516
2517         return rt;
2518 }
2519
2520 int ip6_route_get_saddr(struct net *net,
2521                         struct rt6_info *rt,
2522                         const struct in6_addr *daddr,
2523                         unsigned int prefs,
2524                         struct in6_addr *saddr)
2525 {
2526         struct inet6_dev *idev =
2527                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2528         int err = 0;
2529         if (rt && rt->rt6i_prefsrc.plen)
2530                 *saddr = rt->rt6i_prefsrc.addr;
2531         else
2532                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2533                                          daddr, prefs, saddr);
2534         return err;
2535 }
2536
2537 /* remove deleted ip from prefsrc entries */
2538 struct arg_dev_net_ip {
2539         struct net_device *dev;
2540         struct net *net;
2541         struct in6_addr *addr;
2542 };
2543
2544 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2545 {
2546         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2547         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2548         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2549
2550         if (((void *)rt->dst.dev == dev || !dev) &&
2551             rt != net->ipv6.ip6_null_entry &&
2552             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2553                 /* remove prefsrc entry */
2554                 rt->rt6i_prefsrc.plen = 0;
2555         }
2556         return 0;
2557 }
2558
2559 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2560 {
2561         struct net *net = dev_net(ifp->idev->dev);
2562         struct arg_dev_net_ip adni = {
2563                 .dev = ifp->idev->dev,
2564                 .net = net,
2565                 .addr = &ifp->addr,
2566         };
2567         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2568 }
2569
2570 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2571 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2572
2573 /* Remove routers and update dst entries when gateway turn into host. */
2574 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2575 {
2576         struct in6_addr *gateway = (struct in6_addr *)arg;
2577
2578         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2579              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2580              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2581                 return -1;
2582         }
2583         return 0;
2584 }
2585
2586 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2587 {
2588         fib6_clean_all(net, fib6_clean_tohost, gateway);
2589 }
2590
2591 struct arg_dev_net {
2592         struct net_device *dev;
2593         struct net *net;
2594 };
2595
2596 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2597 {
2598         const struct arg_dev_net *adn = arg;
2599         const struct net_device *dev = adn->dev;
2600
2601         if ((rt->dst.dev == dev || !dev) &&
2602             rt != adn->net->ipv6.ip6_null_entry)
2603                 return -1;
2604
2605         return 0;
2606 }
2607
2608 void rt6_ifdown(struct net *net, struct net_device *dev)
2609 {
2610         struct arg_dev_net adn = {
2611                 .dev = dev,
2612                 .net = net,
2613         };
2614
2615         fib6_clean_all(net, fib6_ifdown, &adn);
2616         icmp6_clean_all(fib6_ifdown, &adn);
2617         if (dev)
2618                 rt6_uncached_list_flush_dev(net, dev);
2619 }
2620
2621 struct rt6_mtu_change_arg {
2622         struct net_device *dev;
2623         unsigned int mtu;
2624 };
2625
2626 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2627 {
2628         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2629         struct inet6_dev *idev;
2630
2631         /* In IPv6 pmtu discovery is not optional,
2632            so that RTAX_MTU lock cannot disable it.
2633            We still use this lock to block changes
2634            caused by addrconf/ndisc.
2635         */
2636
2637         idev = __in6_dev_get(arg->dev);
2638         if (!idev)
2639                 return 0;
2640
2641         /* For administrative MTU increase, there is no way to discover
2642            IPv6 PMTU increase, so PMTU increase should be updated here.
2643            Since RFC 1981 doesn't include administrative MTU increase
2644            update PMTU increase is a MUST. (i.e. jumbo frame)
2645          */
2646         /*
2647            If new MTU is less than route PMTU, this new MTU will be the
2648            lowest MTU in the path, update the route PMTU to reflect PMTU
2649            decreases; if new MTU is greater than route PMTU, and the
2650            old MTU is the lowest MTU in the path, update the route PMTU
2651            to reflect the increase. In this case if the other nodes' MTU
2652            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2653            PMTU discouvery.
2654          */
2655         if (rt->dst.dev == arg->dev &&
2656             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2657                 if (rt->rt6i_flags & RTF_CACHE) {
2658                         /* For RTF_CACHE with rt6i_pmtu == 0
2659                          * (i.e. a redirected route),
2660                          * the metrics of its rt->dst.from has already
2661                          * been updated.
2662                          */
2663                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2664                                 rt->rt6i_pmtu = arg->mtu;
2665                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2666                            (dst_mtu(&rt->dst) < arg->mtu &&
2667                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2668                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2669                 }
2670         }
2671         return 0;
2672 }
2673
2674 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2675 {
2676         struct rt6_mtu_change_arg arg = {
2677                 .dev = dev,
2678                 .mtu = mtu,
2679         };
2680
2681         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2682 }
2683
2684 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2685         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2686         [RTA_OIF]               = { .type = NLA_U32 },
2687         [RTA_IIF]               = { .type = NLA_U32 },
2688         [RTA_PRIORITY]          = { .type = NLA_U32 },
2689         [RTA_METRICS]           = { .type = NLA_NESTED },
2690         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2691         [RTA_PREF]              = { .type = NLA_U8 },
2692         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2693         [RTA_ENCAP]             = { .type = NLA_NESTED },
2694         [RTA_UID]               = { .type = NLA_U32 },
2695 };
2696
2697 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2698                               struct fib6_config *cfg)
2699 {
2700         struct rtmsg *rtm;
2701         struct nlattr *tb[RTA_MAX+1];
2702         unsigned int pref;
2703         int err;
2704
2705         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2706         if (err < 0)
2707                 goto errout;
2708
2709         err = -EINVAL;
2710         rtm = nlmsg_data(nlh);
2711         memset(cfg, 0, sizeof(*cfg));
2712
2713         cfg->fc_table = rtm->rtm_table;
2714         cfg->fc_dst_len = rtm->rtm_dst_len;
2715         cfg->fc_src_len = rtm->rtm_src_len;
2716         cfg->fc_flags = RTF_UP;
2717         cfg->fc_protocol = rtm->rtm_protocol;
2718         cfg->fc_type = rtm->rtm_type;
2719
2720         if (rtm->rtm_type == RTN_UNREACHABLE ||
2721             rtm->rtm_type == RTN_BLACKHOLE ||
2722             rtm->rtm_type == RTN_PROHIBIT ||
2723             rtm->rtm_type == RTN_THROW)
2724                 cfg->fc_flags |= RTF_REJECT;
2725
2726         if (rtm->rtm_type == RTN_LOCAL)
2727                 cfg->fc_flags |= RTF_LOCAL;
2728
2729         if (rtm->rtm_flags & RTM_F_CLONED)
2730                 cfg->fc_flags |= RTF_CACHE;
2731
2732         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2733         cfg->fc_nlinfo.nlh = nlh;
2734         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2735
2736         if (tb[RTA_GATEWAY]) {
2737                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2738                 cfg->fc_flags |= RTF_GATEWAY;
2739         }
2740
2741         if (tb[RTA_DST]) {
2742                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2743
2744                 if (nla_len(tb[RTA_DST]) < plen)
2745                         goto errout;
2746
2747                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2748         }
2749
2750         if (tb[RTA_SRC]) {
2751                 int plen = (rtm->rtm_src_len + 7) >> 3;
2752
2753                 if (nla_len(tb[RTA_SRC]) < plen)
2754                         goto errout;
2755
2756                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2757         }
2758
2759         if (tb[RTA_PREFSRC])
2760                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2761
2762         if (tb[RTA_OIF])
2763                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2764
2765         if (tb[RTA_PRIORITY])
2766                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2767
2768         if (tb[RTA_METRICS]) {
2769                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2770                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2771         }
2772
2773         if (tb[RTA_TABLE])
2774                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2775
2776         if (tb[RTA_MULTIPATH]) {
2777                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2778                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2779         }
2780
2781         if (tb[RTA_PREF]) {
2782                 pref = nla_get_u8(tb[RTA_PREF]);
2783                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2784                     pref != ICMPV6_ROUTER_PREF_HIGH)
2785                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2786                 cfg->fc_flags |= RTF_PREF(pref);
2787         }
2788
2789         if (tb[RTA_ENCAP])
2790                 cfg->fc_encap = tb[RTA_ENCAP];
2791
2792         if (tb[RTA_ENCAP_TYPE])
2793                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2794
2795         err = 0;
2796 errout:
2797         return err;
2798 }
2799
2800 struct rt6_nh {
2801         struct rt6_info *rt6_info;
2802         struct fib6_config r_cfg;
2803         struct mx6_config mxc;
2804         struct list_head next;
2805 };
2806
2807 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2808 {
2809         struct rt6_nh *nh;
2810
2811         list_for_each_entry(nh, rt6_nh_list, next) {
2812                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2813                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2814                         nh->r_cfg.fc_ifindex);
2815         }
2816 }
2817
2818 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2819                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2820 {
2821         struct rt6_nh *nh;
2822         struct rt6_info *rtnh;
2823         int err = -EEXIST;
2824
2825         list_for_each_entry(nh, rt6_nh_list, next) {
2826                 /* check if rt6_info already exists */
2827                 rtnh = nh->rt6_info;
2828
2829                 if (rtnh->dst.dev == rt->dst.dev &&
2830                     rtnh->rt6i_idev == rt->rt6i_idev &&
2831                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2832                                     &rt->rt6i_gateway))
2833                         return err;
2834         }
2835
2836         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2837         if (!nh)
2838                 return -ENOMEM;
2839         nh->rt6_info = rt;
2840         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2841         if (err) {
2842                 kfree(nh);
2843                 return err;
2844         }
2845         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2846         list_add_tail(&nh->next, rt6_nh_list);
2847
2848         return 0;
2849 }
2850
2851 static int ip6_route_multipath_add(struct fib6_config *cfg)
2852 {
2853         struct fib6_config r_cfg;
2854         struct rtnexthop *rtnh;
2855         struct rt6_info *rt;
2856         struct rt6_nh *err_nh;
2857         struct rt6_nh *nh, *nh_safe;
2858         int remaining;
2859         int attrlen;
2860         int err = 1;
2861         int nhn = 0;
2862         int replace = (cfg->fc_nlinfo.nlh &&
2863                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2864         LIST_HEAD(rt6_nh_list);
2865
2866         remaining = cfg->fc_mp_len;
2867         rtnh = (struct rtnexthop *)cfg->fc_mp;
2868
2869         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2870          * rt6_info structs per nexthop
2871          */
2872         while (rtnh_ok(rtnh, remaining)) {
2873                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2874                 if (rtnh->rtnh_ifindex)
2875                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2876
2877                 attrlen = rtnh_attrlen(rtnh);
2878                 if (attrlen > 0) {
2879                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2880
2881                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2882                         if (nla) {
2883                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2884                                 r_cfg.fc_flags |= RTF_GATEWAY;
2885                         }
2886                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2887                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2888                         if (nla)
2889                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2890                 }
2891
2892                 rt = ip6_route_info_create(&r_cfg);
2893                 if (IS_ERR(rt)) {
2894                         err = PTR_ERR(rt);
2895                         rt = NULL;
2896                         goto cleanup;
2897                 }
2898
2899                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2900                 if (err) {
2901                         dst_free(&rt->dst);
2902                         goto cleanup;
2903                 }
2904
2905                 rtnh = rtnh_next(rtnh, &remaining);
2906         }
2907
2908         err_nh = NULL;
2909         list_for_each_entry(nh, &rt6_nh_list, next) {
2910                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2911                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2912                 nh->rt6_info = NULL;
2913                 if (err) {
2914                         if (replace && nhn)
2915                                 ip6_print_replace_route_err(&rt6_nh_list);
2916                         err_nh = nh;
2917                         goto add_errout;
2918                 }
2919
2920                 /* Because each route is added like a single route we remove
2921                  * these flags after the first nexthop: if there is a collision,
2922                  * we have already failed to add the first nexthop:
2923                  * fib6_add_rt2node() has rejected it; when replacing, old
2924                  * nexthops have been replaced by first new, the rest should
2925                  * be added to it.
2926                  */
2927                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2928                                                      NLM_F_REPLACE);
2929                 nhn++;
2930         }
2931
2932         goto cleanup;
2933
2934 add_errout:
2935         /* Delete routes that were already added */
2936         list_for_each_entry(nh, &rt6_nh_list, next) {
2937                 if (err_nh == nh)
2938                         break;
2939                 ip6_route_del(&nh->r_cfg);
2940         }
2941
2942 cleanup:
2943         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2944                 if (nh->rt6_info)
2945                         dst_free(&nh->rt6_info->dst);
2946                 kfree(nh->mxc.mx);
2947                 list_del(&nh->next);
2948                 kfree(nh);
2949         }
2950
2951         return err;
2952 }
2953
2954 static int ip6_route_multipath_del(struct fib6_config *cfg)
2955 {
2956         struct fib6_config r_cfg;
2957         struct rtnexthop *rtnh;
2958         int remaining;
2959         int attrlen;
2960         int err = 1, last_err = 0;
2961
2962         remaining = cfg->fc_mp_len;
2963         rtnh = (struct rtnexthop *)cfg->fc_mp;
2964
2965         /* Parse a Multipath Entry */
2966         while (rtnh_ok(rtnh, remaining)) {
2967                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2968                 if (rtnh->rtnh_ifindex)
2969                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2970
2971                 attrlen = rtnh_attrlen(rtnh);
2972                 if (attrlen > 0) {
2973                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2974
2975                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2976                         if (nla) {
2977                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2978                                 r_cfg.fc_flags |= RTF_GATEWAY;
2979                         }
2980                 }
2981                 err = ip6_route_del(&r_cfg);
2982                 if (err)
2983                         last_err = err;
2984
2985                 rtnh = rtnh_next(rtnh, &remaining);
2986         }
2987
2988         return last_err;
2989 }
2990
2991 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2992 {
2993         struct fib6_config cfg;
2994         int err;
2995
2996         err = rtm_to_fib6_config(skb, nlh, &cfg);
2997         if (err < 0)
2998                 return err;
2999
3000         if (cfg.fc_mp)
3001                 return ip6_route_multipath_del(&cfg);
3002         else
3003                 return ip6_route_del(&cfg);
3004 }
3005
3006 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3007 {
3008         struct fib6_config cfg;
3009         int err;
3010
3011         err = rtm_to_fib6_config(skb, nlh, &cfg);
3012         if (err < 0)
3013                 return err;
3014
3015         if (cfg.fc_mp)
3016                 return ip6_route_multipath_add(&cfg);
3017         else
3018                 return ip6_route_add(&cfg);
3019 }
3020
3021 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3022 {
3023         return NLMSG_ALIGN(sizeof(struct rtmsg))
3024                + nla_total_size(16) /* RTA_SRC */
3025                + nla_total_size(16) /* RTA_DST */
3026                + nla_total_size(16) /* RTA_GATEWAY */
3027                + nla_total_size(16) /* RTA_PREFSRC */
3028                + nla_total_size(4) /* RTA_TABLE */
3029                + nla_total_size(4) /* RTA_IIF */
3030                + nla_total_size(4) /* RTA_OIF */
3031                + nla_total_size(4) /* RTA_PRIORITY */
3032                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3033                + nla_total_size(sizeof(struct rta_cacheinfo))
3034                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3035                + nla_total_size(1) /* RTA_PREF */
3036                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3037 }
3038
3039 static int rt6_fill_node(struct net *net,
3040                          struct sk_buff *skb, struct rt6_info *rt,
3041                          struct in6_addr *dst, struct in6_addr *src,
3042                          int iif, int type, u32 portid, u32 seq,
3043                          int prefix, int nowait, unsigned int flags)
3044 {
3045         u32 metrics[RTAX_MAX];
3046         struct rtmsg *rtm;
3047         struct nlmsghdr *nlh;
3048         long expires;
3049         u32 table;
3050
3051         if (prefix) {   /* user wants prefix routes only */
3052                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3053                         /* success since this is not a prefix route */
3054                         return 1;
3055                 }
3056         }
3057
3058         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3059         if (!nlh)
3060                 return -EMSGSIZE;
3061
3062         rtm = nlmsg_data(nlh);
3063         rtm->rtm_family = AF_INET6;
3064         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3065         rtm->rtm_src_len = rt->rt6i_src.plen;
3066         rtm->rtm_tos = 0;
3067         if (rt->rt6i_table)
3068                 table = rt->rt6i_table->tb6_id;
3069         else
3070                 table = RT6_TABLE_UNSPEC;
3071         rtm->rtm_table = table;
3072         if (nla_put_u32(skb, RTA_TABLE, table))
3073                 goto nla_put_failure;
3074         if (rt->rt6i_flags & RTF_REJECT) {
3075                 switch (rt->dst.error) {
3076                 case -EINVAL:
3077                         rtm->rtm_type = RTN_BLACKHOLE;
3078                         break;
3079                 case -EACCES:
3080                         rtm->rtm_type = RTN_PROHIBIT;
3081                         break;
3082                 case -EAGAIN:
3083                         rtm->rtm_type = RTN_THROW;
3084                         break;
3085                 default:
3086                         rtm->rtm_type = RTN_UNREACHABLE;
3087                         break;
3088                 }
3089         }
3090         else if (rt->rt6i_flags & RTF_LOCAL)
3091                 rtm->rtm_type = RTN_LOCAL;
3092         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3093                 rtm->rtm_type = RTN_LOCAL;
3094         else
3095                 rtm->rtm_type = RTN_UNICAST;
3096         rtm->rtm_flags = 0;
3097         if (!netif_carrier_ok(rt->dst.dev)) {
3098                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3099                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3100                         rtm->rtm_flags |= RTNH_F_DEAD;
3101         }
3102         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3103         rtm->rtm_protocol = rt->rt6i_protocol;
3104         if (rt->rt6i_flags & RTF_DYNAMIC)
3105                 rtm->rtm_protocol = RTPROT_REDIRECT;
3106         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3107                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3108                         rtm->rtm_protocol = RTPROT_RA;
3109                 else
3110                         rtm->rtm_protocol = RTPROT_KERNEL;
3111         }
3112
3113         if (rt->rt6i_flags & RTF_CACHE)
3114                 rtm->rtm_flags |= RTM_F_CLONED;
3115
3116         if (dst) {
3117                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3118                         goto nla_put_failure;
3119                 rtm->rtm_dst_len = 128;
3120         } else if (rtm->rtm_dst_len)
3121                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3122                         goto nla_put_failure;
3123 #ifdef CONFIG_IPV6_SUBTREES
3124         if (src) {
3125                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3126                         goto nla_put_failure;
3127                 rtm->rtm_src_len = 128;
3128         } else if (rtm->rtm_src_len &&
3129                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3130                 goto nla_put_failure;
3131 #endif
3132         if (iif) {
3133 #ifdef CONFIG_IPV6_MROUTE
3134                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3135                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3136                                                   portid);
3137
3138                         if (err <= 0) {
3139                                 if (!nowait) {
3140                                         if (err == 0)
3141                                                 return 0;
3142                                         goto nla_put_failure;
3143                                 } else {
3144                                         if (err == -EMSGSIZE)
3145                                                 goto nla_put_failure;
3146                                 }
3147                         }
3148                 } else
3149 #endif
3150                         if (nla_put_u32(skb, RTA_IIF, iif))
3151                                 goto nla_put_failure;
3152         } else if (dst) {
3153                 struct in6_addr saddr_buf;
3154                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3155                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3156                         goto nla_put_failure;
3157         }
3158
3159         if (rt->rt6i_prefsrc.plen) {
3160                 struct in6_addr saddr_buf;
3161                 saddr_buf = rt->rt6i_prefsrc.addr;
3162                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3163                         goto nla_put_failure;
3164         }
3165
3166         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3167         if (rt->rt6i_pmtu)
3168                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3169         if (rtnetlink_put_metrics(skb, metrics) < 0)
3170                 goto nla_put_failure;
3171
3172         if (rt->rt6i_flags & RTF_GATEWAY) {
3173                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3174                         goto nla_put_failure;
3175         }
3176
3177         if (rt->dst.dev &&
3178             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3179                 goto nla_put_failure;
3180         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3181                 goto nla_put_failure;
3182
3183         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3184
3185         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3186                 goto nla_put_failure;
3187
3188         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3189                 goto nla_put_failure;
3190
3191         if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3192                 goto nla_put_failure;
3193
3194         nlmsg_end(skb, nlh);
3195         return 0;
3196
3197 nla_put_failure:
3198         nlmsg_cancel(skb, nlh);
3199         return -EMSGSIZE;
3200 }
3201
3202 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3203 {
3204         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3205         int prefix;
3206
3207         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3208                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3209                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3210         } else
3211                 prefix = 0;
3212
3213         return rt6_fill_node(arg->net,
3214                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3215                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3216                      prefix, 0, NLM_F_MULTI);
3217 }
3218
3219 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3220 {
3221         struct net *net = sock_net(in_skb->sk);
3222         struct nlattr *tb[RTA_MAX+1];
3223         struct rt6_info *rt;
3224         struct sk_buff *skb;
3225         struct rtmsg *rtm;
3226         struct flowi6 fl6;
3227         int err, iif = 0, oif = 0;
3228
3229         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3230         if (err < 0)
3231                 goto errout;
3232
3233         err = -EINVAL;
3234         memset(&fl6, 0, sizeof(fl6));
3235
3236         if (tb[RTA_SRC]) {
3237                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3238                         goto errout;
3239
3240                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3241         }
3242
3243         if (tb[RTA_DST]) {
3244                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3245                         goto errout;
3246
3247                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3248         }
3249
3250         if (tb[RTA_IIF])
3251                 iif = nla_get_u32(tb[RTA_IIF]);
3252
3253         if (tb[RTA_OIF])
3254                 oif = nla_get_u32(tb[RTA_OIF]);
3255
3256         if (tb[RTA_MARK])
3257                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3258
3259         if (tb[RTA_UID])
3260                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3261                                            nla_get_u32(tb[RTA_UID]));
3262         else
3263                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3264
3265         if (iif) {
3266                 struct net_device *dev;
3267                 int flags = 0;
3268
3269                 dev = __dev_get_by_index(net, iif);
3270                 if (!dev) {
3271                         err = -ENODEV;
3272                         goto errout;
3273                 }
3274
3275                 fl6.flowi6_iif = iif;
3276
3277                 if (!ipv6_addr_any(&fl6.saddr))
3278                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3279
3280                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3281                                                                flags);
3282         } else {
3283                 fl6.flowi6_oif = oif;
3284
3285                 if (netif_index_is_l3_master(net, oif)) {
3286                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3287                                            FLOWI_FLAG_SKIP_NH_OIF;
3288                 }
3289
3290                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3291         }
3292
3293         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3294         if (!skb) {
3295                 ip6_rt_put(rt);
3296                 err = -ENOBUFS;
3297                 goto errout;
3298         }
3299
3300         /* Reserve room for dummy headers, this skb can pass
3301            through good chunk of routing engine.
3302          */
3303         skb_reset_mac_header(skb);
3304         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3305
3306         skb_dst_set(skb, &rt->dst);
3307
3308         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3309                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3310                             nlh->nlmsg_seq, 0, 0, 0);
3311         if (err < 0) {
3312                 kfree_skb(skb);
3313                 goto errout;
3314         }
3315
3316         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3317 errout:
3318         return err;
3319 }
3320
3321 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3322                      unsigned int nlm_flags)
3323 {
3324         struct sk_buff *skb;
3325         struct net *net = info->nl_net;
3326         u32 seq;
3327         int err;
3328
3329         err = -ENOBUFS;
3330         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3331
3332         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3333         if (!skb)
3334                 goto errout;
3335
3336         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3337                                 event, info->portid, seq, 0, 0, nlm_flags);
3338         if (err < 0) {
3339                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3340                 WARN_ON(err == -EMSGSIZE);
3341                 kfree_skb(skb);
3342                 goto errout;
3343         }
3344         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3345                     info->nlh, gfp_any());
3346         return;
3347 errout:
3348         if (err < 0)
3349                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3350 }
3351
3352 static int ip6_route_dev_notify(struct notifier_block *this,
3353                                 unsigned long event, void *ptr)
3354 {
3355         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3356         struct net *net = dev_net(dev);
3357
3358         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3359                 net->ipv6.ip6_null_entry->dst.dev = dev;
3360                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3361 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3362                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3363                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3364                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3365                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3366 #endif
3367         }
3368
3369         return NOTIFY_OK;
3370 }
3371
3372 /*
3373  *      /proc
3374  */
3375
3376 #ifdef CONFIG_PROC_FS
3377
3378 static const struct file_operations ipv6_route_proc_fops = {
3379         .owner          = THIS_MODULE,
3380         .open           = ipv6_route_open,
3381         .read           = seq_read,
3382         .llseek         = seq_lseek,
3383         .release        = seq_release_net,
3384 };
3385
3386 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3387 {
3388         struct net *net = (struct net *)seq->private;
3389         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3390                    net->ipv6.rt6_stats->fib_nodes,
3391                    net->ipv6.rt6_stats->fib_route_nodes,
3392                    net->ipv6.rt6_stats->fib_rt_alloc,
3393                    net->ipv6.rt6_stats->fib_rt_entries,
3394                    net->ipv6.rt6_stats->fib_rt_cache,
3395                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3396                    net->ipv6.rt6_stats->fib_discarded_routes);
3397
3398         return 0;
3399 }
3400
3401 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3402 {
3403         return single_open_net(inode, file, rt6_stats_seq_show);
3404 }
3405
3406 static const struct file_operations rt6_stats_seq_fops = {
3407         .owner   = THIS_MODULE,
3408         .open    = rt6_stats_seq_open,
3409         .read    = seq_read,
3410         .llseek  = seq_lseek,
3411         .release = single_release_net,
3412 };
3413 #endif  /* CONFIG_PROC_FS */
3414
3415 #ifdef CONFIG_SYSCTL
3416
3417 static
3418 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3419                               void __user *buffer, size_t *lenp, loff_t *ppos)
3420 {
3421         struct net *net;
3422         int delay;
3423         if (!write)
3424                 return -EINVAL;
3425
3426         net = (struct net *)ctl->extra1;
3427         delay = net->ipv6.sysctl.flush_delay;
3428         proc_dointvec(ctl, write, buffer, lenp, ppos);
3429         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3430         return 0;
3431 }
3432
3433 struct ctl_table ipv6_route_table_template[] = {
3434         {
3435                 .procname       =       "flush",
3436                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3437                 .maxlen         =       sizeof(int),
3438                 .mode           =       0200,
3439                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3440         },
3441         {
3442                 .procname       =       "gc_thresh",
3443                 .data           =       &ip6_dst_ops_template.gc_thresh,
3444                 .maxlen         =       sizeof(int),
3445                 .mode           =       0644,
3446                 .proc_handler   =       proc_dointvec,
3447         },
3448         {
3449                 .procname       =       "max_size",
3450                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3451                 .maxlen         =       sizeof(int),
3452                 .mode           =       0644,
3453                 .proc_handler   =       proc_dointvec,
3454         },
3455         {
3456                 .procname       =       "gc_min_interval",
3457                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3458                 .maxlen         =       sizeof(int),
3459                 .mode           =       0644,
3460                 .proc_handler   =       proc_dointvec_jiffies,
3461         },
3462         {
3463                 .procname       =       "gc_timeout",
3464                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3465                 .maxlen         =       sizeof(int),
3466                 .mode           =       0644,
3467                 .proc_handler   =       proc_dointvec_jiffies,
3468         },
3469         {
3470                 .procname       =       "gc_interval",
3471                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3472                 .maxlen         =       sizeof(int),
3473                 .mode           =       0644,
3474                 .proc_handler   =       proc_dointvec_jiffies,
3475         },
3476         {
3477                 .procname       =       "gc_elasticity",
3478                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3479                 .maxlen         =       sizeof(int),
3480                 .mode           =       0644,
3481                 .proc_handler   =       proc_dointvec,
3482         },
3483         {
3484                 .procname       =       "mtu_expires",
3485                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3486                 .maxlen         =       sizeof(int),
3487                 .mode           =       0644,
3488                 .proc_handler   =       proc_dointvec_jiffies,
3489         },
3490         {
3491                 .procname       =       "min_adv_mss",
3492                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3493                 .maxlen         =       sizeof(int),
3494                 .mode           =       0644,
3495                 .proc_handler   =       proc_dointvec,
3496         },
3497         {
3498                 .procname       =       "gc_min_interval_ms",
3499                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3500                 .maxlen         =       sizeof(int),
3501                 .mode           =       0644,
3502                 .proc_handler   =       proc_dointvec_ms_jiffies,
3503         },
3504         { }
3505 };
3506
3507 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3508 {
3509         struct ctl_table *table;
3510
3511         table = kmemdup(ipv6_route_table_template,
3512                         sizeof(ipv6_route_table_template),
3513                         GFP_KERNEL);
3514
3515         if (table) {
3516                 table[0].data = &net->ipv6.sysctl.flush_delay;
3517                 table[0].extra1 = net;
3518                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3519                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3520                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3521                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3522                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3523                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3524                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3525                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3526                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3527
3528                 /* Don't export sysctls to unprivileged users */
3529                 if (net->user_ns != &init_user_ns)
3530                         table[0].procname = NULL;
3531         }
3532
3533         return table;
3534 }
3535 #endif
3536
3537 static int __net_init ip6_route_net_init(struct net *net)
3538 {
3539         int ret = -ENOMEM;
3540
3541         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3542                sizeof(net->ipv6.ip6_dst_ops));
3543
3544         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3545                 goto out_ip6_dst_ops;
3546
3547         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3548                                            sizeof(*net->ipv6.ip6_null_entry),
3549                                            GFP_KERNEL);
3550         if (!net->ipv6.ip6_null_entry)
3551                 goto out_ip6_dst_entries;
3552         net->ipv6.ip6_null_entry->dst.path =
3553                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3554         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3555         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3556                          ip6_template_metrics, true);
3557
3558 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3559         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3560                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3561                                                GFP_KERNEL);
3562         if (!net->ipv6.ip6_prohibit_entry)
3563                 goto out_ip6_null_entry;
3564         net->ipv6.ip6_prohibit_entry->dst.path =
3565                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3566         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3567         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3568                          ip6_template_metrics, true);
3569
3570         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3571                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3572                                                GFP_KERNEL);
3573         if (!net->ipv6.ip6_blk_hole_entry)
3574                 goto out_ip6_prohibit_entry;
3575         net->ipv6.ip6_blk_hole_entry->dst.path =
3576                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3577         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3578         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3579                          ip6_template_metrics, true);
3580 #endif
3581
3582         net->ipv6.sysctl.flush_delay = 0;
3583         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3584         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3585         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3586         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3587         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3588         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3589         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3590
3591         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3592
3593         ret = 0;
3594 out:
3595         return ret;
3596
3597 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3598 out_ip6_prohibit_entry:
3599         kfree(net->ipv6.ip6_prohibit_entry);
3600 out_ip6_null_entry:
3601         kfree(net->ipv6.ip6_null_entry);
3602 #endif
3603 out_ip6_dst_entries:
3604         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3605 out_ip6_dst_ops:
3606         goto out;
3607 }
3608
3609 static void __net_exit ip6_route_net_exit(struct net *net)
3610 {
3611         kfree(net->ipv6.ip6_null_entry);
3612 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3613         kfree(net->ipv6.ip6_prohibit_entry);
3614         kfree(net->ipv6.ip6_blk_hole_entry);
3615 #endif
3616         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3617 }
3618
3619 static int __net_init ip6_route_net_init_late(struct net *net)
3620 {
3621 #ifdef CONFIG_PROC_FS
3622         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3623         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3624 #endif
3625         return 0;
3626 }
3627
3628 static void __net_exit ip6_route_net_exit_late(struct net *net)
3629 {
3630 #ifdef CONFIG_PROC_FS
3631         remove_proc_entry("ipv6_route", net->proc_net);
3632         remove_proc_entry("rt6_stats", net->proc_net);
3633 #endif
3634 }
3635
3636 static struct pernet_operations ip6_route_net_ops = {
3637         .init = ip6_route_net_init,
3638         .exit = ip6_route_net_exit,
3639 };
3640
3641 static int __net_init ipv6_inetpeer_init(struct net *net)
3642 {
3643         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3644
3645         if (!bp)
3646                 return -ENOMEM;
3647         inet_peer_base_init(bp);
3648         net->ipv6.peers = bp;
3649         return 0;
3650 }
3651
3652 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3653 {
3654         struct inet_peer_base *bp = net->ipv6.peers;
3655
3656         net->ipv6.peers = NULL;
3657         inetpeer_invalidate_tree(bp);
3658         kfree(bp);
3659 }
3660
3661 static struct pernet_operations ipv6_inetpeer_ops = {
3662         .init   =       ipv6_inetpeer_init,
3663         .exit   =       ipv6_inetpeer_exit,
3664 };
3665
3666 static struct pernet_operations ip6_route_net_late_ops = {
3667         .init = ip6_route_net_init_late,
3668         .exit = ip6_route_net_exit_late,
3669 };
3670
3671 static struct notifier_block ip6_route_dev_notifier = {
3672         .notifier_call = ip6_route_dev_notify,
3673         .priority = 0,
3674 };
3675
3676 int __init ip6_route_init(void)
3677 {
3678         int ret;
3679         int cpu;
3680
3681         ret = -ENOMEM;
3682         ip6_dst_ops_template.kmem_cachep =
3683                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3684                                   SLAB_HWCACHE_ALIGN, NULL);
3685         if (!ip6_dst_ops_template.kmem_cachep)
3686                 goto out;
3687
3688         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3689         if (ret)
3690                 goto out_kmem_cache;
3691
3692         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3693         if (ret)
3694                 goto out_dst_entries;
3695
3696         ret = register_pernet_subsys(&ip6_route_net_ops);
3697         if (ret)
3698                 goto out_register_inetpeer;
3699
3700         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3701
3702         /* Registering of the loopback is done before this portion of code,
3703          * the loopback reference in rt6_info will not be taken, do it
3704          * manually for init_net */
3705         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3706         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3707   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3708         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3709         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3710         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3711         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3712   #endif
3713         ret = fib6_init();
3714         if (ret)
3715                 goto out_register_subsys;
3716
3717         ret = xfrm6_init();
3718         if (ret)
3719                 goto out_fib6_init;
3720
3721         ret = fib6_rules_init();
3722         if (ret)
3723                 goto xfrm6_init;
3724
3725         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3726         if (ret)
3727                 goto fib6_rules_init;
3728
3729         ret = -ENOBUFS;
3730         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3731             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3732             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3733                 goto out_register_late_subsys;
3734
3735         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3736         if (ret)
3737                 goto out_register_late_subsys;
3738
3739         for_each_possible_cpu(cpu) {
3740                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3741
3742                 INIT_LIST_HEAD(&ul->head);
3743                 spin_lock_init(&ul->lock);
3744         }
3745
3746 out:
3747         return ret;
3748
3749 out_register_late_subsys:
3750         unregister_pernet_subsys(&ip6_route_net_late_ops);
3751 fib6_rules_init:
3752         fib6_rules_cleanup();
3753 xfrm6_init:
3754         xfrm6_fini();
3755 out_fib6_init:
3756         fib6_gc_cleanup();
3757 out_register_subsys:
3758         unregister_pernet_subsys(&ip6_route_net_ops);
3759 out_register_inetpeer:
3760         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3761 out_dst_entries:
3762         dst_entries_destroy(&ip6_dst_blackhole_ops);
3763 out_kmem_cache:
3764         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3765         goto out;
3766 }
3767
3768 void ip6_route_cleanup(void)
3769 {
3770         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3771         unregister_pernet_subsys(&ip6_route_net_late_ops);
3772         fib6_rules_cleanup();
3773         xfrm6_fini();
3774         fib6_gc_cleanup();
3775         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3776         unregister_pernet_subsys(&ip6_route_net_ops);
3777         dst_entries_destroy(&ip6_dst_blackhole_ops);
3778         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3779 }