Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
106                                            const struct in6_addr *prefix, int prefixlen,
107                                            const struct in6_addr *gwaddr);
108 #endif
109
110 struct uncached_list {
111         spinlock_t              lock;
112         struct list_head        head;
113 };
114
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121         rt->dst.flags |= DST_NOCACHE;
122         rt->rt6i_uncached_list = ul;
123
124         spin_lock_bh(&ul->lock);
125         list_add_tail(&rt->rt6i_uncached, &ul->head);
126         spin_unlock_bh(&ul->lock);
127 }
128
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131         if (!list_empty(&rt->rt6i_uncached)) {
132                 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134                 spin_lock_bh(&ul->lock);
135                 list_del(&rt->rt6i_uncached);
136                 spin_unlock_bh(&ul->lock);
137         }
138 }
139
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142         struct net_device *loopback_dev = net->loopback_dev;
143         int cpu;
144
145         if (dev == loopback_dev)
146                 return;
147
148         for_each_possible_cpu(cpu) {
149                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150                 struct rt6_info *rt;
151
152                 spin_lock_bh(&ul->lock);
153                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154                         struct inet6_dev *rt_idev = rt->rt6i_idev;
155                         struct net_device *rt_dev = rt->dst.dev;
156
157                         if (rt_idev->dev == dev) {
158                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
159                                 in6_dev_put(rt_idev);
160                         }
161
162                         if (rt_dev == dev) {
163                                 rt->dst.dev = loopback_dev;
164                                 dev_hold(rt->dst.dev);
165                                 dev_put(rt_dev);
166                         }
167                 }
168                 spin_unlock_bh(&ul->lock);
169         }
170 }
171
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174         return dst_metrics_write_ptr(rt->dst.from);
175 }
176
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179         struct rt6_info *rt = (struct rt6_info *)dst;
180
181         if (rt->rt6i_flags & RTF_PCPU)
182                 return rt6_pcpu_cow_metrics(rt);
183         else if (rt->rt6i_flags & RTF_CACHE)
184                 return NULL;
185         else
186                 return dst_cow_metrics_generic(dst, old);
187 }
188
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190                                              struct sk_buff *skb,
191                                              const void *daddr)
192 {
193         struct in6_addr *p = &rt->rt6i_gateway;
194
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203                                           struct sk_buff *skb,
204                                           const void *daddr)
205 {
206         struct rt6_info *rt = (struct rt6_info *) dst;
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(rt, skb, daddr);
210         n = __ipv6_neigh_lookup(dst->dev, daddr);
211         if (n)
212                 return n;
213         return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215
216 static struct dst_ops ip6_dst_ops_template = {
217         .family                 =       AF_INET6,
218         .gc                     =       ip6_dst_gc,
219         .gc_thresh              =       1024,
220         .check                  =       ip6_dst_check,
221         .default_advmss         =       ip6_default_advmss,
222         .mtu                    =       ip6_mtu,
223         .cow_metrics            =       ipv6_cow_metrics,
224         .destroy                =       ip6_dst_destroy,
225         .ifdown                 =       ip6_dst_ifdown,
226         .negative_advice        =       ip6_negative_advice,
227         .link_failure           =       ip6_link_failure,
228         .update_pmtu            =       ip6_rt_update_pmtu,
229         .redirect               =       rt6_do_redirect,
230         .local_out              =       __ip6_local_out,
231         .neigh_lookup           =       ip6_neigh_lookup,
232 };
233
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238         return mtu ? : dst->dev->mtu;
239 }
240
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242                                          struct sk_buff *skb, u32 mtu)
243 {
244 }
245
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247                                       struct sk_buff *skb)
248 {
249 }
250
251 static struct dst_ops ip6_dst_blackhole_ops = {
252         .family                 =       AF_INET6,
253         .destroy                =       ip6_dst_destroy,
254         .check                  =       ip6_dst_check,
255         .mtu                    =       ip6_blackhole_mtu,
256         .default_advmss         =       ip6_default_advmss,
257         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
258         .redirect               =       ip6_rt_blackhole_redirect,
259         .cow_metrics            =       dst_cow_metrics_generic,
260         .neigh_lookup           =       ip6_neigh_lookup,
261 };
262
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264         [RTAX_HOPLIMIT - 1] = 0,
265 };
266
267 static const struct rt6_info ip6_null_entry_template = {
268         .dst = {
269                 .__refcnt       = ATOMIC_INIT(1),
270                 .__use          = 1,
271                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
272                 .error          = -ENETUNREACH,
273                 .input          = ip6_pkt_discard,
274                 .output         = ip6_pkt_discard_out,
275         },
276         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
277         .rt6i_protocol  = RTPROT_KERNEL,
278         .rt6i_metric    = ~(u32) 0,
279         .rt6i_ref       = ATOMIC_INIT(1),
280 };
281
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
284 static const struct rt6_info ip6_prohibit_entry_template = {
285         .dst = {
286                 .__refcnt       = ATOMIC_INIT(1),
287                 .__use          = 1,
288                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
289                 .error          = -EACCES,
290                 .input          = ip6_pkt_prohibit,
291                 .output         = ip6_pkt_prohibit_out,
292         },
293         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
294         .rt6i_protocol  = RTPROT_KERNEL,
295         .rt6i_metric    = ~(u32) 0,
296         .rt6i_ref       = ATOMIC_INIT(1),
297 };
298
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300         .dst = {
301                 .__refcnt       = ATOMIC_INIT(1),
302                 .__use          = 1,
303                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
304                 .error          = -EINVAL,
305                 .input          = dst_discard,
306                 .output         = dst_discard_out,
307         },
308         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
309         .rt6i_protocol  = RTPROT_KERNEL,
310         .rt6i_metric    = ~(u32) 0,
311         .rt6i_ref       = ATOMIC_INIT(1),
312 };
313
314 #endif
315
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318         struct dst_entry *dst = &rt->dst;
319
320         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321         INIT_LIST_HEAD(&rt->rt6i_siblings);
322         INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327                                         struct net_device *dev,
328                                         int flags)
329 {
330         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331                                         0, DST_OBSOLETE_FORCE_CHK, flags);
332
333         if (rt)
334                 rt6_info_init(rt);
335
336         return rt;
337 }
338
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340                                       struct net_device *dev,
341                                       int flags)
342 {
343         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344
345         if (rt) {
346                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347                 if (rt->rt6i_pcpu) {
348                         int cpu;
349
350                         for_each_possible_cpu(cpu) {
351                                 struct rt6_info **p;
352
353                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354                                 /* no one shares rt */
355                                 *p =  NULL;
356                         }
357                 } else {
358                         dst_destroy((struct dst_entry *)rt);
359                         return NULL;
360                 }
361         }
362
363         return rt;
364 }
365
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368         struct rt6_info *rt = (struct rt6_info *)dst;
369         struct dst_entry *from = dst->from;
370         struct inet6_dev *idev;
371
372         dst_destroy_metrics_generic(dst);
373         free_percpu(rt->rt6i_pcpu);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         dst->from = NULL;
383         dst_release(from);
384 }
385
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387                            int how)
388 {
389         struct rt6_info *rt = (struct rt6_info *)dst;
390         struct inet6_dev *idev = rt->rt6i_idev;
391         struct net_device *loopback_dev =
392                 dev_net(dev)->loopback_dev;
393
394         if (dev != loopback_dev) {
395                 if (idev && idev->dev == dev) {
396                         struct inet6_dev *loopback_idev =
397                                 in6_dev_get(loopback_dev);
398                         if (loopback_idev) {
399                                 rt->rt6i_idev = loopback_idev;
400                                 in6_dev_put(idev);
401                         }
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         if (rt->rt6i_flags & RTF_EXPIRES) {
417                 if (time_after(jiffies, rt->dst.expires))
418                         return true;
419         } else if (rt->dst.from) {
420                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
421         }
422         return false;
423 }
424
425 /* Multipath route selection:
426  *   Hash based function using packet header and flowlabel.
427  * Adapted from fib_info_hashfn()
428  */
429 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
430                                const struct flowi6 *fl6)
431 {
432         return get_hash_from_flowi6(fl6) % candidate_count;
433 }
434
435 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
436                                              struct flowi6 *fl6, int oif,
437                                              int strict)
438 {
439         struct rt6_info *sibling, *next_sibling;
440         int route_choosen;
441
442         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
443         /* Don't change the route, if route_choosen == 0
444          * (siblings does not include ourself)
445          */
446         if (route_choosen)
447                 list_for_each_entry_safe(sibling, next_sibling,
448                                 &match->rt6i_siblings, rt6i_siblings) {
449                         route_choosen--;
450                         if (route_choosen == 0) {
451                                 if (rt6_score_route(sibling, oif, strict) < 0)
452                                         break;
453                                 match = sibling;
454                                 break;
455                         }
456                 }
457         return match;
458 }
459
460 /*
461  *      Route lookup. Any table->tb6_lock is implied.
462  */
463
464 static inline struct rt6_info *rt6_device_match(struct net *net,
465                                                     struct rt6_info *rt,
466                                                     const struct in6_addr *saddr,
467                                                     int oif,
468                                                     int flags)
469 {
470         struct rt6_info *local = NULL;
471         struct rt6_info *sprt;
472
473         if (!oif && ipv6_addr_any(saddr))
474                 goto out;
475
476         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
477                 struct net_device *dev = sprt->dst.dev;
478
479                 if (oif) {
480                         if (dev->ifindex == oif)
481                                 return sprt;
482                         if (dev->flags & IFF_LOOPBACK) {
483                                 if (!sprt->rt6i_idev ||
484                                     sprt->rt6i_idev->dev->ifindex != oif) {
485                                         if (flags & RT6_LOOKUP_F_IFACE)
486                                                 continue;
487                                         if (local &&
488                                             local->rt6i_idev->dev->ifindex == oif)
489                                                 continue;
490                                 }
491                                 local = sprt;
492                         }
493                 } else {
494                         if (ipv6_chk_addr(net, saddr, dev,
495                                           flags & RT6_LOOKUP_F_IFACE))
496                                 return sprt;
497                 }
498         }
499
500         if (oif) {
501                 if (local)
502                         return local;
503
504                 if (flags & RT6_LOOKUP_F_IFACE)
505                         return net->ipv6.ip6_null_entry;
506         }
507 out:
508         return rt;
509 }
510
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 struct __rt6_probe_work {
513         struct work_struct work;
514         struct in6_addr target;
515         struct net_device *dev;
516 };
517
518 static void rt6_probe_deferred(struct work_struct *w)
519 {
520         struct in6_addr mcaddr;
521         struct __rt6_probe_work *work =
522                 container_of(w, struct __rt6_probe_work, work);
523
524         addrconf_addr_solict_mult(&work->target, &mcaddr);
525         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
526         dev_put(work->dev);
527         kfree(work);
528 }
529
530 static void rt6_probe(struct rt6_info *rt)
531 {
532         struct __rt6_probe_work *work;
533         struct neighbour *neigh;
534         /*
535          * Okay, this does not seem to be appropriate
536          * for now, however, we need to check if it
537          * is really so; aka Router Reachability Probing.
538          *
539          * Router Reachability Probe MUST be rate-limited
540          * to no more than one per minute.
541          */
542         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
543                 return;
544         rcu_read_lock_bh();
545         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
546         if (neigh) {
547                 if (neigh->nud_state & NUD_VALID)
548                         goto out;
549
550                 work = NULL;
551                 write_lock(&neigh->lock);
552                 if (!(neigh->nud_state & NUD_VALID) &&
553                     time_after(jiffies,
554                                neigh->updated +
555                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else {
562                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563         }
564
565         if (work) {
566                 INIT_WORK(&work->work, rt6_probe_deferred);
567                 work->target = rt->rt6i_gateway;
568                 dev_hold(rt->dst.dev);
569                 work->dev = rt->dst.dev;
570                 schedule_work(&work->work);
571         }
572
573 out:
574         rcu_read_unlock_bh();
575 }
576 #else
577 static inline void rt6_probe(struct rt6_info *rt)
578 {
579 }
580 #endif
581
582 /*
583  * Default Router Selection (RFC 2461 6.3.6)
584  */
585 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
586 {
587         struct net_device *dev = rt->dst.dev;
588         if (!oif || dev->ifindex == oif)
589                 return 2;
590         if ((dev->flags & IFF_LOOPBACK) &&
591             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
592                 return 1;
593         return 0;
594 }
595
596 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
597 {
598         struct neighbour *neigh;
599         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
600
601         if (rt->rt6i_flags & RTF_NONEXTHOP ||
602             !(rt->rt6i_flags & RTF_GATEWAY))
603                 return RT6_NUD_SUCCEED;
604
605         rcu_read_lock_bh();
606         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
607         if (neigh) {
608                 read_lock(&neigh->lock);
609                 if (neigh->nud_state & NUD_VALID)
610                         ret = RT6_NUD_SUCCEED;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612                 else if (!(neigh->nud_state & NUD_FAILED))
613                         ret = RT6_NUD_SUCCEED;
614                 else
615                         ret = RT6_NUD_FAIL_PROBE;
616 #endif
617                 read_unlock(&neigh->lock);
618         } else {
619                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
620                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
621         }
622         rcu_read_unlock_bh();
623
624         return ret;
625 }
626
627 static int rt6_score_route(struct rt6_info *rt, int oif,
628                            int strict)
629 {
630         int m;
631
632         m = rt6_check_dev(rt, oif);
633         if (!m && (strict & RT6_LOOKUP_F_IFACE))
634                 return RT6_NUD_FAIL_HARD;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
637 #endif
638         if (strict & RT6_LOOKUP_F_REACHABLE) {
639                 int n = rt6_check_neigh(rt);
640                 if (n < 0)
641                         return n;
642         }
643         return m;
644 }
645
646 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
647                                    int *mpri, struct rt6_info *match,
648                                    bool *do_rr)
649 {
650         int m;
651         bool match_do_rr = false;
652         struct inet6_dev *idev = rt->rt6i_idev;
653         struct net_device *dev = rt->dst.dev;
654
655         if (dev && !netif_carrier_ok(dev) &&
656             idev->cnf.ignore_routes_with_linkdown)
657                 goto out;
658
659         if (rt6_check_expired(rt))
660                 goto out;
661
662         m = rt6_score_route(rt, oif, strict);
663         if (m == RT6_NUD_FAIL_DO_RR) {
664                 match_do_rr = true;
665                 m = 0; /* lowest valid score */
666         } else if (m == RT6_NUD_FAIL_HARD) {
667                 goto out;
668         }
669
670         if (strict & RT6_LOOKUP_F_REACHABLE)
671                 rt6_probe(rt);
672
673         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
674         if (m > *mpri) {
675                 *do_rr = match_do_rr;
676                 *mpri = m;
677                 match = rt;
678         }
679 out:
680         return match;
681 }
682
683 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
684                                      struct rt6_info *rr_head,
685                                      u32 metric, int oif, int strict,
686                                      bool *do_rr)
687 {
688         struct rt6_info *rt, *match, *cont;
689         int mpri = -1;
690
691         match = NULL;
692         cont = NULL;
693         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
694                 if (rt->rt6i_metric != metric) {
695                         cont = rt;
696                         break;
697                 }
698
699                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
700         }
701
702         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
703                 if (rt->rt6i_metric != metric) {
704                         cont = rt;
705                         break;
706                 }
707
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709         }
710
711         if (match || !cont)
712                 return match;
713
714         for (rt = cont; rt; rt = rt->dst.rt6_next)
715                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716
717         return match;
718 }
719
720 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
721 {
722         struct rt6_info *match, *rt0;
723         struct net *net;
724         bool do_rr = false;
725
726         rt0 = fn->rr_ptr;
727         if (!rt0)
728                 fn->rr_ptr = rt0 = fn->leaf;
729
730         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
731                              &do_rr);
732
733         if (do_rr) {
734                 struct rt6_info *next = rt0->dst.rt6_next;
735
736                 /* no entries matched; do round-robin */
737                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
738                         next = fn->leaf;
739
740                 if (next != rt0)
741                         fn->rr_ptr = next;
742         }
743
744         net = dev_net(rt0->dst.dev);
745         return match ? match : net->ipv6.ip6_null_entry;
746 }
747
748 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
749 {
750         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
751 }
752
753 #ifdef CONFIG_IPV6_ROUTE_INFO
754 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
755                   const struct in6_addr *gwaddr)
756 {
757         struct route_info *rinfo = (struct route_info *) opt;
758         struct in6_addr prefix_buf, *prefix;
759         unsigned int pref;
760         unsigned long lifetime;
761         struct rt6_info *rt;
762
763         if (len < sizeof(struct route_info)) {
764                 return -EINVAL;
765         }
766
767         /* Sanity check for prefix_len and length */
768         if (rinfo->length > 3) {
769                 return -EINVAL;
770         } else if (rinfo->prefix_len > 128) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 64) {
773                 if (rinfo->length < 2) {
774                         return -EINVAL;
775                 }
776         } else if (rinfo->prefix_len > 0) {
777                 if (rinfo->length < 1) {
778                         return -EINVAL;
779                 }
780         }
781
782         pref = rinfo->route_pref;
783         if (pref == ICMPV6_ROUTER_PREF_INVALID)
784                 return -EINVAL;
785
786         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
787
788         if (rinfo->length == 3)
789                 prefix = (struct in6_addr *)rinfo->prefix;
790         else {
791                 /* this function is safe */
792                 ipv6_addr_prefix(&prefix_buf,
793                                  (struct in6_addr *)rinfo->prefix,
794                                  rinfo->prefix_len);
795                 prefix = &prefix_buf;
796         }
797
798         if (rinfo->prefix_len == 0)
799                 rt = rt6_get_dflt_router(gwaddr, dev);
800         else
801                 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
802
803         if (rt && !lifetime) {
804                 ip6_del_rt(rt);
805                 rt = NULL;
806         }
807
808         if (!rt && lifetime)
809                 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
810         else if (rt)
811                 rt->rt6i_flags = RTF_ROUTEINFO |
812                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
813
814         if (rt) {
815                 if (!addrconf_finite_timeout(lifetime))
816                         rt6_clean_expires(rt);
817                 else
818                         rt6_set_expires(rt, jiffies + HZ * lifetime);
819
820                 ip6_rt_put(rt);
821         }
822         return 0;
823 }
824 #endif
825
826 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
827                                         struct in6_addr *saddr)
828 {
829         struct fib6_node *pn;
830         while (1) {
831                 if (fn->fn_flags & RTN_TL_ROOT)
832                         return NULL;
833                 pn = fn->parent;
834                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
835                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
836                 else
837                         fn = pn;
838                 if (fn->fn_flags & RTN_RTINFO)
839                         return fn;
840         }
841 }
842
843 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
844                                              struct fib6_table *table,
845                                              struct flowi6 *fl6, int flags)
846 {
847         struct fib6_node *fn;
848         struct rt6_info *rt;
849
850         read_lock_bh(&table->tb6_lock);
851         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
852 restart:
853         rt = fn->leaf;
854         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
855         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
856                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
857         if (rt == net->ipv6.ip6_null_entry) {
858                 fn = fib6_backtrack(fn, &fl6->saddr);
859                 if (fn)
860                         goto restart;
861         }
862         dst_use(&rt->dst, jiffies);
863         read_unlock_bh(&table->tb6_lock);
864         return rt;
865
866 }
867
868 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
869                                     int flags)
870 {
871         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
872 }
873 EXPORT_SYMBOL_GPL(ip6_route_lookup);
874
875 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
876                             const struct in6_addr *saddr, int oif, int strict)
877 {
878         struct flowi6 fl6 = {
879                 .flowi6_oif = oif,
880                 .daddr = *daddr,
881         };
882         struct dst_entry *dst;
883         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
884
885         if (saddr) {
886                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
887                 flags |= RT6_LOOKUP_F_HAS_SADDR;
888         }
889
890         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
891         if (dst->error == 0)
892                 return (struct rt6_info *) dst;
893
894         dst_release(dst);
895
896         return NULL;
897 }
898 EXPORT_SYMBOL(rt6_lookup);
899
900 /* ip6_ins_rt is called with FREE table->tb6_lock.
901    It takes new route entry, the addition fails by any reason the
902    route is freed. In any case, if caller does not hold it, it may
903    be destroyed.
904  */
905
906 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
907                         struct mx6_config *mxc)
908 {
909         int err;
910         struct fib6_table *table;
911
912         table = rt->rt6i_table;
913         write_lock_bh(&table->tb6_lock);
914         err = fib6_add(&table->tb6_root, rt, info, mxc);
915         write_unlock_bh(&table->tb6_lock);
916
917         return err;
918 }
919
920 int ip6_ins_rt(struct rt6_info *rt)
921 {
922         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
923         struct mx6_config mxc = { .mx = NULL, };
924
925         return __ip6_ins_rt(rt, &info, &mxc);
926 }
927
928 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
929                                            const struct in6_addr *daddr,
930                                            const struct in6_addr *saddr)
931 {
932         struct rt6_info *rt;
933
934         /*
935          *      Clone the route.
936          */
937
938         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
939                 ort = (struct rt6_info *)ort->dst.from;
940
941         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
942
943         if (!rt)
944                 return NULL;
945
946         ip6_rt_copy_init(rt, ort);
947         rt->rt6i_flags |= RTF_CACHE;
948         rt->rt6i_metric = 0;
949         rt->dst.flags |= DST_HOST;
950         rt->rt6i_dst.addr = *daddr;
951         rt->rt6i_dst.plen = 128;
952
953         if (!rt6_is_gw_or_nonexthop(ort)) {
954                 if (ort->rt6i_dst.plen != 128 &&
955                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
956                         rt->rt6i_flags |= RTF_ANYCAST;
957 #ifdef CONFIG_IPV6_SUBTREES
958                 if (rt->rt6i_src.plen && saddr) {
959                         rt->rt6i_src.addr = *saddr;
960                         rt->rt6i_src.plen = 128;
961                 }
962 #endif
963         }
964
965         return rt;
966 }
967
968 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
969 {
970         struct rt6_info *pcpu_rt;
971
972         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
973                                   rt->dst.dev, rt->dst.flags);
974
975         if (!pcpu_rt)
976                 return NULL;
977         ip6_rt_copy_init(pcpu_rt, rt);
978         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
979         pcpu_rt->rt6i_flags |= RTF_PCPU;
980         return pcpu_rt;
981 }
982
983 /* It should be called with read_lock_bh(&tb6_lock) acquired */
984 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
985 {
986         struct rt6_info *pcpu_rt, **p;
987
988         p = this_cpu_ptr(rt->rt6i_pcpu);
989         pcpu_rt = *p;
990
991         if (pcpu_rt) {
992                 dst_hold(&pcpu_rt->dst);
993                 rt6_dst_from_metrics_check(pcpu_rt);
994         }
995         return pcpu_rt;
996 }
997
998 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
999 {
1000         struct fib6_table *table = rt->rt6i_table;
1001         struct rt6_info *pcpu_rt, *prev, **p;
1002
1003         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1004         if (!pcpu_rt) {
1005                 struct net *net = dev_net(rt->dst.dev);
1006
1007                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1008                 return net->ipv6.ip6_null_entry;
1009         }
1010
1011         read_lock_bh(&table->tb6_lock);
1012         if (rt->rt6i_pcpu) {
1013                 p = this_cpu_ptr(rt->rt6i_pcpu);
1014                 prev = cmpxchg(p, NULL, pcpu_rt);
1015                 if (prev) {
1016                         /* If someone did it before us, return prev instead */
1017                         dst_destroy(&pcpu_rt->dst);
1018                         pcpu_rt = prev;
1019                 }
1020         } else {
1021                 /* rt has been removed from the fib6 tree
1022                  * before we have a chance to acquire the read_lock.
1023                  * In this case, don't brother to create a pcpu rt
1024                  * since rt is going away anyway.  The next
1025                  * dst_check() will trigger a re-lookup.
1026                  */
1027                 dst_destroy(&pcpu_rt->dst);
1028                 pcpu_rt = rt;
1029         }
1030         dst_hold(&pcpu_rt->dst);
1031         rt6_dst_from_metrics_check(pcpu_rt);
1032         read_unlock_bh(&table->tb6_lock);
1033         return pcpu_rt;
1034 }
1035
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037                                       struct flowi6 *fl6, int flags)
1038 {
1039         struct fib6_node *fn, *saved_fn;
1040         struct rt6_info *rt;
1041         int strict = 0;
1042
1043         strict |= flags & RT6_LOOKUP_F_IFACE;
1044         if (net->ipv6.devconf_all->forwarding == 0)
1045                 strict |= RT6_LOOKUP_F_REACHABLE;
1046
1047         read_lock_bh(&table->tb6_lock);
1048
1049         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050         saved_fn = fn;
1051
1052         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1053                 oif = 0;
1054
1055 redo_rt6_select:
1056         rt = rt6_select(fn, oif, strict);
1057         if (rt->rt6i_nsiblings)
1058                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1059         if (rt == net->ipv6.ip6_null_entry) {
1060                 fn = fib6_backtrack(fn, &fl6->saddr);
1061                 if (fn)
1062                         goto redo_rt6_select;
1063                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1064                         /* also consider unreachable route */
1065                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1066                         fn = saved_fn;
1067                         goto redo_rt6_select;
1068                 }
1069         }
1070
1071
1072         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1073                 dst_use(&rt->dst, jiffies);
1074                 read_unlock_bh(&table->tb6_lock);
1075
1076                 rt6_dst_from_metrics_check(rt);
1077                 return rt;
1078         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1079                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1080                 /* Create a RTF_CACHE clone which will not be
1081                  * owned by the fib6 tree.  It is for the special case where
1082                  * the daddr in the skb during the neighbor look-up is different
1083                  * from the fl6->daddr used to look-up route here.
1084                  */
1085
1086                 struct rt6_info *uncached_rt;
1087
1088                 dst_use(&rt->dst, jiffies);
1089                 read_unlock_bh(&table->tb6_lock);
1090
1091                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1092                 dst_release(&rt->dst);
1093
1094                 if (uncached_rt)
1095                         rt6_uncached_list_add(uncached_rt);
1096                 else
1097                         uncached_rt = net->ipv6.ip6_null_entry;
1098
1099                 dst_hold(&uncached_rt->dst);
1100                 return uncached_rt;
1101
1102         } else {
1103                 /* Get a percpu copy */
1104
1105                 struct rt6_info *pcpu_rt;
1106
1107                 rt->dst.lastuse = jiffies;
1108                 rt->dst.__use++;
1109                 pcpu_rt = rt6_get_pcpu_route(rt);
1110
1111                 if (pcpu_rt) {
1112                         read_unlock_bh(&table->tb6_lock);
1113                 } else {
1114                         /* We have to do the read_unlock first
1115                          * because rt6_make_pcpu_route() may trigger
1116                          * ip6_dst_gc() which will take the write_lock.
1117                          */
1118                         dst_hold(&rt->dst);
1119                         read_unlock_bh(&table->tb6_lock);
1120                         pcpu_rt = rt6_make_pcpu_route(rt);
1121                         dst_release(&rt->dst);
1122                 }
1123
1124                 return pcpu_rt;
1125
1126         }
1127 }
1128
1129 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1130                                             struct flowi6 *fl6, int flags)
1131 {
1132         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1133 }
1134
1135 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1136                                                 struct net_device *dev,
1137                                                 struct flowi6 *fl6, int flags)
1138 {
1139         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1140                 flags |= RT6_LOOKUP_F_IFACE;
1141
1142         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1143 }
1144
1145 void ip6_route_input(struct sk_buff *skb)
1146 {
1147         const struct ipv6hdr *iph = ipv6_hdr(skb);
1148         struct net *net = dev_net(skb->dev);
1149         int flags = RT6_LOOKUP_F_HAS_SADDR;
1150         struct ip_tunnel_info *tun_info;
1151         struct flowi6 fl6 = {
1152                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1153                 .daddr = iph->daddr,
1154                 .saddr = iph->saddr,
1155                 .flowlabel = ip6_flowinfo(iph),
1156                 .flowi6_mark = skb->mark,
1157                 .flowi6_proto = iph->nexthdr,
1158         };
1159
1160         tun_info = skb_tunnel_info(skb);
1161         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1162                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1163         skb_dst_drop(skb);
1164         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1165 }
1166
1167 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1168                                              struct flowi6 *fl6, int flags)
1169 {
1170         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1171 }
1172
1173 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1174                                          struct flowi6 *fl6, int flags)
1175 {
1176         struct dst_entry *dst;
1177         bool any_src;
1178
1179         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1180         if (dst)
1181                 return dst;
1182
1183         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1184
1185         any_src = ipv6_addr_any(&fl6->saddr);
1186         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1187             (fl6->flowi6_oif && any_src))
1188                 flags |= RT6_LOOKUP_F_IFACE;
1189
1190         if (!any_src)
1191                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1192         else if (sk)
1193                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1194
1195         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1198
1199 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1200 {
1201         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1202         struct dst_entry *new = NULL;
1203
1204         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1205         if (rt) {
1206                 rt6_info_init(rt);
1207
1208                 new = &rt->dst;
1209                 new->__use = 1;
1210                 new->input = dst_discard;
1211                 new->output = dst_discard_out;
1212
1213                 dst_copy_metrics(new, &ort->dst);
1214                 rt->rt6i_idev = ort->rt6i_idev;
1215                 if (rt->rt6i_idev)
1216                         in6_dev_hold(rt->rt6i_idev);
1217
1218                 rt->rt6i_gateway = ort->rt6i_gateway;
1219                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1220                 rt->rt6i_metric = 0;
1221
1222                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225 #endif
1226
1227                 dst_free(new);
1228         }
1229
1230         dst_release(dst_orig);
1231         return new ? new : ERR_PTR(-ENOMEM);
1232 }
1233
1234 /*
1235  *      Destination cache support functions
1236  */
1237
1238 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1239 {
1240         if (rt->dst.from &&
1241             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1242                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1243 }
1244
1245 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1246 {
1247         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1248                 return NULL;
1249
1250         if (rt6_check_expired(rt))
1251                 return NULL;
1252
1253         return &rt->dst;
1254 }
1255
1256 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1257 {
1258         if (!__rt6_check_expired(rt) &&
1259             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1260             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1261                 return &rt->dst;
1262         else
1263                 return NULL;
1264 }
1265
1266 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1267 {
1268         struct rt6_info *rt;
1269
1270         rt = (struct rt6_info *) dst;
1271
1272         /* All IPV6 dsts are created with ->obsolete set to the value
1273          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1274          * into this function always.
1275          */
1276
1277         rt6_dst_from_metrics_check(rt);
1278
1279         if (rt->rt6i_flags & RTF_PCPU ||
1280             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1281                 return rt6_dst_from_check(rt, cookie);
1282         else
1283                 return rt6_check(rt, cookie);
1284 }
1285
1286 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1287 {
1288         struct rt6_info *rt = (struct rt6_info *) dst;
1289
1290         if (rt) {
1291                 if (rt->rt6i_flags & RTF_CACHE) {
1292                         if (rt6_check_expired(rt)) {
1293                                 ip6_del_rt(rt);
1294                                 dst = NULL;
1295                         }
1296                 } else {
1297                         dst_release(dst);
1298                         dst = NULL;
1299                 }
1300         }
1301         return dst;
1302 }
1303
1304 static void ip6_link_failure(struct sk_buff *skb)
1305 {
1306         struct rt6_info *rt;
1307
1308         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1309
1310         rt = (struct rt6_info *) skb_dst(skb);
1311         if (rt) {
1312                 if (rt->rt6i_flags & RTF_CACHE) {
1313                         dst_hold(&rt->dst);
1314                         ip6_del_rt(rt);
1315                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1316                         rt->rt6i_node->fn_sernum = -1;
1317                 }
1318         }
1319 }
1320
1321 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 {
1323         struct net *net = dev_net(rt->dst.dev);
1324
1325         rt->rt6i_flags |= RTF_MODIFIED;
1326         rt->rt6i_pmtu = mtu;
1327         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1328 }
1329
1330 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1331 {
1332         return !(rt->rt6i_flags & RTF_CACHE) &&
1333                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1334 }
1335
1336 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1337                                  const struct ipv6hdr *iph, u32 mtu)
1338 {
1339         struct rt6_info *rt6 = (struct rt6_info *)dst;
1340
1341         if (rt6->rt6i_flags & RTF_LOCAL)
1342                 return;
1343
1344         dst_confirm(dst);
1345         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1346         if (mtu >= dst_mtu(dst))
1347                 return;
1348
1349         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1350                 rt6_do_update_pmtu(rt6, mtu);
1351         } else {
1352                 const struct in6_addr *daddr, *saddr;
1353                 struct rt6_info *nrt6;
1354
1355                 if (iph) {
1356                         daddr = &iph->daddr;
1357                         saddr = &iph->saddr;
1358                 } else if (sk) {
1359                         daddr = &sk->sk_v6_daddr;
1360                         saddr = &inet6_sk(sk)->saddr;
1361                 } else {
1362                         return;
1363                 }
1364                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1365                 if (nrt6) {
1366                         rt6_do_update_pmtu(nrt6, mtu);
1367
1368                         /* ip6_ins_rt(nrt6) will bump the
1369                          * rt6->rt6i_node->fn_sernum
1370                          * which will fail the next rt6_check() and
1371                          * invalidate the sk->sk_dst_cache.
1372                          */
1373                         ip6_ins_rt(nrt6);
1374                 }
1375         }
1376 }
1377
1378 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1379                                struct sk_buff *skb, u32 mtu)
1380 {
1381         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1382 }
1383
1384 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1385                      int oif, u32 mark, kuid_t uid)
1386 {
1387         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1388         struct dst_entry *dst;
1389         struct flowi6 fl6;
1390
1391         memset(&fl6, 0, sizeof(fl6));
1392         fl6.flowi6_oif = oif;
1393         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1394         fl6.daddr = iph->daddr;
1395         fl6.saddr = iph->saddr;
1396         fl6.flowlabel = ip6_flowinfo(iph);
1397         fl6.flowi6_uid = uid;
1398
1399         dst = ip6_route_output(net, NULL, &fl6);
1400         if (!dst->error)
1401                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1402         dst_release(dst);
1403 }
1404 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1405
1406 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1407 {
1408         ip6_update_pmtu(skb, sock_net(sk), mtu,
1409                         sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk));
1410 }
1411 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1412
1413 /* Handle redirects */
1414 struct ip6rd_flowi {
1415         struct flowi6 fl6;
1416         struct in6_addr gateway;
1417 };
1418
1419 static struct rt6_info *__ip6_route_redirect(struct net *net,
1420                                              struct fib6_table *table,
1421                                              struct flowi6 *fl6,
1422                                              int flags)
1423 {
1424         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1425         struct rt6_info *rt;
1426         struct fib6_node *fn;
1427
1428         /* Get the "current" route for this destination and
1429          * check if the redirect has come from approriate router.
1430          *
1431          * RFC 4861 specifies that redirects should only be
1432          * accepted if they come from the nexthop to the target.
1433          * Due to the way the routes are chosen, this notion
1434          * is a bit fuzzy and one might need to check all possible
1435          * routes.
1436          */
1437
1438         read_lock_bh(&table->tb6_lock);
1439         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1440 restart:
1441         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1442                 if (rt6_check_expired(rt))
1443                         continue;
1444                 if (rt->dst.error)
1445                         break;
1446                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447                         continue;
1448                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1449                         continue;
1450                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1451                         continue;
1452                 break;
1453         }
1454
1455         if (!rt)
1456                 rt = net->ipv6.ip6_null_entry;
1457         else if (rt->dst.error) {
1458                 rt = net->ipv6.ip6_null_entry;
1459                 goto out;
1460         }
1461
1462         if (rt == net->ipv6.ip6_null_entry) {
1463                 fn = fib6_backtrack(fn, &fl6->saddr);
1464                 if (fn)
1465                         goto restart;
1466         }
1467
1468 out:
1469         dst_hold(&rt->dst);
1470
1471         read_unlock_bh(&table->tb6_lock);
1472
1473         return rt;
1474 };
1475
1476 static struct dst_entry *ip6_route_redirect(struct net *net,
1477                                         const struct flowi6 *fl6,
1478                                         const struct in6_addr *gateway)
1479 {
1480         int flags = RT6_LOOKUP_F_HAS_SADDR;
1481         struct ip6rd_flowi rdfl;
1482
1483         rdfl.fl6 = *fl6;
1484         rdfl.gateway = *gateway;
1485
1486         return fib6_rule_lookup(net, &rdfl.fl6,
1487                                 flags, __ip6_route_redirect);
1488 }
1489
1490 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1491 {
1492         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1493         struct dst_entry *dst;
1494         struct flowi6 fl6;
1495
1496         memset(&fl6, 0, sizeof(fl6));
1497         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1498         fl6.flowi6_oif = oif;
1499         fl6.flowi6_mark = mark;
1500         fl6.daddr = iph->daddr;
1501         fl6.saddr = iph->saddr;
1502         fl6.flowlabel = ip6_flowinfo(iph);
1503
1504         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1505         rt6_do_redirect(dst, NULL, skb);
1506         dst_release(dst);
1507 }
1508 EXPORT_SYMBOL_GPL(ip6_redirect);
1509
1510 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1511                             u32 mark)
1512 {
1513         const struct ipv6hdr *iph = ipv6_hdr(skb);
1514         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1515         struct dst_entry *dst;
1516         struct flowi6 fl6;
1517
1518         memset(&fl6, 0, sizeof(fl6));
1519         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1520         fl6.flowi6_oif = oif;
1521         fl6.flowi6_mark = mark;
1522         fl6.daddr = msg->dest;
1523         fl6.saddr = iph->daddr;
1524
1525         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1526         rt6_do_redirect(dst, NULL, skb);
1527         dst_release(dst);
1528 }
1529
1530 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1531 {
1532         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1533 }
1534 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1535
1536 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1537 {
1538         struct net_device *dev = dst->dev;
1539         unsigned int mtu = dst_mtu(dst);
1540         struct net *net = dev_net(dev);
1541
1542         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1543
1544         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1545                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1546
1547         /*
1548          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1549          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1550          * IPV6_MAXPLEN is also valid and means: "any MSS,
1551          * rely only on pmtu discovery"
1552          */
1553         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1554                 mtu = IPV6_MAXPLEN;
1555         return mtu;
1556 }
1557
1558 static unsigned int ip6_mtu(const struct dst_entry *dst)
1559 {
1560         const struct rt6_info *rt = (const struct rt6_info *)dst;
1561         unsigned int mtu = rt->rt6i_pmtu;
1562         struct inet6_dev *idev;
1563
1564         if (mtu)
1565                 goto out;
1566
1567         mtu = dst_metric_raw(dst, RTAX_MTU);
1568         if (mtu)
1569                 goto out;
1570
1571         mtu = IPV6_MIN_MTU;
1572
1573         rcu_read_lock();
1574         idev = __in6_dev_get(dst->dev);
1575         if (idev)
1576                 mtu = idev->cnf.mtu6;
1577         rcu_read_unlock();
1578
1579 out:
1580         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1581 }
1582
1583 static struct dst_entry *icmp6_dst_gc_list;
1584 static DEFINE_SPINLOCK(icmp6_dst_lock);
1585
1586 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1587                                   struct flowi6 *fl6)
1588 {
1589         struct dst_entry *dst;
1590         struct rt6_info *rt;
1591         struct inet6_dev *idev = in6_dev_get(dev);
1592         struct net *net = dev_net(dev);
1593
1594         if (unlikely(!idev))
1595                 return ERR_PTR(-ENODEV);
1596
1597         rt = ip6_dst_alloc(net, dev, 0);
1598         if (unlikely(!rt)) {
1599                 in6_dev_put(idev);
1600                 dst = ERR_PTR(-ENOMEM);
1601                 goto out;
1602         }
1603
1604         rt->dst.flags |= DST_HOST;
1605         rt->dst.output  = ip6_output;
1606         atomic_set(&rt->dst.__refcnt, 1);
1607         rt->rt6i_gateway  = fl6->daddr;
1608         rt->rt6i_dst.addr = fl6->daddr;
1609         rt->rt6i_dst.plen = 128;
1610         rt->rt6i_idev     = idev;
1611         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1612
1613         spin_lock_bh(&icmp6_dst_lock);
1614         rt->dst.next = icmp6_dst_gc_list;
1615         icmp6_dst_gc_list = &rt->dst;
1616         spin_unlock_bh(&icmp6_dst_lock);
1617
1618         fib6_force_start_gc(net);
1619
1620         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1621
1622 out:
1623         return dst;
1624 }
1625
1626 int icmp6_dst_gc(void)
1627 {
1628         struct dst_entry *dst, **pprev;
1629         int more = 0;
1630
1631         spin_lock_bh(&icmp6_dst_lock);
1632         pprev = &icmp6_dst_gc_list;
1633
1634         while ((dst = *pprev) != NULL) {
1635                 if (!atomic_read(&dst->__refcnt)) {
1636                         *pprev = dst->next;
1637                         dst_free(dst);
1638                 } else {
1639                         pprev = &dst->next;
1640                         ++more;
1641                 }
1642         }
1643
1644         spin_unlock_bh(&icmp6_dst_lock);
1645
1646         return more;
1647 }
1648
1649 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1650                             void *arg)
1651 {
1652         struct dst_entry *dst, **pprev;
1653
1654         spin_lock_bh(&icmp6_dst_lock);
1655         pprev = &icmp6_dst_gc_list;
1656         while ((dst = *pprev) != NULL) {
1657                 struct rt6_info *rt = (struct rt6_info *) dst;
1658                 if (func(rt, arg)) {
1659                         *pprev = dst->next;
1660                         dst_free(dst);
1661                 } else {
1662                         pprev = &dst->next;
1663                 }
1664         }
1665         spin_unlock_bh(&icmp6_dst_lock);
1666 }
1667
1668 static int ip6_dst_gc(struct dst_ops *ops)
1669 {
1670         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1671         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1672         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1673         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1674         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1675         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1676         int entries;
1677
1678         entries = dst_entries_get_fast(ops);
1679         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1680             entries <= rt_max_size)
1681                 goto out;
1682
1683         net->ipv6.ip6_rt_gc_expire++;
1684         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1685         entries = dst_entries_get_slow(ops);
1686         if (entries < ops->gc_thresh)
1687                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1688 out:
1689         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1690         return entries > rt_max_size;
1691 }
1692
1693 static int ip6_convert_metrics(struct mx6_config *mxc,
1694                                const struct fib6_config *cfg)
1695 {
1696         bool ecn_ca = false;
1697         struct nlattr *nla;
1698         int remaining;
1699         u32 *mp;
1700
1701         if (!cfg->fc_mx)
1702                 return 0;
1703
1704         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1705         if (unlikely(!mp))
1706                 return -ENOMEM;
1707
1708         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1709                 int type = nla_type(nla);
1710                 u32 val;
1711
1712                 if (!type)
1713                         continue;
1714                 if (unlikely(type > RTAX_MAX))
1715                         goto err;
1716
1717                 if (type == RTAX_CC_ALGO) {
1718                         char tmp[TCP_CA_NAME_MAX];
1719
1720                         nla_strlcpy(tmp, nla, sizeof(tmp));
1721                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1722                         if (val == TCP_CA_UNSPEC)
1723                                 goto err;
1724                 } else {
1725                         val = nla_get_u32(nla);
1726                 }
1727                 if (type == RTAX_HOPLIMIT && val > 255)
1728                         val = 255;
1729                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1730                         goto err;
1731
1732                 mp[type - 1] = val;
1733                 __set_bit(type - 1, mxc->mx_valid);
1734         }
1735
1736         if (ecn_ca) {
1737                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1738                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1739         }
1740
1741         mxc->mx = mp;
1742         return 0;
1743  err:
1744         kfree(mp);
1745         return -EINVAL;
1746 }
1747
1748 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1749 {
1750         struct net *net = cfg->fc_nlinfo.nl_net;
1751         struct rt6_info *rt = NULL;
1752         struct net_device *dev = NULL;
1753         struct inet6_dev *idev = NULL;
1754         struct fib6_table *table;
1755         int addr_type;
1756         int err = -EINVAL;
1757
1758         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1759                 goto out;
1760 #ifndef CONFIG_IPV6_SUBTREES
1761         if (cfg->fc_src_len)
1762                 goto out;
1763 #endif
1764         if (cfg->fc_ifindex) {
1765                 err = -ENODEV;
1766                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1767                 if (!dev)
1768                         goto out;
1769                 idev = in6_dev_get(dev);
1770                 if (!idev)
1771                         goto out;
1772         }
1773
1774         if (cfg->fc_metric == 0)
1775                 cfg->fc_metric = IP6_RT_PRIO_USER;
1776
1777         err = -ENOBUFS;
1778         if (cfg->fc_nlinfo.nlh &&
1779             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1780                 table = fib6_get_table(net, cfg->fc_table);
1781                 if (!table) {
1782                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1783                         table = fib6_new_table(net, cfg->fc_table);
1784                 }
1785         } else {
1786                 table = fib6_new_table(net, cfg->fc_table);
1787         }
1788
1789         if (!table)
1790                 goto out;
1791
1792         rt = ip6_dst_alloc(net, NULL,
1793                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1794
1795         if (!rt) {
1796                 err = -ENOMEM;
1797                 goto out;
1798         }
1799
1800         if (cfg->fc_flags & RTF_EXPIRES)
1801                 rt6_set_expires(rt, jiffies +
1802                                 clock_t_to_jiffies(cfg->fc_expires));
1803         else
1804                 rt6_clean_expires(rt);
1805
1806         if (cfg->fc_protocol == RTPROT_UNSPEC)
1807                 cfg->fc_protocol = RTPROT_BOOT;
1808         rt->rt6i_protocol = cfg->fc_protocol;
1809
1810         addr_type = ipv6_addr_type(&cfg->fc_dst);
1811
1812         if (addr_type & IPV6_ADDR_MULTICAST)
1813                 rt->dst.input = ip6_mc_input;
1814         else if (cfg->fc_flags & RTF_LOCAL)
1815                 rt->dst.input = ip6_input;
1816         else
1817                 rt->dst.input = ip6_forward;
1818
1819         rt->dst.output = ip6_output;
1820
1821         if (cfg->fc_encap) {
1822                 struct lwtunnel_state *lwtstate;
1823
1824                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1825                                            cfg->fc_encap, AF_INET6, cfg,
1826                                            &lwtstate);
1827                 if (err)
1828                         goto out;
1829                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1830                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1831                         rt->dst.lwtstate->orig_output = rt->dst.output;
1832                         rt->dst.output = lwtunnel_output;
1833                 }
1834                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1835                         rt->dst.lwtstate->orig_input = rt->dst.input;
1836                         rt->dst.input = lwtunnel_input;
1837                 }
1838         }
1839
1840         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1841         rt->rt6i_dst.plen = cfg->fc_dst_len;
1842         if (rt->rt6i_dst.plen == 128)
1843                 rt->dst.flags |= DST_HOST;
1844
1845 #ifdef CONFIG_IPV6_SUBTREES
1846         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1847         rt->rt6i_src.plen = cfg->fc_src_len;
1848 #endif
1849
1850         rt->rt6i_metric = cfg->fc_metric;
1851
1852         /* We cannot add true routes via loopback here,
1853            they would result in kernel looping; promote them to reject routes
1854          */
1855         if ((cfg->fc_flags & RTF_REJECT) ||
1856             (dev && (dev->flags & IFF_LOOPBACK) &&
1857              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1858              !(cfg->fc_flags & RTF_LOCAL))) {
1859                 /* hold loopback dev/idev if we haven't done so. */
1860                 if (dev != net->loopback_dev) {
1861                         if (dev) {
1862                                 dev_put(dev);
1863                                 in6_dev_put(idev);
1864                         }
1865                         dev = net->loopback_dev;
1866                         dev_hold(dev);
1867                         idev = in6_dev_get(dev);
1868                         if (!idev) {
1869                                 err = -ENODEV;
1870                                 goto out;
1871                         }
1872                 }
1873                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1874                 switch (cfg->fc_type) {
1875                 case RTN_BLACKHOLE:
1876                         rt->dst.error = -EINVAL;
1877                         rt->dst.output = dst_discard_out;
1878                         rt->dst.input = dst_discard;
1879                         break;
1880                 case RTN_PROHIBIT:
1881                         rt->dst.error = -EACCES;
1882                         rt->dst.output = ip6_pkt_prohibit_out;
1883                         rt->dst.input = ip6_pkt_prohibit;
1884                         break;
1885                 case RTN_THROW:
1886                 case RTN_UNREACHABLE:
1887                 default:
1888                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1889                                         : (cfg->fc_type == RTN_UNREACHABLE)
1890                                         ? -EHOSTUNREACH : -ENETUNREACH;
1891                         rt->dst.output = ip6_pkt_discard_out;
1892                         rt->dst.input = ip6_pkt_discard;
1893                         break;
1894                 }
1895                 goto install_route;
1896         }
1897
1898         if (cfg->fc_flags & RTF_GATEWAY) {
1899                 const struct in6_addr *gw_addr;
1900                 int gwa_type;
1901
1902                 gw_addr = &cfg->fc_gateway;
1903                 gwa_type = ipv6_addr_type(gw_addr);
1904
1905                 /* if gw_addr is local we will fail to detect this in case
1906                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1907                  * will return already-added prefix route via interface that
1908                  * prefix route was assigned to, which might be non-loopback.
1909                  */
1910                 err = -EINVAL;
1911                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1912                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1913                                             dev : NULL, 0, 0))
1914                         goto out;
1915
1916                 rt->rt6i_gateway = *gw_addr;
1917
1918                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1919                         struct rt6_info *grt;
1920
1921                         /* IPv6 strictly inhibits using not link-local
1922                            addresses as nexthop address.
1923                            Otherwise, router will not able to send redirects.
1924                            It is very good, but in some (rare!) circumstances
1925                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1926                            some exceptions. --ANK
1927                          */
1928                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1929                                 goto out;
1930
1931                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1932
1933                         err = -EHOSTUNREACH;
1934                         if (!grt)
1935                                 goto out;
1936                         if (dev) {
1937                                 if (dev != grt->dst.dev) {
1938                                         ip6_rt_put(grt);
1939                                         goto out;
1940                                 }
1941                         } else {
1942                                 dev = grt->dst.dev;
1943                                 idev = grt->rt6i_idev;
1944                                 dev_hold(dev);
1945                                 in6_dev_hold(grt->rt6i_idev);
1946                         }
1947                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1948                                 err = 0;
1949                         ip6_rt_put(grt);
1950
1951                         if (err)
1952                                 goto out;
1953                 }
1954                 err = -EINVAL;
1955                 if (!dev || (dev->flags & IFF_LOOPBACK))
1956                         goto out;
1957         }
1958
1959         err = -ENODEV;
1960         if (!dev)
1961                 goto out;
1962
1963         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1964                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1965                         err = -EINVAL;
1966                         goto out;
1967                 }
1968                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1969                 rt->rt6i_prefsrc.plen = 128;
1970         } else
1971                 rt->rt6i_prefsrc.plen = 0;
1972
1973         rt->rt6i_flags = cfg->fc_flags;
1974
1975 install_route:
1976         rt->dst.dev = dev;
1977         rt->rt6i_idev = idev;
1978         rt->rt6i_table = table;
1979
1980         cfg->fc_nlinfo.nl_net = dev_net(dev);
1981
1982         return rt;
1983 out:
1984         if (dev)
1985                 dev_put(dev);
1986         if (idev)
1987                 in6_dev_put(idev);
1988         if (rt)
1989                 dst_free(&rt->dst);
1990
1991         return ERR_PTR(err);
1992 }
1993
1994 int ip6_route_add(struct fib6_config *cfg)
1995 {
1996         struct mx6_config mxc = { .mx = NULL, };
1997         struct rt6_info *rt;
1998         int err;
1999
2000         rt = ip6_route_info_create(cfg);
2001         if (IS_ERR(rt)) {
2002                 err = PTR_ERR(rt);
2003                 rt = NULL;
2004                 goto out;
2005         }
2006
2007         err = ip6_convert_metrics(&mxc, cfg);
2008         if (err)
2009                 goto out;
2010
2011         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2012
2013         kfree(mxc.mx);
2014
2015         return err;
2016 out:
2017         if (rt)
2018                 dst_free(&rt->dst);
2019
2020         return err;
2021 }
2022
2023 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2024 {
2025         int err;
2026         struct fib6_table *table;
2027         struct net *net = dev_net(rt->dst.dev);
2028
2029         if (rt == net->ipv6.ip6_null_entry ||
2030             rt->dst.flags & DST_NOCACHE) {
2031                 err = -ENOENT;
2032                 goto out;
2033         }
2034
2035         table = rt->rt6i_table;
2036         write_lock_bh(&table->tb6_lock);
2037         err = fib6_del(rt, info);
2038         write_unlock_bh(&table->tb6_lock);
2039
2040 out:
2041         ip6_rt_put(rt);
2042         return err;
2043 }
2044
2045 int ip6_del_rt(struct rt6_info *rt)
2046 {
2047         struct nl_info info = {
2048                 .nl_net = dev_net(rt->dst.dev),
2049         };
2050         return __ip6_del_rt(rt, &info);
2051 }
2052
2053 static int ip6_route_del(struct fib6_config *cfg)
2054 {
2055         struct fib6_table *table;
2056         struct fib6_node *fn;
2057         struct rt6_info *rt;
2058         int err = -ESRCH;
2059
2060         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2061         if (!table)
2062                 return err;
2063
2064         read_lock_bh(&table->tb6_lock);
2065
2066         fn = fib6_locate(&table->tb6_root,
2067                          &cfg->fc_dst, cfg->fc_dst_len,
2068                          &cfg->fc_src, cfg->fc_src_len);
2069
2070         if (fn) {
2071                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2072                         if ((rt->rt6i_flags & RTF_CACHE) &&
2073                             !(cfg->fc_flags & RTF_CACHE))
2074                                 continue;
2075                         if (cfg->fc_ifindex &&
2076                             (!rt->dst.dev ||
2077                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2078                                 continue;
2079                         if (cfg->fc_flags & RTF_GATEWAY &&
2080                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2081                                 continue;
2082                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2083                                 continue;
2084                         dst_hold(&rt->dst);
2085                         read_unlock_bh(&table->tb6_lock);
2086
2087                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2088                 }
2089         }
2090         read_unlock_bh(&table->tb6_lock);
2091
2092         return err;
2093 }
2094
2095 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2096 {
2097         struct netevent_redirect netevent;
2098         struct rt6_info *rt, *nrt = NULL;
2099         struct ndisc_options ndopts;
2100         struct inet6_dev *in6_dev;
2101         struct neighbour *neigh;
2102         struct rd_msg *msg;
2103         int optlen, on_link;
2104         u8 *lladdr;
2105
2106         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2107         optlen -= sizeof(*msg);
2108
2109         if (optlen < 0) {
2110                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2111                 return;
2112         }
2113
2114         msg = (struct rd_msg *)icmp6_hdr(skb);
2115
2116         if (ipv6_addr_is_multicast(&msg->dest)) {
2117                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2118                 return;
2119         }
2120
2121         on_link = 0;
2122         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2123                 on_link = 1;
2124         } else if (ipv6_addr_type(&msg->target) !=
2125                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2126                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2127                 return;
2128         }
2129
2130         in6_dev = __in6_dev_get(skb->dev);
2131         if (!in6_dev)
2132                 return;
2133         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2134                 return;
2135
2136         /* RFC2461 8.1:
2137          *      The IP source address of the Redirect MUST be the same as the current
2138          *      first-hop router for the specified ICMP Destination Address.
2139          */
2140
2141         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2142                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2143                 return;
2144         }
2145
2146         lladdr = NULL;
2147         if (ndopts.nd_opts_tgt_lladdr) {
2148                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2149                                              skb->dev);
2150                 if (!lladdr) {
2151                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2152                         return;
2153                 }
2154         }
2155
2156         rt = (struct rt6_info *) dst;
2157         if (rt->rt6i_flags & RTF_REJECT) {
2158                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2159                 return;
2160         }
2161
2162         /* Redirect received -> path was valid.
2163          * Look, redirects are sent only in response to data packets,
2164          * so that this nexthop apparently is reachable. --ANK
2165          */
2166         dst_confirm(&rt->dst);
2167
2168         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2169         if (!neigh)
2170                 return;
2171
2172         /*
2173          *      We have finally decided to accept it.
2174          */
2175
2176         neigh_update(neigh, lladdr, NUD_STALE,
2177                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2178                      NEIGH_UPDATE_F_OVERRIDE|
2179                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2180                                      NEIGH_UPDATE_F_ISROUTER))
2181                      );
2182
2183         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2184         if (!nrt)
2185                 goto out;
2186
2187         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2188         if (on_link)
2189                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2190
2191         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2192
2193         if (ip6_ins_rt(nrt))
2194                 goto out;
2195
2196         netevent.old = &rt->dst;
2197         netevent.new = &nrt->dst;
2198         netevent.daddr = &msg->dest;
2199         netevent.neigh = neigh;
2200         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2201
2202         if (rt->rt6i_flags & RTF_CACHE) {
2203                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2204                 ip6_del_rt(rt);
2205         }
2206
2207 out:
2208         neigh_release(neigh);
2209 }
2210
2211 /*
2212  *      Misc support functions
2213  */
2214
2215 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2216 {
2217         BUG_ON(from->dst.from);
2218
2219         rt->rt6i_flags &= ~RTF_EXPIRES;
2220         dst_hold(&from->dst);
2221         rt->dst.from = &from->dst;
2222         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2223 }
2224
2225 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2226 {
2227         rt->dst.input = ort->dst.input;
2228         rt->dst.output = ort->dst.output;
2229         rt->rt6i_dst = ort->rt6i_dst;
2230         rt->dst.error = ort->dst.error;
2231         rt->rt6i_idev = ort->rt6i_idev;
2232         if (rt->rt6i_idev)
2233                 in6_dev_hold(rt->rt6i_idev);
2234         rt->dst.lastuse = jiffies;
2235         rt->rt6i_gateway = ort->rt6i_gateway;
2236         rt->rt6i_flags = ort->rt6i_flags;
2237         rt6_set_from(rt, ort);
2238         rt->rt6i_metric = ort->rt6i_metric;
2239 #ifdef CONFIG_IPV6_SUBTREES
2240         rt->rt6i_src = ort->rt6i_src;
2241 #endif
2242         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2243         rt->rt6i_table = ort->rt6i_table;
2244         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2245 }
2246
2247 #ifdef CONFIG_IPV6_ROUTE_INFO
2248 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
2249                                            const struct in6_addr *prefix, int prefixlen,
2250                                            const struct in6_addr *gwaddr)
2251 {
2252         struct fib6_node *fn;
2253         struct rt6_info *rt = NULL;
2254         struct fib6_table *table;
2255
2256         table = fib6_get_table(dev_net(dev),
2257                                addrconf_rt_table(dev, RT6_TABLE_INFO));
2258         if (!table)
2259                 return NULL;
2260
2261         read_lock_bh(&table->tb6_lock);
2262         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2263         if (!fn)
2264                 goto out;
2265
2266         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2267                 if (rt->dst.dev->ifindex != dev->ifindex)
2268                         continue;
2269                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2270                         continue;
2271                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2272                         continue;
2273                 dst_hold(&rt->dst);
2274                 break;
2275         }
2276 out:
2277         read_unlock_bh(&table->tb6_lock);
2278         return rt;
2279 }
2280
2281 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
2282                                            const struct in6_addr *prefix, int prefixlen,
2283                                            const struct in6_addr *gwaddr, unsigned int pref)
2284 {
2285         struct fib6_config cfg = {
2286                 .fc_metric      = IP6_RT_PRIO_USER,
2287                 .fc_ifindex     = dev->ifindex,
2288                 .fc_dst_len     = prefixlen,
2289                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2290                                   RTF_UP | RTF_PREF(pref),
2291                 .fc_nlinfo.portid = 0,
2292                 .fc_nlinfo.nlh = NULL,
2293                 .fc_nlinfo.nl_net = dev_net(dev),
2294         };
2295
2296         cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2297         cfg.fc_dst = *prefix;
2298         cfg.fc_gateway = *gwaddr;
2299
2300         /* We should treat it as a default route if prefix length is 0. */
2301         if (!prefixlen)
2302                 cfg.fc_flags |= RTF_DEFAULT;
2303
2304         ip6_route_add(&cfg);
2305
2306         return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
2307 }
2308 #endif
2309
2310 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2311 {
2312         struct rt6_info *rt;
2313         struct fib6_table *table;
2314
2315         table = fib6_get_table(dev_net(dev),
2316                                addrconf_rt_table(dev, RT6_TABLE_MAIN));
2317         if (!table)
2318                 return NULL;
2319
2320         read_lock_bh(&table->tb6_lock);
2321         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2322                 if (dev == rt->dst.dev &&
2323                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2324                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2325                         break;
2326         }
2327         if (rt)
2328                 dst_hold(&rt->dst);
2329         read_unlock_bh(&table->tb6_lock);
2330         return rt;
2331 }
2332
2333 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2334                                      struct net_device *dev,
2335                                      unsigned int pref)
2336 {
2337         struct fib6_config cfg = {
2338                 .fc_table       = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2339                 .fc_metric      = IP6_RT_PRIO_USER,
2340                 .fc_ifindex     = dev->ifindex,
2341                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2342                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2343                 .fc_nlinfo.portid = 0,
2344                 .fc_nlinfo.nlh = NULL,
2345                 .fc_nlinfo.nl_net = dev_net(dev),
2346         };
2347
2348         cfg.fc_gateway = *gwaddr;
2349
2350         ip6_route_add(&cfg);
2351
2352         return rt6_get_dflt_router(gwaddr, dev);
2353 }
2354
2355
2356 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2357         if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2358             (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2359                 return -1;
2360         return 0;
2361 }
2362
2363 void rt6_purge_dflt_routers(struct net *net)
2364 {
2365         fib6_clean_all(net, rt6_addrconf_purge, NULL);
2366 }
2367
2368 static void rtmsg_to_fib6_config(struct net *net,
2369                                  struct in6_rtmsg *rtmsg,
2370                                  struct fib6_config *cfg)
2371 {
2372         memset(cfg, 0, sizeof(*cfg));
2373
2374         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2375                          : RT6_TABLE_MAIN;
2376         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2377         cfg->fc_metric = rtmsg->rtmsg_metric;
2378         cfg->fc_expires = rtmsg->rtmsg_info;
2379         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2380         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2381         cfg->fc_flags = rtmsg->rtmsg_flags;
2382
2383         cfg->fc_nlinfo.nl_net = net;
2384
2385         cfg->fc_dst = rtmsg->rtmsg_dst;
2386         cfg->fc_src = rtmsg->rtmsg_src;
2387         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2388 }
2389
2390 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2391 {
2392         struct fib6_config cfg;
2393         struct in6_rtmsg rtmsg;
2394         int err;
2395
2396         switch (cmd) {
2397         case SIOCADDRT:         /* Add a route */
2398         case SIOCDELRT:         /* Delete a route */
2399                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2400                         return -EPERM;
2401                 err = copy_from_user(&rtmsg, arg,
2402                                      sizeof(struct in6_rtmsg));
2403                 if (err)
2404                         return -EFAULT;
2405
2406                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2407
2408                 rtnl_lock();
2409                 switch (cmd) {
2410                 case SIOCADDRT:
2411                         err = ip6_route_add(&cfg);
2412                         break;
2413                 case SIOCDELRT:
2414                         err = ip6_route_del(&cfg);
2415                         break;
2416                 default:
2417                         err = -EINVAL;
2418                 }
2419                 rtnl_unlock();
2420
2421                 return err;
2422         }
2423
2424         return -EINVAL;
2425 }
2426
2427 /*
2428  *      Drop the packet on the floor
2429  */
2430
2431 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2432 {
2433         int type;
2434         struct dst_entry *dst = skb_dst(skb);
2435         switch (ipstats_mib_noroutes) {
2436         case IPSTATS_MIB_INNOROUTES:
2437                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2438                 if (type == IPV6_ADDR_ANY) {
2439                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2440                                       IPSTATS_MIB_INADDRERRORS);
2441                         break;
2442                 }
2443                 /* FALLTHROUGH */
2444         case IPSTATS_MIB_OUTNOROUTES:
2445                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2446                               ipstats_mib_noroutes);
2447                 break;
2448         }
2449         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2450         kfree_skb(skb);
2451         return 0;
2452 }
2453
2454 static int ip6_pkt_discard(struct sk_buff *skb)
2455 {
2456         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2457 }
2458
2459 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2460 {
2461         skb->dev = skb_dst(skb)->dev;
2462         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2463 }
2464
2465 static int ip6_pkt_prohibit(struct sk_buff *skb)
2466 {
2467         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2468 }
2469
2470 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2471 {
2472         skb->dev = skb_dst(skb)->dev;
2473         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2474 }
2475
2476 /*
2477  *      Allocate a dst for local (unicast / anycast) address.
2478  */
2479
2480 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2481                                     const struct in6_addr *addr,
2482                                     bool anycast)
2483 {
2484         u32 tb_id;
2485         struct net *net = dev_net(idev->dev);
2486         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2487                                             DST_NOCOUNT);
2488         if (!rt)
2489                 return ERR_PTR(-ENOMEM);
2490
2491         in6_dev_hold(idev);
2492
2493         rt->dst.flags |= DST_HOST;
2494         rt->dst.input = ip6_input;
2495         rt->dst.output = ip6_output;
2496         rt->rt6i_idev = idev;
2497
2498         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2499         if (anycast)
2500                 rt->rt6i_flags |= RTF_ANYCAST;
2501         else
2502                 rt->rt6i_flags |= RTF_LOCAL;
2503
2504         rt->rt6i_gateway  = *addr;
2505         rt->rt6i_dst.addr = *addr;
2506         rt->rt6i_dst.plen = 128;
2507         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2508         rt->rt6i_table = fib6_get_table(net, tb_id);
2509         rt->dst.flags |= DST_NOCACHE;
2510
2511         atomic_set(&rt->dst.__refcnt, 1);
2512
2513         return rt;
2514 }
2515
2516 int ip6_route_get_saddr(struct net *net,
2517                         struct rt6_info *rt,
2518                         const struct in6_addr *daddr,
2519                         unsigned int prefs,
2520                         struct in6_addr *saddr)
2521 {
2522         struct inet6_dev *idev =
2523                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2524         int err = 0;
2525         if (rt && rt->rt6i_prefsrc.plen)
2526                 *saddr = rt->rt6i_prefsrc.addr;
2527         else
2528                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2529                                          daddr, prefs, saddr);
2530         return err;
2531 }
2532
2533 /* remove deleted ip from prefsrc entries */
2534 struct arg_dev_net_ip {
2535         struct net_device *dev;
2536         struct net *net;
2537         struct in6_addr *addr;
2538 };
2539
2540 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2541 {
2542         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2543         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2544         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2545
2546         if (((void *)rt->dst.dev == dev || !dev) &&
2547             rt != net->ipv6.ip6_null_entry &&
2548             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2549                 /* remove prefsrc entry */
2550                 rt->rt6i_prefsrc.plen = 0;
2551         }
2552         return 0;
2553 }
2554
2555 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2556 {
2557         struct net *net = dev_net(ifp->idev->dev);
2558         struct arg_dev_net_ip adni = {
2559                 .dev = ifp->idev->dev,
2560                 .net = net,
2561                 .addr = &ifp->addr,
2562         };
2563         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2564 }
2565
2566 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2567 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2568
2569 /* Remove routers and update dst entries when gateway turn into host. */
2570 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2571 {
2572         struct in6_addr *gateway = (struct in6_addr *)arg;
2573
2574         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2575              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2576              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2577                 return -1;
2578         }
2579         return 0;
2580 }
2581
2582 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2583 {
2584         fib6_clean_all(net, fib6_clean_tohost, gateway);
2585 }
2586
2587 struct arg_dev_net {
2588         struct net_device *dev;
2589         struct net *net;
2590 };
2591
2592 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2593 {
2594         const struct arg_dev_net *adn = arg;
2595         const struct net_device *dev = adn->dev;
2596
2597         if ((rt->dst.dev == dev || !dev) &&
2598             rt != adn->net->ipv6.ip6_null_entry)
2599                 return -1;
2600
2601         return 0;
2602 }
2603
2604 void rt6_ifdown(struct net *net, struct net_device *dev)
2605 {
2606         struct arg_dev_net adn = {
2607                 .dev = dev,
2608                 .net = net,
2609         };
2610
2611         fib6_clean_all(net, fib6_ifdown, &adn);
2612         icmp6_clean_all(fib6_ifdown, &adn);
2613         if (dev)
2614                 rt6_uncached_list_flush_dev(net, dev);
2615 }
2616
2617 struct rt6_mtu_change_arg {
2618         struct net_device *dev;
2619         unsigned int mtu;
2620 };
2621
2622 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2623 {
2624         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2625         struct inet6_dev *idev;
2626
2627         /* In IPv6 pmtu discovery is not optional,
2628            so that RTAX_MTU lock cannot disable it.
2629            We still use this lock to block changes
2630            caused by addrconf/ndisc.
2631         */
2632
2633         idev = __in6_dev_get(arg->dev);
2634         if (!idev)
2635                 return 0;
2636
2637         /* For administrative MTU increase, there is no way to discover
2638            IPv6 PMTU increase, so PMTU increase should be updated here.
2639            Since RFC 1981 doesn't include administrative MTU increase
2640            update PMTU increase is a MUST. (i.e. jumbo frame)
2641          */
2642         /*
2643            If new MTU is less than route PMTU, this new MTU will be the
2644            lowest MTU in the path, update the route PMTU to reflect PMTU
2645            decreases; if new MTU is greater than route PMTU, and the
2646            old MTU is the lowest MTU in the path, update the route PMTU
2647            to reflect the increase. In this case if the other nodes' MTU
2648            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2649            PMTU discouvery.
2650          */
2651         if (rt->dst.dev == arg->dev &&
2652             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2653                 if (rt->rt6i_flags & RTF_CACHE) {
2654                         /* For RTF_CACHE with rt6i_pmtu == 0
2655                          * (i.e. a redirected route),
2656                          * the metrics of its rt->dst.from has already
2657                          * been updated.
2658                          */
2659                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2660                                 rt->rt6i_pmtu = arg->mtu;
2661                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2662                            (dst_mtu(&rt->dst) < arg->mtu &&
2663                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2664                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2665                 }
2666         }
2667         return 0;
2668 }
2669
2670 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2671 {
2672         struct rt6_mtu_change_arg arg = {
2673                 .dev = dev,
2674                 .mtu = mtu,
2675         };
2676
2677         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2678 }
2679
2680 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2681         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2682         [RTA_OIF]               = { .type = NLA_U32 },
2683         [RTA_IIF]               = { .type = NLA_U32 },
2684         [RTA_PRIORITY]          = { .type = NLA_U32 },
2685         [RTA_METRICS]           = { .type = NLA_NESTED },
2686         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2687         [RTA_PREF]              = { .type = NLA_U8 },
2688         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2689         [RTA_ENCAP]             = { .type = NLA_NESTED },
2690         [RTA_UID]               = { .type = NLA_U32 },
2691 };
2692
2693 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2694                               struct fib6_config *cfg)
2695 {
2696         struct rtmsg *rtm;
2697         struct nlattr *tb[RTA_MAX+1];
2698         unsigned int pref;
2699         int err;
2700
2701         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2702         if (err < 0)
2703                 goto errout;
2704
2705         err = -EINVAL;
2706         rtm = nlmsg_data(nlh);
2707         memset(cfg, 0, sizeof(*cfg));
2708
2709         cfg->fc_table = rtm->rtm_table;
2710         cfg->fc_dst_len = rtm->rtm_dst_len;
2711         cfg->fc_src_len = rtm->rtm_src_len;
2712         cfg->fc_flags = RTF_UP;
2713         cfg->fc_protocol = rtm->rtm_protocol;
2714         cfg->fc_type = rtm->rtm_type;
2715
2716         if (rtm->rtm_type == RTN_UNREACHABLE ||
2717             rtm->rtm_type == RTN_BLACKHOLE ||
2718             rtm->rtm_type == RTN_PROHIBIT ||
2719             rtm->rtm_type == RTN_THROW)
2720                 cfg->fc_flags |= RTF_REJECT;
2721
2722         if (rtm->rtm_type == RTN_LOCAL)
2723                 cfg->fc_flags |= RTF_LOCAL;
2724
2725         if (rtm->rtm_flags & RTM_F_CLONED)
2726                 cfg->fc_flags |= RTF_CACHE;
2727
2728         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2729         cfg->fc_nlinfo.nlh = nlh;
2730         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2731
2732         if (tb[RTA_GATEWAY]) {
2733                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2734                 cfg->fc_flags |= RTF_GATEWAY;
2735         }
2736
2737         if (tb[RTA_DST]) {
2738                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2739
2740                 if (nla_len(tb[RTA_DST]) < plen)
2741                         goto errout;
2742
2743                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2744         }
2745
2746         if (tb[RTA_SRC]) {
2747                 int plen = (rtm->rtm_src_len + 7) >> 3;
2748
2749                 if (nla_len(tb[RTA_SRC]) < plen)
2750                         goto errout;
2751
2752                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2753         }
2754
2755         if (tb[RTA_PREFSRC])
2756                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2757
2758         if (tb[RTA_OIF])
2759                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2760
2761         if (tb[RTA_PRIORITY])
2762                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2763
2764         if (tb[RTA_METRICS]) {
2765                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2766                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2767         }
2768
2769         if (tb[RTA_TABLE])
2770                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2771
2772         if (tb[RTA_MULTIPATH]) {
2773                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2774                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2775         }
2776
2777         if (tb[RTA_PREF]) {
2778                 pref = nla_get_u8(tb[RTA_PREF]);
2779                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2780                     pref != ICMPV6_ROUTER_PREF_HIGH)
2781                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2782                 cfg->fc_flags |= RTF_PREF(pref);
2783         }
2784
2785         if (tb[RTA_ENCAP])
2786                 cfg->fc_encap = tb[RTA_ENCAP];
2787
2788         if (tb[RTA_ENCAP_TYPE])
2789                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2790
2791         err = 0;
2792 errout:
2793         return err;
2794 }
2795
2796 struct rt6_nh {
2797         struct rt6_info *rt6_info;
2798         struct fib6_config r_cfg;
2799         struct mx6_config mxc;
2800         struct list_head next;
2801 };
2802
2803 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2804 {
2805         struct rt6_nh *nh;
2806
2807         list_for_each_entry(nh, rt6_nh_list, next) {
2808                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2809                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2810                         nh->r_cfg.fc_ifindex);
2811         }
2812 }
2813
2814 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2815                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2816 {
2817         struct rt6_nh *nh;
2818         struct rt6_info *rtnh;
2819         int err = -EEXIST;
2820
2821         list_for_each_entry(nh, rt6_nh_list, next) {
2822                 /* check if rt6_info already exists */
2823                 rtnh = nh->rt6_info;
2824
2825                 if (rtnh->dst.dev == rt->dst.dev &&
2826                     rtnh->rt6i_idev == rt->rt6i_idev &&
2827                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2828                                     &rt->rt6i_gateway))
2829                         return err;
2830         }
2831
2832         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2833         if (!nh)
2834                 return -ENOMEM;
2835         nh->rt6_info = rt;
2836         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2837         if (err) {
2838                 kfree(nh);
2839                 return err;
2840         }
2841         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2842         list_add_tail(&nh->next, rt6_nh_list);
2843
2844         return 0;
2845 }
2846
2847 static int ip6_route_multipath_add(struct fib6_config *cfg)
2848 {
2849         struct fib6_config r_cfg;
2850         struct rtnexthop *rtnh;
2851         struct rt6_info *rt;
2852         struct rt6_nh *err_nh;
2853         struct rt6_nh *nh, *nh_safe;
2854         int remaining;
2855         int attrlen;
2856         int err = 1;
2857         int nhn = 0;
2858         int replace = (cfg->fc_nlinfo.nlh &&
2859                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2860         LIST_HEAD(rt6_nh_list);
2861
2862         remaining = cfg->fc_mp_len;
2863         rtnh = (struct rtnexthop *)cfg->fc_mp;
2864
2865         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2866          * rt6_info structs per nexthop
2867          */
2868         while (rtnh_ok(rtnh, remaining)) {
2869                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2870                 if (rtnh->rtnh_ifindex)
2871                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2872
2873                 attrlen = rtnh_attrlen(rtnh);
2874                 if (attrlen > 0) {
2875                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2876
2877                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2878                         if (nla) {
2879                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2880                                 r_cfg.fc_flags |= RTF_GATEWAY;
2881                         }
2882                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2883                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2884                         if (nla)
2885                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2886                 }
2887
2888                 rt = ip6_route_info_create(&r_cfg);
2889                 if (IS_ERR(rt)) {
2890                         err = PTR_ERR(rt);
2891                         rt = NULL;
2892                         goto cleanup;
2893                 }
2894
2895                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2896                 if (err) {
2897                         dst_free(&rt->dst);
2898                         goto cleanup;
2899                 }
2900
2901                 rtnh = rtnh_next(rtnh, &remaining);
2902         }
2903
2904         err_nh = NULL;
2905         list_for_each_entry(nh, &rt6_nh_list, next) {
2906                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2907                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2908                 nh->rt6_info = NULL;
2909                 if (err) {
2910                         if (replace && nhn)
2911                                 ip6_print_replace_route_err(&rt6_nh_list);
2912                         err_nh = nh;
2913                         goto add_errout;
2914                 }
2915
2916                 /* Because each route is added like a single route we remove
2917                  * these flags after the first nexthop: if there is a collision,
2918                  * we have already failed to add the first nexthop:
2919                  * fib6_add_rt2node() has rejected it; when replacing, old
2920                  * nexthops have been replaced by first new, the rest should
2921                  * be added to it.
2922                  */
2923                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2924                                                      NLM_F_REPLACE);
2925                 nhn++;
2926         }
2927
2928         goto cleanup;
2929
2930 add_errout:
2931         /* Delete routes that were already added */
2932         list_for_each_entry(nh, &rt6_nh_list, next) {
2933                 if (err_nh == nh)
2934                         break;
2935                 ip6_route_del(&nh->r_cfg);
2936         }
2937
2938 cleanup:
2939         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2940                 if (nh->rt6_info)
2941                         dst_free(&nh->rt6_info->dst);
2942                 kfree(nh->mxc.mx);
2943                 list_del(&nh->next);
2944                 kfree(nh);
2945         }
2946
2947         return err;
2948 }
2949
2950 static int ip6_route_multipath_del(struct fib6_config *cfg)
2951 {
2952         struct fib6_config r_cfg;
2953         struct rtnexthop *rtnh;
2954         int remaining;
2955         int attrlen;
2956         int err = 1, last_err = 0;
2957
2958         remaining = cfg->fc_mp_len;
2959         rtnh = (struct rtnexthop *)cfg->fc_mp;
2960
2961         /* Parse a Multipath Entry */
2962         while (rtnh_ok(rtnh, remaining)) {
2963                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2964                 if (rtnh->rtnh_ifindex)
2965                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2966
2967                 attrlen = rtnh_attrlen(rtnh);
2968                 if (attrlen > 0) {
2969                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2970
2971                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2972                         if (nla) {
2973                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2974                                 r_cfg.fc_flags |= RTF_GATEWAY;
2975                         }
2976                 }
2977                 err = ip6_route_del(&r_cfg);
2978                 if (err)
2979                         last_err = err;
2980
2981                 rtnh = rtnh_next(rtnh, &remaining);
2982         }
2983
2984         return last_err;
2985 }
2986
2987 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2988 {
2989         struct fib6_config cfg;
2990         int err;
2991
2992         err = rtm_to_fib6_config(skb, nlh, &cfg);
2993         if (err < 0)
2994                 return err;
2995
2996         if (cfg.fc_mp)
2997                 return ip6_route_multipath_del(&cfg);
2998         else
2999                 return ip6_route_del(&cfg);
3000 }
3001
3002 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3003 {
3004         struct fib6_config cfg;
3005         int err;
3006
3007         err = rtm_to_fib6_config(skb, nlh, &cfg);
3008         if (err < 0)
3009                 return err;
3010
3011         if (cfg.fc_mp)
3012                 return ip6_route_multipath_add(&cfg);
3013         else
3014                 return ip6_route_add(&cfg);
3015 }
3016
3017 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3018 {
3019         return NLMSG_ALIGN(sizeof(struct rtmsg))
3020                + nla_total_size(16) /* RTA_SRC */
3021                + nla_total_size(16) /* RTA_DST */
3022                + nla_total_size(16) /* RTA_GATEWAY */
3023                + nla_total_size(16) /* RTA_PREFSRC */
3024                + nla_total_size(4) /* RTA_TABLE */
3025                + nla_total_size(4) /* RTA_IIF */
3026                + nla_total_size(4) /* RTA_OIF */
3027                + nla_total_size(4) /* RTA_PRIORITY */
3028                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3029                + nla_total_size(sizeof(struct rta_cacheinfo))
3030                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3031                + nla_total_size(1) /* RTA_PREF */
3032                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3033 }
3034
3035 static int rt6_fill_node(struct net *net,
3036                          struct sk_buff *skb, struct rt6_info *rt,
3037                          struct in6_addr *dst, struct in6_addr *src,
3038                          int iif, int type, u32 portid, u32 seq,
3039                          int prefix, int nowait, unsigned int flags)
3040 {
3041         u32 metrics[RTAX_MAX];
3042         struct rtmsg *rtm;
3043         struct nlmsghdr *nlh;
3044         long expires;
3045         u32 table;
3046
3047         if (prefix) {   /* user wants prefix routes only */
3048                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3049                         /* success since this is not a prefix route */
3050                         return 1;
3051                 }
3052         }
3053
3054         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3055         if (!nlh)
3056                 return -EMSGSIZE;
3057
3058         rtm = nlmsg_data(nlh);
3059         rtm->rtm_family = AF_INET6;
3060         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3061         rtm->rtm_src_len = rt->rt6i_src.plen;
3062         rtm->rtm_tos = 0;
3063         if (rt->rt6i_table)
3064                 table = rt->rt6i_table->tb6_id;
3065         else
3066                 table = RT6_TABLE_UNSPEC;
3067         rtm->rtm_table = table;
3068         if (nla_put_u32(skb, RTA_TABLE, table))
3069                 goto nla_put_failure;
3070         if (rt->rt6i_flags & RTF_REJECT) {
3071                 switch (rt->dst.error) {
3072                 case -EINVAL:
3073                         rtm->rtm_type = RTN_BLACKHOLE;
3074                         break;
3075                 case -EACCES:
3076                         rtm->rtm_type = RTN_PROHIBIT;
3077                         break;
3078                 case -EAGAIN:
3079                         rtm->rtm_type = RTN_THROW;
3080                         break;
3081                 default:
3082                         rtm->rtm_type = RTN_UNREACHABLE;
3083                         break;
3084                 }
3085         }
3086         else if (rt->rt6i_flags & RTF_LOCAL)
3087                 rtm->rtm_type = RTN_LOCAL;
3088         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3089                 rtm->rtm_type = RTN_LOCAL;
3090         else
3091                 rtm->rtm_type = RTN_UNICAST;
3092         rtm->rtm_flags = 0;
3093         if (!netif_carrier_ok(rt->dst.dev)) {
3094                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3095                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3096                         rtm->rtm_flags |= RTNH_F_DEAD;
3097         }
3098         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3099         rtm->rtm_protocol = rt->rt6i_protocol;
3100         if (rt->rt6i_flags & RTF_DYNAMIC)
3101                 rtm->rtm_protocol = RTPROT_REDIRECT;
3102         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3103                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3104                         rtm->rtm_protocol = RTPROT_RA;
3105                 else
3106                         rtm->rtm_protocol = RTPROT_KERNEL;
3107         }
3108
3109         if (rt->rt6i_flags & RTF_CACHE)
3110                 rtm->rtm_flags |= RTM_F_CLONED;
3111
3112         if (dst) {
3113                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3114                         goto nla_put_failure;
3115                 rtm->rtm_dst_len = 128;
3116         } else if (rtm->rtm_dst_len)
3117                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3118                         goto nla_put_failure;
3119 #ifdef CONFIG_IPV6_SUBTREES
3120         if (src) {
3121                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3122                         goto nla_put_failure;
3123                 rtm->rtm_src_len = 128;
3124         } else if (rtm->rtm_src_len &&
3125                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3126                 goto nla_put_failure;
3127 #endif
3128         if (iif) {
3129 #ifdef CONFIG_IPV6_MROUTE
3130                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3131                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3132                                                   portid);
3133
3134                         if (err <= 0) {
3135                                 if (!nowait) {
3136                                         if (err == 0)
3137                                                 return 0;
3138                                         goto nla_put_failure;
3139                                 } else {
3140                                         if (err == -EMSGSIZE)
3141                                                 goto nla_put_failure;
3142                                 }
3143                         }
3144                 } else
3145 #endif
3146                         if (nla_put_u32(skb, RTA_IIF, iif))
3147                                 goto nla_put_failure;
3148         } else if (dst) {
3149                 struct in6_addr saddr_buf;
3150                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3151                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3152                         goto nla_put_failure;
3153         }
3154
3155         if (rt->rt6i_prefsrc.plen) {
3156                 struct in6_addr saddr_buf;
3157                 saddr_buf = rt->rt6i_prefsrc.addr;
3158                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3159                         goto nla_put_failure;
3160         }
3161
3162         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3163         if (rt->rt6i_pmtu)
3164                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3165         if (rtnetlink_put_metrics(skb, metrics) < 0)
3166                 goto nla_put_failure;
3167
3168         if (rt->rt6i_flags & RTF_GATEWAY) {
3169                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3170                         goto nla_put_failure;
3171         }
3172
3173         if (rt->dst.dev &&
3174             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3175                 goto nla_put_failure;
3176         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3177                 goto nla_put_failure;
3178
3179         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3180
3181         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3182                 goto nla_put_failure;
3183
3184         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3185                 goto nla_put_failure;
3186
3187         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3188
3189         nlmsg_end(skb, nlh);
3190         return 0;
3191
3192 nla_put_failure:
3193         nlmsg_cancel(skb, nlh);
3194         return -EMSGSIZE;
3195 }
3196
3197 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3198 {
3199         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3200         int prefix;
3201
3202         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3203                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3204                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3205         } else
3206                 prefix = 0;
3207
3208         return rt6_fill_node(arg->net,
3209                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3210                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3211                      prefix, 0, NLM_F_MULTI);
3212 }
3213
3214 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3215 {
3216         struct net *net = sock_net(in_skb->sk);
3217         struct nlattr *tb[RTA_MAX+1];
3218         struct rt6_info *rt;
3219         struct sk_buff *skb;
3220         struct rtmsg *rtm;
3221         struct flowi6 fl6;
3222         int err, iif = 0, oif = 0;
3223
3224         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3225         if (err < 0)
3226                 goto errout;
3227
3228         err = -EINVAL;
3229         memset(&fl6, 0, sizeof(fl6));
3230
3231         if (tb[RTA_SRC]) {
3232                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3233                         goto errout;
3234
3235                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3236         }
3237
3238         if (tb[RTA_DST]) {
3239                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3240                         goto errout;
3241
3242                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3243         }
3244
3245         if (tb[RTA_IIF])
3246                 iif = nla_get_u32(tb[RTA_IIF]);
3247
3248         if (tb[RTA_OIF])
3249                 oif = nla_get_u32(tb[RTA_OIF]);
3250
3251         if (tb[RTA_MARK])
3252                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3253
3254         if (tb[RTA_UID])
3255                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3256                                            nla_get_u32(tb[RTA_UID]));
3257         else
3258                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3259         if (iif) {
3260                 struct net_device *dev;
3261                 int flags = 0;
3262
3263                 dev = __dev_get_by_index(net, iif);
3264                 if (!dev) {
3265                         err = -ENODEV;
3266                         goto errout;
3267                 }
3268
3269                 fl6.flowi6_iif = iif;
3270
3271                 if (!ipv6_addr_any(&fl6.saddr))
3272                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3273
3274                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3275                                                                flags);
3276         } else {
3277                 fl6.flowi6_oif = oif;
3278
3279                 if (netif_index_is_l3_master(net, oif)) {
3280                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3281                                            FLOWI_FLAG_SKIP_NH_OIF;
3282                 }
3283
3284                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3285         }
3286
3287         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3288         if (!skb) {
3289                 ip6_rt_put(rt);
3290                 err = -ENOBUFS;
3291                 goto errout;
3292         }
3293
3294         /* Reserve room for dummy headers, this skb can pass
3295            through good chunk of routing engine.
3296          */
3297         skb_reset_mac_header(skb);
3298         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3299
3300         skb_dst_set(skb, &rt->dst);
3301
3302         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3303                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3304                             nlh->nlmsg_seq, 0, 0, 0);
3305         if (err < 0) {
3306                 kfree_skb(skb);
3307                 goto errout;
3308         }
3309
3310         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3311 errout:
3312         return err;
3313 }
3314
3315 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3316                      unsigned int nlm_flags)
3317 {
3318         struct sk_buff *skb;
3319         struct net *net = info->nl_net;
3320         u32 seq;
3321         int err;
3322
3323         err = -ENOBUFS;
3324         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3325
3326         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3327         if (!skb)
3328                 goto errout;
3329
3330         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3331                                 event, info->portid, seq, 0, 0, nlm_flags);
3332         if (err < 0) {
3333                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3334                 WARN_ON(err == -EMSGSIZE);
3335                 kfree_skb(skb);
3336                 goto errout;
3337         }
3338         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3339                     info->nlh, gfp_any());
3340         return;
3341 errout:
3342         if (err < 0)
3343                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3344 }
3345
3346 static int ip6_route_dev_notify(struct notifier_block *this,
3347                                 unsigned long event, void *ptr)
3348 {
3349         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3350         struct net *net = dev_net(dev);
3351
3352         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3353                 net->ipv6.ip6_null_entry->dst.dev = dev;
3354                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3355 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3356                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3357                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3358                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3359                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3360 #endif
3361         }
3362
3363         return NOTIFY_OK;
3364 }
3365
3366 /*
3367  *      /proc
3368  */
3369
3370 #ifdef CONFIG_PROC_FS
3371
3372 static const struct file_operations ipv6_route_proc_fops = {
3373         .owner          = THIS_MODULE,
3374         .open           = ipv6_route_open,
3375         .read           = seq_read,
3376         .llseek         = seq_lseek,
3377         .release        = seq_release_net,
3378 };
3379
3380 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3381 {
3382         struct net *net = (struct net *)seq->private;
3383         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3384                    net->ipv6.rt6_stats->fib_nodes,
3385                    net->ipv6.rt6_stats->fib_route_nodes,
3386                    net->ipv6.rt6_stats->fib_rt_alloc,
3387                    net->ipv6.rt6_stats->fib_rt_entries,
3388                    net->ipv6.rt6_stats->fib_rt_cache,
3389                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3390                    net->ipv6.rt6_stats->fib_discarded_routes);
3391
3392         return 0;
3393 }
3394
3395 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3396 {
3397         return single_open_net(inode, file, rt6_stats_seq_show);
3398 }
3399
3400 static const struct file_operations rt6_stats_seq_fops = {
3401         .owner   = THIS_MODULE,
3402         .open    = rt6_stats_seq_open,
3403         .read    = seq_read,
3404         .llseek  = seq_lseek,
3405         .release = single_release_net,
3406 };
3407 #endif  /* CONFIG_PROC_FS */
3408
3409 #ifdef CONFIG_SYSCTL
3410
3411 static
3412 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3413                               void __user *buffer, size_t *lenp, loff_t *ppos)
3414 {
3415         struct net *net;
3416         int delay;
3417         if (!write)
3418                 return -EINVAL;
3419
3420         net = (struct net *)ctl->extra1;
3421         delay = net->ipv6.sysctl.flush_delay;
3422         proc_dointvec(ctl, write, buffer, lenp, ppos);
3423         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3424         return 0;
3425 }
3426
3427 struct ctl_table ipv6_route_table_template[] = {
3428         {
3429                 .procname       =       "flush",
3430                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3431                 .maxlen         =       sizeof(int),
3432                 .mode           =       0200,
3433                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3434         },
3435         {
3436                 .procname       =       "gc_thresh",
3437                 .data           =       &ip6_dst_ops_template.gc_thresh,
3438                 .maxlen         =       sizeof(int),
3439                 .mode           =       0644,
3440                 .proc_handler   =       proc_dointvec,
3441         },
3442         {
3443                 .procname       =       "max_size",
3444                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3445                 .maxlen         =       sizeof(int),
3446                 .mode           =       0644,
3447                 .proc_handler   =       proc_dointvec,
3448         },
3449         {
3450                 .procname       =       "gc_min_interval",
3451                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3452                 .maxlen         =       sizeof(int),
3453                 .mode           =       0644,
3454                 .proc_handler   =       proc_dointvec_jiffies,
3455         },
3456         {
3457                 .procname       =       "gc_timeout",
3458                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3459                 .maxlen         =       sizeof(int),
3460                 .mode           =       0644,
3461                 .proc_handler   =       proc_dointvec_jiffies,
3462         },
3463         {
3464                 .procname       =       "gc_interval",
3465                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3466                 .maxlen         =       sizeof(int),
3467                 .mode           =       0644,
3468                 .proc_handler   =       proc_dointvec_jiffies,
3469         },
3470         {
3471                 .procname       =       "gc_elasticity",
3472                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3473                 .maxlen         =       sizeof(int),
3474                 .mode           =       0644,
3475                 .proc_handler   =       proc_dointvec,
3476         },
3477         {
3478                 .procname       =       "mtu_expires",
3479                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3480                 .maxlen         =       sizeof(int),
3481                 .mode           =       0644,
3482                 .proc_handler   =       proc_dointvec_jiffies,
3483         },
3484         {
3485                 .procname       =       "min_adv_mss",
3486                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3487                 .maxlen         =       sizeof(int),
3488                 .mode           =       0644,
3489                 .proc_handler   =       proc_dointvec,
3490         },
3491         {
3492                 .procname       =       "gc_min_interval_ms",
3493                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3494                 .maxlen         =       sizeof(int),
3495                 .mode           =       0644,
3496                 .proc_handler   =       proc_dointvec_ms_jiffies,
3497         },
3498         { }
3499 };
3500
3501 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3502 {
3503         struct ctl_table *table;
3504
3505         table = kmemdup(ipv6_route_table_template,
3506                         sizeof(ipv6_route_table_template),
3507                         GFP_KERNEL);
3508
3509         if (table) {
3510                 table[0].data = &net->ipv6.sysctl.flush_delay;
3511                 table[0].extra1 = net;
3512                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3513                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3514                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3515                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3516                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3517                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3518                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3519                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3520                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3521
3522                 /* Don't export sysctls to unprivileged users */
3523                 if (net->user_ns != &init_user_ns)
3524                         table[0].procname = NULL;
3525         }
3526
3527         return table;
3528 }
3529 #endif
3530
3531 static int __net_init ip6_route_net_init(struct net *net)
3532 {
3533         int ret = -ENOMEM;
3534
3535         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3536                sizeof(net->ipv6.ip6_dst_ops));
3537
3538         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3539                 goto out_ip6_dst_ops;
3540
3541         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3542                                            sizeof(*net->ipv6.ip6_null_entry),
3543                                            GFP_KERNEL);
3544         if (!net->ipv6.ip6_null_entry)
3545                 goto out_ip6_dst_entries;
3546         net->ipv6.ip6_null_entry->dst.path =
3547                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3548         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3549         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3550                          ip6_template_metrics, true);
3551
3552 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3553         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3554                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3555                                                GFP_KERNEL);
3556         if (!net->ipv6.ip6_prohibit_entry)
3557                 goto out_ip6_null_entry;
3558         net->ipv6.ip6_prohibit_entry->dst.path =
3559                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3560         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3561         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3562                          ip6_template_metrics, true);
3563
3564         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3565                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3566                                                GFP_KERNEL);
3567         if (!net->ipv6.ip6_blk_hole_entry)
3568                 goto out_ip6_prohibit_entry;
3569         net->ipv6.ip6_blk_hole_entry->dst.path =
3570                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3571         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3572         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3573                          ip6_template_metrics, true);
3574 #endif
3575
3576         net->ipv6.sysctl.flush_delay = 0;
3577         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3578         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3579         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3580         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3581         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3582         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3583         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3584
3585         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3586
3587         ret = 0;
3588 out:
3589         return ret;
3590
3591 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3592 out_ip6_prohibit_entry:
3593         kfree(net->ipv6.ip6_prohibit_entry);
3594 out_ip6_null_entry:
3595         kfree(net->ipv6.ip6_null_entry);
3596 #endif
3597 out_ip6_dst_entries:
3598         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3599 out_ip6_dst_ops:
3600         goto out;
3601 }
3602
3603 static void __net_exit ip6_route_net_exit(struct net *net)
3604 {
3605         kfree(net->ipv6.ip6_null_entry);
3606 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3607         kfree(net->ipv6.ip6_prohibit_entry);
3608         kfree(net->ipv6.ip6_blk_hole_entry);
3609 #endif
3610         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3611 }
3612
3613 static int __net_init ip6_route_net_init_late(struct net *net)
3614 {
3615 #ifdef CONFIG_PROC_FS
3616         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3617         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3618 #endif
3619         return 0;
3620 }
3621
3622 static void __net_exit ip6_route_net_exit_late(struct net *net)
3623 {
3624 #ifdef CONFIG_PROC_FS
3625         remove_proc_entry("ipv6_route", net->proc_net);
3626         remove_proc_entry("rt6_stats", net->proc_net);
3627 #endif
3628 }
3629
3630 static struct pernet_operations ip6_route_net_ops = {
3631         .init = ip6_route_net_init,
3632         .exit = ip6_route_net_exit,
3633 };
3634
3635 static int __net_init ipv6_inetpeer_init(struct net *net)
3636 {
3637         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3638
3639         if (!bp)
3640                 return -ENOMEM;
3641         inet_peer_base_init(bp);
3642         net->ipv6.peers = bp;
3643         return 0;
3644 }
3645
3646 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3647 {
3648         struct inet_peer_base *bp = net->ipv6.peers;
3649
3650         net->ipv6.peers = NULL;
3651         inetpeer_invalidate_tree(bp);
3652         kfree(bp);
3653 }
3654
3655 static struct pernet_operations ipv6_inetpeer_ops = {
3656         .init   =       ipv6_inetpeer_init,
3657         .exit   =       ipv6_inetpeer_exit,
3658 };
3659
3660 static struct pernet_operations ip6_route_net_late_ops = {
3661         .init = ip6_route_net_init_late,
3662         .exit = ip6_route_net_exit_late,
3663 };
3664
3665 static struct notifier_block ip6_route_dev_notifier = {
3666         .notifier_call = ip6_route_dev_notify,
3667         .priority = 0,
3668 };
3669
3670 int __init ip6_route_init(void)
3671 {
3672         int ret;
3673         int cpu;
3674
3675         ret = -ENOMEM;
3676         ip6_dst_ops_template.kmem_cachep =
3677                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3678                                   SLAB_HWCACHE_ALIGN, NULL);
3679         if (!ip6_dst_ops_template.kmem_cachep)
3680                 goto out;
3681
3682         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3683         if (ret)
3684                 goto out_kmem_cache;
3685
3686         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3687         if (ret)
3688                 goto out_dst_entries;
3689
3690         ret = register_pernet_subsys(&ip6_route_net_ops);
3691         if (ret)
3692                 goto out_register_inetpeer;
3693
3694         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3695
3696         /* Registering of the loopback is done before this portion of code,
3697          * the loopback reference in rt6_info will not be taken, do it
3698          * manually for init_net */
3699         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3700         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3701   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3702         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3703         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3704         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3705         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3706   #endif
3707         ret = fib6_init();
3708         if (ret)
3709                 goto out_register_subsys;
3710
3711         ret = xfrm6_init();
3712         if (ret)
3713                 goto out_fib6_init;
3714
3715         ret = fib6_rules_init();
3716         if (ret)
3717                 goto xfrm6_init;
3718
3719         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3720         if (ret)
3721                 goto fib6_rules_init;
3722
3723         ret = -ENOBUFS;
3724         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3725             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3726             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3727                 goto out_register_late_subsys;
3728
3729         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3730         if (ret)
3731                 goto out_register_late_subsys;
3732
3733         for_each_possible_cpu(cpu) {
3734                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3735
3736                 INIT_LIST_HEAD(&ul->head);
3737                 spin_lock_init(&ul->lock);
3738         }
3739
3740 out:
3741         return ret;
3742
3743 out_register_late_subsys:
3744         unregister_pernet_subsys(&ip6_route_net_late_ops);
3745 fib6_rules_init:
3746         fib6_rules_cleanup();
3747 xfrm6_init:
3748         xfrm6_fini();
3749 out_fib6_init:
3750         fib6_gc_cleanup();
3751 out_register_subsys:
3752         unregister_pernet_subsys(&ip6_route_net_ops);
3753 out_register_inetpeer:
3754         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3755 out_dst_entries:
3756         dst_entries_destroy(&ip6_dst_blackhole_ops);
3757 out_kmem_cache:
3758         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3759         goto out;
3760 }
3761
3762 void ip6_route_cleanup(void)
3763 {
3764         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3765         unregister_pernet_subsys(&ip6_route_net_late_ops);
3766         fib6_rules_cleanup();
3767         xfrm6_fini();
3768         fib6_gc_cleanup();
3769         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3770         unregister_pernet_subsys(&ip6_route_net_ops);
3771         dst_entries_destroy(&ip6_dst_blackhole_ops);
3772         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3773 }