Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
106                                            const struct in6_addr *prefix, int prefixlen,
107                                            const struct in6_addr *gwaddr);
108 #endif
109
110 struct uncached_list {
111         spinlock_t              lock;
112         struct list_head        head;
113 };
114
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121         rt->dst.flags |= DST_NOCACHE;
122         rt->rt6i_uncached_list = ul;
123
124         spin_lock_bh(&ul->lock);
125         list_add_tail(&rt->rt6i_uncached, &ul->head);
126         spin_unlock_bh(&ul->lock);
127 }
128
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131         if (!list_empty(&rt->rt6i_uncached)) {
132                 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134                 spin_lock_bh(&ul->lock);
135                 list_del(&rt->rt6i_uncached);
136                 spin_unlock_bh(&ul->lock);
137         }
138 }
139
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142         struct net_device *loopback_dev = net->loopback_dev;
143         int cpu;
144
145         if (dev == loopback_dev)
146                 return;
147
148         for_each_possible_cpu(cpu) {
149                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150                 struct rt6_info *rt;
151
152                 spin_lock_bh(&ul->lock);
153                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154                         struct inet6_dev *rt_idev = rt->rt6i_idev;
155                         struct net_device *rt_dev = rt->dst.dev;
156
157                         if (rt_idev->dev == dev) {
158                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
159                                 in6_dev_put(rt_idev);
160                         }
161
162                         if (rt_dev == dev) {
163                                 rt->dst.dev = loopback_dev;
164                                 dev_hold(rt->dst.dev);
165                                 dev_put(rt_dev);
166                         }
167                 }
168                 spin_unlock_bh(&ul->lock);
169         }
170 }
171
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174         return dst_metrics_write_ptr(rt->dst.from);
175 }
176
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179         struct rt6_info *rt = (struct rt6_info *)dst;
180
181         if (rt->rt6i_flags & RTF_PCPU)
182                 return rt6_pcpu_cow_metrics(rt);
183         else if (rt->rt6i_flags & RTF_CACHE)
184                 return NULL;
185         else
186                 return dst_cow_metrics_generic(dst, old);
187 }
188
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190                                              struct sk_buff *skb,
191                                              const void *daddr)
192 {
193         struct in6_addr *p = &rt->rt6i_gateway;
194
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203                                           struct sk_buff *skb,
204                                           const void *daddr)
205 {
206         struct rt6_info *rt = (struct rt6_info *) dst;
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(rt, skb, daddr);
210         n = __ipv6_neigh_lookup(dst->dev, daddr);
211         if (n)
212                 return n;
213         return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215
216 static struct dst_ops ip6_dst_ops_template = {
217         .family                 =       AF_INET6,
218         .gc                     =       ip6_dst_gc,
219         .gc_thresh              =       1024,
220         .check                  =       ip6_dst_check,
221         .default_advmss         =       ip6_default_advmss,
222         .mtu                    =       ip6_mtu,
223         .cow_metrics            =       ipv6_cow_metrics,
224         .destroy                =       ip6_dst_destroy,
225         .ifdown                 =       ip6_dst_ifdown,
226         .negative_advice        =       ip6_negative_advice,
227         .link_failure           =       ip6_link_failure,
228         .update_pmtu            =       ip6_rt_update_pmtu,
229         .redirect               =       rt6_do_redirect,
230         .local_out              =       __ip6_local_out,
231         .neigh_lookup           =       ip6_neigh_lookup,
232 };
233
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238         return mtu ? : dst->dev->mtu;
239 }
240
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242                                          struct sk_buff *skb, u32 mtu)
243 {
244 }
245
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247                                       struct sk_buff *skb)
248 {
249 }
250
251 static struct dst_ops ip6_dst_blackhole_ops = {
252         .family                 =       AF_INET6,
253         .destroy                =       ip6_dst_destroy,
254         .check                  =       ip6_dst_check,
255         .mtu                    =       ip6_blackhole_mtu,
256         .default_advmss         =       ip6_default_advmss,
257         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
258         .redirect               =       ip6_rt_blackhole_redirect,
259         .cow_metrics            =       dst_cow_metrics_generic,
260         .neigh_lookup           =       ip6_neigh_lookup,
261 };
262
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264         [RTAX_HOPLIMIT - 1] = 0,
265 };
266
267 static const struct rt6_info ip6_null_entry_template = {
268         .dst = {
269                 .__refcnt       = ATOMIC_INIT(1),
270                 .__use          = 1,
271                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
272                 .error          = -ENETUNREACH,
273                 .input          = ip6_pkt_discard,
274                 .output         = ip6_pkt_discard_out,
275         },
276         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
277         .rt6i_protocol  = RTPROT_KERNEL,
278         .rt6i_metric    = ~(u32) 0,
279         .rt6i_ref       = ATOMIC_INIT(1),
280 };
281
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
284 static const struct rt6_info ip6_prohibit_entry_template = {
285         .dst = {
286                 .__refcnt       = ATOMIC_INIT(1),
287                 .__use          = 1,
288                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
289                 .error          = -EACCES,
290                 .input          = ip6_pkt_prohibit,
291                 .output         = ip6_pkt_prohibit_out,
292         },
293         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
294         .rt6i_protocol  = RTPROT_KERNEL,
295         .rt6i_metric    = ~(u32) 0,
296         .rt6i_ref       = ATOMIC_INIT(1),
297 };
298
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300         .dst = {
301                 .__refcnt       = ATOMIC_INIT(1),
302                 .__use          = 1,
303                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
304                 .error          = -EINVAL,
305                 .input          = dst_discard,
306                 .output         = dst_discard_out,
307         },
308         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
309         .rt6i_protocol  = RTPROT_KERNEL,
310         .rt6i_metric    = ~(u32) 0,
311         .rt6i_ref       = ATOMIC_INIT(1),
312 };
313
314 #endif
315
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318         struct dst_entry *dst = &rt->dst;
319
320         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321         INIT_LIST_HEAD(&rt->rt6i_siblings);
322         INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327                                         struct net_device *dev,
328                                         int flags)
329 {
330         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331                                         0, DST_OBSOLETE_FORCE_CHK, flags);
332
333         if (rt)
334                 rt6_info_init(rt);
335
336         return rt;
337 }
338
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340                                       struct net_device *dev,
341                                       int flags)
342 {
343         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344
345         if (rt) {
346                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347                 if (rt->rt6i_pcpu) {
348                         int cpu;
349
350                         for_each_possible_cpu(cpu) {
351                                 struct rt6_info **p;
352
353                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354                                 /* no one shares rt */
355                                 *p =  NULL;
356                         }
357                 } else {
358                         dst_destroy((struct dst_entry *)rt);
359                         return NULL;
360                 }
361         }
362
363         return rt;
364 }
365
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368         struct rt6_info *rt = (struct rt6_info *)dst;
369         struct dst_entry *from = dst->from;
370         struct inet6_dev *idev;
371
372         dst_destroy_metrics_generic(dst);
373         free_percpu(rt->rt6i_pcpu);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         dst->from = NULL;
383         dst_release(from);
384 }
385
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387                            int how)
388 {
389         struct rt6_info *rt = (struct rt6_info *)dst;
390         struct inet6_dev *idev = rt->rt6i_idev;
391         struct net_device *loopback_dev =
392                 dev_net(dev)->loopback_dev;
393
394         if (dev != loopback_dev) {
395                 if (idev && idev->dev == dev) {
396                         struct inet6_dev *loopback_idev =
397                                 in6_dev_get(loopback_dev);
398                         if (loopback_idev) {
399                                 rt->rt6i_idev = loopback_idev;
400                                 in6_dev_put(idev);
401                         }
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         if (rt->rt6i_flags & RTF_EXPIRES) {
417                 if (time_after(jiffies, rt->dst.expires))
418                         return true;
419         } else if (rt->dst.from) {
420                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
421         }
422         return false;
423 }
424
425 /* Multipath route selection:
426  *   Hash based function using packet header and flowlabel.
427  * Adapted from fib_info_hashfn()
428  */
429 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
430                                const struct flowi6 *fl6)
431 {
432         return get_hash_from_flowi6(fl6) % candidate_count;
433 }
434
435 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
436                                              struct flowi6 *fl6, int oif,
437                                              int strict)
438 {
439         struct rt6_info *sibling, *next_sibling;
440         int route_choosen;
441
442         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
443         /* Don't change the route, if route_choosen == 0
444          * (siblings does not include ourself)
445          */
446         if (route_choosen)
447                 list_for_each_entry_safe(sibling, next_sibling,
448                                 &match->rt6i_siblings, rt6i_siblings) {
449                         route_choosen--;
450                         if (route_choosen == 0) {
451                                 if (rt6_score_route(sibling, oif, strict) < 0)
452                                         break;
453                                 match = sibling;
454                                 break;
455                         }
456                 }
457         return match;
458 }
459
460 /*
461  *      Route lookup. Any table->tb6_lock is implied.
462  */
463
464 static inline struct rt6_info *rt6_device_match(struct net *net,
465                                                     struct rt6_info *rt,
466                                                     const struct in6_addr *saddr,
467                                                     int oif,
468                                                     int flags)
469 {
470         struct rt6_info *local = NULL;
471         struct rt6_info *sprt;
472
473         if (!oif && ipv6_addr_any(saddr))
474                 goto out;
475
476         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
477                 struct net_device *dev = sprt->dst.dev;
478
479                 if (oif) {
480                         if (dev->ifindex == oif)
481                                 return sprt;
482                         if (dev->flags & IFF_LOOPBACK) {
483                                 if (!sprt->rt6i_idev ||
484                                     sprt->rt6i_idev->dev->ifindex != oif) {
485                                         if (flags & RT6_LOOKUP_F_IFACE)
486                                                 continue;
487                                         if (local &&
488                                             local->rt6i_idev->dev->ifindex == oif)
489                                                 continue;
490                                 }
491                                 local = sprt;
492                         }
493                 } else {
494                         if (ipv6_chk_addr(net, saddr, dev,
495                                           flags & RT6_LOOKUP_F_IFACE))
496                                 return sprt;
497                 }
498         }
499
500         if (oif) {
501                 if (local)
502                         return local;
503
504                 if (flags & RT6_LOOKUP_F_IFACE)
505                         return net->ipv6.ip6_null_entry;
506         }
507 out:
508         return rt;
509 }
510
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 struct __rt6_probe_work {
513         struct work_struct work;
514         struct in6_addr target;
515         struct net_device *dev;
516 };
517
518 static void rt6_probe_deferred(struct work_struct *w)
519 {
520         struct in6_addr mcaddr;
521         struct __rt6_probe_work *work =
522                 container_of(w, struct __rt6_probe_work, work);
523
524         addrconf_addr_solict_mult(&work->target, &mcaddr);
525         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
526         dev_put(work->dev);
527         kfree(work);
528 }
529
530 static void rt6_probe(struct rt6_info *rt)
531 {
532         struct __rt6_probe_work *work;
533         struct neighbour *neigh;
534         /*
535          * Okay, this does not seem to be appropriate
536          * for now, however, we need to check if it
537          * is really so; aka Router Reachability Probing.
538          *
539          * Router Reachability Probe MUST be rate-limited
540          * to no more than one per minute.
541          */
542         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
543                 return;
544         rcu_read_lock_bh();
545         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
546         if (neigh) {
547                 if (neigh->nud_state & NUD_VALID)
548                         goto out;
549
550                 work = NULL;
551                 write_lock(&neigh->lock);
552                 if (!(neigh->nud_state & NUD_VALID) &&
553                     time_after(jiffies,
554                                neigh->updated +
555                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else {
562                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563         }
564
565         if (work) {
566                 INIT_WORK(&work->work, rt6_probe_deferred);
567                 work->target = rt->rt6i_gateway;
568                 dev_hold(rt->dst.dev);
569                 work->dev = rt->dst.dev;
570                 schedule_work(&work->work);
571         }
572
573 out:
574         rcu_read_unlock_bh();
575 }
576 #else
577 static inline void rt6_probe(struct rt6_info *rt)
578 {
579 }
580 #endif
581
582 /*
583  * Default Router Selection (RFC 2461 6.3.6)
584  */
585 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
586 {
587         struct net_device *dev = rt->dst.dev;
588         if (!oif || dev->ifindex == oif)
589                 return 2;
590         if ((dev->flags & IFF_LOOPBACK) &&
591             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
592                 return 1;
593         return 0;
594 }
595
596 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
597 {
598         struct neighbour *neigh;
599         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
600
601         if (rt->rt6i_flags & RTF_NONEXTHOP ||
602             !(rt->rt6i_flags & RTF_GATEWAY))
603                 return RT6_NUD_SUCCEED;
604
605         rcu_read_lock_bh();
606         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
607         if (neigh) {
608                 read_lock(&neigh->lock);
609                 if (neigh->nud_state & NUD_VALID)
610                         ret = RT6_NUD_SUCCEED;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612                 else if (!(neigh->nud_state & NUD_FAILED))
613                         ret = RT6_NUD_SUCCEED;
614                 else
615                         ret = RT6_NUD_FAIL_PROBE;
616 #endif
617                 read_unlock(&neigh->lock);
618         } else {
619                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
620                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
621         }
622         rcu_read_unlock_bh();
623
624         return ret;
625 }
626
627 static int rt6_score_route(struct rt6_info *rt, int oif,
628                            int strict)
629 {
630         int m;
631
632         m = rt6_check_dev(rt, oif);
633         if (!m && (strict & RT6_LOOKUP_F_IFACE))
634                 return RT6_NUD_FAIL_HARD;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
637 #endif
638         if (strict & RT6_LOOKUP_F_REACHABLE) {
639                 int n = rt6_check_neigh(rt);
640                 if (n < 0)
641                         return n;
642         }
643         return m;
644 }
645
646 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
647                                    int *mpri, struct rt6_info *match,
648                                    bool *do_rr)
649 {
650         int m;
651         bool match_do_rr = false;
652         struct inet6_dev *idev = rt->rt6i_idev;
653         struct net_device *dev = rt->dst.dev;
654
655         if (dev && !netif_carrier_ok(dev) &&
656             idev->cnf.ignore_routes_with_linkdown)
657                 goto out;
658
659         if (rt6_check_expired(rt))
660                 goto out;
661
662         m = rt6_score_route(rt, oif, strict);
663         if (m == RT6_NUD_FAIL_DO_RR) {
664                 match_do_rr = true;
665                 m = 0; /* lowest valid score */
666         } else if (m == RT6_NUD_FAIL_HARD) {
667                 goto out;
668         }
669
670         if (strict & RT6_LOOKUP_F_REACHABLE)
671                 rt6_probe(rt);
672
673         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
674         if (m > *mpri) {
675                 *do_rr = match_do_rr;
676                 *mpri = m;
677                 match = rt;
678         }
679 out:
680         return match;
681 }
682
683 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
684                                      struct rt6_info *rr_head,
685                                      u32 metric, int oif, int strict,
686                                      bool *do_rr)
687 {
688         struct rt6_info *rt, *match, *cont;
689         int mpri = -1;
690
691         match = NULL;
692         cont = NULL;
693         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
694                 if (rt->rt6i_metric != metric) {
695                         cont = rt;
696                         break;
697                 }
698
699                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
700         }
701
702         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
703                 if (rt->rt6i_metric != metric) {
704                         cont = rt;
705                         break;
706                 }
707
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709         }
710
711         if (match || !cont)
712                 return match;
713
714         for (rt = cont; rt; rt = rt->dst.rt6_next)
715                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716
717         return match;
718 }
719
720 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
721 {
722         struct rt6_info *match, *rt0;
723         struct net *net;
724         bool do_rr = false;
725
726         rt0 = fn->rr_ptr;
727         if (!rt0)
728                 fn->rr_ptr = rt0 = fn->leaf;
729
730         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
731                              &do_rr);
732
733         if (do_rr) {
734                 struct rt6_info *next = rt0->dst.rt6_next;
735
736                 /* no entries matched; do round-robin */
737                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
738                         next = fn->leaf;
739
740                 if (next != rt0)
741                         fn->rr_ptr = next;
742         }
743
744         net = dev_net(rt0->dst.dev);
745         return match ? match : net->ipv6.ip6_null_entry;
746 }
747
748 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
749 {
750         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
751 }
752
753 #ifdef CONFIG_IPV6_ROUTE_INFO
754 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
755                   const struct in6_addr *gwaddr)
756 {
757         struct route_info *rinfo = (struct route_info *) opt;
758         struct in6_addr prefix_buf, *prefix;
759         unsigned int pref;
760         unsigned long lifetime;
761         struct rt6_info *rt;
762
763         if (len < sizeof(struct route_info)) {
764                 return -EINVAL;
765         }
766
767         /* Sanity check for prefix_len and length */
768         if (rinfo->length > 3) {
769                 return -EINVAL;
770         } else if (rinfo->prefix_len > 128) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 64) {
773                 if (rinfo->length < 2) {
774                         return -EINVAL;
775                 }
776         } else if (rinfo->prefix_len > 0) {
777                 if (rinfo->length < 1) {
778                         return -EINVAL;
779                 }
780         }
781
782         pref = rinfo->route_pref;
783         if (pref == ICMPV6_ROUTER_PREF_INVALID)
784                 return -EINVAL;
785
786         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
787
788         if (rinfo->length == 3)
789                 prefix = (struct in6_addr *)rinfo->prefix;
790         else {
791                 /* this function is safe */
792                 ipv6_addr_prefix(&prefix_buf,
793                                  (struct in6_addr *)rinfo->prefix,
794                                  rinfo->prefix_len);
795                 prefix = &prefix_buf;
796         }
797
798         if (rinfo->prefix_len == 0)
799                 rt = rt6_get_dflt_router(gwaddr, dev);
800         else
801                 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
802
803         if (rt && !lifetime) {
804                 ip6_del_rt(rt);
805                 rt = NULL;
806         }
807
808         if (!rt && lifetime)
809                 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
810         else if (rt)
811                 rt->rt6i_flags = RTF_ROUTEINFO |
812                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
813
814         if (rt) {
815                 if (!addrconf_finite_timeout(lifetime))
816                         rt6_clean_expires(rt);
817                 else
818                         rt6_set_expires(rt, jiffies + HZ * lifetime);
819
820                 ip6_rt_put(rt);
821         }
822         return 0;
823 }
824 #endif
825
826 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
827                                         struct in6_addr *saddr)
828 {
829         struct fib6_node *pn;
830         while (1) {
831                 if (fn->fn_flags & RTN_TL_ROOT)
832                         return NULL;
833                 pn = fn->parent;
834                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
835                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
836                 else
837                         fn = pn;
838                 if (fn->fn_flags & RTN_RTINFO)
839                         return fn;
840         }
841 }
842
843 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
844                                              struct fib6_table *table,
845                                              struct flowi6 *fl6, int flags)
846 {
847         struct fib6_node *fn;
848         struct rt6_info *rt;
849
850         read_lock_bh(&table->tb6_lock);
851         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
852 restart:
853         rt = fn->leaf;
854         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
855         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
856                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
857         if (rt == net->ipv6.ip6_null_entry) {
858                 fn = fib6_backtrack(fn, &fl6->saddr);
859                 if (fn)
860                         goto restart;
861         }
862         dst_use(&rt->dst, jiffies);
863         read_unlock_bh(&table->tb6_lock);
864         return rt;
865
866 }
867
868 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
869                                     int flags)
870 {
871         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
872 }
873 EXPORT_SYMBOL_GPL(ip6_route_lookup);
874
875 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
876                             const struct in6_addr *saddr, int oif, int strict)
877 {
878         struct flowi6 fl6 = {
879                 .flowi6_oif = oif,
880                 .daddr = *daddr,
881         };
882         struct dst_entry *dst;
883         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
884
885         if (saddr) {
886                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
887                 flags |= RT6_LOOKUP_F_HAS_SADDR;
888         }
889
890         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
891         if (dst->error == 0)
892                 return (struct rt6_info *) dst;
893
894         dst_release(dst);
895
896         return NULL;
897 }
898 EXPORT_SYMBOL(rt6_lookup);
899
900 /* ip6_ins_rt is called with FREE table->tb6_lock.
901    It takes new route entry, the addition fails by any reason the
902    route is freed. In any case, if caller does not hold it, it may
903    be destroyed.
904  */
905
906 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
907                         struct mx6_config *mxc)
908 {
909         int err;
910         struct fib6_table *table;
911
912         table = rt->rt6i_table;
913         write_lock_bh(&table->tb6_lock);
914         err = fib6_add(&table->tb6_root, rt, info, mxc);
915         write_unlock_bh(&table->tb6_lock);
916
917         return err;
918 }
919
920 int ip6_ins_rt(struct rt6_info *rt)
921 {
922         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
923         struct mx6_config mxc = { .mx = NULL, };
924
925         return __ip6_ins_rt(rt, &info, &mxc);
926 }
927
928 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
929                                            const struct in6_addr *daddr,
930                                            const struct in6_addr *saddr)
931 {
932         struct rt6_info *rt;
933
934         /*
935          *      Clone the route.
936          */
937
938         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
939                 ort = (struct rt6_info *)ort->dst.from;
940
941         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
942
943         if (!rt)
944                 return NULL;
945
946         ip6_rt_copy_init(rt, ort);
947         rt->rt6i_flags |= RTF_CACHE;
948         rt->rt6i_metric = 0;
949         rt->dst.flags |= DST_HOST;
950         rt->rt6i_dst.addr = *daddr;
951         rt->rt6i_dst.plen = 128;
952
953         if (!rt6_is_gw_or_nonexthop(ort)) {
954                 if (ort->rt6i_dst.plen != 128 &&
955                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
956                         rt->rt6i_flags |= RTF_ANYCAST;
957 #ifdef CONFIG_IPV6_SUBTREES
958                 if (rt->rt6i_src.plen && saddr) {
959                         rt->rt6i_src.addr = *saddr;
960                         rt->rt6i_src.plen = 128;
961                 }
962 #endif
963         }
964
965         return rt;
966 }
967
968 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
969 {
970         struct rt6_info *pcpu_rt;
971
972         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
973                                   rt->dst.dev, rt->dst.flags);
974
975         if (!pcpu_rt)
976                 return NULL;
977         ip6_rt_copy_init(pcpu_rt, rt);
978         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
979         pcpu_rt->rt6i_flags |= RTF_PCPU;
980         return pcpu_rt;
981 }
982
983 /* It should be called with read_lock_bh(&tb6_lock) acquired */
984 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
985 {
986         struct rt6_info *pcpu_rt, **p;
987
988         p = this_cpu_ptr(rt->rt6i_pcpu);
989         pcpu_rt = *p;
990
991         if (pcpu_rt) {
992                 dst_hold(&pcpu_rt->dst);
993                 rt6_dst_from_metrics_check(pcpu_rt);
994         }
995         return pcpu_rt;
996 }
997
998 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
999 {
1000         struct fib6_table *table = rt->rt6i_table;
1001         struct rt6_info *pcpu_rt, *prev, **p;
1002
1003         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1004         if (!pcpu_rt) {
1005                 struct net *net = dev_net(rt->dst.dev);
1006
1007                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1008                 return net->ipv6.ip6_null_entry;
1009         }
1010
1011         read_lock_bh(&table->tb6_lock);
1012         if (rt->rt6i_pcpu) {
1013                 p = this_cpu_ptr(rt->rt6i_pcpu);
1014                 prev = cmpxchg(p, NULL, pcpu_rt);
1015                 if (prev) {
1016                         /* If someone did it before us, return prev instead */
1017                         dst_destroy(&pcpu_rt->dst);
1018                         pcpu_rt = prev;
1019                 }
1020         } else {
1021                 /* rt has been removed from the fib6 tree
1022                  * before we have a chance to acquire the read_lock.
1023                  * In this case, don't brother to create a pcpu rt
1024                  * since rt is going away anyway.  The next
1025                  * dst_check() will trigger a re-lookup.
1026                  */
1027                 dst_destroy(&pcpu_rt->dst);
1028                 pcpu_rt = rt;
1029         }
1030         dst_hold(&pcpu_rt->dst);
1031         rt6_dst_from_metrics_check(pcpu_rt);
1032         read_unlock_bh(&table->tb6_lock);
1033         return pcpu_rt;
1034 }
1035
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037                                       struct flowi6 *fl6, int flags)
1038 {
1039         struct fib6_node *fn, *saved_fn;
1040         struct rt6_info *rt;
1041         int strict = 0;
1042
1043         strict |= flags & RT6_LOOKUP_F_IFACE;
1044         if (net->ipv6.devconf_all->forwarding == 0)
1045                 strict |= RT6_LOOKUP_F_REACHABLE;
1046
1047         read_lock_bh(&table->tb6_lock);
1048
1049         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050         saved_fn = fn;
1051
1052         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1053                 oif = 0;
1054
1055 redo_rt6_select:
1056         rt = rt6_select(fn, oif, strict);
1057         if (rt->rt6i_nsiblings)
1058                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1059         if (rt == net->ipv6.ip6_null_entry) {
1060                 fn = fib6_backtrack(fn, &fl6->saddr);
1061                 if (fn)
1062                         goto redo_rt6_select;
1063                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1064                         /* also consider unreachable route */
1065                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1066                         fn = saved_fn;
1067                         goto redo_rt6_select;
1068                 }
1069         }
1070
1071
1072         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1073                 dst_use(&rt->dst, jiffies);
1074                 read_unlock_bh(&table->tb6_lock);
1075
1076                 rt6_dst_from_metrics_check(rt);
1077                 return rt;
1078         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1079                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1080                 /* Create a RTF_CACHE clone which will not be
1081                  * owned by the fib6 tree.  It is for the special case where
1082                  * the daddr in the skb during the neighbor look-up is different
1083                  * from the fl6->daddr used to look-up route here.
1084                  */
1085
1086                 struct rt6_info *uncached_rt;
1087
1088                 dst_use(&rt->dst, jiffies);
1089                 read_unlock_bh(&table->tb6_lock);
1090
1091                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1092                 dst_release(&rt->dst);
1093
1094                 if (uncached_rt)
1095                         rt6_uncached_list_add(uncached_rt);
1096                 else
1097                         uncached_rt = net->ipv6.ip6_null_entry;
1098
1099                 dst_hold(&uncached_rt->dst);
1100                 return uncached_rt;
1101
1102         } else {
1103                 /* Get a percpu copy */
1104
1105                 struct rt6_info *pcpu_rt;
1106
1107                 rt->dst.lastuse = jiffies;
1108                 rt->dst.__use++;
1109                 pcpu_rt = rt6_get_pcpu_route(rt);
1110
1111                 if (pcpu_rt) {
1112                         read_unlock_bh(&table->tb6_lock);
1113                 } else {
1114                         /* We have to do the read_unlock first
1115                          * because rt6_make_pcpu_route() may trigger
1116                          * ip6_dst_gc() which will take the write_lock.
1117                          */
1118                         dst_hold(&rt->dst);
1119                         read_unlock_bh(&table->tb6_lock);
1120                         pcpu_rt = rt6_make_pcpu_route(rt);
1121                         dst_release(&rt->dst);
1122                 }
1123
1124                 return pcpu_rt;
1125
1126         }
1127 }
1128
1129 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1130                                             struct flowi6 *fl6, int flags)
1131 {
1132         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1133 }
1134
1135 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1136                                                 struct net_device *dev,
1137                                                 struct flowi6 *fl6, int flags)
1138 {
1139         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1140                 flags |= RT6_LOOKUP_F_IFACE;
1141
1142         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1143 }
1144
1145 void ip6_route_input(struct sk_buff *skb)
1146 {
1147         const struct ipv6hdr *iph = ipv6_hdr(skb);
1148         struct net *net = dev_net(skb->dev);
1149         int flags = RT6_LOOKUP_F_HAS_SADDR;
1150         struct ip_tunnel_info *tun_info;
1151         struct flowi6 fl6 = {
1152                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1153                 .daddr = iph->daddr,
1154                 .saddr = iph->saddr,
1155                 .flowlabel = ip6_flowinfo(iph),
1156                 .flowi6_mark = skb->mark,
1157                 .flowi6_proto = iph->nexthdr,
1158         };
1159
1160         tun_info = skb_tunnel_info(skb);
1161         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1162                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1163         skb_dst_drop(skb);
1164         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1165 }
1166
1167 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1168                                              struct flowi6 *fl6, int flags)
1169 {
1170         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1171 }
1172
1173 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1174                                          struct flowi6 *fl6, int flags)
1175 {
1176         struct dst_entry *dst;
1177         bool any_src;
1178
1179         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1180         if (dst)
1181                 return dst;
1182
1183         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1184
1185         any_src = ipv6_addr_any(&fl6->saddr);
1186         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1187             (fl6->flowi6_oif && any_src))
1188                 flags |= RT6_LOOKUP_F_IFACE;
1189
1190         if (!any_src)
1191                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1192         else if (sk)
1193                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1194
1195         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1198
1199 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1200 {
1201         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1202         struct dst_entry *new = NULL;
1203
1204         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1205         if (rt) {
1206                 rt6_info_init(rt);
1207
1208                 new = &rt->dst;
1209                 new->__use = 1;
1210                 new->input = dst_discard;
1211                 new->output = dst_discard_out;
1212
1213                 dst_copy_metrics(new, &ort->dst);
1214                 rt->rt6i_idev = ort->rt6i_idev;
1215                 if (rt->rt6i_idev)
1216                         in6_dev_hold(rt->rt6i_idev);
1217
1218                 rt->rt6i_gateway = ort->rt6i_gateway;
1219                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1220                 rt->rt6i_metric = 0;
1221
1222                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225 #endif
1226
1227                 dst_free(new);
1228         }
1229
1230         dst_release(dst_orig);
1231         return new ? new : ERR_PTR(-ENOMEM);
1232 }
1233
1234 /*
1235  *      Destination cache support functions
1236  */
1237
1238 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1239 {
1240         if (rt->dst.from &&
1241             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1242                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1243 }
1244
1245 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1246 {
1247         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1248                 return NULL;
1249
1250         if (rt6_check_expired(rt))
1251                 return NULL;
1252
1253         return &rt->dst;
1254 }
1255
1256 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1257 {
1258         if (!__rt6_check_expired(rt) &&
1259             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1260             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1261                 return &rt->dst;
1262         else
1263                 return NULL;
1264 }
1265
1266 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1267 {
1268         struct rt6_info *rt;
1269
1270         rt = (struct rt6_info *) dst;
1271
1272         /* All IPV6 dsts are created with ->obsolete set to the value
1273          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1274          * into this function always.
1275          */
1276
1277         rt6_dst_from_metrics_check(rt);
1278
1279         if (rt->rt6i_flags & RTF_PCPU ||
1280             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1281                 return rt6_dst_from_check(rt, cookie);
1282         else
1283                 return rt6_check(rt, cookie);
1284 }
1285
1286 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1287 {
1288         struct rt6_info *rt = (struct rt6_info *) dst;
1289
1290         if (rt) {
1291                 if (rt->rt6i_flags & RTF_CACHE) {
1292                         if (rt6_check_expired(rt)) {
1293                                 ip6_del_rt(rt);
1294                                 dst = NULL;
1295                         }
1296                 } else {
1297                         dst_release(dst);
1298                         dst = NULL;
1299                 }
1300         }
1301         return dst;
1302 }
1303
1304 static void ip6_link_failure(struct sk_buff *skb)
1305 {
1306         struct rt6_info *rt;
1307
1308         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1309
1310         rt = (struct rt6_info *) skb_dst(skb);
1311         if (rt) {
1312                 if (rt->rt6i_flags & RTF_CACHE) {
1313                         dst_hold(&rt->dst);
1314                         ip6_del_rt(rt);
1315                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1316                         rt->rt6i_node->fn_sernum = -1;
1317                 }
1318         }
1319 }
1320
1321 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 {
1323         struct net *net = dev_net(rt->dst.dev);
1324
1325         rt->rt6i_flags |= RTF_MODIFIED;
1326         rt->rt6i_pmtu = mtu;
1327         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1328 }
1329
1330 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1331 {
1332         return !(rt->rt6i_flags & RTF_CACHE) &&
1333                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1334 }
1335
1336 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1337                                  const struct ipv6hdr *iph, u32 mtu)
1338 {
1339         struct rt6_info *rt6 = (struct rt6_info *)dst;
1340
1341         if (rt6->rt6i_flags & RTF_LOCAL)
1342                 return;
1343
1344         dst_confirm(dst);
1345         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1346         if (mtu >= dst_mtu(dst))
1347                 return;
1348
1349         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1350                 rt6_do_update_pmtu(rt6, mtu);
1351         } else {
1352                 const struct in6_addr *daddr, *saddr;
1353                 struct rt6_info *nrt6;
1354
1355                 if (iph) {
1356                         daddr = &iph->daddr;
1357                         saddr = &iph->saddr;
1358                 } else if (sk) {
1359                         daddr = &sk->sk_v6_daddr;
1360                         saddr = &inet6_sk(sk)->saddr;
1361                 } else {
1362                         return;
1363                 }
1364                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1365                 if (nrt6) {
1366                         rt6_do_update_pmtu(nrt6, mtu);
1367
1368                         /* ip6_ins_rt(nrt6) will bump the
1369                          * rt6->rt6i_node->fn_sernum
1370                          * which will fail the next rt6_check() and
1371                          * invalidate the sk->sk_dst_cache.
1372                          */
1373                         ip6_ins_rt(nrt6);
1374                 }
1375         }
1376 }
1377
1378 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1379                                struct sk_buff *skb, u32 mtu)
1380 {
1381         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1382 }
1383
1384 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1385                      int oif, u32 mark, kuid_t uid)
1386 {
1387         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1388         struct dst_entry *dst;
1389         struct flowi6 fl6;
1390
1391         memset(&fl6, 0, sizeof(fl6));
1392         fl6.flowi6_oif = oif;
1393         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1394         fl6.daddr = iph->daddr;
1395         fl6.saddr = iph->saddr;
1396         fl6.flowlabel = ip6_flowinfo(iph);
1397         fl6.flowi6_uid = uid;
1398
1399         dst = ip6_route_output(net, NULL, &fl6);
1400         if (!dst->error)
1401                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1402         dst_release(dst);
1403 }
1404 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1405
1406 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1407 {
1408         ip6_update_pmtu(skb, sock_net(sk), mtu,
1409                         sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1410 }
1411 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1412
1413 /* Handle redirects */
1414 struct ip6rd_flowi {
1415         struct flowi6 fl6;
1416         struct in6_addr gateway;
1417 };
1418
1419 static struct rt6_info *__ip6_route_redirect(struct net *net,
1420                                              struct fib6_table *table,
1421                                              struct flowi6 *fl6,
1422                                              int flags)
1423 {
1424         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1425         struct rt6_info *rt;
1426         struct fib6_node *fn;
1427
1428         /* Get the "current" route for this destination and
1429          * check if the redirect has come from approriate router.
1430          *
1431          * RFC 4861 specifies that redirects should only be
1432          * accepted if they come from the nexthop to the target.
1433          * Due to the way the routes are chosen, this notion
1434          * is a bit fuzzy and one might need to check all possible
1435          * routes.
1436          */
1437
1438         read_lock_bh(&table->tb6_lock);
1439         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1440 restart:
1441         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1442                 if (rt6_check_expired(rt))
1443                         continue;
1444                 if (rt->dst.error)
1445                         break;
1446                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447                         continue;
1448                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1449                         continue;
1450                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1451                         continue;
1452                 break;
1453         }
1454
1455         if (!rt)
1456                 rt = net->ipv6.ip6_null_entry;
1457         else if (rt->dst.error) {
1458                 rt = net->ipv6.ip6_null_entry;
1459                 goto out;
1460         }
1461
1462         if (rt == net->ipv6.ip6_null_entry) {
1463                 fn = fib6_backtrack(fn, &fl6->saddr);
1464                 if (fn)
1465                         goto restart;
1466         }
1467
1468 out:
1469         dst_hold(&rt->dst);
1470
1471         read_unlock_bh(&table->tb6_lock);
1472
1473         return rt;
1474 };
1475
1476 static struct dst_entry *ip6_route_redirect(struct net *net,
1477                                         const struct flowi6 *fl6,
1478                                         const struct in6_addr *gateway)
1479 {
1480         int flags = RT6_LOOKUP_F_HAS_SADDR;
1481         struct ip6rd_flowi rdfl;
1482
1483         rdfl.fl6 = *fl6;
1484         rdfl.gateway = *gateway;
1485
1486         return fib6_rule_lookup(net, &rdfl.fl6,
1487                                 flags, __ip6_route_redirect);
1488 }
1489
1490 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1491                   kuid_t uid)
1492 {
1493         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1494         struct dst_entry *dst;
1495         struct flowi6 fl6;
1496
1497         memset(&fl6, 0, sizeof(fl6));
1498         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1499         fl6.flowi6_oif = oif;
1500         fl6.flowi6_mark = mark;
1501         fl6.daddr = iph->daddr;
1502         fl6.saddr = iph->saddr;
1503         fl6.flowlabel = ip6_flowinfo(iph);
1504         fl6.flowi6_uid = uid;
1505
1506         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1507         rt6_do_redirect(dst, NULL, skb);
1508         dst_release(dst);
1509 }
1510 EXPORT_SYMBOL_GPL(ip6_redirect);
1511
1512 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1513                             u32 mark)
1514 {
1515         const struct ipv6hdr *iph = ipv6_hdr(skb);
1516         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1517         struct dst_entry *dst;
1518         struct flowi6 fl6;
1519
1520         memset(&fl6, 0, sizeof(fl6));
1521         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1522         fl6.flowi6_oif = oif;
1523         fl6.flowi6_mark = mark;
1524         fl6.daddr = msg->dest;
1525         fl6.saddr = iph->daddr;
1526         fl6.flowi6_uid = sock_net_uid(net, NULL);
1527
1528         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1529         rt6_do_redirect(dst, NULL, skb);
1530         dst_release(dst);
1531 }
1532
1533 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1534 {
1535         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1536                      sk->sk_uid);
1537 }
1538 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1539
1540 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1541 {
1542         struct net_device *dev = dst->dev;
1543         unsigned int mtu = dst_mtu(dst);
1544         struct net *net = dev_net(dev);
1545
1546         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1547
1548         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1549                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1550
1551         /*
1552          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1553          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1554          * IPV6_MAXPLEN is also valid and means: "any MSS,
1555          * rely only on pmtu discovery"
1556          */
1557         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1558                 mtu = IPV6_MAXPLEN;
1559         return mtu;
1560 }
1561
1562 static unsigned int ip6_mtu(const struct dst_entry *dst)
1563 {
1564         const struct rt6_info *rt = (const struct rt6_info *)dst;
1565         unsigned int mtu = rt->rt6i_pmtu;
1566         struct inet6_dev *idev;
1567
1568         if (mtu)
1569                 goto out;
1570
1571         mtu = dst_metric_raw(dst, RTAX_MTU);
1572         if (mtu)
1573                 goto out;
1574
1575         mtu = IPV6_MIN_MTU;
1576
1577         rcu_read_lock();
1578         idev = __in6_dev_get(dst->dev);
1579         if (idev)
1580                 mtu = idev->cnf.mtu6;
1581         rcu_read_unlock();
1582
1583 out:
1584         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1585 }
1586
1587 static struct dst_entry *icmp6_dst_gc_list;
1588 static DEFINE_SPINLOCK(icmp6_dst_lock);
1589
1590 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1591                                   struct flowi6 *fl6)
1592 {
1593         struct dst_entry *dst;
1594         struct rt6_info *rt;
1595         struct inet6_dev *idev = in6_dev_get(dev);
1596         struct net *net = dev_net(dev);
1597
1598         if (unlikely(!idev))
1599                 return ERR_PTR(-ENODEV);
1600
1601         rt = ip6_dst_alloc(net, dev, 0);
1602         if (unlikely(!rt)) {
1603                 in6_dev_put(idev);
1604                 dst = ERR_PTR(-ENOMEM);
1605                 goto out;
1606         }
1607
1608         rt->dst.flags |= DST_HOST;
1609         rt->dst.output  = ip6_output;
1610         atomic_set(&rt->dst.__refcnt, 1);
1611         rt->rt6i_gateway  = fl6->daddr;
1612         rt->rt6i_dst.addr = fl6->daddr;
1613         rt->rt6i_dst.plen = 128;
1614         rt->rt6i_idev     = idev;
1615         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1616
1617         spin_lock_bh(&icmp6_dst_lock);
1618         rt->dst.next = icmp6_dst_gc_list;
1619         icmp6_dst_gc_list = &rt->dst;
1620         spin_unlock_bh(&icmp6_dst_lock);
1621
1622         fib6_force_start_gc(net);
1623
1624         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1625
1626 out:
1627         return dst;
1628 }
1629
1630 int icmp6_dst_gc(void)
1631 {
1632         struct dst_entry *dst, **pprev;
1633         int more = 0;
1634
1635         spin_lock_bh(&icmp6_dst_lock);
1636         pprev = &icmp6_dst_gc_list;
1637
1638         while ((dst = *pprev) != NULL) {
1639                 if (!atomic_read(&dst->__refcnt)) {
1640                         *pprev = dst->next;
1641                         dst_free(dst);
1642                 } else {
1643                         pprev = &dst->next;
1644                         ++more;
1645                 }
1646         }
1647
1648         spin_unlock_bh(&icmp6_dst_lock);
1649
1650         return more;
1651 }
1652
1653 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1654                             void *arg)
1655 {
1656         struct dst_entry *dst, **pprev;
1657
1658         spin_lock_bh(&icmp6_dst_lock);
1659         pprev = &icmp6_dst_gc_list;
1660         while ((dst = *pprev) != NULL) {
1661                 struct rt6_info *rt = (struct rt6_info *) dst;
1662                 if (func(rt, arg)) {
1663                         *pprev = dst->next;
1664                         dst_free(dst);
1665                 } else {
1666                         pprev = &dst->next;
1667                 }
1668         }
1669         spin_unlock_bh(&icmp6_dst_lock);
1670 }
1671
1672 static int ip6_dst_gc(struct dst_ops *ops)
1673 {
1674         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1675         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1676         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1677         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1678         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1679         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1680         int entries;
1681
1682         entries = dst_entries_get_fast(ops);
1683         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1684             entries <= rt_max_size)
1685                 goto out;
1686
1687         net->ipv6.ip6_rt_gc_expire++;
1688         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1689         entries = dst_entries_get_slow(ops);
1690         if (entries < ops->gc_thresh)
1691                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1692 out:
1693         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1694         return entries > rt_max_size;
1695 }
1696
1697 static int ip6_convert_metrics(struct mx6_config *mxc,
1698                                const struct fib6_config *cfg)
1699 {
1700         bool ecn_ca = false;
1701         struct nlattr *nla;
1702         int remaining;
1703         u32 *mp;
1704
1705         if (!cfg->fc_mx)
1706                 return 0;
1707
1708         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1709         if (unlikely(!mp))
1710                 return -ENOMEM;
1711
1712         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713                 int type = nla_type(nla);
1714                 u32 val;
1715
1716                 if (!type)
1717                         continue;
1718                 if (unlikely(type > RTAX_MAX))
1719                         goto err;
1720
1721                 if (type == RTAX_CC_ALGO) {
1722                         char tmp[TCP_CA_NAME_MAX];
1723
1724                         nla_strlcpy(tmp, nla, sizeof(tmp));
1725                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1726                         if (val == TCP_CA_UNSPEC)
1727                                 goto err;
1728                 } else {
1729                         val = nla_get_u32(nla);
1730                 }
1731                 if (type == RTAX_HOPLIMIT && val > 255)
1732                         val = 255;
1733                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1734                         goto err;
1735
1736                 mp[type - 1] = val;
1737                 __set_bit(type - 1, mxc->mx_valid);
1738         }
1739
1740         if (ecn_ca) {
1741                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1742                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1743         }
1744
1745         mxc->mx = mp;
1746         return 0;
1747  err:
1748         kfree(mp);
1749         return -EINVAL;
1750 }
1751
1752 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1753 {
1754         struct net *net = cfg->fc_nlinfo.nl_net;
1755         struct rt6_info *rt = NULL;
1756         struct net_device *dev = NULL;
1757         struct inet6_dev *idev = NULL;
1758         struct fib6_table *table;
1759         int addr_type;
1760         int err = -EINVAL;
1761
1762         /* RTF_PCPU is an internal flag; can not be set by userspace */
1763         if (cfg->fc_flags & RTF_PCPU)
1764                 goto out;
1765
1766         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1767                 goto out;
1768 #ifndef CONFIG_IPV6_SUBTREES
1769         if (cfg->fc_src_len)
1770                 goto out;
1771 #endif
1772         if (cfg->fc_ifindex) {
1773                 err = -ENODEV;
1774                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1775                 if (!dev)
1776                         goto out;
1777                 idev = in6_dev_get(dev);
1778                 if (!idev)
1779                         goto out;
1780         }
1781
1782         if (cfg->fc_metric == 0)
1783                 cfg->fc_metric = IP6_RT_PRIO_USER;
1784
1785         err = -ENOBUFS;
1786         if (cfg->fc_nlinfo.nlh &&
1787             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1788                 table = fib6_get_table(net, cfg->fc_table);
1789                 if (!table) {
1790                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1791                         table = fib6_new_table(net, cfg->fc_table);
1792                 }
1793         } else {
1794                 table = fib6_new_table(net, cfg->fc_table);
1795         }
1796
1797         if (!table)
1798                 goto out;
1799
1800         rt = ip6_dst_alloc(net, NULL,
1801                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1802
1803         if (!rt) {
1804                 err = -ENOMEM;
1805                 goto out;
1806         }
1807
1808         if (cfg->fc_flags & RTF_EXPIRES)
1809                 rt6_set_expires(rt, jiffies +
1810                                 clock_t_to_jiffies(cfg->fc_expires));
1811         else
1812                 rt6_clean_expires(rt);
1813
1814         if (cfg->fc_protocol == RTPROT_UNSPEC)
1815                 cfg->fc_protocol = RTPROT_BOOT;
1816         rt->rt6i_protocol = cfg->fc_protocol;
1817
1818         addr_type = ipv6_addr_type(&cfg->fc_dst);
1819
1820         if (addr_type & IPV6_ADDR_MULTICAST)
1821                 rt->dst.input = ip6_mc_input;
1822         else if (cfg->fc_flags & RTF_LOCAL)
1823                 rt->dst.input = ip6_input;
1824         else
1825                 rt->dst.input = ip6_forward;
1826
1827         rt->dst.output = ip6_output;
1828
1829         if (cfg->fc_encap) {
1830                 struct lwtunnel_state *lwtstate;
1831
1832                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1833                                            cfg->fc_encap, AF_INET6, cfg,
1834                                            &lwtstate);
1835                 if (err)
1836                         goto out;
1837                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1838                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1839                         rt->dst.lwtstate->orig_output = rt->dst.output;
1840                         rt->dst.output = lwtunnel_output;
1841                 }
1842                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1843                         rt->dst.lwtstate->orig_input = rt->dst.input;
1844                         rt->dst.input = lwtunnel_input;
1845                 }
1846         }
1847
1848         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1849         rt->rt6i_dst.plen = cfg->fc_dst_len;
1850         if (rt->rt6i_dst.plen == 128)
1851                 rt->dst.flags |= DST_HOST;
1852
1853 #ifdef CONFIG_IPV6_SUBTREES
1854         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1855         rt->rt6i_src.plen = cfg->fc_src_len;
1856 #endif
1857
1858         rt->rt6i_metric = cfg->fc_metric;
1859
1860         /* We cannot add true routes via loopback here,
1861            they would result in kernel looping; promote them to reject routes
1862          */
1863         if ((cfg->fc_flags & RTF_REJECT) ||
1864             (dev && (dev->flags & IFF_LOOPBACK) &&
1865              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1866              !(cfg->fc_flags & RTF_LOCAL))) {
1867                 /* hold loopback dev/idev if we haven't done so. */
1868                 if (dev != net->loopback_dev) {
1869                         if (dev) {
1870                                 dev_put(dev);
1871                                 in6_dev_put(idev);
1872                         }
1873                         dev = net->loopback_dev;
1874                         dev_hold(dev);
1875                         idev = in6_dev_get(dev);
1876                         if (!idev) {
1877                                 err = -ENODEV;
1878                                 goto out;
1879                         }
1880                 }
1881                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1882                 switch (cfg->fc_type) {
1883                 case RTN_BLACKHOLE:
1884                         rt->dst.error = -EINVAL;
1885                         rt->dst.output = dst_discard_out;
1886                         rt->dst.input = dst_discard;
1887                         break;
1888                 case RTN_PROHIBIT:
1889                         rt->dst.error = -EACCES;
1890                         rt->dst.output = ip6_pkt_prohibit_out;
1891                         rt->dst.input = ip6_pkt_prohibit;
1892                         break;
1893                 case RTN_THROW:
1894                 case RTN_UNREACHABLE:
1895                 default:
1896                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1897                                         : (cfg->fc_type == RTN_UNREACHABLE)
1898                                         ? -EHOSTUNREACH : -ENETUNREACH;
1899                         rt->dst.output = ip6_pkt_discard_out;
1900                         rt->dst.input = ip6_pkt_discard;
1901                         break;
1902                 }
1903                 goto install_route;
1904         }
1905
1906         if (cfg->fc_flags & RTF_GATEWAY) {
1907                 const struct in6_addr *gw_addr;
1908                 int gwa_type;
1909
1910                 gw_addr = &cfg->fc_gateway;
1911                 gwa_type = ipv6_addr_type(gw_addr);
1912
1913                 /* if gw_addr is local we will fail to detect this in case
1914                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1915                  * will return already-added prefix route via interface that
1916                  * prefix route was assigned to, which might be non-loopback.
1917                  */
1918                 err = -EINVAL;
1919                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1920                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1921                                             dev : NULL, 0, 0))
1922                         goto out;
1923
1924                 rt->rt6i_gateway = *gw_addr;
1925
1926                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1927                         struct rt6_info *grt;
1928
1929                         /* IPv6 strictly inhibits using not link-local
1930                            addresses as nexthop address.
1931                            Otherwise, router will not able to send redirects.
1932                            It is very good, but in some (rare!) circumstances
1933                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1934                            some exceptions. --ANK
1935                          */
1936                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1937                                 goto out;
1938
1939                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1940
1941                         err = -EHOSTUNREACH;
1942                         if (!grt)
1943                                 goto out;
1944                         if (dev) {
1945                                 if (dev != grt->dst.dev) {
1946                                         ip6_rt_put(grt);
1947                                         goto out;
1948                                 }
1949                         } else {
1950                                 dev = grt->dst.dev;
1951                                 idev = grt->rt6i_idev;
1952                                 dev_hold(dev);
1953                                 in6_dev_hold(grt->rt6i_idev);
1954                         }
1955                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1956                                 err = 0;
1957                         ip6_rt_put(grt);
1958
1959                         if (err)
1960                                 goto out;
1961                 }
1962                 err = -EINVAL;
1963                 if (!dev || (dev->flags & IFF_LOOPBACK))
1964                         goto out;
1965         }
1966
1967         err = -ENODEV;
1968         if (!dev)
1969                 goto out;
1970
1971         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1972                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1973                         err = -EINVAL;
1974                         goto out;
1975                 }
1976                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1977                 rt->rt6i_prefsrc.plen = 128;
1978         } else
1979                 rt->rt6i_prefsrc.plen = 0;
1980
1981         rt->rt6i_flags = cfg->fc_flags;
1982
1983 install_route:
1984         rt->dst.dev = dev;
1985         rt->rt6i_idev = idev;
1986         rt->rt6i_table = table;
1987
1988         cfg->fc_nlinfo.nl_net = dev_net(dev);
1989
1990         return rt;
1991 out:
1992         if (dev)
1993                 dev_put(dev);
1994         if (idev)
1995                 in6_dev_put(idev);
1996         if (rt)
1997                 dst_free(&rt->dst);
1998
1999         return ERR_PTR(err);
2000 }
2001
2002 int ip6_route_add(struct fib6_config *cfg)
2003 {
2004         struct mx6_config mxc = { .mx = NULL, };
2005         struct rt6_info *rt;
2006         int err;
2007
2008         rt = ip6_route_info_create(cfg);
2009         if (IS_ERR(rt)) {
2010                 err = PTR_ERR(rt);
2011                 rt = NULL;
2012                 goto out;
2013         }
2014
2015         err = ip6_convert_metrics(&mxc, cfg);
2016         if (err)
2017                 goto out;
2018
2019         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2020
2021         kfree(mxc.mx);
2022
2023         return err;
2024 out:
2025         if (rt)
2026                 dst_free(&rt->dst);
2027
2028         return err;
2029 }
2030
2031 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2032 {
2033         int err;
2034         struct fib6_table *table;
2035         struct net *net = dev_net(rt->dst.dev);
2036
2037         if (rt == net->ipv6.ip6_null_entry ||
2038             rt->dst.flags & DST_NOCACHE) {
2039                 err = -ENOENT;
2040                 goto out;
2041         }
2042
2043         table = rt->rt6i_table;
2044         write_lock_bh(&table->tb6_lock);
2045         err = fib6_del(rt, info);
2046         write_unlock_bh(&table->tb6_lock);
2047
2048 out:
2049         ip6_rt_put(rt);
2050         return err;
2051 }
2052
2053 int ip6_del_rt(struct rt6_info *rt)
2054 {
2055         struct nl_info info = {
2056                 .nl_net = dev_net(rt->dst.dev),
2057         };
2058         return __ip6_del_rt(rt, &info);
2059 }
2060
2061 static int ip6_route_del(struct fib6_config *cfg)
2062 {
2063         struct fib6_table *table;
2064         struct fib6_node *fn;
2065         struct rt6_info *rt;
2066         int err = -ESRCH;
2067
2068         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2069         if (!table)
2070                 return err;
2071
2072         read_lock_bh(&table->tb6_lock);
2073
2074         fn = fib6_locate(&table->tb6_root,
2075                          &cfg->fc_dst, cfg->fc_dst_len,
2076                          &cfg->fc_src, cfg->fc_src_len);
2077
2078         if (fn) {
2079                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2080                         if ((rt->rt6i_flags & RTF_CACHE) &&
2081                             !(cfg->fc_flags & RTF_CACHE))
2082                                 continue;
2083                         if (cfg->fc_ifindex &&
2084                             (!rt->dst.dev ||
2085                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2086                                 continue;
2087                         if (cfg->fc_flags & RTF_GATEWAY &&
2088                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2089                                 continue;
2090                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2091                                 continue;
2092                         if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2093                                 continue;
2094                         dst_hold(&rt->dst);
2095                         read_unlock_bh(&table->tb6_lock);
2096
2097                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2098                 }
2099         }
2100         read_unlock_bh(&table->tb6_lock);
2101
2102         return err;
2103 }
2104
2105 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2106 {
2107         struct netevent_redirect netevent;
2108         struct rt6_info *rt, *nrt = NULL;
2109         struct ndisc_options ndopts;
2110         struct inet6_dev *in6_dev;
2111         struct neighbour *neigh;
2112         struct rd_msg *msg;
2113         int optlen, on_link;
2114         u8 *lladdr;
2115
2116         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2117         optlen -= sizeof(*msg);
2118
2119         if (optlen < 0) {
2120                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2121                 return;
2122         }
2123
2124         msg = (struct rd_msg *)icmp6_hdr(skb);
2125
2126         if (ipv6_addr_is_multicast(&msg->dest)) {
2127                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2128                 return;
2129         }
2130
2131         on_link = 0;
2132         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2133                 on_link = 1;
2134         } else if (ipv6_addr_type(&msg->target) !=
2135                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2136                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2137                 return;
2138         }
2139
2140         in6_dev = __in6_dev_get(skb->dev);
2141         if (!in6_dev)
2142                 return;
2143         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2144                 return;
2145
2146         /* RFC2461 8.1:
2147          *      The IP source address of the Redirect MUST be the same as the current
2148          *      first-hop router for the specified ICMP Destination Address.
2149          */
2150
2151         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2152                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2153                 return;
2154         }
2155
2156         lladdr = NULL;
2157         if (ndopts.nd_opts_tgt_lladdr) {
2158                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2159                                              skb->dev);
2160                 if (!lladdr) {
2161                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2162                         return;
2163                 }
2164         }
2165
2166         rt = (struct rt6_info *) dst;
2167         if (rt->rt6i_flags & RTF_REJECT) {
2168                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2169                 return;
2170         }
2171
2172         /* Redirect received -> path was valid.
2173          * Look, redirects are sent only in response to data packets,
2174          * so that this nexthop apparently is reachable. --ANK
2175          */
2176         dst_confirm(&rt->dst);
2177
2178         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2179         if (!neigh)
2180                 return;
2181
2182         /*
2183          *      We have finally decided to accept it.
2184          */
2185
2186         neigh_update(neigh, lladdr, NUD_STALE,
2187                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2188                      NEIGH_UPDATE_F_OVERRIDE|
2189                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2190                                      NEIGH_UPDATE_F_ISROUTER))
2191                      );
2192
2193         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2194         if (!nrt)
2195                 goto out;
2196
2197         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2198         if (on_link)
2199                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2200
2201         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2202
2203         if (ip6_ins_rt(nrt))
2204                 goto out;
2205
2206         netevent.old = &rt->dst;
2207         netevent.new = &nrt->dst;
2208         netevent.daddr = &msg->dest;
2209         netevent.neigh = neigh;
2210         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2211
2212         if (rt->rt6i_flags & RTF_CACHE) {
2213                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2214                 ip6_del_rt(rt);
2215         }
2216
2217 out:
2218         neigh_release(neigh);
2219 }
2220
2221 /*
2222  *      Misc support functions
2223  */
2224
2225 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2226 {
2227         BUG_ON(from->dst.from);
2228
2229         rt->rt6i_flags &= ~RTF_EXPIRES;
2230         dst_hold(&from->dst);
2231         rt->dst.from = &from->dst;
2232         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2233 }
2234
2235 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2236 {
2237         rt->dst.input = ort->dst.input;
2238         rt->dst.output = ort->dst.output;
2239         rt->rt6i_dst = ort->rt6i_dst;
2240         rt->dst.error = ort->dst.error;
2241         rt->rt6i_idev = ort->rt6i_idev;
2242         if (rt->rt6i_idev)
2243                 in6_dev_hold(rt->rt6i_idev);
2244         rt->dst.lastuse = jiffies;
2245         rt->rt6i_gateway = ort->rt6i_gateway;
2246         rt->rt6i_flags = ort->rt6i_flags;
2247         rt6_set_from(rt, ort);
2248         rt->rt6i_metric = ort->rt6i_metric;
2249 #ifdef CONFIG_IPV6_SUBTREES
2250         rt->rt6i_src = ort->rt6i_src;
2251 #endif
2252         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2253         rt->rt6i_table = ort->rt6i_table;
2254         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2255 }
2256
2257 #ifdef CONFIG_IPV6_ROUTE_INFO
2258 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
2259                                            const struct in6_addr *prefix, int prefixlen,
2260                                            const struct in6_addr *gwaddr)
2261 {
2262         struct fib6_node *fn;
2263         struct rt6_info *rt = NULL;
2264         struct fib6_table *table;
2265
2266         table = fib6_get_table(dev_net(dev),
2267                                addrconf_rt_table(dev, RT6_TABLE_INFO));
2268         if (!table)
2269                 return NULL;
2270
2271         read_lock_bh(&table->tb6_lock);
2272         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2273         if (!fn)
2274                 goto out;
2275
2276         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2277                 if (rt->dst.dev->ifindex != dev->ifindex)
2278                         continue;
2279                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2280                         continue;
2281                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2282                         continue;
2283                 dst_hold(&rt->dst);
2284                 break;
2285         }
2286 out:
2287         read_unlock_bh(&table->tb6_lock);
2288         return rt;
2289 }
2290
2291 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
2292                                            const struct in6_addr *prefix, int prefixlen,
2293                                            const struct in6_addr *gwaddr, unsigned int pref)
2294 {
2295         struct fib6_config cfg = {
2296                 .fc_metric      = IP6_RT_PRIO_USER,
2297                 .fc_ifindex     = dev->ifindex,
2298                 .fc_dst_len     = prefixlen,
2299                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2300                                   RTF_UP | RTF_PREF(pref),
2301                 .fc_nlinfo.portid = 0,
2302                 .fc_nlinfo.nlh = NULL,
2303                 .fc_nlinfo.nl_net = dev_net(dev),
2304         };
2305
2306         cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2307         cfg.fc_dst = *prefix;
2308         cfg.fc_gateway = *gwaddr;
2309
2310         /* We should treat it as a default route if prefix length is 0. */
2311         if (!prefixlen)
2312                 cfg.fc_flags |= RTF_DEFAULT;
2313
2314         ip6_route_add(&cfg);
2315
2316         return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
2317 }
2318 #endif
2319
2320 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2321 {
2322         struct rt6_info *rt;
2323         struct fib6_table *table;
2324
2325         table = fib6_get_table(dev_net(dev),
2326                                addrconf_rt_table(dev, RT6_TABLE_MAIN));
2327         if (!table)
2328                 return NULL;
2329
2330         read_lock_bh(&table->tb6_lock);
2331         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2332                 if (dev == rt->dst.dev &&
2333                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2334                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2335                         break;
2336         }
2337         if (rt)
2338                 dst_hold(&rt->dst);
2339         read_unlock_bh(&table->tb6_lock);
2340         return rt;
2341 }
2342
2343 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2344                                      struct net_device *dev,
2345                                      unsigned int pref)
2346 {
2347         struct fib6_config cfg = {
2348                 .fc_table       = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2349                 .fc_metric      = IP6_RT_PRIO_USER,
2350                 .fc_ifindex     = dev->ifindex,
2351                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2352                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2353                 .fc_nlinfo.portid = 0,
2354                 .fc_nlinfo.nlh = NULL,
2355                 .fc_nlinfo.nl_net = dev_net(dev),
2356         };
2357
2358         cfg.fc_gateway = *gwaddr;
2359
2360         ip6_route_add(&cfg);
2361
2362         return rt6_get_dflt_router(gwaddr, dev);
2363 }
2364
2365
2366 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2367         if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2368             (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2369                 return -1;
2370         return 0;
2371 }
2372
2373 void rt6_purge_dflt_routers(struct net *net)
2374 {
2375         fib6_clean_all(net, rt6_addrconf_purge, NULL);
2376 }
2377
2378 static void rtmsg_to_fib6_config(struct net *net,
2379                                  struct in6_rtmsg *rtmsg,
2380                                  struct fib6_config *cfg)
2381 {
2382         memset(cfg, 0, sizeof(*cfg));
2383
2384         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2385                          : RT6_TABLE_MAIN;
2386         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2387         cfg->fc_metric = rtmsg->rtmsg_metric;
2388         cfg->fc_expires = rtmsg->rtmsg_info;
2389         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2390         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2391         cfg->fc_flags = rtmsg->rtmsg_flags;
2392
2393         cfg->fc_nlinfo.nl_net = net;
2394
2395         cfg->fc_dst = rtmsg->rtmsg_dst;
2396         cfg->fc_src = rtmsg->rtmsg_src;
2397         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2398 }
2399
2400 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2401 {
2402         struct fib6_config cfg;
2403         struct in6_rtmsg rtmsg;
2404         int err;
2405
2406         switch (cmd) {
2407         case SIOCADDRT:         /* Add a route */
2408         case SIOCDELRT:         /* Delete a route */
2409                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2410                         return -EPERM;
2411                 err = copy_from_user(&rtmsg, arg,
2412                                      sizeof(struct in6_rtmsg));
2413                 if (err)
2414                         return -EFAULT;
2415
2416                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2417
2418                 rtnl_lock();
2419                 switch (cmd) {
2420                 case SIOCADDRT:
2421                         err = ip6_route_add(&cfg);
2422                         break;
2423                 case SIOCDELRT:
2424                         err = ip6_route_del(&cfg);
2425                         break;
2426                 default:
2427                         err = -EINVAL;
2428                 }
2429                 rtnl_unlock();
2430
2431                 return err;
2432         }
2433
2434         return -EINVAL;
2435 }
2436
2437 /*
2438  *      Drop the packet on the floor
2439  */
2440
2441 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2442 {
2443         int type;
2444         struct dst_entry *dst = skb_dst(skb);
2445         switch (ipstats_mib_noroutes) {
2446         case IPSTATS_MIB_INNOROUTES:
2447                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2448                 if (type == IPV6_ADDR_ANY) {
2449                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2450                                       IPSTATS_MIB_INADDRERRORS);
2451                         break;
2452                 }
2453                 /* FALLTHROUGH */
2454         case IPSTATS_MIB_OUTNOROUTES:
2455                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2456                               ipstats_mib_noroutes);
2457                 break;
2458         }
2459         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2460         kfree_skb(skb);
2461         return 0;
2462 }
2463
2464 static int ip6_pkt_discard(struct sk_buff *skb)
2465 {
2466         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2467 }
2468
2469 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2470 {
2471         skb->dev = skb_dst(skb)->dev;
2472         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2473 }
2474
2475 static int ip6_pkt_prohibit(struct sk_buff *skb)
2476 {
2477         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2478 }
2479
2480 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2481 {
2482         skb->dev = skb_dst(skb)->dev;
2483         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2484 }
2485
2486 /*
2487  *      Allocate a dst for local (unicast / anycast) address.
2488  */
2489
2490 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2491                                     const struct in6_addr *addr,
2492                                     bool anycast)
2493 {
2494         u32 tb_id;
2495         struct net *net = dev_net(idev->dev);
2496         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2497                                             DST_NOCOUNT);
2498         if (!rt)
2499                 return ERR_PTR(-ENOMEM);
2500
2501         in6_dev_hold(idev);
2502
2503         rt->dst.flags |= DST_HOST;
2504         rt->dst.input = ip6_input;
2505         rt->dst.output = ip6_output;
2506         rt->rt6i_idev = idev;
2507
2508         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2509         if (anycast)
2510                 rt->rt6i_flags |= RTF_ANYCAST;
2511         else
2512                 rt->rt6i_flags |= RTF_LOCAL;
2513
2514         rt->rt6i_gateway  = *addr;
2515         rt->rt6i_dst.addr = *addr;
2516         rt->rt6i_dst.plen = 128;
2517         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2518         rt->rt6i_table = fib6_get_table(net, tb_id);
2519         rt->dst.flags |= DST_NOCACHE;
2520
2521         atomic_set(&rt->dst.__refcnt, 1);
2522
2523         return rt;
2524 }
2525
2526 int ip6_route_get_saddr(struct net *net,
2527                         struct rt6_info *rt,
2528                         const struct in6_addr *daddr,
2529                         unsigned int prefs,
2530                         struct in6_addr *saddr)
2531 {
2532         struct inet6_dev *idev =
2533                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2534         int err = 0;
2535         if (rt && rt->rt6i_prefsrc.plen)
2536                 *saddr = rt->rt6i_prefsrc.addr;
2537         else
2538                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2539                                          daddr, prefs, saddr);
2540         return err;
2541 }
2542
2543 /* remove deleted ip from prefsrc entries */
2544 struct arg_dev_net_ip {
2545         struct net_device *dev;
2546         struct net *net;
2547         struct in6_addr *addr;
2548 };
2549
2550 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2551 {
2552         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2553         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2554         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2555
2556         if (((void *)rt->dst.dev == dev || !dev) &&
2557             rt != net->ipv6.ip6_null_entry &&
2558             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2559                 /* remove prefsrc entry */
2560                 rt->rt6i_prefsrc.plen = 0;
2561         }
2562         return 0;
2563 }
2564
2565 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2566 {
2567         struct net *net = dev_net(ifp->idev->dev);
2568         struct arg_dev_net_ip adni = {
2569                 .dev = ifp->idev->dev,
2570                 .net = net,
2571                 .addr = &ifp->addr,
2572         };
2573         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2574 }
2575
2576 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2577 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2578
2579 /* Remove routers and update dst entries when gateway turn into host. */
2580 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2581 {
2582         struct in6_addr *gateway = (struct in6_addr *)arg;
2583
2584         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2585              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2586              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2587                 return -1;
2588         }
2589         return 0;
2590 }
2591
2592 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2593 {
2594         fib6_clean_all(net, fib6_clean_tohost, gateway);
2595 }
2596
2597 struct arg_dev_net {
2598         struct net_device *dev;
2599         struct net *net;
2600 };
2601
2602 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2603 {
2604         const struct arg_dev_net *adn = arg;
2605         const struct net_device *dev = adn->dev;
2606
2607         if ((rt->dst.dev == dev || !dev) &&
2608             rt != adn->net->ipv6.ip6_null_entry)
2609                 return -1;
2610
2611         return 0;
2612 }
2613
2614 void rt6_ifdown(struct net *net, struct net_device *dev)
2615 {
2616         struct arg_dev_net adn = {
2617                 .dev = dev,
2618                 .net = net,
2619         };
2620
2621         fib6_clean_all(net, fib6_ifdown, &adn);
2622         icmp6_clean_all(fib6_ifdown, &adn);
2623         if (dev)
2624                 rt6_uncached_list_flush_dev(net, dev);
2625 }
2626
2627 struct rt6_mtu_change_arg {
2628         struct net_device *dev;
2629         unsigned int mtu;
2630 };
2631
2632 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2633 {
2634         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2635         struct inet6_dev *idev;
2636
2637         /* In IPv6 pmtu discovery is not optional,
2638            so that RTAX_MTU lock cannot disable it.
2639            We still use this lock to block changes
2640            caused by addrconf/ndisc.
2641         */
2642
2643         idev = __in6_dev_get(arg->dev);
2644         if (!idev)
2645                 return 0;
2646
2647         /* For administrative MTU increase, there is no way to discover
2648            IPv6 PMTU increase, so PMTU increase should be updated here.
2649            Since RFC 1981 doesn't include administrative MTU increase
2650            update PMTU increase is a MUST. (i.e. jumbo frame)
2651          */
2652         /*
2653            If new MTU is less than route PMTU, this new MTU will be the
2654            lowest MTU in the path, update the route PMTU to reflect PMTU
2655            decreases; if new MTU is greater than route PMTU, and the
2656            old MTU is the lowest MTU in the path, update the route PMTU
2657            to reflect the increase. In this case if the other nodes' MTU
2658            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2659            PMTU discouvery.
2660          */
2661         if (rt->dst.dev == arg->dev &&
2662             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2663                 if (rt->rt6i_flags & RTF_CACHE) {
2664                         /* For RTF_CACHE with rt6i_pmtu == 0
2665                          * (i.e. a redirected route),
2666                          * the metrics of its rt->dst.from has already
2667                          * been updated.
2668                          */
2669                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2670                                 rt->rt6i_pmtu = arg->mtu;
2671                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2672                            (dst_mtu(&rt->dst) < arg->mtu &&
2673                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2674                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2675                 }
2676         }
2677         return 0;
2678 }
2679
2680 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2681 {
2682         struct rt6_mtu_change_arg arg = {
2683                 .dev = dev,
2684                 .mtu = mtu,
2685         };
2686
2687         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2688 }
2689
2690 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2691         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2692         [RTA_OIF]               = { .type = NLA_U32 },
2693         [RTA_IIF]               = { .type = NLA_U32 },
2694         [RTA_PRIORITY]          = { .type = NLA_U32 },
2695         [RTA_METRICS]           = { .type = NLA_NESTED },
2696         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2697         [RTA_PREF]              = { .type = NLA_U8 },
2698         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2699         [RTA_ENCAP]             = { .type = NLA_NESTED },
2700         [RTA_UID]               = { .type = NLA_U32 },
2701 };
2702
2703 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2704                               struct fib6_config *cfg)
2705 {
2706         struct rtmsg *rtm;
2707         struct nlattr *tb[RTA_MAX+1];
2708         unsigned int pref;
2709         int err;
2710
2711         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2712         if (err < 0)
2713                 goto errout;
2714
2715         err = -EINVAL;
2716         rtm = nlmsg_data(nlh);
2717         memset(cfg, 0, sizeof(*cfg));
2718
2719         cfg->fc_table = rtm->rtm_table;
2720         cfg->fc_dst_len = rtm->rtm_dst_len;
2721         cfg->fc_src_len = rtm->rtm_src_len;
2722         cfg->fc_flags = RTF_UP;
2723         cfg->fc_protocol = rtm->rtm_protocol;
2724         cfg->fc_type = rtm->rtm_type;
2725
2726         if (rtm->rtm_type == RTN_UNREACHABLE ||
2727             rtm->rtm_type == RTN_BLACKHOLE ||
2728             rtm->rtm_type == RTN_PROHIBIT ||
2729             rtm->rtm_type == RTN_THROW)
2730                 cfg->fc_flags |= RTF_REJECT;
2731
2732         if (rtm->rtm_type == RTN_LOCAL)
2733                 cfg->fc_flags |= RTF_LOCAL;
2734
2735         if (rtm->rtm_flags & RTM_F_CLONED)
2736                 cfg->fc_flags |= RTF_CACHE;
2737
2738         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2739         cfg->fc_nlinfo.nlh = nlh;
2740         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2741
2742         if (tb[RTA_GATEWAY]) {
2743                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2744                 cfg->fc_flags |= RTF_GATEWAY;
2745         }
2746
2747         if (tb[RTA_DST]) {
2748                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2749
2750                 if (nla_len(tb[RTA_DST]) < plen)
2751                         goto errout;
2752
2753                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2754         }
2755
2756         if (tb[RTA_SRC]) {
2757                 int plen = (rtm->rtm_src_len + 7) >> 3;
2758
2759                 if (nla_len(tb[RTA_SRC]) < plen)
2760                         goto errout;
2761
2762                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2763         }
2764
2765         if (tb[RTA_PREFSRC])
2766                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2767
2768         if (tb[RTA_OIF])
2769                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2770
2771         if (tb[RTA_PRIORITY])
2772                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2773
2774         if (tb[RTA_METRICS]) {
2775                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2776                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2777         }
2778
2779         if (tb[RTA_TABLE])
2780                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2781
2782         if (tb[RTA_MULTIPATH]) {
2783                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2784                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2785         }
2786
2787         if (tb[RTA_PREF]) {
2788                 pref = nla_get_u8(tb[RTA_PREF]);
2789                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2790                     pref != ICMPV6_ROUTER_PREF_HIGH)
2791                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2792                 cfg->fc_flags |= RTF_PREF(pref);
2793         }
2794
2795         if (tb[RTA_ENCAP])
2796                 cfg->fc_encap = tb[RTA_ENCAP];
2797
2798         if (tb[RTA_ENCAP_TYPE])
2799                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2800
2801         err = 0;
2802 errout:
2803         return err;
2804 }
2805
2806 struct rt6_nh {
2807         struct rt6_info *rt6_info;
2808         struct fib6_config r_cfg;
2809         struct mx6_config mxc;
2810         struct list_head next;
2811 };
2812
2813 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2814 {
2815         struct rt6_nh *nh;
2816
2817         list_for_each_entry(nh, rt6_nh_list, next) {
2818                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2819                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2820                         nh->r_cfg.fc_ifindex);
2821         }
2822 }
2823
2824 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2825                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2826 {
2827         struct rt6_nh *nh;
2828         struct rt6_info *rtnh;
2829         int err = -EEXIST;
2830
2831         list_for_each_entry(nh, rt6_nh_list, next) {
2832                 /* check if rt6_info already exists */
2833                 rtnh = nh->rt6_info;
2834
2835                 if (rtnh->dst.dev == rt->dst.dev &&
2836                     rtnh->rt6i_idev == rt->rt6i_idev &&
2837                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2838                                     &rt->rt6i_gateway))
2839                         return err;
2840         }
2841
2842         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2843         if (!nh)
2844                 return -ENOMEM;
2845         nh->rt6_info = rt;
2846         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2847         if (err) {
2848                 kfree(nh);
2849                 return err;
2850         }
2851         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2852         list_add_tail(&nh->next, rt6_nh_list);
2853
2854         return 0;
2855 }
2856
2857 static int ip6_route_multipath_add(struct fib6_config *cfg)
2858 {
2859         struct fib6_config r_cfg;
2860         struct rtnexthop *rtnh;
2861         struct rt6_info *rt;
2862         struct rt6_nh *err_nh;
2863         struct rt6_nh *nh, *nh_safe;
2864         int remaining;
2865         int attrlen;
2866         int err = 1;
2867         int nhn = 0;
2868         int replace = (cfg->fc_nlinfo.nlh &&
2869                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2870         LIST_HEAD(rt6_nh_list);
2871
2872         remaining = cfg->fc_mp_len;
2873         rtnh = (struct rtnexthop *)cfg->fc_mp;
2874
2875         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2876          * rt6_info structs per nexthop
2877          */
2878         while (rtnh_ok(rtnh, remaining)) {
2879                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2880                 if (rtnh->rtnh_ifindex)
2881                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2882
2883                 attrlen = rtnh_attrlen(rtnh);
2884                 if (attrlen > 0) {
2885                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2886
2887                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2888                         if (nla) {
2889                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2890                                 r_cfg.fc_flags |= RTF_GATEWAY;
2891                         }
2892                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2893                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2894                         if (nla)
2895                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2896                 }
2897
2898                 rt = ip6_route_info_create(&r_cfg);
2899                 if (IS_ERR(rt)) {
2900                         err = PTR_ERR(rt);
2901                         rt = NULL;
2902                         goto cleanup;
2903                 }
2904
2905                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2906                 if (err) {
2907                         dst_free(&rt->dst);
2908                         goto cleanup;
2909                 }
2910
2911                 rtnh = rtnh_next(rtnh, &remaining);
2912         }
2913
2914         err_nh = NULL;
2915         list_for_each_entry(nh, &rt6_nh_list, next) {
2916                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2917                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2918                 nh->rt6_info = NULL;
2919                 if (err) {
2920                         if (replace && nhn)
2921                                 ip6_print_replace_route_err(&rt6_nh_list);
2922                         err_nh = nh;
2923                         goto add_errout;
2924                 }
2925
2926                 /* Because each route is added like a single route we remove
2927                  * these flags after the first nexthop: if there is a collision,
2928                  * we have already failed to add the first nexthop:
2929                  * fib6_add_rt2node() has rejected it; when replacing, old
2930                  * nexthops have been replaced by first new, the rest should
2931                  * be added to it.
2932                  */
2933                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2934                                                      NLM_F_REPLACE);
2935                 nhn++;
2936         }
2937
2938         goto cleanup;
2939
2940 add_errout:
2941         /* Delete routes that were already added */
2942         list_for_each_entry(nh, &rt6_nh_list, next) {
2943                 if (err_nh == nh)
2944                         break;
2945                 ip6_route_del(&nh->r_cfg);
2946         }
2947
2948 cleanup:
2949         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2950                 if (nh->rt6_info)
2951                         dst_free(&nh->rt6_info->dst);
2952                 kfree(nh->mxc.mx);
2953                 list_del(&nh->next);
2954                 kfree(nh);
2955         }
2956
2957         return err;
2958 }
2959
2960 static int ip6_route_multipath_del(struct fib6_config *cfg)
2961 {
2962         struct fib6_config r_cfg;
2963         struct rtnexthop *rtnh;
2964         int remaining;
2965         int attrlen;
2966         int err = 1, last_err = 0;
2967
2968         remaining = cfg->fc_mp_len;
2969         rtnh = (struct rtnexthop *)cfg->fc_mp;
2970
2971         /* Parse a Multipath Entry */
2972         while (rtnh_ok(rtnh, remaining)) {
2973                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2974                 if (rtnh->rtnh_ifindex)
2975                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2976
2977                 attrlen = rtnh_attrlen(rtnh);
2978                 if (attrlen > 0) {
2979                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2980
2981                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2982                         if (nla) {
2983                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2984                                 r_cfg.fc_flags |= RTF_GATEWAY;
2985                         }
2986                 }
2987                 err = ip6_route_del(&r_cfg);
2988                 if (err)
2989                         last_err = err;
2990
2991                 rtnh = rtnh_next(rtnh, &remaining);
2992         }
2993
2994         return last_err;
2995 }
2996
2997 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2998 {
2999         struct fib6_config cfg;
3000         int err;
3001
3002         err = rtm_to_fib6_config(skb, nlh, &cfg);
3003         if (err < 0)
3004                 return err;
3005
3006         if (cfg.fc_mp)
3007                 return ip6_route_multipath_del(&cfg);
3008         else
3009                 return ip6_route_del(&cfg);
3010 }
3011
3012 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3013 {
3014         struct fib6_config cfg;
3015         int err;
3016
3017         err = rtm_to_fib6_config(skb, nlh, &cfg);
3018         if (err < 0)
3019                 return err;
3020
3021         if (cfg.fc_mp)
3022                 return ip6_route_multipath_add(&cfg);
3023         else
3024                 return ip6_route_add(&cfg);
3025 }
3026
3027 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3028 {
3029         return NLMSG_ALIGN(sizeof(struct rtmsg))
3030                + nla_total_size(16) /* RTA_SRC */
3031                + nla_total_size(16) /* RTA_DST */
3032                + nla_total_size(16) /* RTA_GATEWAY */
3033                + nla_total_size(16) /* RTA_PREFSRC */
3034                + nla_total_size(4) /* RTA_TABLE */
3035                + nla_total_size(4) /* RTA_IIF */
3036                + nla_total_size(4) /* RTA_OIF */
3037                + nla_total_size(4) /* RTA_PRIORITY */
3038                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3039                + nla_total_size(sizeof(struct rta_cacheinfo))
3040                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3041                + nla_total_size(1) /* RTA_PREF */
3042                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3043 }
3044
3045 static int rt6_fill_node(struct net *net,
3046                          struct sk_buff *skb, struct rt6_info *rt,
3047                          struct in6_addr *dst, struct in6_addr *src,
3048                          int iif, int type, u32 portid, u32 seq,
3049                          int prefix, int nowait, unsigned int flags)
3050 {
3051         u32 metrics[RTAX_MAX];
3052         struct rtmsg *rtm;
3053         struct nlmsghdr *nlh;
3054         long expires;
3055         u32 table;
3056
3057         if (prefix) {   /* user wants prefix routes only */
3058                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3059                         /* success since this is not a prefix route */
3060                         return 1;
3061                 }
3062         }
3063
3064         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3065         if (!nlh)
3066                 return -EMSGSIZE;
3067
3068         rtm = nlmsg_data(nlh);
3069         rtm->rtm_family = AF_INET6;
3070         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3071         rtm->rtm_src_len = rt->rt6i_src.plen;
3072         rtm->rtm_tos = 0;
3073         if (rt->rt6i_table)
3074                 table = rt->rt6i_table->tb6_id;
3075         else
3076                 table = RT6_TABLE_UNSPEC;
3077         rtm->rtm_table = table;
3078         if (nla_put_u32(skb, RTA_TABLE, table))
3079                 goto nla_put_failure;
3080         if (rt->rt6i_flags & RTF_REJECT) {
3081                 switch (rt->dst.error) {
3082                 case -EINVAL:
3083                         rtm->rtm_type = RTN_BLACKHOLE;
3084                         break;
3085                 case -EACCES:
3086                         rtm->rtm_type = RTN_PROHIBIT;
3087                         break;
3088                 case -EAGAIN:
3089                         rtm->rtm_type = RTN_THROW;
3090                         break;
3091                 default:
3092                         rtm->rtm_type = RTN_UNREACHABLE;
3093                         break;
3094                 }
3095         }
3096         else if (rt->rt6i_flags & RTF_LOCAL)
3097                 rtm->rtm_type = RTN_LOCAL;
3098         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3099                 rtm->rtm_type = RTN_LOCAL;
3100         else
3101                 rtm->rtm_type = RTN_UNICAST;
3102         rtm->rtm_flags = 0;
3103         if (!netif_carrier_ok(rt->dst.dev)) {
3104                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3105                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3106                         rtm->rtm_flags |= RTNH_F_DEAD;
3107         }
3108         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3109         rtm->rtm_protocol = rt->rt6i_protocol;
3110         if (rt->rt6i_flags & RTF_DYNAMIC)
3111                 rtm->rtm_protocol = RTPROT_REDIRECT;
3112         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3113                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3114                         rtm->rtm_protocol = RTPROT_RA;
3115                 else
3116                         rtm->rtm_protocol = RTPROT_KERNEL;
3117         }
3118
3119         if (rt->rt6i_flags & RTF_CACHE)
3120                 rtm->rtm_flags |= RTM_F_CLONED;
3121
3122         if (dst) {
3123                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3124                         goto nla_put_failure;
3125                 rtm->rtm_dst_len = 128;
3126         } else if (rtm->rtm_dst_len)
3127                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3128                         goto nla_put_failure;
3129 #ifdef CONFIG_IPV6_SUBTREES
3130         if (src) {
3131                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3132                         goto nla_put_failure;
3133                 rtm->rtm_src_len = 128;
3134         } else if (rtm->rtm_src_len &&
3135                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3136                 goto nla_put_failure;
3137 #endif
3138         if (iif) {
3139 #ifdef CONFIG_IPV6_MROUTE
3140                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3141                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3142                                                   portid);
3143
3144                         if (err <= 0) {
3145                                 if (!nowait) {
3146                                         if (err == 0)
3147                                                 return 0;
3148                                         goto nla_put_failure;
3149                                 } else {
3150                                         if (err == -EMSGSIZE)
3151                                                 goto nla_put_failure;
3152                                 }
3153                         }
3154                 } else
3155 #endif
3156                         if (nla_put_u32(skb, RTA_IIF, iif))
3157                                 goto nla_put_failure;
3158         } else if (dst) {
3159                 struct in6_addr saddr_buf;
3160                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3161                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3162                         goto nla_put_failure;
3163         }
3164
3165         if (rt->rt6i_prefsrc.plen) {
3166                 struct in6_addr saddr_buf;
3167                 saddr_buf = rt->rt6i_prefsrc.addr;
3168                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3169                         goto nla_put_failure;
3170         }
3171
3172         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3173         if (rt->rt6i_pmtu)
3174                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3175         if (rtnetlink_put_metrics(skb, metrics) < 0)
3176                 goto nla_put_failure;
3177
3178         if (rt->rt6i_flags & RTF_GATEWAY) {
3179                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3180                         goto nla_put_failure;
3181         }
3182
3183         if (rt->dst.dev &&
3184             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3185                 goto nla_put_failure;
3186         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3187                 goto nla_put_failure;
3188
3189         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3190
3191         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3192                 goto nla_put_failure;
3193
3194         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3195                 goto nla_put_failure;
3196
3197         if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3198                 goto nla_put_failure;
3199
3200         nlmsg_end(skb, nlh);
3201         return 0;
3202
3203 nla_put_failure:
3204         nlmsg_cancel(skb, nlh);
3205         return -EMSGSIZE;
3206 }
3207
3208 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3209 {
3210         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3211         int prefix;
3212
3213         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3214                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3215                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3216         } else
3217                 prefix = 0;
3218
3219         return rt6_fill_node(arg->net,
3220                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3221                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3222                      prefix, 0, NLM_F_MULTI);
3223 }
3224
3225 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3226 {
3227         struct net *net = sock_net(in_skb->sk);
3228         struct nlattr *tb[RTA_MAX+1];
3229         struct rt6_info *rt;
3230         struct sk_buff *skb;
3231         struct rtmsg *rtm;
3232         struct flowi6 fl6;
3233         int err, iif = 0, oif = 0;
3234
3235         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3236         if (err < 0)
3237                 goto errout;
3238
3239         err = -EINVAL;
3240         memset(&fl6, 0, sizeof(fl6));
3241
3242         if (tb[RTA_SRC]) {
3243                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3244                         goto errout;
3245
3246                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3247         }
3248
3249         if (tb[RTA_DST]) {
3250                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3251                         goto errout;
3252
3253                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3254         }
3255
3256         if (tb[RTA_IIF])
3257                 iif = nla_get_u32(tb[RTA_IIF]);
3258
3259         if (tb[RTA_OIF])
3260                 oif = nla_get_u32(tb[RTA_OIF]);
3261
3262         if (tb[RTA_MARK])
3263                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3264
3265         if (tb[RTA_UID])
3266                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3267                                            nla_get_u32(tb[RTA_UID]));
3268         else
3269                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3270
3271         if (iif) {
3272                 struct net_device *dev;
3273                 int flags = 0;
3274
3275                 dev = __dev_get_by_index(net, iif);
3276                 if (!dev) {
3277                         err = -ENODEV;
3278                         goto errout;
3279                 }
3280
3281                 fl6.flowi6_iif = iif;
3282
3283                 if (!ipv6_addr_any(&fl6.saddr))
3284                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3285
3286                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3287                                                                flags);
3288         } else {
3289                 fl6.flowi6_oif = oif;
3290
3291                 if (netif_index_is_l3_master(net, oif)) {
3292                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3293                                            FLOWI_FLAG_SKIP_NH_OIF;
3294                 }
3295
3296                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3297         }
3298
3299         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3300         if (!skb) {
3301                 ip6_rt_put(rt);
3302                 err = -ENOBUFS;
3303                 goto errout;
3304         }
3305
3306         /* Reserve room for dummy headers, this skb can pass
3307            through good chunk of routing engine.
3308          */
3309         skb_reset_mac_header(skb);
3310         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3311
3312         skb_dst_set(skb, &rt->dst);
3313
3314         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3315                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3316                             nlh->nlmsg_seq, 0, 0, 0);
3317         if (err < 0) {
3318                 kfree_skb(skb);
3319                 goto errout;
3320         }
3321
3322         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3323 errout:
3324         return err;
3325 }
3326
3327 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3328                      unsigned int nlm_flags)
3329 {
3330         struct sk_buff *skb;
3331         struct net *net = info->nl_net;
3332         u32 seq;
3333         int err;
3334
3335         err = -ENOBUFS;
3336         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3337
3338         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3339         if (!skb)
3340                 goto errout;
3341
3342         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3343                                 event, info->portid, seq, 0, 0, nlm_flags);
3344         if (err < 0) {
3345                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3346                 WARN_ON(err == -EMSGSIZE);
3347                 kfree_skb(skb);
3348                 goto errout;
3349         }
3350         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3351                     info->nlh, gfp_any());
3352         return;
3353 errout:
3354         if (err < 0)
3355                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3356 }
3357
3358 static int ip6_route_dev_notify(struct notifier_block *this,
3359                                 unsigned long event, void *ptr)
3360 {
3361         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3362         struct net *net = dev_net(dev);
3363
3364         if (!(dev->flags & IFF_LOOPBACK))
3365                 return NOTIFY_OK;
3366
3367         if (event == NETDEV_REGISTER) {
3368                 net->ipv6.ip6_null_entry->dst.dev = dev;
3369                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3370 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3371                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3372                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3373                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3374                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3375 #endif
3376          } else if (event == NETDEV_UNREGISTER) {
3377                 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3378 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3379                 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3380                 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3381 #endif
3382         }
3383
3384         return NOTIFY_OK;
3385 }
3386
3387 /*
3388  *      /proc
3389  */
3390
3391 #ifdef CONFIG_PROC_FS
3392
3393 static const struct file_operations ipv6_route_proc_fops = {
3394         .owner          = THIS_MODULE,
3395         .open           = ipv6_route_open,
3396         .read           = seq_read,
3397         .llseek         = seq_lseek,
3398         .release        = seq_release_net,
3399 };
3400
3401 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3402 {
3403         struct net *net = (struct net *)seq->private;
3404         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3405                    net->ipv6.rt6_stats->fib_nodes,
3406                    net->ipv6.rt6_stats->fib_route_nodes,
3407                    net->ipv6.rt6_stats->fib_rt_alloc,
3408                    net->ipv6.rt6_stats->fib_rt_entries,
3409                    net->ipv6.rt6_stats->fib_rt_cache,
3410                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3411                    net->ipv6.rt6_stats->fib_discarded_routes);
3412
3413         return 0;
3414 }
3415
3416 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3417 {
3418         return single_open_net(inode, file, rt6_stats_seq_show);
3419 }
3420
3421 static const struct file_operations rt6_stats_seq_fops = {
3422         .owner   = THIS_MODULE,
3423         .open    = rt6_stats_seq_open,
3424         .read    = seq_read,
3425         .llseek  = seq_lseek,
3426         .release = single_release_net,
3427 };
3428 #endif  /* CONFIG_PROC_FS */
3429
3430 #ifdef CONFIG_SYSCTL
3431
3432 static
3433 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3434                               void __user *buffer, size_t *lenp, loff_t *ppos)
3435 {
3436         struct net *net;
3437         int delay;
3438         if (!write)
3439                 return -EINVAL;
3440
3441         net = (struct net *)ctl->extra1;
3442         delay = net->ipv6.sysctl.flush_delay;
3443         proc_dointvec(ctl, write, buffer, lenp, ppos);
3444         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3445         return 0;
3446 }
3447
3448 struct ctl_table ipv6_route_table_template[] = {
3449         {
3450                 .procname       =       "flush",
3451                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3452                 .maxlen         =       sizeof(int),
3453                 .mode           =       0200,
3454                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3455         },
3456         {
3457                 .procname       =       "gc_thresh",
3458                 .data           =       &ip6_dst_ops_template.gc_thresh,
3459                 .maxlen         =       sizeof(int),
3460                 .mode           =       0644,
3461                 .proc_handler   =       proc_dointvec,
3462         },
3463         {
3464                 .procname       =       "max_size",
3465                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3466                 .maxlen         =       sizeof(int),
3467                 .mode           =       0644,
3468                 .proc_handler   =       proc_dointvec,
3469         },
3470         {
3471                 .procname       =       "gc_min_interval",
3472                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3473                 .maxlen         =       sizeof(int),
3474                 .mode           =       0644,
3475                 .proc_handler   =       proc_dointvec_jiffies,
3476         },
3477         {
3478                 .procname       =       "gc_timeout",
3479                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3480                 .maxlen         =       sizeof(int),
3481                 .mode           =       0644,
3482                 .proc_handler   =       proc_dointvec_jiffies,
3483         },
3484         {
3485                 .procname       =       "gc_interval",
3486                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3487                 .maxlen         =       sizeof(int),
3488                 .mode           =       0644,
3489                 .proc_handler   =       proc_dointvec_jiffies,
3490         },
3491         {
3492                 .procname       =       "gc_elasticity",
3493                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3494                 .maxlen         =       sizeof(int),
3495                 .mode           =       0644,
3496                 .proc_handler   =       proc_dointvec,
3497         },
3498         {
3499                 .procname       =       "mtu_expires",
3500                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3501                 .maxlen         =       sizeof(int),
3502                 .mode           =       0644,
3503                 .proc_handler   =       proc_dointvec_jiffies,
3504         },
3505         {
3506                 .procname       =       "min_adv_mss",
3507                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3508                 .maxlen         =       sizeof(int),
3509                 .mode           =       0644,
3510                 .proc_handler   =       proc_dointvec,
3511         },
3512         {
3513                 .procname       =       "gc_min_interval_ms",
3514                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3515                 .maxlen         =       sizeof(int),
3516                 .mode           =       0644,
3517                 .proc_handler   =       proc_dointvec_ms_jiffies,
3518         },
3519         { }
3520 };
3521
3522 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3523 {
3524         struct ctl_table *table;
3525
3526         table = kmemdup(ipv6_route_table_template,
3527                         sizeof(ipv6_route_table_template),
3528                         GFP_KERNEL);
3529
3530         if (table) {
3531                 table[0].data = &net->ipv6.sysctl.flush_delay;
3532                 table[0].extra1 = net;
3533                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3534                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3535                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3536                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3537                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3538                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3539                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3540                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3541                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3542
3543                 /* Don't export sysctls to unprivileged users */
3544                 if (net->user_ns != &init_user_ns)
3545                         table[0].procname = NULL;
3546         }
3547
3548         return table;
3549 }
3550 #endif
3551
3552 static int __net_init ip6_route_net_init(struct net *net)
3553 {
3554         int ret = -ENOMEM;
3555
3556         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3557                sizeof(net->ipv6.ip6_dst_ops));
3558
3559         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3560                 goto out_ip6_dst_ops;
3561
3562         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3563                                            sizeof(*net->ipv6.ip6_null_entry),
3564                                            GFP_KERNEL);
3565         if (!net->ipv6.ip6_null_entry)
3566                 goto out_ip6_dst_entries;
3567         net->ipv6.ip6_null_entry->dst.path =
3568                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3569         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3570         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3571                          ip6_template_metrics, true);
3572
3573 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3574         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3575                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3576                                                GFP_KERNEL);
3577         if (!net->ipv6.ip6_prohibit_entry)
3578                 goto out_ip6_null_entry;
3579         net->ipv6.ip6_prohibit_entry->dst.path =
3580                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3581         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3582         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3583                          ip6_template_metrics, true);
3584
3585         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3586                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3587                                                GFP_KERNEL);
3588         if (!net->ipv6.ip6_blk_hole_entry)
3589                 goto out_ip6_prohibit_entry;
3590         net->ipv6.ip6_blk_hole_entry->dst.path =
3591                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3592         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3593         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3594                          ip6_template_metrics, true);
3595 #endif
3596
3597         net->ipv6.sysctl.flush_delay = 0;
3598         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3599         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3600         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3601         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3602         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3603         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3604         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3605
3606         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3607
3608         ret = 0;
3609 out:
3610         return ret;
3611
3612 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3613 out_ip6_prohibit_entry:
3614         kfree(net->ipv6.ip6_prohibit_entry);
3615 out_ip6_null_entry:
3616         kfree(net->ipv6.ip6_null_entry);
3617 #endif
3618 out_ip6_dst_entries:
3619         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3620 out_ip6_dst_ops:
3621         goto out;
3622 }
3623
3624 static void __net_exit ip6_route_net_exit(struct net *net)
3625 {
3626         kfree(net->ipv6.ip6_null_entry);
3627 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3628         kfree(net->ipv6.ip6_prohibit_entry);
3629         kfree(net->ipv6.ip6_blk_hole_entry);
3630 #endif
3631         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3632 }
3633
3634 static int __net_init ip6_route_net_init_late(struct net *net)
3635 {
3636 #ifdef CONFIG_PROC_FS
3637         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3638         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3639 #endif
3640         return 0;
3641 }
3642
3643 static void __net_exit ip6_route_net_exit_late(struct net *net)
3644 {
3645 #ifdef CONFIG_PROC_FS
3646         remove_proc_entry("ipv6_route", net->proc_net);
3647         remove_proc_entry("rt6_stats", net->proc_net);
3648 #endif
3649 }
3650
3651 static struct pernet_operations ip6_route_net_ops = {
3652         .init = ip6_route_net_init,
3653         .exit = ip6_route_net_exit,
3654 };
3655
3656 static int __net_init ipv6_inetpeer_init(struct net *net)
3657 {
3658         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3659
3660         if (!bp)
3661                 return -ENOMEM;
3662         inet_peer_base_init(bp);
3663         net->ipv6.peers = bp;
3664         return 0;
3665 }
3666
3667 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3668 {
3669         struct inet_peer_base *bp = net->ipv6.peers;
3670
3671         net->ipv6.peers = NULL;
3672         inetpeer_invalidate_tree(bp);
3673         kfree(bp);
3674 }
3675
3676 static struct pernet_operations ipv6_inetpeer_ops = {
3677         .init   =       ipv6_inetpeer_init,
3678         .exit   =       ipv6_inetpeer_exit,
3679 };
3680
3681 static struct pernet_operations ip6_route_net_late_ops = {
3682         .init = ip6_route_net_init_late,
3683         .exit = ip6_route_net_exit_late,
3684 };
3685
3686 static struct notifier_block ip6_route_dev_notifier = {
3687         .notifier_call = ip6_route_dev_notify,
3688         .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3689 };
3690
3691 void __init ip6_route_init_special_entries(void)
3692 {
3693         /* Registering of the loopback is done before this portion of code,
3694          * the loopback reference in rt6_info will not be taken, do it
3695          * manually for init_net */
3696         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3697         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3699         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3700         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3701         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3702         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3703   #endif
3704 }
3705
3706 int __init ip6_route_init(void)
3707 {
3708         int ret;
3709         int cpu;
3710
3711         ret = -ENOMEM;
3712         ip6_dst_ops_template.kmem_cachep =
3713                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3714                                   SLAB_HWCACHE_ALIGN, NULL);
3715         if (!ip6_dst_ops_template.kmem_cachep)
3716                 goto out;
3717
3718         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3719         if (ret)
3720                 goto out_kmem_cache;
3721
3722         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3723         if (ret)
3724                 goto out_dst_entries;
3725
3726         ret = register_pernet_subsys(&ip6_route_net_ops);
3727         if (ret)
3728                 goto out_register_inetpeer;
3729
3730         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3731
3732         ret = fib6_init();
3733         if (ret)
3734                 goto out_register_subsys;
3735
3736         ret = xfrm6_init();
3737         if (ret)
3738                 goto out_fib6_init;
3739
3740         ret = fib6_rules_init();
3741         if (ret)
3742                 goto xfrm6_init;
3743
3744         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3745         if (ret)
3746                 goto fib6_rules_init;
3747
3748         ret = -ENOBUFS;
3749         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3750             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3751             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3752                 goto out_register_late_subsys;
3753
3754         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3755         if (ret)
3756                 goto out_register_late_subsys;
3757
3758         for_each_possible_cpu(cpu) {
3759                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3760
3761                 INIT_LIST_HEAD(&ul->head);
3762                 spin_lock_init(&ul->lock);
3763         }
3764
3765 out:
3766         return ret;
3767
3768 out_register_late_subsys:
3769         unregister_pernet_subsys(&ip6_route_net_late_ops);
3770 fib6_rules_init:
3771         fib6_rules_cleanup();
3772 xfrm6_init:
3773         xfrm6_fini();
3774 out_fib6_init:
3775         fib6_gc_cleanup();
3776 out_register_subsys:
3777         unregister_pernet_subsys(&ip6_route_net_ops);
3778 out_register_inetpeer:
3779         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3780 out_dst_entries:
3781         dst_entries_destroy(&ip6_dst_blackhole_ops);
3782 out_kmem_cache:
3783         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3784         goto out;
3785 }
3786
3787 void ip6_route_cleanup(void)
3788 {
3789         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3790         unregister_pernet_subsys(&ip6_route_net_late_ops);
3791         fib6_rules_cleanup();
3792         xfrm6_fini();
3793         fib6_gc_cleanup();
3794         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3795         unregister_pernet_subsys(&ip6_route_net_ops);
3796         dst_entries_destroy(&ip6_dst_blackhole_ops);
3797         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3798 }