ipv6: Only create RTF_CACHE routes after encountering pmtu exception
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex,
102                                            unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
109 {
110         struct rt6_info *rt = (struct rt6_info *)dst;
111
112         if (rt->rt6i_flags & RTF_CACHE)
113                 return NULL;
114         else
115                 return dst_cow_metrics_generic(dst, old);
116 }
117
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
119                                              struct sk_buff *skb,
120                                              const void *daddr)
121 {
122         struct in6_addr *p = &rt->rt6i_gateway;
123
124         if (!ipv6_addr_any(p))
125                 return (const void *) p;
126         else if (skb)
127                 return &ipv6_hdr(skb)->daddr;
128         return daddr;
129 }
130
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
132                                           struct sk_buff *skb,
133                                           const void *daddr)
134 {
135         struct rt6_info *rt = (struct rt6_info *) dst;
136         struct neighbour *n;
137
138         daddr = choose_neigh_daddr(rt, skb, daddr);
139         n = __ipv6_neigh_lookup(dst->dev, daddr);
140         if (n)
141                 return n;
142         return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static struct dst_ops ip6_dst_ops_template = {
146         .family                 =       AF_INET6,
147         .gc                     =       ip6_dst_gc,
148         .gc_thresh              =       1024,
149         .check                  =       ip6_dst_check,
150         .default_advmss         =       ip6_default_advmss,
151         .mtu                    =       ip6_mtu,
152         .cow_metrics            =       ipv6_cow_metrics,
153         .destroy                =       ip6_dst_destroy,
154         .ifdown                 =       ip6_dst_ifdown,
155         .negative_advice        =       ip6_negative_advice,
156         .link_failure           =       ip6_link_failure,
157         .update_pmtu            =       ip6_rt_update_pmtu,
158         .redirect               =       rt6_do_redirect,
159         .local_out              =       __ip6_local_out,
160         .neigh_lookup           =       ip6_neigh_lookup,
161 };
162
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167         return mtu ? : dst->dev->mtu;
168 }
169
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171                                          struct sk_buff *skb, u32 mtu)
172 {
173 }
174
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
176                                       struct sk_buff *skb)
177 {
178 }
179
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
181                                          unsigned long old)
182 {
183         return NULL;
184 }
185
186 static struct dst_ops ip6_dst_blackhole_ops = {
187         .family                 =       AF_INET6,
188         .destroy                =       ip6_dst_destroy,
189         .check                  =       ip6_dst_check,
190         .mtu                    =       ip6_blackhole_mtu,
191         .default_advmss         =       ip6_default_advmss,
192         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
193         .redirect               =       ip6_rt_blackhole_redirect,
194         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
195         .neigh_lookup           =       ip6_neigh_lookup,
196 };
197
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199         [RTAX_HOPLIMIT - 1] = 0,
200 };
201
202 static const struct rt6_info ip6_null_entry_template = {
203         .dst = {
204                 .__refcnt       = ATOMIC_INIT(1),
205                 .__use          = 1,
206                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
207                 .error          = -ENETUNREACH,
208                 .input          = ip6_pkt_discard,
209                 .output         = ip6_pkt_discard_out,
210         },
211         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
212         .rt6i_protocol  = RTPROT_KERNEL,
213         .rt6i_metric    = ~(u32) 0,
214         .rt6i_ref       = ATOMIC_INIT(1),
215 };
216
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
218
219 static const struct rt6_info ip6_prohibit_entry_template = {
220         .dst = {
221                 .__refcnt       = ATOMIC_INIT(1),
222                 .__use          = 1,
223                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
224                 .error          = -EACCES,
225                 .input          = ip6_pkt_prohibit,
226                 .output         = ip6_pkt_prohibit_out,
227         },
228         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
229         .rt6i_protocol  = RTPROT_KERNEL,
230         .rt6i_metric    = ~(u32) 0,
231         .rt6i_ref       = ATOMIC_INIT(1),
232 };
233
234 static const struct rt6_info ip6_blk_hole_entry_template = {
235         .dst = {
236                 .__refcnt       = ATOMIC_INIT(1),
237                 .__use          = 1,
238                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
239                 .error          = -EINVAL,
240                 .input          = dst_discard,
241                 .output         = dst_discard_sk,
242         },
243         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
244         .rt6i_protocol  = RTPROT_KERNEL,
245         .rt6i_metric    = ~(u32) 0,
246         .rt6i_ref       = ATOMIC_INIT(1),
247 };
248
249 #endif
250
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253                                              struct net_device *dev,
254                                              int flags,
255                                              struct fib6_table *table)
256 {
257         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258                                         0, DST_OBSOLETE_FORCE_CHK, flags);
259
260         if (rt) {
261                 struct dst_entry *dst = &rt->dst;
262
263                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264                 INIT_LIST_HEAD(&rt->rt6i_siblings);
265         }
266         return rt;
267 }
268
269 static void ip6_dst_destroy(struct dst_entry *dst)
270 {
271         struct rt6_info *rt = (struct rt6_info *)dst;
272         struct inet6_dev *idev = rt->rt6i_idev;
273         struct dst_entry *from = dst->from;
274
275         dst_destroy_metrics_generic(dst);
276
277         if (idev) {
278                 rt->rt6i_idev = NULL;
279                 in6_dev_put(idev);
280         }
281
282         dst->from = NULL;
283         dst_release(from);
284 }
285
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287                            int how)
288 {
289         struct rt6_info *rt = (struct rt6_info *)dst;
290         struct inet6_dev *idev = rt->rt6i_idev;
291         struct net_device *loopback_dev =
292                 dev_net(dev)->loopback_dev;
293
294         if (dev != loopback_dev) {
295                 if (idev && idev->dev == dev) {
296                         struct inet6_dev *loopback_idev =
297                                 in6_dev_get(loopback_dev);
298                         if (loopback_idev) {
299                                 rt->rt6i_idev = loopback_idev;
300                                 in6_dev_put(idev);
301                         }
302                 }
303         }
304 }
305
306 static bool rt6_check_expired(const struct rt6_info *rt)
307 {
308         if (rt->rt6i_flags & RTF_EXPIRES) {
309                 if (time_after(jiffies, rt->dst.expires))
310                         return true;
311         } else if (rt->dst.from) {
312                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
313         }
314         return false;
315 }
316
317 /* Multipath route selection:
318  *   Hash based function using packet header and flowlabel.
319  * Adapted from fib_info_hashfn()
320  */
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322                                const struct flowi6 *fl6)
323 {
324         unsigned int val = fl6->flowi6_proto;
325
326         val ^= ipv6_addr_hash(&fl6->daddr);
327         val ^= ipv6_addr_hash(&fl6->saddr);
328
329         /* Work only if this not encapsulated */
330         switch (fl6->flowi6_proto) {
331         case IPPROTO_UDP:
332         case IPPROTO_TCP:
333         case IPPROTO_SCTP:
334                 val ^= (__force u16)fl6->fl6_sport;
335                 val ^= (__force u16)fl6->fl6_dport;
336                 break;
337
338         case IPPROTO_ICMPV6:
339                 val ^= (__force u16)fl6->fl6_icmp_type;
340                 val ^= (__force u16)fl6->fl6_icmp_code;
341                 break;
342         }
343         /* RFC6438 recommands to use flowlabel */
344         val ^= (__force u32)fl6->flowlabel;
345
346         /* Perhaps, we need to tune, this function? */
347         val = val ^ (val >> 7) ^ (val >> 12);
348         return val % candidate_count;
349 }
350
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352                                              struct flowi6 *fl6, int oif,
353                                              int strict)
354 {
355         struct rt6_info *sibling, *next_sibling;
356         int route_choosen;
357
358         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359         /* Don't change the route, if route_choosen == 0
360          * (siblings does not include ourself)
361          */
362         if (route_choosen)
363                 list_for_each_entry_safe(sibling, next_sibling,
364                                 &match->rt6i_siblings, rt6i_siblings) {
365                         route_choosen--;
366                         if (route_choosen == 0) {
367                                 if (rt6_score_route(sibling, oif, strict) < 0)
368                                         break;
369                                 match = sibling;
370                                 break;
371                         }
372                 }
373         return match;
374 }
375
376 /*
377  *      Route lookup. Any table->tb6_lock is implied.
378  */
379
380 static inline struct rt6_info *rt6_device_match(struct net *net,
381                                                     struct rt6_info *rt,
382                                                     const struct in6_addr *saddr,
383                                                     int oif,
384                                                     int flags)
385 {
386         struct rt6_info *local = NULL;
387         struct rt6_info *sprt;
388
389         if (!oif && ipv6_addr_any(saddr))
390                 goto out;
391
392         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393                 struct net_device *dev = sprt->dst.dev;
394
395                 if (oif) {
396                         if (dev->ifindex == oif)
397                                 return sprt;
398                         if (dev->flags & IFF_LOOPBACK) {
399                                 if (!sprt->rt6i_idev ||
400                                     sprt->rt6i_idev->dev->ifindex != oif) {
401                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
402                                                 continue;
403                                         if (local && (!oif ||
404                                                       local->rt6i_idev->dev->ifindex == oif))
405                                                 continue;
406                                 }
407                                 local = sprt;
408                         }
409                 } else {
410                         if (ipv6_chk_addr(net, saddr, dev,
411                                           flags & RT6_LOOKUP_F_IFACE))
412                                 return sprt;
413                 }
414         }
415
416         if (oif) {
417                 if (local)
418                         return local;
419
420                 if (flags & RT6_LOOKUP_F_IFACE)
421                         return net->ipv6.ip6_null_entry;
422         }
423 out:
424         return rt;
425 }
426
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429         struct work_struct work;
430         struct in6_addr target;
431         struct net_device *dev;
432 };
433
434 static void rt6_probe_deferred(struct work_struct *w)
435 {
436         struct in6_addr mcaddr;
437         struct __rt6_probe_work *work =
438                 container_of(w, struct __rt6_probe_work, work);
439
440         addrconf_addr_solict_mult(&work->target, &mcaddr);
441         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
442         dev_put(work->dev);
443         kfree(work);
444 }
445
446 static void rt6_probe(struct rt6_info *rt)
447 {
448         struct neighbour *neigh;
449         /*
450          * Okay, this does not seem to be appropriate
451          * for now, however, we need to check if it
452          * is really so; aka Router Reachability Probing.
453          *
454          * Router Reachability Probe MUST be rate-limited
455          * to no more than one per minute.
456          */
457         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
458                 return;
459         rcu_read_lock_bh();
460         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
461         if (neigh) {
462                 write_lock(&neigh->lock);
463                 if (neigh->nud_state & NUD_VALID)
464                         goto out;
465         }
466
467         if (!neigh ||
468             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469                 struct __rt6_probe_work *work;
470
471                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
472
473                 if (neigh && work)
474                         __neigh_set_probe_once(neigh);
475
476                 if (neigh)
477                         write_unlock(&neigh->lock);
478
479                 if (work) {
480                         INIT_WORK(&work->work, rt6_probe_deferred);
481                         work->target = rt->rt6i_gateway;
482                         dev_hold(rt->dst.dev);
483                         work->dev = rt->dst.dev;
484                         schedule_work(&work->work);
485                 }
486         } else {
487 out:
488                 write_unlock(&neigh->lock);
489         }
490         rcu_read_unlock_bh();
491 }
492 #else
493 static inline void rt6_probe(struct rt6_info *rt)
494 {
495 }
496 #endif
497
498 /*
499  * Default Router Selection (RFC 2461 6.3.6)
500  */
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
502 {
503         struct net_device *dev = rt->dst.dev;
504         if (!oif || dev->ifindex == oif)
505                 return 2;
506         if ((dev->flags & IFF_LOOPBACK) &&
507             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
508                 return 1;
509         return 0;
510 }
511
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
513 {
514         struct neighbour *neigh;
515         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
516
517         if (rt->rt6i_flags & RTF_NONEXTHOP ||
518             !(rt->rt6i_flags & RTF_GATEWAY))
519                 return RT6_NUD_SUCCEED;
520
521         rcu_read_lock_bh();
522         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
523         if (neigh) {
524                 read_lock(&neigh->lock);
525                 if (neigh->nud_state & NUD_VALID)
526                         ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528                 else if (!(neigh->nud_state & NUD_FAILED))
529                         ret = RT6_NUD_SUCCEED;
530                 else
531                         ret = RT6_NUD_FAIL_PROBE;
532 #endif
533                 read_unlock(&neigh->lock);
534         } else {
535                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
537         }
538         rcu_read_unlock_bh();
539
540         return ret;
541 }
542
543 static int rt6_score_route(struct rt6_info *rt, int oif,
544                            int strict)
545 {
546         int m;
547
548         m = rt6_check_dev(rt, oif);
549         if (!m && (strict & RT6_LOOKUP_F_IFACE))
550                 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
553 #endif
554         if (strict & RT6_LOOKUP_F_REACHABLE) {
555                 int n = rt6_check_neigh(rt);
556                 if (n < 0)
557                         return n;
558         }
559         return m;
560 }
561
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563                                    int *mpri, struct rt6_info *match,
564                                    bool *do_rr)
565 {
566         int m;
567         bool match_do_rr = false;
568
569         if (rt6_check_expired(rt))
570                 goto out;
571
572         m = rt6_score_route(rt, oif, strict);
573         if (m == RT6_NUD_FAIL_DO_RR) {
574                 match_do_rr = true;
575                 m = 0; /* lowest valid score */
576         } else if (m == RT6_NUD_FAIL_HARD) {
577                 goto out;
578         }
579
580         if (strict & RT6_LOOKUP_F_REACHABLE)
581                 rt6_probe(rt);
582
583         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
584         if (m > *mpri) {
585                 *do_rr = match_do_rr;
586                 *mpri = m;
587                 match = rt;
588         }
589 out:
590         return match;
591 }
592
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594                                      struct rt6_info *rr_head,
595                                      u32 metric, int oif, int strict,
596                                      bool *do_rr)
597 {
598         struct rt6_info *rt, *match, *cont;
599         int mpri = -1;
600
601         match = NULL;
602         cont = NULL;
603         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604                 if (rt->rt6i_metric != metric) {
605                         cont = rt;
606                         break;
607                 }
608
609                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
610         }
611
612         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613                 if (rt->rt6i_metric != metric) {
614                         cont = rt;
615                         break;
616                 }
617
618                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
619         }
620
621         if (match || !cont)
622                 return match;
623
624         for (rt = cont; rt; rt = rt->dst.rt6_next)
625                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
626
627         return match;
628 }
629
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
631 {
632         struct rt6_info *match, *rt0;
633         struct net *net;
634         bool do_rr = false;
635
636         rt0 = fn->rr_ptr;
637         if (!rt0)
638                 fn->rr_ptr = rt0 = fn->leaf;
639
640         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
641                              &do_rr);
642
643         if (do_rr) {
644                 struct rt6_info *next = rt0->dst.rt6_next;
645
646                 /* no entries matched; do round-robin */
647                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
648                         next = fn->leaf;
649
650                 if (next != rt0)
651                         fn->rr_ptr = next;
652         }
653
654         net = dev_net(rt0->dst.dev);
655         return match ? match : net->ipv6.ip6_null_entry;
656 }
657
658 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
659 {
660         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
661 }
662
663 #ifdef CONFIG_IPV6_ROUTE_INFO
664 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
665                   const struct in6_addr *gwaddr)
666 {
667         struct net *net = dev_net(dev);
668         struct route_info *rinfo = (struct route_info *) opt;
669         struct in6_addr prefix_buf, *prefix;
670         unsigned int pref;
671         unsigned long lifetime;
672         struct rt6_info *rt;
673
674         if (len < sizeof(struct route_info)) {
675                 return -EINVAL;
676         }
677
678         /* Sanity check for prefix_len and length */
679         if (rinfo->length > 3) {
680                 return -EINVAL;
681         } else if (rinfo->prefix_len > 128) {
682                 return -EINVAL;
683         } else if (rinfo->prefix_len > 64) {
684                 if (rinfo->length < 2) {
685                         return -EINVAL;
686                 }
687         } else if (rinfo->prefix_len > 0) {
688                 if (rinfo->length < 1) {
689                         return -EINVAL;
690                 }
691         }
692
693         pref = rinfo->route_pref;
694         if (pref == ICMPV6_ROUTER_PREF_INVALID)
695                 return -EINVAL;
696
697         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
698
699         if (rinfo->length == 3)
700                 prefix = (struct in6_addr *)rinfo->prefix;
701         else {
702                 /* this function is safe */
703                 ipv6_addr_prefix(&prefix_buf,
704                                  (struct in6_addr *)rinfo->prefix,
705                                  rinfo->prefix_len);
706                 prefix = &prefix_buf;
707         }
708
709         if (rinfo->prefix_len == 0)
710                 rt = rt6_get_dflt_router(gwaddr, dev);
711         else
712                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
713                                         gwaddr, dev->ifindex);
714
715         if (rt && !lifetime) {
716                 ip6_del_rt(rt);
717                 rt = NULL;
718         }
719
720         if (!rt && lifetime)
721                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
722                                         pref);
723         else if (rt)
724                 rt->rt6i_flags = RTF_ROUTEINFO |
725                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
726
727         if (rt) {
728                 if (!addrconf_finite_timeout(lifetime))
729                         rt6_clean_expires(rt);
730                 else
731                         rt6_set_expires(rt, jiffies + HZ * lifetime);
732
733                 ip6_rt_put(rt);
734         }
735         return 0;
736 }
737 #endif
738
739 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
740                                         struct in6_addr *saddr)
741 {
742         struct fib6_node *pn;
743         while (1) {
744                 if (fn->fn_flags & RTN_TL_ROOT)
745                         return NULL;
746                 pn = fn->parent;
747                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
748                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
749                 else
750                         fn = pn;
751                 if (fn->fn_flags & RTN_RTINFO)
752                         return fn;
753         }
754 }
755
756 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757                                              struct fib6_table *table,
758                                              struct flowi6 *fl6, int flags)
759 {
760         struct fib6_node *fn;
761         struct rt6_info *rt;
762
763         read_lock_bh(&table->tb6_lock);
764         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
765 restart:
766         rt = fn->leaf;
767         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
768         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
769                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
770         if (rt == net->ipv6.ip6_null_entry) {
771                 fn = fib6_backtrack(fn, &fl6->saddr);
772                 if (fn)
773                         goto restart;
774         }
775         dst_use(&rt->dst, jiffies);
776         read_unlock_bh(&table->tb6_lock);
777         return rt;
778
779 }
780
781 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
782                                     int flags)
783 {
784         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
785 }
786 EXPORT_SYMBOL_GPL(ip6_route_lookup);
787
788 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789                             const struct in6_addr *saddr, int oif, int strict)
790 {
791         struct flowi6 fl6 = {
792                 .flowi6_oif = oif,
793                 .daddr = *daddr,
794         };
795         struct dst_entry *dst;
796         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
797
798         if (saddr) {
799                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800                 flags |= RT6_LOOKUP_F_HAS_SADDR;
801         }
802
803         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
804         if (dst->error == 0)
805                 return (struct rt6_info *) dst;
806
807         dst_release(dst);
808
809         return NULL;
810 }
811 EXPORT_SYMBOL(rt6_lookup);
812
813 /* ip6_ins_rt is called with FREE table->tb6_lock.
814    It takes new route entry, the addition fails by any reason the
815    route is freed. In any case, if caller does not hold it, it may
816    be destroyed.
817  */
818
819 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
820                         struct mx6_config *mxc)
821 {
822         int err;
823         struct fib6_table *table;
824
825         table = rt->rt6i_table;
826         write_lock_bh(&table->tb6_lock);
827         err = fib6_add(&table->tb6_root, rt, info, mxc);
828         write_unlock_bh(&table->tb6_lock);
829
830         return err;
831 }
832
833 int ip6_ins_rt(struct rt6_info *rt)
834 {
835         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
836         struct mx6_config mxc = { .mx = NULL, };
837
838         return __ip6_ins_rt(rt, &info, &mxc);
839 }
840
841 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
842                                            const struct in6_addr *daddr,
843                                            const struct in6_addr *saddr)
844 {
845         struct rt6_info *rt;
846
847         /*
848          *      Clone the route.
849          */
850
851         rt = ip6_rt_copy(ort, daddr);
852
853         if (rt) {
854                 rt->rt6i_flags |= RTF_CACHE;
855
856                 if (!rt6_is_gw_or_nonexthop(ort)) {
857                         if (ort->rt6i_dst.plen != 128 &&
858                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859                                 rt->rt6i_flags |= RTF_ANYCAST;
860 #ifdef CONFIG_IPV6_SUBTREES
861                         if (rt->rt6i_src.plen && saddr) {
862                                 rt->rt6i_src.addr = *saddr;
863                                 rt->rt6i_src.plen = 128;
864                         }
865 #endif
866                 }
867         }
868
869         return rt;
870 }
871
872 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
873                                       struct flowi6 *fl6, int flags)
874 {
875         struct fib6_node *fn, *saved_fn;
876         struct rt6_info *rt;
877         int strict = 0;
878
879         strict |= flags & RT6_LOOKUP_F_IFACE;
880         if (net->ipv6.devconf_all->forwarding == 0)
881                 strict |= RT6_LOOKUP_F_REACHABLE;
882
883         read_lock_bh(&table->tb6_lock);
884
885         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
886         saved_fn = fn;
887
888 redo_rt6_select:
889         rt = rt6_select(fn, oif, strict);
890         if (rt->rt6i_nsiblings)
891                 rt = rt6_multipath_select(rt, fl6, oif, strict);
892         if (rt == net->ipv6.ip6_null_entry) {
893                 fn = fib6_backtrack(fn, &fl6->saddr);
894                 if (fn)
895                         goto redo_rt6_select;
896                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
897                         /* also consider unreachable route */
898                         strict &= ~RT6_LOOKUP_F_REACHABLE;
899                         fn = saved_fn;
900                         goto redo_rt6_select;
901                 }
902         }
903
904         dst_hold(&rt->dst);
905         read_unlock_bh(&table->tb6_lock);
906
907         rt6_dst_from_metrics_check(rt);
908         rt->dst.lastuse = jiffies;
909         rt->dst.__use++;
910
911         return rt;
912 }
913
914 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
915                                             struct flowi6 *fl6, int flags)
916 {
917         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
918 }
919
920 static struct dst_entry *ip6_route_input_lookup(struct net *net,
921                                                 struct net_device *dev,
922                                                 struct flowi6 *fl6, int flags)
923 {
924         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925                 flags |= RT6_LOOKUP_F_IFACE;
926
927         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
928 }
929
930 void ip6_route_input(struct sk_buff *skb)
931 {
932         const struct ipv6hdr *iph = ipv6_hdr(skb);
933         struct net *net = dev_net(skb->dev);
934         int flags = RT6_LOOKUP_F_HAS_SADDR;
935         struct flowi6 fl6 = {
936                 .flowi6_iif = skb->dev->ifindex,
937                 .daddr = iph->daddr,
938                 .saddr = iph->saddr,
939                 .flowlabel = ip6_flowinfo(iph),
940                 .flowi6_mark = skb->mark,
941                 .flowi6_proto = iph->nexthdr,
942         };
943
944         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
945 }
946
947 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
948                                              struct flowi6 *fl6, int flags)
949 {
950         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
951 }
952
953 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
954                                     struct flowi6 *fl6)
955 {
956         int flags = 0;
957
958         fl6->flowi6_iif = LOOPBACK_IFINDEX;
959
960         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
961                 flags |= RT6_LOOKUP_F_IFACE;
962
963         if (!ipv6_addr_any(&fl6->saddr))
964                 flags |= RT6_LOOKUP_F_HAS_SADDR;
965         else if (sk)
966                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
967
968         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
969 }
970 EXPORT_SYMBOL(ip6_route_output);
971
972 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
973 {
974         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
975         struct dst_entry *new = NULL;
976
977         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
978         if (rt) {
979                 new = &rt->dst;
980
981                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
982
983                 new->__use = 1;
984                 new->input = dst_discard;
985                 new->output = dst_discard_sk;
986
987                 if (dst_metrics_read_only(&ort->dst))
988                         new->_metrics = ort->dst._metrics;
989                 else
990                         dst_copy_metrics(new, &ort->dst);
991                 rt->rt6i_idev = ort->rt6i_idev;
992                 if (rt->rt6i_idev)
993                         in6_dev_hold(rt->rt6i_idev);
994
995                 rt->rt6i_gateway = ort->rt6i_gateway;
996                 rt->rt6i_flags = ort->rt6i_flags;
997                 rt->rt6i_metric = 0;
998
999                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1000 #ifdef CONFIG_IPV6_SUBTREES
1001                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1002 #endif
1003
1004                 dst_free(new);
1005         }
1006
1007         dst_release(dst_orig);
1008         return new ? new : ERR_PTR(-ENOMEM);
1009 }
1010
1011 /*
1012  *      Destination cache support functions
1013  */
1014
1015 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1016 {
1017         if (rt->dst.from &&
1018             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1019                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1020 }
1021
1022 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1023 {
1024         struct rt6_info *rt;
1025
1026         rt = (struct rt6_info *) dst;
1027
1028         /* All IPV6 dsts are created with ->obsolete set to the value
1029          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1030          * into this function always.
1031          */
1032         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1033                 return NULL;
1034
1035         if (rt6_check_expired(rt))
1036                 return NULL;
1037
1038         rt6_dst_from_metrics_check(rt);
1039
1040         return dst;
1041 }
1042
1043 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1044 {
1045         struct rt6_info *rt = (struct rt6_info *) dst;
1046
1047         if (rt) {
1048                 if (rt->rt6i_flags & RTF_CACHE) {
1049                         if (rt6_check_expired(rt)) {
1050                                 ip6_del_rt(rt);
1051                                 dst = NULL;
1052                         }
1053                 } else {
1054                         dst_release(dst);
1055                         dst = NULL;
1056                 }
1057         }
1058         return dst;
1059 }
1060
1061 static void ip6_link_failure(struct sk_buff *skb)
1062 {
1063         struct rt6_info *rt;
1064
1065         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1066
1067         rt = (struct rt6_info *) skb_dst(skb);
1068         if (rt) {
1069                 if (rt->rt6i_flags & RTF_CACHE) {
1070                         dst_hold(&rt->dst);
1071                         if (ip6_del_rt(rt))
1072                                 dst_free(&rt->dst);
1073                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1074                         rt->rt6i_node->fn_sernum = -1;
1075                 }
1076         }
1077 }
1078
1079 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1080 {
1081         struct net *net = dev_net(rt->dst.dev);
1082
1083         rt->rt6i_flags |= RTF_MODIFIED;
1084         rt->rt6i_pmtu = mtu;
1085         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1086 }
1087
1088 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1089                                  const struct ipv6hdr *iph, u32 mtu)
1090 {
1091         struct rt6_info *rt6 = (struct rt6_info *)dst;
1092
1093         if (rt6->rt6i_flags & RTF_LOCAL)
1094                 return;
1095
1096         dst_confirm(dst);
1097         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1098         if (mtu >= dst_mtu(dst))
1099                 return;
1100
1101         if (rt6->rt6i_flags & RTF_CACHE) {
1102                 rt6_do_update_pmtu(rt6, mtu);
1103         } else {
1104                 const struct in6_addr *daddr, *saddr;
1105                 struct rt6_info *nrt6;
1106
1107                 if (iph) {
1108                         daddr = &iph->daddr;
1109                         saddr = &iph->saddr;
1110                 } else if (sk) {
1111                         daddr = &sk->sk_v6_daddr;
1112                         saddr = &inet6_sk(sk)->saddr;
1113                 } else {
1114                         return;
1115                 }
1116                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1117                 if (nrt6) {
1118                         rt6_do_update_pmtu(nrt6, mtu);
1119
1120                         /* ip6_ins_rt(nrt6) will bump the
1121                          * rt6->rt6i_node->fn_sernum
1122                          * which will fail the next rt6_check() and
1123                          * invalidate the sk->sk_dst_cache.
1124                          */
1125                         ip6_ins_rt(nrt6);
1126                 }
1127         }
1128 }
1129
1130 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1131                                struct sk_buff *skb, u32 mtu)
1132 {
1133         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1134 }
1135
1136 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1137                      int oif, u32 mark)
1138 {
1139         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1140         struct dst_entry *dst;
1141         struct flowi6 fl6;
1142
1143         memset(&fl6, 0, sizeof(fl6));
1144         fl6.flowi6_oif = oif;
1145         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1146         fl6.daddr = iph->daddr;
1147         fl6.saddr = iph->saddr;
1148         fl6.flowlabel = ip6_flowinfo(iph);
1149
1150         dst = ip6_route_output(net, NULL, &fl6);
1151         if (!dst->error)
1152                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1153         dst_release(dst);
1154 }
1155 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1156
1157 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1158 {
1159         ip6_update_pmtu(skb, sock_net(sk), mtu,
1160                         sk->sk_bound_dev_if, sk->sk_mark);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1163
1164 /* Handle redirects */
1165 struct ip6rd_flowi {
1166         struct flowi6 fl6;
1167         struct in6_addr gateway;
1168 };
1169
1170 static struct rt6_info *__ip6_route_redirect(struct net *net,
1171                                              struct fib6_table *table,
1172                                              struct flowi6 *fl6,
1173                                              int flags)
1174 {
1175         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1176         struct rt6_info *rt;
1177         struct fib6_node *fn;
1178
1179         /* Get the "current" route for this destination and
1180          * check if the redirect has come from approriate router.
1181          *
1182          * RFC 4861 specifies that redirects should only be
1183          * accepted if they come from the nexthop to the target.
1184          * Due to the way the routes are chosen, this notion
1185          * is a bit fuzzy and one might need to check all possible
1186          * routes.
1187          */
1188
1189         read_lock_bh(&table->tb6_lock);
1190         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1191 restart:
1192         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1193                 if (rt6_check_expired(rt))
1194                         continue;
1195                 if (rt->dst.error)
1196                         break;
1197                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1198                         continue;
1199                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1200                         continue;
1201                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1202                         continue;
1203                 break;
1204         }
1205
1206         if (!rt)
1207                 rt = net->ipv6.ip6_null_entry;
1208         else if (rt->dst.error) {
1209                 rt = net->ipv6.ip6_null_entry;
1210                 goto out;
1211         }
1212
1213         if (rt == net->ipv6.ip6_null_entry) {
1214                 fn = fib6_backtrack(fn, &fl6->saddr);
1215                 if (fn)
1216                         goto restart;
1217         }
1218
1219 out:
1220         dst_hold(&rt->dst);
1221
1222         read_unlock_bh(&table->tb6_lock);
1223
1224         return rt;
1225 };
1226
1227 static struct dst_entry *ip6_route_redirect(struct net *net,
1228                                         const struct flowi6 *fl6,
1229                                         const struct in6_addr *gateway)
1230 {
1231         int flags = RT6_LOOKUP_F_HAS_SADDR;
1232         struct ip6rd_flowi rdfl;
1233
1234         rdfl.fl6 = *fl6;
1235         rdfl.gateway = *gateway;
1236
1237         return fib6_rule_lookup(net, &rdfl.fl6,
1238                                 flags, __ip6_route_redirect);
1239 }
1240
1241 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1242 {
1243         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1244         struct dst_entry *dst;
1245         struct flowi6 fl6;
1246
1247         memset(&fl6, 0, sizeof(fl6));
1248         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1249         fl6.flowi6_oif = oif;
1250         fl6.flowi6_mark = mark;
1251         fl6.daddr = iph->daddr;
1252         fl6.saddr = iph->saddr;
1253         fl6.flowlabel = ip6_flowinfo(iph);
1254
1255         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1256         rt6_do_redirect(dst, NULL, skb);
1257         dst_release(dst);
1258 }
1259 EXPORT_SYMBOL_GPL(ip6_redirect);
1260
1261 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1262                             u32 mark)
1263 {
1264         const struct ipv6hdr *iph = ipv6_hdr(skb);
1265         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1266         struct dst_entry *dst;
1267         struct flowi6 fl6;
1268
1269         memset(&fl6, 0, sizeof(fl6));
1270         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1271         fl6.flowi6_oif = oif;
1272         fl6.flowi6_mark = mark;
1273         fl6.daddr = msg->dest;
1274         fl6.saddr = iph->daddr;
1275
1276         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1277         rt6_do_redirect(dst, NULL, skb);
1278         dst_release(dst);
1279 }
1280
1281 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1282 {
1283         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1284 }
1285 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1286
1287 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1288 {
1289         struct net_device *dev = dst->dev;
1290         unsigned int mtu = dst_mtu(dst);
1291         struct net *net = dev_net(dev);
1292
1293         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1294
1295         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1296                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1297
1298         /*
1299          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1300          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1301          * IPV6_MAXPLEN is also valid and means: "any MSS,
1302          * rely only on pmtu discovery"
1303          */
1304         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1305                 mtu = IPV6_MAXPLEN;
1306         return mtu;
1307 }
1308
1309 static unsigned int ip6_mtu(const struct dst_entry *dst)
1310 {
1311         const struct rt6_info *rt = (const struct rt6_info *)dst;
1312         unsigned int mtu = rt->rt6i_pmtu;
1313         struct inet6_dev *idev;
1314
1315         if (mtu)
1316                 goto out;
1317
1318         mtu = dst_metric_raw(dst, RTAX_MTU);
1319         if (mtu)
1320                 goto out;
1321
1322         mtu = IPV6_MIN_MTU;
1323
1324         rcu_read_lock();
1325         idev = __in6_dev_get(dst->dev);
1326         if (idev)
1327                 mtu = idev->cnf.mtu6;
1328         rcu_read_unlock();
1329
1330 out:
1331         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1332 }
1333
1334 static struct dst_entry *icmp6_dst_gc_list;
1335 static DEFINE_SPINLOCK(icmp6_dst_lock);
1336
1337 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1338                                   struct flowi6 *fl6)
1339 {
1340         struct dst_entry *dst;
1341         struct rt6_info *rt;
1342         struct inet6_dev *idev = in6_dev_get(dev);
1343         struct net *net = dev_net(dev);
1344
1345         if (unlikely(!idev))
1346                 return ERR_PTR(-ENODEV);
1347
1348         rt = ip6_dst_alloc(net, dev, 0, NULL);
1349         if (unlikely(!rt)) {
1350                 in6_dev_put(idev);
1351                 dst = ERR_PTR(-ENOMEM);
1352                 goto out;
1353         }
1354
1355         rt->dst.flags |= DST_HOST;
1356         rt->dst.output  = ip6_output;
1357         atomic_set(&rt->dst.__refcnt, 1);
1358         rt->rt6i_gateway  = fl6->daddr;
1359         rt->rt6i_dst.addr = fl6->daddr;
1360         rt->rt6i_dst.plen = 128;
1361         rt->rt6i_idev     = idev;
1362         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1363
1364         spin_lock_bh(&icmp6_dst_lock);
1365         rt->dst.next = icmp6_dst_gc_list;
1366         icmp6_dst_gc_list = &rt->dst;
1367         spin_unlock_bh(&icmp6_dst_lock);
1368
1369         fib6_force_start_gc(net);
1370
1371         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1372
1373 out:
1374         return dst;
1375 }
1376
1377 int icmp6_dst_gc(void)
1378 {
1379         struct dst_entry *dst, **pprev;
1380         int more = 0;
1381
1382         spin_lock_bh(&icmp6_dst_lock);
1383         pprev = &icmp6_dst_gc_list;
1384
1385         while ((dst = *pprev) != NULL) {
1386                 if (!atomic_read(&dst->__refcnt)) {
1387                         *pprev = dst->next;
1388                         dst_free(dst);
1389                 } else {
1390                         pprev = &dst->next;
1391                         ++more;
1392                 }
1393         }
1394
1395         spin_unlock_bh(&icmp6_dst_lock);
1396
1397         return more;
1398 }
1399
1400 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1401                             void *arg)
1402 {
1403         struct dst_entry *dst, **pprev;
1404
1405         spin_lock_bh(&icmp6_dst_lock);
1406         pprev = &icmp6_dst_gc_list;
1407         while ((dst = *pprev) != NULL) {
1408                 struct rt6_info *rt = (struct rt6_info *) dst;
1409                 if (func(rt, arg)) {
1410                         *pprev = dst->next;
1411                         dst_free(dst);
1412                 } else {
1413                         pprev = &dst->next;
1414                 }
1415         }
1416         spin_unlock_bh(&icmp6_dst_lock);
1417 }
1418
1419 static int ip6_dst_gc(struct dst_ops *ops)
1420 {
1421         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1422         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1423         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1424         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1425         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1426         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1427         int entries;
1428
1429         entries = dst_entries_get_fast(ops);
1430         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1431             entries <= rt_max_size)
1432                 goto out;
1433
1434         net->ipv6.ip6_rt_gc_expire++;
1435         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1436         entries = dst_entries_get_slow(ops);
1437         if (entries < ops->gc_thresh)
1438                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1439 out:
1440         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1441         return entries > rt_max_size;
1442 }
1443
1444 static int ip6_convert_metrics(struct mx6_config *mxc,
1445                                const struct fib6_config *cfg)
1446 {
1447         struct nlattr *nla;
1448         int remaining;
1449         u32 *mp;
1450
1451         if (!cfg->fc_mx)
1452                 return 0;
1453
1454         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1455         if (unlikely(!mp))
1456                 return -ENOMEM;
1457
1458         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1459                 int type = nla_type(nla);
1460
1461                 if (type) {
1462                         u32 val;
1463
1464                         if (unlikely(type > RTAX_MAX))
1465                                 goto err;
1466                         if (type == RTAX_CC_ALGO) {
1467                                 char tmp[TCP_CA_NAME_MAX];
1468
1469                                 nla_strlcpy(tmp, nla, sizeof(tmp));
1470                                 val = tcp_ca_get_key_by_name(tmp);
1471                                 if (val == TCP_CA_UNSPEC)
1472                                         goto err;
1473                         } else {
1474                                 val = nla_get_u32(nla);
1475                         }
1476
1477                         mp[type - 1] = val;
1478                         __set_bit(type - 1, mxc->mx_valid);
1479                 }
1480         }
1481
1482         mxc->mx = mp;
1483
1484         return 0;
1485  err:
1486         kfree(mp);
1487         return -EINVAL;
1488 }
1489
1490 int ip6_route_add(struct fib6_config *cfg)
1491 {
1492         int err;
1493         struct net *net = cfg->fc_nlinfo.nl_net;
1494         struct rt6_info *rt = NULL;
1495         struct net_device *dev = NULL;
1496         struct inet6_dev *idev = NULL;
1497         struct fib6_table *table;
1498         struct mx6_config mxc = { .mx = NULL, };
1499         int addr_type;
1500
1501         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1502                 return -EINVAL;
1503 #ifndef CONFIG_IPV6_SUBTREES
1504         if (cfg->fc_src_len)
1505                 return -EINVAL;
1506 #endif
1507         if (cfg->fc_ifindex) {
1508                 err = -ENODEV;
1509                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1510                 if (!dev)
1511                         goto out;
1512                 idev = in6_dev_get(dev);
1513                 if (!idev)
1514                         goto out;
1515         }
1516
1517         if (cfg->fc_metric == 0)
1518                 cfg->fc_metric = IP6_RT_PRIO_USER;
1519
1520         err = -ENOBUFS;
1521         if (cfg->fc_nlinfo.nlh &&
1522             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1523                 table = fib6_get_table(net, cfg->fc_table);
1524                 if (!table) {
1525                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1526                         table = fib6_new_table(net, cfg->fc_table);
1527                 }
1528         } else {
1529                 table = fib6_new_table(net, cfg->fc_table);
1530         }
1531
1532         if (!table)
1533                 goto out;
1534
1535         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1536
1537         if (!rt) {
1538                 err = -ENOMEM;
1539                 goto out;
1540         }
1541
1542         if (cfg->fc_flags & RTF_EXPIRES)
1543                 rt6_set_expires(rt, jiffies +
1544                                 clock_t_to_jiffies(cfg->fc_expires));
1545         else
1546                 rt6_clean_expires(rt);
1547
1548         if (cfg->fc_protocol == RTPROT_UNSPEC)
1549                 cfg->fc_protocol = RTPROT_BOOT;
1550         rt->rt6i_protocol = cfg->fc_protocol;
1551
1552         addr_type = ipv6_addr_type(&cfg->fc_dst);
1553
1554         if (addr_type & IPV6_ADDR_MULTICAST)
1555                 rt->dst.input = ip6_mc_input;
1556         else if (cfg->fc_flags & RTF_LOCAL)
1557                 rt->dst.input = ip6_input;
1558         else
1559                 rt->dst.input = ip6_forward;
1560
1561         rt->dst.output = ip6_output;
1562
1563         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1564         rt->rt6i_dst.plen = cfg->fc_dst_len;
1565         if (rt->rt6i_dst.plen == 128)
1566                 rt->dst.flags |= DST_HOST;
1567
1568 #ifdef CONFIG_IPV6_SUBTREES
1569         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1570         rt->rt6i_src.plen = cfg->fc_src_len;
1571 #endif
1572
1573         rt->rt6i_metric = cfg->fc_metric;
1574
1575         /* We cannot add true routes via loopback here,
1576            they would result in kernel looping; promote them to reject routes
1577          */
1578         if ((cfg->fc_flags & RTF_REJECT) ||
1579             (dev && (dev->flags & IFF_LOOPBACK) &&
1580              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1581              !(cfg->fc_flags & RTF_LOCAL))) {
1582                 /* hold loopback dev/idev if we haven't done so. */
1583                 if (dev != net->loopback_dev) {
1584                         if (dev) {
1585                                 dev_put(dev);
1586                                 in6_dev_put(idev);
1587                         }
1588                         dev = net->loopback_dev;
1589                         dev_hold(dev);
1590                         idev = in6_dev_get(dev);
1591                         if (!idev) {
1592                                 err = -ENODEV;
1593                                 goto out;
1594                         }
1595                 }
1596                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1597                 switch (cfg->fc_type) {
1598                 case RTN_BLACKHOLE:
1599                         rt->dst.error = -EINVAL;
1600                         rt->dst.output = dst_discard_sk;
1601                         rt->dst.input = dst_discard;
1602                         break;
1603                 case RTN_PROHIBIT:
1604                         rt->dst.error = -EACCES;
1605                         rt->dst.output = ip6_pkt_prohibit_out;
1606                         rt->dst.input = ip6_pkt_prohibit;
1607                         break;
1608                 case RTN_THROW:
1609                 default:
1610                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1611                                         : -ENETUNREACH;
1612                         rt->dst.output = ip6_pkt_discard_out;
1613                         rt->dst.input = ip6_pkt_discard;
1614                         break;
1615                 }
1616                 goto install_route;
1617         }
1618
1619         if (cfg->fc_flags & RTF_GATEWAY) {
1620                 const struct in6_addr *gw_addr;
1621                 int gwa_type;
1622
1623                 gw_addr = &cfg->fc_gateway;
1624
1625                 /* if gw_addr is local we will fail to detect this in case
1626                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1627                  * will return already-added prefix route via interface that
1628                  * prefix route was assigned to, which might be non-loopback.
1629                  */
1630                 err = -EINVAL;
1631                 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1632                         goto out;
1633
1634                 rt->rt6i_gateway = *gw_addr;
1635                 gwa_type = ipv6_addr_type(gw_addr);
1636
1637                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1638                         struct rt6_info *grt;
1639
1640                         /* IPv6 strictly inhibits using not link-local
1641                            addresses as nexthop address.
1642                            Otherwise, router will not able to send redirects.
1643                            It is very good, but in some (rare!) circumstances
1644                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1645                            some exceptions. --ANK
1646                          */
1647                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1648                                 goto out;
1649
1650                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1651
1652                         err = -EHOSTUNREACH;
1653                         if (!grt)
1654                                 goto out;
1655                         if (dev) {
1656                                 if (dev != grt->dst.dev) {
1657                                         ip6_rt_put(grt);
1658                                         goto out;
1659                                 }
1660                         } else {
1661                                 dev = grt->dst.dev;
1662                                 idev = grt->rt6i_idev;
1663                                 dev_hold(dev);
1664                                 in6_dev_hold(grt->rt6i_idev);
1665                         }
1666                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1667                                 err = 0;
1668                         ip6_rt_put(grt);
1669
1670                         if (err)
1671                                 goto out;
1672                 }
1673                 err = -EINVAL;
1674                 if (!dev || (dev->flags & IFF_LOOPBACK))
1675                         goto out;
1676         }
1677
1678         err = -ENODEV;
1679         if (!dev)
1680                 goto out;
1681
1682         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1683                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1684                         err = -EINVAL;
1685                         goto out;
1686                 }
1687                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1688                 rt->rt6i_prefsrc.plen = 128;
1689         } else
1690                 rt->rt6i_prefsrc.plen = 0;
1691
1692         rt->rt6i_flags = cfg->fc_flags;
1693
1694 install_route:
1695         rt->dst.dev = dev;
1696         rt->rt6i_idev = idev;
1697         rt->rt6i_table = table;
1698
1699         cfg->fc_nlinfo.nl_net = dev_net(dev);
1700
1701         err = ip6_convert_metrics(&mxc, cfg);
1702         if (err)
1703                 goto out;
1704
1705         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1706
1707         kfree(mxc.mx);
1708         return err;
1709 out:
1710         if (dev)
1711                 dev_put(dev);
1712         if (idev)
1713                 in6_dev_put(idev);
1714         if (rt)
1715                 dst_free(&rt->dst);
1716         return err;
1717 }
1718
1719 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1720 {
1721         int err;
1722         struct fib6_table *table;
1723         struct net *net = dev_net(rt->dst.dev);
1724
1725         if (rt == net->ipv6.ip6_null_entry) {
1726                 err = -ENOENT;
1727                 goto out;
1728         }
1729
1730         table = rt->rt6i_table;
1731         write_lock_bh(&table->tb6_lock);
1732         err = fib6_del(rt, info);
1733         write_unlock_bh(&table->tb6_lock);
1734
1735 out:
1736         ip6_rt_put(rt);
1737         return err;
1738 }
1739
1740 int ip6_del_rt(struct rt6_info *rt)
1741 {
1742         struct nl_info info = {
1743                 .nl_net = dev_net(rt->dst.dev),
1744         };
1745         return __ip6_del_rt(rt, &info);
1746 }
1747
1748 static int ip6_route_del(struct fib6_config *cfg)
1749 {
1750         struct fib6_table *table;
1751         struct fib6_node *fn;
1752         struct rt6_info *rt;
1753         int err = -ESRCH;
1754
1755         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1756         if (!table)
1757                 return err;
1758
1759         read_lock_bh(&table->tb6_lock);
1760
1761         fn = fib6_locate(&table->tb6_root,
1762                          &cfg->fc_dst, cfg->fc_dst_len,
1763                          &cfg->fc_src, cfg->fc_src_len);
1764
1765         if (fn) {
1766                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1767                         if ((rt->rt6i_flags & RTF_CACHE) &&
1768                             !(cfg->fc_flags & RTF_CACHE))
1769                                 continue;
1770                         if (cfg->fc_ifindex &&
1771                             (!rt->dst.dev ||
1772                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1773                                 continue;
1774                         if (cfg->fc_flags & RTF_GATEWAY &&
1775                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1776                                 continue;
1777                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1778                                 continue;
1779                         dst_hold(&rt->dst);
1780                         read_unlock_bh(&table->tb6_lock);
1781
1782                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1783                 }
1784         }
1785         read_unlock_bh(&table->tb6_lock);
1786
1787         return err;
1788 }
1789
1790 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1791 {
1792         struct net *net = dev_net(skb->dev);
1793         struct netevent_redirect netevent;
1794         struct rt6_info *rt, *nrt = NULL;
1795         struct ndisc_options ndopts;
1796         struct inet6_dev *in6_dev;
1797         struct neighbour *neigh;
1798         struct rd_msg *msg;
1799         int optlen, on_link;
1800         u8 *lladdr;
1801
1802         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1803         optlen -= sizeof(*msg);
1804
1805         if (optlen < 0) {
1806                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1807                 return;
1808         }
1809
1810         msg = (struct rd_msg *)icmp6_hdr(skb);
1811
1812         if (ipv6_addr_is_multicast(&msg->dest)) {
1813                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1814                 return;
1815         }
1816
1817         on_link = 0;
1818         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1819                 on_link = 1;
1820         } else if (ipv6_addr_type(&msg->target) !=
1821                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1822                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1823                 return;
1824         }
1825
1826         in6_dev = __in6_dev_get(skb->dev);
1827         if (!in6_dev)
1828                 return;
1829         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1830                 return;
1831
1832         /* RFC2461 8.1:
1833          *      The IP source address of the Redirect MUST be the same as the current
1834          *      first-hop router for the specified ICMP Destination Address.
1835          */
1836
1837         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1838                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1839                 return;
1840         }
1841
1842         lladdr = NULL;
1843         if (ndopts.nd_opts_tgt_lladdr) {
1844                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1845                                              skb->dev);
1846                 if (!lladdr) {
1847                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1848                         return;
1849                 }
1850         }
1851
1852         rt = (struct rt6_info *) dst;
1853         if (rt == net->ipv6.ip6_null_entry) {
1854                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1855                 return;
1856         }
1857
1858         /* Redirect received -> path was valid.
1859          * Look, redirects are sent only in response to data packets,
1860          * so that this nexthop apparently is reachable. --ANK
1861          */
1862         dst_confirm(&rt->dst);
1863
1864         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1865         if (!neigh)
1866                 return;
1867
1868         /*
1869          *      We have finally decided to accept it.
1870          */
1871
1872         neigh_update(neigh, lladdr, NUD_STALE,
1873                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1874                      NEIGH_UPDATE_F_OVERRIDE|
1875                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1876                                      NEIGH_UPDATE_F_ISROUTER))
1877                      );
1878
1879         nrt = ip6_rt_copy(rt, &msg->dest);
1880         if (!nrt)
1881                 goto out;
1882
1883         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1884         if (on_link)
1885                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1886
1887         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1888
1889         if (ip6_ins_rt(nrt))
1890                 goto out;
1891
1892         netevent.old = &rt->dst;
1893         netevent.new = &nrt->dst;
1894         netevent.daddr = &msg->dest;
1895         netevent.neigh = neigh;
1896         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1897
1898         if (rt->rt6i_flags & RTF_CACHE) {
1899                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1900                 ip6_del_rt(rt);
1901         }
1902
1903 out:
1904         neigh_release(neigh);
1905 }
1906
1907 /*
1908  *      Misc support functions
1909  */
1910
1911 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1912 {
1913         BUG_ON(from->dst.from);
1914
1915         rt->rt6i_flags &= ~RTF_EXPIRES;
1916         dst_hold(&from->dst);
1917         rt->dst.from = &from->dst;
1918         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1919 }
1920
1921 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1922                                     const struct in6_addr *dest)
1923 {
1924         struct net *net = dev_net(ort->dst.dev);
1925         struct rt6_info *rt;
1926
1927         if (ort->rt6i_flags & RTF_CACHE)
1928                 ort = (struct rt6_info *)ort->dst.from;
1929
1930         rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1931                            ort->rt6i_table);
1932
1933         if (rt) {
1934                 rt->dst.input = ort->dst.input;
1935                 rt->dst.output = ort->dst.output;
1936                 rt->dst.flags |= DST_HOST;
1937
1938                 rt->rt6i_dst.addr = *dest;
1939                 rt->rt6i_dst.plen = 128;
1940                 rt->dst.error = ort->dst.error;
1941                 rt->rt6i_idev = ort->rt6i_idev;
1942                 if (rt->rt6i_idev)
1943                         in6_dev_hold(rt->rt6i_idev);
1944                 rt->dst.lastuse = jiffies;
1945                 rt->rt6i_gateway = ort->rt6i_gateway;
1946                 rt->rt6i_flags = ort->rt6i_flags;
1947                 rt6_set_from(rt, ort);
1948                 rt->rt6i_metric = 0;
1949
1950 #ifdef CONFIG_IPV6_SUBTREES
1951                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1952 #endif
1953                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1954                 rt->rt6i_table = ort->rt6i_table;
1955         }
1956         return rt;
1957 }
1958
1959 #ifdef CONFIG_IPV6_ROUTE_INFO
1960 static struct rt6_info *rt6_get_route_info(struct net *net,
1961                                            const struct in6_addr *prefix, int prefixlen,
1962                                            const struct in6_addr *gwaddr, int ifindex)
1963 {
1964         struct fib6_node *fn;
1965         struct rt6_info *rt = NULL;
1966         struct fib6_table *table;
1967
1968         table = fib6_get_table(net, RT6_TABLE_INFO);
1969         if (!table)
1970                 return NULL;
1971
1972         read_lock_bh(&table->tb6_lock);
1973         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1974         if (!fn)
1975                 goto out;
1976
1977         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1978                 if (rt->dst.dev->ifindex != ifindex)
1979                         continue;
1980                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1981                         continue;
1982                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1983                         continue;
1984                 dst_hold(&rt->dst);
1985                 break;
1986         }
1987 out:
1988         read_unlock_bh(&table->tb6_lock);
1989         return rt;
1990 }
1991
1992 static struct rt6_info *rt6_add_route_info(struct net *net,
1993                                            const struct in6_addr *prefix, int prefixlen,
1994                                            const struct in6_addr *gwaddr, int ifindex,
1995                                            unsigned int pref)
1996 {
1997         struct fib6_config cfg = {
1998                 .fc_table       = RT6_TABLE_INFO,
1999                 .fc_metric      = IP6_RT_PRIO_USER,
2000                 .fc_ifindex     = ifindex,
2001                 .fc_dst_len     = prefixlen,
2002                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2003                                   RTF_UP | RTF_PREF(pref),
2004                 .fc_nlinfo.portid = 0,
2005                 .fc_nlinfo.nlh = NULL,
2006                 .fc_nlinfo.nl_net = net,
2007         };
2008
2009         cfg.fc_dst = *prefix;
2010         cfg.fc_gateway = *gwaddr;
2011
2012         /* We should treat it as a default route if prefix length is 0. */
2013         if (!prefixlen)
2014                 cfg.fc_flags |= RTF_DEFAULT;
2015
2016         ip6_route_add(&cfg);
2017
2018         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2019 }
2020 #endif
2021
2022 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2023 {
2024         struct rt6_info *rt;
2025         struct fib6_table *table;
2026
2027         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2028         if (!table)
2029                 return NULL;
2030
2031         read_lock_bh(&table->tb6_lock);
2032         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2033                 if (dev == rt->dst.dev &&
2034                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2035                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2036                         break;
2037         }
2038         if (rt)
2039                 dst_hold(&rt->dst);
2040         read_unlock_bh(&table->tb6_lock);
2041         return rt;
2042 }
2043
2044 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2045                                      struct net_device *dev,
2046                                      unsigned int pref)
2047 {
2048         struct fib6_config cfg = {
2049                 .fc_table       = RT6_TABLE_DFLT,
2050                 .fc_metric      = IP6_RT_PRIO_USER,
2051                 .fc_ifindex     = dev->ifindex,
2052                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2053                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2054                 .fc_nlinfo.portid = 0,
2055                 .fc_nlinfo.nlh = NULL,
2056                 .fc_nlinfo.nl_net = dev_net(dev),
2057         };
2058
2059         cfg.fc_gateway = *gwaddr;
2060
2061         ip6_route_add(&cfg);
2062
2063         return rt6_get_dflt_router(gwaddr, dev);
2064 }
2065
2066 void rt6_purge_dflt_routers(struct net *net)
2067 {
2068         struct rt6_info *rt;
2069         struct fib6_table *table;
2070
2071         /* NOTE: Keep consistent with rt6_get_dflt_router */
2072         table = fib6_get_table(net, RT6_TABLE_DFLT);
2073         if (!table)
2074                 return;
2075
2076 restart:
2077         read_lock_bh(&table->tb6_lock);
2078         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2079                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2080                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2081                         dst_hold(&rt->dst);
2082                         read_unlock_bh(&table->tb6_lock);
2083                         ip6_del_rt(rt);
2084                         goto restart;
2085                 }
2086         }
2087         read_unlock_bh(&table->tb6_lock);
2088 }
2089
2090 static void rtmsg_to_fib6_config(struct net *net,
2091                                  struct in6_rtmsg *rtmsg,
2092                                  struct fib6_config *cfg)
2093 {
2094         memset(cfg, 0, sizeof(*cfg));
2095
2096         cfg->fc_table = RT6_TABLE_MAIN;
2097         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2098         cfg->fc_metric = rtmsg->rtmsg_metric;
2099         cfg->fc_expires = rtmsg->rtmsg_info;
2100         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2101         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2102         cfg->fc_flags = rtmsg->rtmsg_flags;
2103
2104         cfg->fc_nlinfo.nl_net = net;
2105
2106         cfg->fc_dst = rtmsg->rtmsg_dst;
2107         cfg->fc_src = rtmsg->rtmsg_src;
2108         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2109 }
2110
2111 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2112 {
2113         struct fib6_config cfg;
2114         struct in6_rtmsg rtmsg;
2115         int err;
2116
2117         switch (cmd) {
2118         case SIOCADDRT:         /* Add a route */
2119         case SIOCDELRT:         /* Delete a route */
2120                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2121                         return -EPERM;
2122                 err = copy_from_user(&rtmsg, arg,
2123                                      sizeof(struct in6_rtmsg));
2124                 if (err)
2125                         return -EFAULT;
2126
2127                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2128
2129                 rtnl_lock();
2130                 switch (cmd) {
2131                 case SIOCADDRT:
2132                         err = ip6_route_add(&cfg);
2133                         break;
2134                 case SIOCDELRT:
2135                         err = ip6_route_del(&cfg);
2136                         break;
2137                 default:
2138                         err = -EINVAL;
2139                 }
2140                 rtnl_unlock();
2141
2142                 return err;
2143         }
2144
2145         return -EINVAL;
2146 }
2147
2148 /*
2149  *      Drop the packet on the floor
2150  */
2151
2152 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2153 {
2154         int type;
2155         struct dst_entry *dst = skb_dst(skb);
2156         switch (ipstats_mib_noroutes) {
2157         case IPSTATS_MIB_INNOROUTES:
2158                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2159                 if (type == IPV6_ADDR_ANY) {
2160                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2161                                       IPSTATS_MIB_INADDRERRORS);
2162                         break;
2163                 }
2164                 /* FALLTHROUGH */
2165         case IPSTATS_MIB_OUTNOROUTES:
2166                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2167                               ipstats_mib_noroutes);
2168                 break;
2169         }
2170         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2171         kfree_skb(skb);
2172         return 0;
2173 }
2174
2175 static int ip6_pkt_discard(struct sk_buff *skb)
2176 {
2177         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2178 }
2179
2180 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2181 {
2182         skb->dev = skb_dst(skb)->dev;
2183         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2184 }
2185
2186 static int ip6_pkt_prohibit(struct sk_buff *skb)
2187 {
2188         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2189 }
2190
2191 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2192 {
2193         skb->dev = skb_dst(skb)->dev;
2194         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2195 }
2196
2197 /*
2198  *      Allocate a dst for local (unicast / anycast) address.
2199  */
2200
2201 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2202                                     const struct in6_addr *addr,
2203                                     bool anycast)
2204 {
2205         struct net *net = dev_net(idev->dev);
2206         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2207                                             DST_NOCOUNT, NULL);
2208         if (!rt)
2209                 return ERR_PTR(-ENOMEM);
2210
2211         in6_dev_hold(idev);
2212
2213         rt->dst.flags |= DST_HOST;
2214         rt->dst.input = ip6_input;
2215         rt->dst.output = ip6_output;
2216         rt->rt6i_idev = idev;
2217
2218         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2219         if (anycast)
2220                 rt->rt6i_flags |= RTF_ANYCAST;
2221         else
2222                 rt->rt6i_flags |= RTF_LOCAL;
2223
2224         rt->rt6i_gateway  = *addr;
2225         rt->rt6i_dst.addr = *addr;
2226         rt->rt6i_dst.plen = 128;
2227         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2228
2229         atomic_set(&rt->dst.__refcnt, 1);
2230
2231         return rt;
2232 }
2233
2234 int ip6_route_get_saddr(struct net *net,
2235                         struct rt6_info *rt,
2236                         const struct in6_addr *daddr,
2237                         unsigned int prefs,
2238                         struct in6_addr *saddr)
2239 {
2240         struct inet6_dev *idev =
2241                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2242         int err = 0;
2243         if (rt && rt->rt6i_prefsrc.plen)
2244                 *saddr = rt->rt6i_prefsrc.addr;
2245         else
2246                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2247                                          daddr, prefs, saddr);
2248         return err;
2249 }
2250
2251 /* remove deleted ip from prefsrc entries */
2252 struct arg_dev_net_ip {
2253         struct net_device *dev;
2254         struct net *net;
2255         struct in6_addr *addr;
2256 };
2257
2258 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2259 {
2260         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2261         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2262         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2263
2264         if (((void *)rt->dst.dev == dev || !dev) &&
2265             rt != net->ipv6.ip6_null_entry &&
2266             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2267                 /* remove prefsrc entry */
2268                 rt->rt6i_prefsrc.plen = 0;
2269         }
2270         return 0;
2271 }
2272
2273 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2274 {
2275         struct net *net = dev_net(ifp->idev->dev);
2276         struct arg_dev_net_ip adni = {
2277                 .dev = ifp->idev->dev,
2278                 .net = net,
2279                 .addr = &ifp->addr,
2280         };
2281         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2282 }
2283
2284 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2285 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2286
2287 /* Remove routers and update dst entries when gateway turn into host. */
2288 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2289 {
2290         struct in6_addr *gateway = (struct in6_addr *)arg;
2291
2292         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2293              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2294              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2295                 return -1;
2296         }
2297         return 0;
2298 }
2299
2300 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2301 {
2302         fib6_clean_all(net, fib6_clean_tohost, gateway);
2303 }
2304
2305 struct arg_dev_net {
2306         struct net_device *dev;
2307         struct net *net;
2308 };
2309
2310 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2311 {
2312         const struct arg_dev_net *adn = arg;
2313         const struct net_device *dev = adn->dev;
2314
2315         if ((rt->dst.dev == dev || !dev) &&
2316             rt != adn->net->ipv6.ip6_null_entry)
2317                 return -1;
2318
2319         return 0;
2320 }
2321
2322 void rt6_ifdown(struct net *net, struct net_device *dev)
2323 {
2324         struct arg_dev_net adn = {
2325                 .dev = dev,
2326                 .net = net,
2327         };
2328
2329         fib6_clean_all(net, fib6_ifdown, &adn);
2330         icmp6_clean_all(fib6_ifdown, &adn);
2331 }
2332
2333 struct rt6_mtu_change_arg {
2334         struct net_device *dev;
2335         unsigned int mtu;
2336 };
2337
2338 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2339 {
2340         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2341         struct inet6_dev *idev;
2342
2343         /* In IPv6 pmtu discovery is not optional,
2344            so that RTAX_MTU lock cannot disable it.
2345            We still use this lock to block changes
2346            caused by addrconf/ndisc.
2347         */
2348
2349         idev = __in6_dev_get(arg->dev);
2350         if (!idev)
2351                 return 0;
2352
2353         /* For administrative MTU increase, there is no way to discover
2354            IPv6 PMTU increase, so PMTU increase should be updated here.
2355            Since RFC 1981 doesn't include administrative MTU increase
2356            update PMTU increase is a MUST. (i.e. jumbo frame)
2357          */
2358         /*
2359            If new MTU is less than route PMTU, this new MTU will be the
2360            lowest MTU in the path, update the route PMTU to reflect PMTU
2361            decreases; if new MTU is greater than route PMTU, and the
2362            old MTU is the lowest MTU in the path, update the route PMTU
2363            to reflect the increase. In this case if the other nodes' MTU
2364            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2365            PMTU discouvery.
2366          */
2367         if (rt->dst.dev == arg->dev &&
2368             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2369                 if (rt->rt6i_flags & RTF_CACHE) {
2370                         /* For RTF_CACHE with rt6i_pmtu == 0
2371                          * (i.e. a redirected route),
2372                          * the metrics of its rt->dst.from has already
2373                          * been updated.
2374                          */
2375                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2376                                 rt->rt6i_pmtu = arg->mtu;
2377                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2378                            (dst_mtu(&rt->dst) < arg->mtu &&
2379                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2380                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2381                 }
2382         }
2383         return 0;
2384 }
2385
2386 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2387 {
2388         struct rt6_mtu_change_arg arg = {
2389                 .dev = dev,
2390                 .mtu = mtu,
2391         };
2392
2393         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2394 }
2395
2396 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2397         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2398         [RTA_OIF]               = { .type = NLA_U32 },
2399         [RTA_IIF]               = { .type = NLA_U32 },
2400         [RTA_PRIORITY]          = { .type = NLA_U32 },
2401         [RTA_METRICS]           = { .type = NLA_NESTED },
2402         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2403         [RTA_PREF]              = { .type = NLA_U8 },
2404 };
2405
2406 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2407                               struct fib6_config *cfg)
2408 {
2409         struct rtmsg *rtm;
2410         struct nlattr *tb[RTA_MAX+1];
2411         unsigned int pref;
2412         int err;
2413
2414         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2415         if (err < 0)
2416                 goto errout;
2417
2418         err = -EINVAL;
2419         rtm = nlmsg_data(nlh);
2420         memset(cfg, 0, sizeof(*cfg));
2421
2422         cfg->fc_table = rtm->rtm_table;
2423         cfg->fc_dst_len = rtm->rtm_dst_len;
2424         cfg->fc_src_len = rtm->rtm_src_len;
2425         cfg->fc_flags = RTF_UP;
2426         cfg->fc_protocol = rtm->rtm_protocol;
2427         cfg->fc_type = rtm->rtm_type;
2428
2429         if (rtm->rtm_type == RTN_UNREACHABLE ||
2430             rtm->rtm_type == RTN_BLACKHOLE ||
2431             rtm->rtm_type == RTN_PROHIBIT ||
2432             rtm->rtm_type == RTN_THROW)
2433                 cfg->fc_flags |= RTF_REJECT;
2434
2435         if (rtm->rtm_type == RTN_LOCAL)
2436                 cfg->fc_flags |= RTF_LOCAL;
2437
2438         if (rtm->rtm_flags & RTM_F_CLONED)
2439                 cfg->fc_flags |= RTF_CACHE;
2440
2441         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2442         cfg->fc_nlinfo.nlh = nlh;
2443         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2444
2445         if (tb[RTA_GATEWAY]) {
2446                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2447                 cfg->fc_flags |= RTF_GATEWAY;
2448         }
2449
2450         if (tb[RTA_DST]) {
2451                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2452
2453                 if (nla_len(tb[RTA_DST]) < plen)
2454                         goto errout;
2455
2456                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2457         }
2458
2459         if (tb[RTA_SRC]) {
2460                 int plen = (rtm->rtm_src_len + 7) >> 3;
2461
2462                 if (nla_len(tb[RTA_SRC]) < plen)
2463                         goto errout;
2464
2465                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2466         }
2467
2468         if (tb[RTA_PREFSRC])
2469                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2470
2471         if (tb[RTA_OIF])
2472                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2473
2474         if (tb[RTA_PRIORITY])
2475                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2476
2477         if (tb[RTA_METRICS]) {
2478                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2479                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2480         }
2481
2482         if (tb[RTA_TABLE])
2483                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2484
2485         if (tb[RTA_MULTIPATH]) {
2486                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2487                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2488         }
2489
2490         if (tb[RTA_PREF]) {
2491                 pref = nla_get_u8(tb[RTA_PREF]);
2492                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2493                     pref != ICMPV6_ROUTER_PREF_HIGH)
2494                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2495                 cfg->fc_flags |= RTF_PREF(pref);
2496         }
2497
2498         err = 0;
2499 errout:
2500         return err;
2501 }
2502
2503 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2504 {
2505         struct fib6_config r_cfg;
2506         struct rtnexthop *rtnh;
2507         int remaining;
2508         int attrlen;
2509         int err = 0, last_err = 0;
2510
2511         remaining = cfg->fc_mp_len;
2512 beginning:
2513         rtnh = (struct rtnexthop *)cfg->fc_mp;
2514
2515         /* Parse a Multipath Entry */
2516         while (rtnh_ok(rtnh, remaining)) {
2517                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2518                 if (rtnh->rtnh_ifindex)
2519                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2520
2521                 attrlen = rtnh_attrlen(rtnh);
2522                 if (attrlen > 0) {
2523                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2524
2525                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2526                         if (nla) {
2527                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2528                                 r_cfg.fc_flags |= RTF_GATEWAY;
2529                         }
2530                 }
2531                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2532                 if (err) {
2533                         last_err = err;
2534                         /* If we are trying to remove a route, do not stop the
2535                          * loop when ip6_route_del() fails (because next hop is
2536                          * already gone), we should try to remove all next hops.
2537                          */
2538                         if (add) {
2539                                 /* If add fails, we should try to delete all
2540                                  * next hops that have been already added.
2541                                  */
2542                                 add = 0;
2543                                 remaining = cfg->fc_mp_len - remaining;
2544                                 goto beginning;
2545                         }
2546                 }
2547                 /* Because each route is added like a single route we remove
2548                  * these flags after the first nexthop: if there is a collision,
2549                  * we have already failed to add the first nexthop:
2550                  * fib6_add_rt2node() has rejected it; when replacing, old
2551                  * nexthops have been replaced by first new, the rest should
2552                  * be added to it.
2553                  */
2554                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2555                                                      NLM_F_REPLACE);
2556                 rtnh = rtnh_next(rtnh, &remaining);
2557         }
2558
2559         return last_err;
2560 }
2561
2562 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2563 {
2564         struct fib6_config cfg;
2565         int err;
2566
2567         err = rtm_to_fib6_config(skb, nlh, &cfg);
2568         if (err < 0)
2569                 return err;
2570
2571         if (cfg.fc_mp)
2572                 return ip6_route_multipath(&cfg, 0);
2573         else
2574                 return ip6_route_del(&cfg);
2575 }
2576
2577 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2578 {
2579         struct fib6_config cfg;
2580         int err;
2581
2582         err = rtm_to_fib6_config(skb, nlh, &cfg);
2583         if (err < 0)
2584                 return err;
2585
2586         if (cfg.fc_mp)
2587                 return ip6_route_multipath(&cfg, 1);
2588         else
2589                 return ip6_route_add(&cfg);
2590 }
2591
2592 static inline size_t rt6_nlmsg_size(void)
2593 {
2594         return NLMSG_ALIGN(sizeof(struct rtmsg))
2595                + nla_total_size(16) /* RTA_SRC */
2596                + nla_total_size(16) /* RTA_DST */
2597                + nla_total_size(16) /* RTA_GATEWAY */
2598                + nla_total_size(16) /* RTA_PREFSRC */
2599                + nla_total_size(4) /* RTA_TABLE */
2600                + nla_total_size(4) /* RTA_IIF */
2601                + nla_total_size(4) /* RTA_OIF */
2602                + nla_total_size(4) /* RTA_PRIORITY */
2603                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2604                + nla_total_size(sizeof(struct rta_cacheinfo))
2605                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2606                + nla_total_size(1); /* RTA_PREF */
2607 }
2608
2609 static int rt6_fill_node(struct net *net,
2610                          struct sk_buff *skb, struct rt6_info *rt,
2611                          struct in6_addr *dst, struct in6_addr *src,
2612                          int iif, int type, u32 portid, u32 seq,
2613                          int prefix, int nowait, unsigned int flags)
2614 {
2615         u32 metrics[RTAX_MAX];
2616         struct rtmsg *rtm;
2617         struct nlmsghdr *nlh;
2618         long expires;
2619         u32 table;
2620
2621         if (prefix) {   /* user wants prefix routes only */
2622                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2623                         /* success since this is not a prefix route */
2624                         return 1;
2625                 }
2626         }
2627
2628         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2629         if (!nlh)
2630                 return -EMSGSIZE;
2631
2632         rtm = nlmsg_data(nlh);
2633         rtm->rtm_family = AF_INET6;
2634         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2635         rtm->rtm_src_len = rt->rt6i_src.plen;
2636         rtm->rtm_tos = 0;
2637         if (rt->rt6i_table)
2638                 table = rt->rt6i_table->tb6_id;
2639         else
2640                 table = RT6_TABLE_UNSPEC;
2641         rtm->rtm_table = table;
2642         if (nla_put_u32(skb, RTA_TABLE, table))
2643                 goto nla_put_failure;
2644         if (rt->rt6i_flags & RTF_REJECT) {
2645                 switch (rt->dst.error) {
2646                 case -EINVAL:
2647                         rtm->rtm_type = RTN_BLACKHOLE;
2648                         break;
2649                 case -EACCES:
2650                         rtm->rtm_type = RTN_PROHIBIT;
2651                         break;
2652                 case -EAGAIN:
2653                         rtm->rtm_type = RTN_THROW;
2654                         break;
2655                 default:
2656                         rtm->rtm_type = RTN_UNREACHABLE;
2657                         break;
2658                 }
2659         }
2660         else if (rt->rt6i_flags & RTF_LOCAL)
2661                 rtm->rtm_type = RTN_LOCAL;
2662         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2663                 rtm->rtm_type = RTN_LOCAL;
2664         else
2665                 rtm->rtm_type = RTN_UNICAST;
2666         rtm->rtm_flags = 0;
2667         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2668         rtm->rtm_protocol = rt->rt6i_protocol;
2669         if (rt->rt6i_flags & RTF_DYNAMIC)
2670                 rtm->rtm_protocol = RTPROT_REDIRECT;
2671         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2672                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2673                         rtm->rtm_protocol = RTPROT_RA;
2674                 else
2675                         rtm->rtm_protocol = RTPROT_KERNEL;
2676         }
2677
2678         if (rt->rt6i_flags & RTF_CACHE)
2679                 rtm->rtm_flags |= RTM_F_CLONED;
2680
2681         if (dst) {
2682                 if (nla_put_in6_addr(skb, RTA_DST, dst))
2683                         goto nla_put_failure;
2684                 rtm->rtm_dst_len = 128;
2685         } else if (rtm->rtm_dst_len)
2686                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2687                         goto nla_put_failure;
2688 #ifdef CONFIG_IPV6_SUBTREES
2689         if (src) {
2690                 if (nla_put_in6_addr(skb, RTA_SRC, src))
2691                         goto nla_put_failure;
2692                 rtm->rtm_src_len = 128;
2693         } else if (rtm->rtm_src_len &&
2694                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2695                 goto nla_put_failure;
2696 #endif
2697         if (iif) {
2698 #ifdef CONFIG_IPV6_MROUTE
2699                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2700                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2701                         if (err <= 0) {
2702                                 if (!nowait) {
2703                                         if (err == 0)
2704                                                 return 0;
2705                                         goto nla_put_failure;
2706                                 } else {
2707                                         if (err == -EMSGSIZE)
2708                                                 goto nla_put_failure;
2709                                 }
2710                         }
2711                 } else
2712 #endif
2713                         if (nla_put_u32(skb, RTA_IIF, iif))
2714                                 goto nla_put_failure;
2715         } else if (dst) {
2716                 struct in6_addr saddr_buf;
2717                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2718                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2719                         goto nla_put_failure;
2720         }
2721
2722         if (rt->rt6i_prefsrc.plen) {
2723                 struct in6_addr saddr_buf;
2724                 saddr_buf = rt->rt6i_prefsrc.addr;
2725                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2726                         goto nla_put_failure;
2727         }
2728
2729         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2730         if (rt->rt6i_pmtu)
2731                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2732         if (rtnetlink_put_metrics(skb, metrics) < 0)
2733                 goto nla_put_failure;
2734
2735         if (rt->rt6i_flags & RTF_GATEWAY) {
2736                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2737                         goto nla_put_failure;
2738         }
2739
2740         if (rt->dst.dev &&
2741             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2742                 goto nla_put_failure;
2743         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2744                 goto nla_put_failure;
2745
2746         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2747
2748         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2749                 goto nla_put_failure;
2750
2751         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2752                 goto nla_put_failure;
2753
2754         nlmsg_end(skb, nlh);
2755         return 0;
2756
2757 nla_put_failure:
2758         nlmsg_cancel(skb, nlh);
2759         return -EMSGSIZE;
2760 }
2761
2762 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2763 {
2764         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2765         int prefix;
2766
2767         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2768                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2769                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2770         } else
2771                 prefix = 0;
2772
2773         return rt6_fill_node(arg->net,
2774                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2775                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2776                      prefix, 0, NLM_F_MULTI);
2777 }
2778
2779 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2780 {
2781         struct net *net = sock_net(in_skb->sk);
2782         struct nlattr *tb[RTA_MAX+1];
2783         struct rt6_info *rt;
2784         struct sk_buff *skb;
2785         struct rtmsg *rtm;
2786         struct flowi6 fl6;
2787         int err, iif = 0, oif = 0;
2788
2789         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2790         if (err < 0)
2791                 goto errout;
2792
2793         err = -EINVAL;
2794         memset(&fl6, 0, sizeof(fl6));
2795
2796         if (tb[RTA_SRC]) {
2797                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2798                         goto errout;
2799
2800                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2801         }
2802
2803         if (tb[RTA_DST]) {
2804                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2805                         goto errout;
2806
2807                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2808         }
2809
2810         if (tb[RTA_IIF])
2811                 iif = nla_get_u32(tb[RTA_IIF]);
2812
2813         if (tb[RTA_OIF])
2814                 oif = nla_get_u32(tb[RTA_OIF]);
2815
2816         if (tb[RTA_MARK])
2817                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2818
2819         if (iif) {
2820                 struct net_device *dev;
2821                 int flags = 0;
2822
2823                 dev = __dev_get_by_index(net, iif);
2824                 if (!dev) {
2825                         err = -ENODEV;
2826                         goto errout;
2827                 }
2828
2829                 fl6.flowi6_iif = iif;
2830
2831                 if (!ipv6_addr_any(&fl6.saddr))
2832                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2833
2834                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2835                                                                flags);
2836         } else {
2837                 fl6.flowi6_oif = oif;
2838
2839                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2840         }
2841
2842         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2843         if (!skb) {
2844                 ip6_rt_put(rt);
2845                 err = -ENOBUFS;
2846                 goto errout;
2847         }
2848
2849         /* Reserve room for dummy headers, this skb can pass
2850            through good chunk of routing engine.
2851          */
2852         skb_reset_mac_header(skb);
2853         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2854
2855         skb_dst_set(skb, &rt->dst);
2856
2857         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2858                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2859                             nlh->nlmsg_seq, 0, 0, 0);
2860         if (err < 0) {
2861                 kfree_skb(skb);
2862                 goto errout;
2863         }
2864
2865         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2866 errout:
2867         return err;
2868 }
2869
2870 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2871 {
2872         struct sk_buff *skb;
2873         struct net *net = info->nl_net;
2874         u32 seq;
2875         int err;
2876
2877         err = -ENOBUFS;
2878         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2879
2880         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2881         if (!skb)
2882                 goto errout;
2883
2884         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2885                                 event, info->portid, seq, 0, 0, 0);
2886         if (err < 0) {
2887                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2888                 WARN_ON(err == -EMSGSIZE);
2889                 kfree_skb(skb);
2890                 goto errout;
2891         }
2892         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2893                     info->nlh, gfp_any());
2894         return;
2895 errout:
2896         if (err < 0)
2897                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2898 }
2899
2900 static int ip6_route_dev_notify(struct notifier_block *this,
2901                                 unsigned long event, void *ptr)
2902 {
2903         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2904         struct net *net = dev_net(dev);
2905
2906         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2907                 net->ipv6.ip6_null_entry->dst.dev = dev;
2908                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2911                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2912                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2913                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2914 #endif
2915         }
2916
2917         return NOTIFY_OK;
2918 }
2919
2920 /*
2921  *      /proc
2922  */
2923
2924 #ifdef CONFIG_PROC_FS
2925
2926 static const struct file_operations ipv6_route_proc_fops = {
2927         .owner          = THIS_MODULE,
2928         .open           = ipv6_route_open,
2929         .read           = seq_read,
2930         .llseek         = seq_lseek,
2931         .release        = seq_release_net,
2932 };
2933
2934 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2935 {
2936         struct net *net = (struct net *)seq->private;
2937         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2938                    net->ipv6.rt6_stats->fib_nodes,
2939                    net->ipv6.rt6_stats->fib_route_nodes,
2940                    net->ipv6.rt6_stats->fib_rt_alloc,
2941                    net->ipv6.rt6_stats->fib_rt_entries,
2942                    net->ipv6.rt6_stats->fib_rt_cache,
2943                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2944                    net->ipv6.rt6_stats->fib_discarded_routes);
2945
2946         return 0;
2947 }
2948
2949 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2950 {
2951         return single_open_net(inode, file, rt6_stats_seq_show);
2952 }
2953
2954 static const struct file_operations rt6_stats_seq_fops = {
2955         .owner   = THIS_MODULE,
2956         .open    = rt6_stats_seq_open,
2957         .read    = seq_read,
2958         .llseek  = seq_lseek,
2959         .release = single_release_net,
2960 };
2961 #endif  /* CONFIG_PROC_FS */
2962
2963 #ifdef CONFIG_SYSCTL
2964
2965 static
2966 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2967                               void __user *buffer, size_t *lenp, loff_t *ppos)
2968 {
2969         struct net *net;
2970         int delay;
2971         if (!write)
2972                 return -EINVAL;
2973
2974         net = (struct net *)ctl->extra1;
2975         delay = net->ipv6.sysctl.flush_delay;
2976         proc_dointvec(ctl, write, buffer, lenp, ppos);
2977         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2978         return 0;
2979 }
2980
2981 struct ctl_table ipv6_route_table_template[] = {
2982         {
2983                 .procname       =       "flush",
2984                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2985                 .maxlen         =       sizeof(int),
2986                 .mode           =       0200,
2987                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2988         },
2989         {
2990                 .procname       =       "gc_thresh",
2991                 .data           =       &ip6_dst_ops_template.gc_thresh,
2992                 .maxlen         =       sizeof(int),
2993                 .mode           =       0644,
2994                 .proc_handler   =       proc_dointvec,
2995         },
2996         {
2997                 .procname       =       "max_size",
2998                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2999                 .maxlen         =       sizeof(int),
3000                 .mode           =       0644,
3001                 .proc_handler   =       proc_dointvec,
3002         },
3003         {
3004                 .procname       =       "gc_min_interval",
3005                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3006                 .maxlen         =       sizeof(int),
3007                 .mode           =       0644,
3008                 .proc_handler   =       proc_dointvec_jiffies,
3009         },
3010         {
3011                 .procname       =       "gc_timeout",
3012                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3013                 .maxlen         =       sizeof(int),
3014                 .mode           =       0644,
3015                 .proc_handler   =       proc_dointvec_jiffies,
3016         },
3017         {
3018                 .procname       =       "gc_interval",
3019                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3020                 .maxlen         =       sizeof(int),
3021                 .mode           =       0644,
3022                 .proc_handler   =       proc_dointvec_jiffies,
3023         },
3024         {
3025                 .procname       =       "gc_elasticity",
3026                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3027                 .maxlen         =       sizeof(int),
3028                 .mode           =       0644,
3029                 .proc_handler   =       proc_dointvec,
3030         },
3031         {
3032                 .procname       =       "mtu_expires",
3033                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3034                 .maxlen         =       sizeof(int),
3035                 .mode           =       0644,
3036                 .proc_handler   =       proc_dointvec_jiffies,
3037         },
3038         {
3039                 .procname       =       "min_adv_mss",
3040                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3041                 .maxlen         =       sizeof(int),
3042                 .mode           =       0644,
3043                 .proc_handler   =       proc_dointvec,
3044         },
3045         {
3046                 .procname       =       "gc_min_interval_ms",
3047                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3048                 .maxlen         =       sizeof(int),
3049                 .mode           =       0644,
3050                 .proc_handler   =       proc_dointvec_ms_jiffies,
3051         },
3052         { }
3053 };
3054
3055 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3056 {
3057         struct ctl_table *table;
3058
3059         table = kmemdup(ipv6_route_table_template,
3060                         sizeof(ipv6_route_table_template),
3061                         GFP_KERNEL);
3062
3063         if (table) {
3064                 table[0].data = &net->ipv6.sysctl.flush_delay;
3065                 table[0].extra1 = net;
3066                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3067                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3068                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3069                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3070                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3071                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3072                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3073                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3074                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3075
3076                 /* Don't export sysctls to unprivileged users */
3077                 if (net->user_ns != &init_user_ns)
3078                         table[0].procname = NULL;
3079         }
3080
3081         return table;
3082 }
3083 #endif
3084
3085 static int __net_init ip6_route_net_init(struct net *net)
3086 {
3087         int ret = -ENOMEM;
3088
3089         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3090                sizeof(net->ipv6.ip6_dst_ops));
3091
3092         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3093                 goto out_ip6_dst_ops;
3094
3095         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3096                                            sizeof(*net->ipv6.ip6_null_entry),
3097                                            GFP_KERNEL);
3098         if (!net->ipv6.ip6_null_entry)
3099                 goto out_ip6_dst_entries;
3100         net->ipv6.ip6_null_entry->dst.path =
3101                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3102         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3103         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3104                          ip6_template_metrics, true);
3105
3106 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3107         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3108                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3109                                                GFP_KERNEL);
3110         if (!net->ipv6.ip6_prohibit_entry)
3111                 goto out_ip6_null_entry;
3112         net->ipv6.ip6_prohibit_entry->dst.path =
3113                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3114         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3115         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3116                          ip6_template_metrics, true);
3117
3118         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3119                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3120                                                GFP_KERNEL);
3121         if (!net->ipv6.ip6_blk_hole_entry)
3122                 goto out_ip6_prohibit_entry;
3123         net->ipv6.ip6_blk_hole_entry->dst.path =
3124                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3125         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3126         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3127                          ip6_template_metrics, true);
3128 #endif
3129
3130         net->ipv6.sysctl.flush_delay = 0;
3131         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3132         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3133         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3134         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3135         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3136         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3137         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3138
3139         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3140
3141         ret = 0;
3142 out:
3143         return ret;
3144
3145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3146 out_ip6_prohibit_entry:
3147         kfree(net->ipv6.ip6_prohibit_entry);
3148 out_ip6_null_entry:
3149         kfree(net->ipv6.ip6_null_entry);
3150 #endif
3151 out_ip6_dst_entries:
3152         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3153 out_ip6_dst_ops:
3154         goto out;
3155 }
3156
3157 static void __net_exit ip6_route_net_exit(struct net *net)
3158 {
3159         kfree(net->ipv6.ip6_null_entry);
3160 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3161         kfree(net->ipv6.ip6_prohibit_entry);
3162         kfree(net->ipv6.ip6_blk_hole_entry);
3163 #endif
3164         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3165 }
3166
3167 static int __net_init ip6_route_net_init_late(struct net *net)
3168 {
3169 #ifdef CONFIG_PROC_FS
3170         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3171         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3172 #endif
3173         return 0;
3174 }
3175
3176 static void __net_exit ip6_route_net_exit_late(struct net *net)
3177 {
3178 #ifdef CONFIG_PROC_FS
3179         remove_proc_entry("ipv6_route", net->proc_net);
3180         remove_proc_entry("rt6_stats", net->proc_net);
3181 #endif
3182 }
3183
3184 static struct pernet_operations ip6_route_net_ops = {
3185         .init = ip6_route_net_init,
3186         .exit = ip6_route_net_exit,
3187 };
3188
3189 static int __net_init ipv6_inetpeer_init(struct net *net)
3190 {
3191         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3192
3193         if (!bp)
3194                 return -ENOMEM;
3195         inet_peer_base_init(bp);
3196         net->ipv6.peers = bp;
3197         return 0;
3198 }
3199
3200 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3201 {
3202         struct inet_peer_base *bp = net->ipv6.peers;
3203
3204         net->ipv6.peers = NULL;
3205         inetpeer_invalidate_tree(bp);
3206         kfree(bp);
3207 }
3208
3209 static struct pernet_operations ipv6_inetpeer_ops = {
3210         .init   =       ipv6_inetpeer_init,
3211         .exit   =       ipv6_inetpeer_exit,
3212 };
3213
3214 static struct pernet_operations ip6_route_net_late_ops = {
3215         .init = ip6_route_net_init_late,
3216         .exit = ip6_route_net_exit_late,
3217 };
3218
3219 static struct notifier_block ip6_route_dev_notifier = {
3220         .notifier_call = ip6_route_dev_notify,
3221         .priority = 0,
3222 };
3223
3224 int __init ip6_route_init(void)
3225 {
3226         int ret;
3227
3228         ret = -ENOMEM;
3229         ip6_dst_ops_template.kmem_cachep =
3230                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3231                                   SLAB_HWCACHE_ALIGN, NULL);
3232         if (!ip6_dst_ops_template.kmem_cachep)
3233                 goto out;
3234
3235         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3236         if (ret)
3237                 goto out_kmem_cache;
3238
3239         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3240         if (ret)
3241                 goto out_dst_entries;
3242
3243         ret = register_pernet_subsys(&ip6_route_net_ops);
3244         if (ret)
3245                 goto out_register_inetpeer;
3246
3247         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3248
3249         /* Registering of the loopback is done before this portion of code,
3250          * the loopback reference in rt6_info will not be taken, do it
3251          * manually for init_net */
3252         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3253         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3254   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3255         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3256         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3257         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3258         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3259   #endif
3260         ret = fib6_init();
3261         if (ret)
3262                 goto out_register_subsys;
3263
3264         ret = xfrm6_init();
3265         if (ret)
3266                 goto out_fib6_init;
3267
3268         ret = fib6_rules_init();
3269         if (ret)
3270                 goto xfrm6_init;
3271
3272         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3273         if (ret)
3274                 goto fib6_rules_init;
3275
3276         ret = -ENOBUFS;
3277         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3278             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3279             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3280                 goto out_register_late_subsys;
3281
3282         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3283         if (ret)
3284                 goto out_register_late_subsys;
3285
3286 out:
3287         return ret;
3288
3289 out_register_late_subsys:
3290         unregister_pernet_subsys(&ip6_route_net_late_ops);
3291 fib6_rules_init:
3292         fib6_rules_cleanup();
3293 xfrm6_init:
3294         xfrm6_fini();
3295 out_fib6_init:
3296         fib6_gc_cleanup();
3297 out_register_subsys:
3298         unregister_pernet_subsys(&ip6_route_net_ops);
3299 out_register_inetpeer:
3300         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3301 out_dst_entries:
3302         dst_entries_destroy(&ip6_dst_blackhole_ops);
3303 out_kmem_cache:
3304         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3305         goto out;
3306 }
3307
3308 void ip6_route_cleanup(void)
3309 {
3310         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3311         unregister_pernet_subsys(&ip6_route_net_late_ops);
3312         fib6_rules_cleanup();
3313         xfrm6_fini();
3314         fib6_gc_cleanup();
3315         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3316         unregister_pernet_subsys(&ip6_route_net_ops);
3317         dst_entries_destroy(&ip6_dst_blackhole_ops);
3318         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3319 }