ipv6: Initialize the struct rt6_info behind the dst_enty field
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82 static void             rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
83
84 #ifdef CONFIG_IPV6_ROUTE_INFO
85 static struct rt6_info *rt6_add_route_info(struct net *net,
86                                            const struct in6_addr *prefix, int prefixlen,
87                                            const struct in6_addr *gwaddr, int ifindex,
88                                            unsigned int pref);
89 static struct rt6_info *rt6_get_route_info(struct net *net,
90                                            const struct in6_addr *prefix, int prefixlen,
91                                            const struct in6_addr *gwaddr, int ifindex);
92 #endif
93
94 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
95 {
96         struct rt6_info *rt = (struct rt6_info *) dst;
97         struct inet_peer *peer;
98         u32 *p = NULL;
99
100         if (!(rt->dst.flags & DST_HOST))
101                 return NULL;
102
103         peer = rt6_get_peer_create(rt);
104         if (peer) {
105                 u32 *old_p = __DST_METRICS_PTR(old);
106                 unsigned long prev, new;
107
108                 p = peer->metrics;
109                 if (inet_metrics_new(peer))
110                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112                 new = (unsigned long) p;
113                 prev = cmpxchg(&dst->_metrics, old, new);
114
115                 if (prev != old) {
116                         p = __DST_METRICS_PTR(prev);
117                         if (prev & DST_METRICS_READ_ONLY)
118                                 p = NULL;
119                 }
120         }
121         return p;
122 }
123
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
125                                              struct sk_buff *skb,
126                                              const void *daddr)
127 {
128         struct in6_addr *p = &rt->rt6i_gateway;
129
130         if (!ipv6_addr_any(p))
131                 return (const void *) p;
132         else if (skb)
133                 return &ipv6_hdr(skb)->daddr;
134         return daddr;
135 }
136
137 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
138                                           struct sk_buff *skb,
139                                           const void *daddr)
140 {
141         struct rt6_info *rt = (struct rt6_info *) dst;
142         struct neighbour *n;
143
144         daddr = choose_neigh_daddr(rt, skb, daddr);
145         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
146         if (n)
147                 return n;
148         return neigh_create(&nd_tbl, daddr, dst->dev);
149 }
150
151 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
152 {
153         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
154         if (!n) {
155                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
156                 if (IS_ERR(n))
157                         return PTR_ERR(n);
158         }
159         rt->n = n;
160
161         return 0;
162 }
163
164 static struct dst_ops ip6_dst_ops_template = {
165         .family                 =       AF_INET6,
166         .protocol               =       cpu_to_be16(ETH_P_IPV6),
167         .gc                     =       ip6_dst_gc,
168         .gc_thresh              =       1024,
169         .check                  =       ip6_dst_check,
170         .default_advmss         =       ip6_default_advmss,
171         .mtu                    =       ip6_mtu,
172         .cow_metrics            =       ipv6_cow_metrics,
173         .destroy                =       ip6_dst_destroy,
174         .ifdown                 =       ip6_dst_ifdown,
175         .negative_advice        =       ip6_negative_advice,
176         .link_failure           =       ip6_link_failure,
177         .update_pmtu            =       ip6_rt_update_pmtu,
178         .redirect               =       rt6_do_redirect,
179         .local_out              =       __ip6_local_out,
180         .neigh_lookup           =       ip6_neigh_lookup,
181 };
182
183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
184 {
185         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186
187         return mtu ? : dst->dev->mtu;
188 }
189
190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
191 {
192 }
193
194 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
195 {
196 }
197
198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
199                                          unsigned long old)
200 {
201         return NULL;
202 }
203
204 static struct dst_ops ip6_dst_blackhole_ops = {
205         .family                 =       AF_INET6,
206         .protocol               =       cpu_to_be16(ETH_P_IPV6),
207         .destroy                =       ip6_dst_destroy,
208         .check                  =       ip6_dst_check,
209         .mtu                    =       ip6_blackhole_mtu,
210         .default_advmss         =       ip6_default_advmss,
211         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
212         .redirect               =       ip6_rt_blackhole_redirect,
213         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
214         .neigh_lookup           =       ip6_neigh_lookup,
215 };
216
217 static const u32 ip6_template_metrics[RTAX_MAX] = {
218         [RTAX_HOPLIMIT - 1] = 255,
219 };
220
221 static struct rt6_info ip6_null_entry_template = {
222         .dst = {
223                 .__refcnt       = ATOMIC_INIT(1),
224                 .__use          = 1,
225                 .obsolete       = -1,
226                 .error          = -ENETUNREACH,
227                 .input          = ip6_pkt_discard,
228                 .output         = ip6_pkt_discard_out,
229         },
230         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
231         .rt6i_protocol  = RTPROT_KERNEL,
232         .rt6i_metric    = ~(u32) 0,
233         .rt6i_ref       = ATOMIC_INIT(1),
234 };
235
236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
237
238 static int ip6_pkt_prohibit(struct sk_buff *skb);
239 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
240
241 static struct rt6_info ip6_prohibit_entry_template = {
242         .dst = {
243                 .__refcnt       = ATOMIC_INIT(1),
244                 .__use          = 1,
245                 .obsolete       = -1,
246                 .error          = -EACCES,
247                 .input          = ip6_pkt_prohibit,
248                 .output         = ip6_pkt_prohibit_out,
249         },
250         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
251         .rt6i_protocol  = RTPROT_KERNEL,
252         .rt6i_metric    = ~(u32) 0,
253         .rt6i_ref       = ATOMIC_INIT(1),
254 };
255
256 static struct rt6_info ip6_blk_hole_entry_template = {
257         .dst = {
258                 .__refcnt       = ATOMIC_INIT(1),
259                 .__use          = 1,
260                 .obsolete       = -1,
261                 .error          = -EINVAL,
262                 .input          = dst_discard,
263                 .output         = dst_discard,
264         },
265         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
266         .rt6i_protocol  = RTPROT_KERNEL,
267         .rt6i_metric    = ~(u32) 0,
268         .rt6i_ref       = ATOMIC_INIT(1),
269 };
270
271 #endif
272
273 /* allocate dst with ip6_dst_ops */
274 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
275                                              struct net_device *dev,
276                                              int flags,
277                                              struct fib6_table *table)
278 {
279         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
280                                         0, 0, flags);
281
282         if (rt) {
283                 struct dst_entry *dst = &rt->dst;
284
285                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
286                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
287         }
288         return rt;
289 }
290
291 static void ip6_dst_destroy(struct dst_entry *dst)
292 {
293         struct rt6_info *rt = (struct rt6_info *)dst;
294         struct inet6_dev *idev = rt->rt6i_idev;
295
296         if (rt->n)
297                 neigh_release(rt->n);
298
299         if (!(rt->dst.flags & DST_HOST))
300                 dst_destroy_metrics_generic(dst);
301
302         if (idev) {
303                 rt->rt6i_idev = NULL;
304                 in6_dev_put(idev);
305         }
306
307         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
308                 dst_release(dst->from);
309
310         if (rt6_has_peer(rt)) {
311                 struct inet_peer *peer = rt6_peer_ptr(rt);
312                 inet_putpeer(peer);
313         }
314 }
315
316 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
317
318 static u32 rt6_peer_genid(void)
319 {
320         return atomic_read(&__rt6_peer_genid);
321 }
322
323 void rt6_bind_peer(struct rt6_info *rt, int create)
324 {
325         struct inet_peer_base *base;
326         struct inet_peer *peer;
327
328         base = inetpeer_base_ptr(rt->_rt6i_peer);
329         if (!base)
330                 return;
331
332         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
333         if (peer) {
334                 if (!rt6_set_peer(rt, peer))
335                         inet_putpeer(peer);
336                 else
337                         rt->rt6i_peer_genid = rt6_peer_genid();
338         }
339 }
340
341 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
342                            int how)
343 {
344         struct rt6_info *rt = (struct rt6_info *)dst;
345         struct inet6_dev *idev = rt->rt6i_idev;
346         struct net_device *loopback_dev =
347                 dev_net(dev)->loopback_dev;
348
349         if (dev != loopback_dev) {
350                 if (idev && idev->dev == dev) {
351                         struct inet6_dev *loopback_idev =
352                                 in6_dev_get(loopback_dev);
353                         if (loopback_idev) {
354                                 rt->rt6i_idev = loopback_idev;
355                                 in6_dev_put(idev);
356                         }
357                 }
358                 if (rt->n && rt->n->dev == dev) {
359                         rt->n->dev = loopback_dev;
360                         dev_hold(loopback_dev);
361                         dev_put(dev);
362                 }
363         }
364 }
365
366 static bool rt6_check_expired(const struct rt6_info *rt)
367 {
368         struct rt6_info *ort = NULL;
369
370         if (rt->rt6i_flags & RTF_EXPIRES) {
371                 if (time_after(jiffies, rt->dst.expires))
372                         return true;
373         } else if (rt->dst.from) {
374                 ort = (struct rt6_info *) rt->dst.from;
375                 return (ort->rt6i_flags & RTF_EXPIRES) &&
376                         time_after(jiffies, ort->dst.expires);
377         }
378         return false;
379 }
380
381 static bool rt6_need_strict(const struct in6_addr *daddr)
382 {
383         return ipv6_addr_type(daddr) &
384                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
385 }
386
387 /*
388  *      Route lookup. Any table->tb6_lock is implied.
389  */
390
391 static inline struct rt6_info *rt6_device_match(struct net *net,
392                                                     struct rt6_info *rt,
393                                                     const struct in6_addr *saddr,
394                                                     int oif,
395                                                     int flags)
396 {
397         struct rt6_info *local = NULL;
398         struct rt6_info *sprt;
399
400         if (!oif && ipv6_addr_any(saddr))
401                 goto out;
402
403         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
404                 struct net_device *dev = sprt->dst.dev;
405
406                 if (oif) {
407                         if (dev->ifindex == oif)
408                                 return sprt;
409                         if (dev->flags & IFF_LOOPBACK) {
410                                 if (!sprt->rt6i_idev ||
411                                     sprt->rt6i_idev->dev->ifindex != oif) {
412                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
413                                                 continue;
414                                         if (local && (!oif ||
415                                                       local->rt6i_idev->dev->ifindex == oif))
416                                                 continue;
417                                 }
418                                 local = sprt;
419                         }
420                 } else {
421                         if (ipv6_chk_addr(net, saddr, dev,
422                                           flags & RT6_LOOKUP_F_IFACE))
423                                 return sprt;
424                 }
425         }
426
427         if (oif) {
428                 if (local)
429                         return local;
430
431                 if (flags & RT6_LOOKUP_F_IFACE)
432                         return net->ipv6.ip6_null_entry;
433         }
434 out:
435         return rt;
436 }
437
438 #ifdef CONFIG_IPV6_ROUTER_PREF
439 static void rt6_probe(struct rt6_info *rt)
440 {
441         struct neighbour *neigh;
442         /*
443          * Okay, this does not seem to be appropriate
444          * for now, however, we need to check if it
445          * is really so; aka Router Reachability Probing.
446          *
447          * Router Reachability Probe MUST be rate-limited
448          * to no more than one per minute.
449          */
450         rcu_read_lock();
451         neigh = rt ? rt->n : NULL;
452         if (!neigh || (neigh->nud_state & NUD_VALID))
453                 goto out;
454         read_lock_bh(&neigh->lock);
455         if (!(neigh->nud_state & NUD_VALID) &&
456             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457                 struct in6_addr mcaddr;
458                 struct in6_addr *target;
459
460                 neigh->updated = jiffies;
461                 read_unlock_bh(&neigh->lock);
462
463                 target = (struct in6_addr *)&neigh->primary_key;
464                 addrconf_addr_solict_mult(target, &mcaddr);
465                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466         } else {
467                 read_unlock_bh(&neigh->lock);
468         }
469 out:
470         rcu_read_unlock();
471 }
472 #else
473 static inline void rt6_probe(struct rt6_info *rt)
474 {
475 }
476 #endif
477
478 /*
479  * Default Router Selection (RFC 2461 6.3.6)
480  */
481 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
482 {
483         struct net_device *dev = rt->dst.dev;
484         if (!oif || dev->ifindex == oif)
485                 return 2;
486         if ((dev->flags & IFF_LOOPBACK) &&
487             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
488                 return 1;
489         return 0;
490 }
491
492 static inline int rt6_check_neigh(struct rt6_info *rt)
493 {
494         struct neighbour *neigh;
495         int m;
496
497         rcu_read_lock();
498         neigh = rt->n;
499         if (rt->rt6i_flags & RTF_NONEXTHOP ||
500             !(rt->rt6i_flags & RTF_GATEWAY))
501                 m = 1;
502         else if (neigh) {
503                 read_lock_bh(&neigh->lock);
504                 if (neigh->nud_state & NUD_VALID)
505                         m = 2;
506 #ifdef CONFIG_IPV6_ROUTER_PREF
507                 else if (neigh->nud_state & NUD_FAILED)
508                         m = 0;
509 #endif
510                 else
511                         m = 1;
512                 read_unlock_bh(&neigh->lock);
513         } else
514                 m = 0;
515         rcu_read_unlock();
516         return m;
517 }
518
519 static int rt6_score_route(struct rt6_info *rt, int oif,
520                            int strict)
521 {
522         int m, n;
523
524         m = rt6_check_dev(rt, oif);
525         if (!m && (strict & RT6_LOOKUP_F_IFACE))
526                 return -1;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
529 #endif
530         n = rt6_check_neigh(rt);
531         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
532                 return -1;
533         return m;
534 }
535
536 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
537                                    int *mpri, struct rt6_info *match)
538 {
539         int m;
540
541         if (rt6_check_expired(rt))
542                 goto out;
543
544         m = rt6_score_route(rt, oif, strict);
545         if (m < 0)
546                 goto out;
547
548         if (m > *mpri) {
549                 if (strict & RT6_LOOKUP_F_REACHABLE)
550                         rt6_probe(match);
551                 *mpri = m;
552                 match = rt;
553         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
554                 rt6_probe(rt);
555         }
556
557 out:
558         return match;
559 }
560
561 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
562                                      struct rt6_info *rr_head,
563                                      u32 metric, int oif, int strict)
564 {
565         struct rt6_info *rt, *match;
566         int mpri = -1;
567
568         match = NULL;
569         for (rt = rr_head; rt && rt->rt6i_metric == metric;
570              rt = rt->dst.rt6_next)
571                 match = find_match(rt, oif, strict, &mpri, match);
572         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
573              rt = rt->dst.rt6_next)
574                 match = find_match(rt, oif, strict, &mpri, match);
575
576         return match;
577 }
578
579 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
580 {
581         struct rt6_info *match, *rt0;
582         struct net *net;
583
584         rt0 = fn->rr_ptr;
585         if (!rt0)
586                 fn->rr_ptr = rt0 = fn->leaf;
587
588         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
589
590         if (!match &&
591             (strict & RT6_LOOKUP_F_REACHABLE)) {
592                 struct rt6_info *next = rt0->dst.rt6_next;
593
594                 /* no entries matched; do round-robin */
595                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
596                         next = fn->leaf;
597
598                 if (next != rt0)
599                         fn->rr_ptr = next;
600         }
601
602         net = dev_net(rt0->dst.dev);
603         return match ? match : net->ipv6.ip6_null_entry;
604 }
605
606 #ifdef CONFIG_IPV6_ROUTE_INFO
607 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
608                   const struct in6_addr *gwaddr)
609 {
610         struct net *net = dev_net(dev);
611         struct route_info *rinfo = (struct route_info *) opt;
612         struct in6_addr prefix_buf, *prefix;
613         unsigned int pref;
614         unsigned long lifetime;
615         struct rt6_info *rt;
616
617         if (len < sizeof(struct route_info)) {
618                 return -EINVAL;
619         }
620
621         /* Sanity check for prefix_len and length */
622         if (rinfo->length > 3) {
623                 return -EINVAL;
624         } else if (rinfo->prefix_len > 128) {
625                 return -EINVAL;
626         } else if (rinfo->prefix_len > 64) {
627                 if (rinfo->length < 2) {
628                         return -EINVAL;
629                 }
630         } else if (rinfo->prefix_len > 0) {
631                 if (rinfo->length < 1) {
632                         return -EINVAL;
633                 }
634         }
635
636         pref = rinfo->route_pref;
637         if (pref == ICMPV6_ROUTER_PREF_INVALID)
638                 return -EINVAL;
639
640         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
641
642         if (rinfo->length == 3)
643                 prefix = (struct in6_addr *)rinfo->prefix;
644         else {
645                 /* this function is safe */
646                 ipv6_addr_prefix(&prefix_buf,
647                                  (struct in6_addr *)rinfo->prefix,
648                                  rinfo->prefix_len);
649                 prefix = &prefix_buf;
650         }
651
652         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
653                                 dev->ifindex);
654
655         if (rt && !lifetime) {
656                 ip6_del_rt(rt);
657                 rt = NULL;
658         }
659
660         if (!rt && lifetime)
661                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
662                                         pref);
663         else if (rt)
664                 rt->rt6i_flags = RTF_ROUTEINFO |
665                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
666
667         if (rt) {
668                 if (!addrconf_finite_timeout(lifetime))
669                         rt6_clean_expires(rt);
670                 else
671                         rt6_set_expires(rt, jiffies + HZ * lifetime);
672
673                 dst_release(&rt->dst);
674         }
675         return 0;
676 }
677 #endif
678
679 #define BACKTRACK(__net, saddr)                 \
680 do { \
681         if (rt == __net->ipv6.ip6_null_entry) { \
682                 struct fib6_node *pn; \
683                 while (1) { \
684                         if (fn->fn_flags & RTN_TL_ROOT) \
685                                 goto out; \
686                         pn = fn->parent; \
687                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
688                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
689                         else \
690                                 fn = pn; \
691                         if (fn->fn_flags & RTN_RTINFO) \
692                                 goto restart; \
693                 } \
694         } \
695 } while (0)
696
697 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
698                                              struct fib6_table *table,
699                                              struct flowi6 *fl6, int flags)
700 {
701         struct fib6_node *fn;
702         struct rt6_info *rt;
703
704         read_lock_bh(&table->tb6_lock);
705         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
706 restart:
707         rt = fn->leaf;
708         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
709         BACKTRACK(net, &fl6->saddr);
710 out:
711         dst_use(&rt->dst, jiffies);
712         read_unlock_bh(&table->tb6_lock);
713         return rt;
714
715 }
716
717 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
718                                     int flags)
719 {
720         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
721 }
722 EXPORT_SYMBOL_GPL(ip6_route_lookup);
723
724 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
725                             const struct in6_addr *saddr, int oif, int strict)
726 {
727         struct flowi6 fl6 = {
728                 .flowi6_oif = oif,
729                 .daddr = *daddr,
730         };
731         struct dst_entry *dst;
732         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
733
734         if (saddr) {
735                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
736                 flags |= RT6_LOOKUP_F_HAS_SADDR;
737         }
738
739         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
740         if (dst->error == 0)
741                 return (struct rt6_info *) dst;
742
743         dst_release(dst);
744
745         return NULL;
746 }
747
748 EXPORT_SYMBOL(rt6_lookup);
749
750 /* ip6_ins_rt is called with FREE table->tb6_lock.
751    It takes new route entry, the addition fails by any reason the
752    route is freed. In any case, if caller does not hold it, it may
753    be destroyed.
754  */
755
756 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
757 {
758         int err;
759         struct fib6_table *table;
760
761         table = rt->rt6i_table;
762         write_lock_bh(&table->tb6_lock);
763         err = fib6_add(&table->tb6_root, rt, info);
764         write_unlock_bh(&table->tb6_lock);
765
766         return err;
767 }
768
769 int ip6_ins_rt(struct rt6_info *rt)
770 {
771         struct nl_info info = {
772                 .nl_net = dev_net(rt->dst.dev),
773         };
774         return __ip6_ins_rt(rt, &info);
775 }
776
777 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
778                                       const struct in6_addr *daddr,
779                                       const struct in6_addr *saddr)
780 {
781         struct rt6_info *rt;
782
783         /*
784          *      Clone the route.
785          */
786
787         rt = ip6_rt_copy(ort, daddr);
788
789         if (rt) {
790                 int attempts = !in_softirq();
791
792                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
793                         if (ort->rt6i_dst.plen != 128 &&
794                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
795                                 rt->rt6i_flags |= RTF_ANYCAST;
796                         rt->rt6i_gateway = *daddr;
797                 }
798
799                 rt->rt6i_flags |= RTF_CACHE;
800
801 #ifdef CONFIG_IPV6_SUBTREES
802                 if (rt->rt6i_src.plen && saddr) {
803                         rt->rt6i_src.addr = *saddr;
804                         rt->rt6i_src.plen = 128;
805                 }
806 #endif
807
808         retry:
809                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
810                         struct net *net = dev_net(rt->dst.dev);
811                         int saved_rt_min_interval =
812                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
813                         int saved_rt_elasticity =
814                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
815
816                         if (attempts-- > 0) {
817                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
818                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
819
820                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
821
822                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
823                                         saved_rt_elasticity;
824                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
825                                         saved_rt_min_interval;
826                                 goto retry;
827                         }
828
829                         net_warn_ratelimited("Neighbour table overflow\n");
830                         dst_free(&rt->dst);
831                         return NULL;
832                 }
833         }
834
835         return rt;
836 }
837
838 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
839                                         const struct in6_addr *daddr)
840 {
841         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
842
843         if (rt) {
844                 rt->rt6i_flags |= RTF_CACHE;
845                 rt->n = neigh_clone(ort->n);
846         }
847         return rt;
848 }
849
850 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
851                                       struct flowi6 *fl6, int flags)
852 {
853         struct fib6_node *fn;
854         struct rt6_info *rt, *nrt;
855         int strict = 0;
856         int attempts = 3;
857         int err;
858         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
859
860         strict |= flags & RT6_LOOKUP_F_IFACE;
861
862 relookup:
863         read_lock_bh(&table->tb6_lock);
864
865 restart_2:
866         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
867
868 restart:
869         rt = rt6_select(fn, oif, strict | reachable);
870
871         BACKTRACK(net, &fl6->saddr);
872         if (rt == net->ipv6.ip6_null_entry ||
873             rt->rt6i_flags & RTF_CACHE)
874                 goto out;
875
876         dst_hold(&rt->dst);
877         read_unlock_bh(&table->tb6_lock);
878
879         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
880                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
881         else if (!(rt->dst.flags & DST_HOST))
882                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
883         else
884                 goto out2;
885
886         dst_release(&rt->dst);
887         rt = nrt ? : net->ipv6.ip6_null_entry;
888
889         dst_hold(&rt->dst);
890         if (nrt) {
891                 err = ip6_ins_rt(nrt);
892                 if (!err)
893                         goto out2;
894         }
895
896         if (--attempts <= 0)
897                 goto out2;
898
899         /*
900          * Race condition! In the gap, when table->tb6_lock was
901          * released someone could insert this route.  Relookup.
902          */
903         dst_release(&rt->dst);
904         goto relookup;
905
906 out:
907         if (reachable) {
908                 reachable = 0;
909                 goto restart_2;
910         }
911         dst_hold(&rt->dst);
912         read_unlock_bh(&table->tb6_lock);
913 out2:
914         rt->dst.lastuse = jiffies;
915         rt->dst.__use++;
916
917         return rt;
918 }
919
920 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
921                                             struct flowi6 *fl6, int flags)
922 {
923         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
924 }
925
926 static struct dst_entry *ip6_route_input_lookup(struct net *net,
927                                                 struct net_device *dev,
928                                                 struct flowi6 *fl6, int flags)
929 {
930         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
931                 flags |= RT6_LOOKUP_F_IFACE;
932
933         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
934 }
935
936 void ip6_route_input(struct sk_buff *skb)
937 {
938         const struct ipv6hdr *iph = ipv6_hdr(skb);
939         struct net *net = dev_net(skb->dev);
940         int flags = RT6_LOOKUP_F_HAS_SADDR;
941         struct flowi6 fl6 = {
942                 .flowi6_iif = skb->dev->ifindex,
943                 .daddr = iph->daddr,
944                 .saddr = iph->saddr,
945                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
946                 .flowi6_mark = skb->mark,
947                 .flowi6_proto = iph->nexthdr,
948         };
949
950         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
951 }
952
953 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
954                                              struct flowi6 *fl6, int flags)
955 {
956         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
957 }
958
959 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
960                                     struct flowi6 *fl6)
961 {
962         int flags = 0;
963
964         fl6->flowi6_iif = net->loopback_dev->ifindex;
965
966         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
967                 flags |= RT6_LOOKUP_F_IFACE;
968
969         if (!ipv6_addr_any(&fl6->saddr))
970                 flags |= RT6_LOOKUP_F_HAS_SADDR;
971         else if (sk)
972                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
973
974         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
975 }
976
977 EXPORT_SYMBOL(ip6_route_output);
978
979 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
980 {
981         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
982         struct dst_entry *new = NULL;
983
984         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
985         if (rt) {
986                 new = &rt->dst;
987
988                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
989                 rt6_init_peer(rt, net->ipv6.peers);
990
991                 new->__use = 1;
992                 new->input = dst_discard;
993                 new->output = dst_discard;
994
995                 if (dst_metrics_read_only(&ort->dst))
996                         new->_metrics = ort->dst._metrics;
997                 else
998                         dst_copy_metrics(new, &ort->dst);
999                 rt->rt6i_idev = ort->rt6i_idev;
1000                 if (rt->rt6i_idev)
1001                         in6_dev_hold(rt->rt6i_idev);
1002
1003                 rt->rt6i_gateway = ort->rt6i_gateway;
1004                 rt->rt6i_flags = ort->rt6i_flags;
1005                 rt6_clean_expires(rt);
1006                 rt->rt6i_metric = 0;
1007
1008                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1009 #ifdef CONFIG_IPV6_SUBTREES
1010                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1011 #endif
1012
1013                 dst_free(new);
1014         }
1015
1016         dst_release(dst_orig);
1017         return new ? new : ERR_PTR(-ENOMEM);
1018 }
1019
1020 /*
1021  *      Destination cache support functions
1022  */
1023
1024 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1025 {
1026         struct rt6_info *rt;
1027
1028         rt = (struct rt6_info *) dst;
1029
1030         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1031                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1032                         if (!rt6_has_peer(rt))
1033                                 rt6_bind_peer(rt, 0);
1034                         rt->rt6i_peer_genid = rt6_peer_genid();
1035                 }
1036                 return dst;
1037         }
1038         return NULL;
1039 }
1040
1041 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1042 {
1043         struct rt6_info *rt = (struct rt6_info *) dst;
1044
1045         if (rt) {
1046                 if (rt->rt6i_flags & RTF_CACHE) {
1047                         if (rt6_check_expired(rt)) {
1048                                 ip6_del_rt(rt);
1049                                 dst = NULL;
1050                         }
1051                 } else {
1052                         dst_release(dst);
1053                         dst = NULL;
1054                 }
1055         }
1056         return dst;
1057 }
1058
1059 static void ip6_link_failure(struct sk_buff *skb)
1060 {
1061         struct rt6_info *rt;
1062
1063         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1064
1065         rt = (struct rt6_info *) skb_dst(skb);
1066         if (rt) {
1067                 if (rt->rt6i_flags & RTF_CACHE)
1068                         rt6_update_expires(rt, 0);
1069                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1070                         rt->rt6i_node->fn_sernum = -1;
1071         }
1072 }
1073
1074 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1075 {
1076         struct rt6_info *rt6 = (struct rt6_info*)dst;
1077
1078         dst_confirm(dst);
1079         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1080                 struct net *net = dev_net(dst->dev);
1081
1082                 rt6->rt6i_flags |= RTF_MODIFIED;
1083                 if (mtu < IPV6_MIN_MTU) {
1084                         u32 features = dst_metric(dst, RTAX_FEATURES);
1085                         mtu = IPV6_MIN_MTU;
1086                         features |= RTAX_FEATURE_ALLFRAG;
1087                         dst_metric_set(dst, RTAX_FEATURES, features);
1088                 }
1089                 dst_metric_set(dst, RTAX_MTU, mtu);
1090                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1091         }
1092 }
1093
1094 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1095                      int oif, u32 mark)
1096 {
1097         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1098         struct dst_entry *dst;
1099         struct flowi6 fl6;
1100
1101         memset(&fl6, 0, sizeof(fl6));
1102         fl6.flowi6_oif = oif;
1103         fl6.flowi6_mark = mark;
1104         fl6.flowi6_flags = 0;
1105         fl6.daddr = iph->daddr;
1106         fl6.saddr = iph->saddr;
1107         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1108
1109         dst = ip6_route_output(net, NULL, &fl6);
1110         if (!dst->error)
1111                 ip6_rt_update_pmtu(dst, ntohl(mtu));
1112         dst_release(dst);
1113 }
1114 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1115
1116 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1117 {
1118         ip6_update_pmtu(skb, sock_net(sk), mtu,
1119                         sk->sk_bound_dev_if, sk->sk_mark);
1120 }
1121 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1122
1123 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1124 {
1125         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1126         struct dst_entry *dst;
1127         struct flowi6 fl6;
1128
1129         memset(&fl6, 0, sizeof(fl6));
1130         fl6.flowi6_oif = oif;
1131         fl6.flowi6_mark = mark;
1132         fl6.flowi6_flags = 0;
1133         fl6.daddr = iph->daddr;
1134         fl6.saddr = iph->saddr;
1135         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1136
1137         dst = ip6_route_output(net, NULL, &fl6);
1138         if (!dst->error)
1139                 rt6_do_redirect(dst, skb);
1140         dst_release(dst);
1141 }
1142 EXPORT_SYMBOL_GPL(ip6_redirect);
1143
1144 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1145 {
1146         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1147 }
1148 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1149
1150 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1151 {
1152         struct net_device *dev = dst->dev;
1153         unsigned int mtu = dst_mtu(dst);
1154         struct net *net = dev_net(dev);
1155
1156         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1157
1158         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1159                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1160
1161         /*
1162          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1163          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1164          * IPV6_MAXPLEN is also valid and means: "any MSS,
1165          * rely only on pmtu discovery"
1166          */
1167         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1168                 mtu = IPV6_MAXPLEN;
1169         return mtu;
1170 }
1171
1172 static unsigned int ip6_mtu(const struct dst_entry *dst)
1173 {
1174         struct inet6_dev *idev;
1175         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1176
1177         if (mtu)
1178                 return mtu;
1179
1180         mtu = IPV6_MIN_MTU;
1181
1182         rcu_read_lock();
1183         idev = __in6_dev_get(dst->dev);
1184         if (idev)
1185                 mtu = idev->cnf.mtu6;
1186         rcu_read_unlock();
1187
1188         return mtu;
1189 }
1190
1191 static struct dst_entry *icmp6_dst_gc_list;
1192 static DEFINE_SPINLOCK(icmp6_dst_lock);
1193
1194 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1195                                   struct neighbour *neigh,
1196                                   struct flowi6 *fl6)
1197 {
1198         struct dst_entry *dst;
1199         struct rt6_info *rt;
1200         struct inet6_dev *idev = in6_dev_get(dev);
1201         struct net *net = dev_net(dev);
1202
1203         if (unlikely(!idev))
1204                 return ERR_PTR(-ENODEV);
1205
1206         rt = ip6_dst_alloc(net, dev, 0, NULL);
1207         if (unlikely(!rt)) {
1208                 in6_dev_put(idev);
1209                 dst = ERR_PTR(-ENOMEM);
1210                 goto out;
1211         }
1212
1213         if (neigh)
1214                 neigh_hold(neigh);
1215         else {
1216                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1217                 if (IS_ERR(neigh)) {
1218                         in6_dev_put(idev);
1219                         dst_free(&rt->dst);
1220                         return ERR_CAST(neigh);
1221                 }
1222         }
1223
1224         rt->dst.flags |= DST_HOST;
1225         rt->dst.output  = ip6_output;
1226         rt->n = neigh;
1227         atomic_set(&rt->dst.__refcnt, 1);
1228         rt->rt6i_dst.addr = fl6->daddr;
1229         rt->rt6i_dst.plen = 128;
1230         rt->rt6i_idev     = idev;
1231         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1232
1233         spin_lock_bh(&icmp6_dst_lock);
1234         rt->dst.next = icmp6_dst_gc_list;
1235         icmp6_dst_gc_list = &rt->dst;
1236         spin_unlock_bh(&icmp6_dst_lock);
1237
1238         fib6_force_start_gc(net);
1239
1240         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1241
1242 out:
1243         return dst;
1244 }
1245
1246 int icmp6_dst_gc(void)
1247 {
1248         struct dst_entry *dst, **pprev;
1249         int more = 0;
1250
1251         spin_lock_bh(&icmp6_dst_lock);
1252         pprev = &icmp6_dst_gc_list;
1253
1254         while ((dst = *pprev) != NULL) {
1255                 if (!atomic_read(&dst->__refcnt)) {
1256                         *pprev = dst->next;
1257                         dst_free(dst);
1258                 } else {
1259                         pprev = &dst->next;
1260                         ++more;
1261                 }
1262         }
1263
1264         spin_unlock_bh(&icmp6_dst_lock);
1265
1266         return more;
1267 }
1268
1269 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1270                             void *arg)
1271 {
1272         struct dst_entry *dst, **pprev;
1273
1274         spin_lock_bh(&icmp6_dst_lock);
1275         pprev = &icmp6_dst_gc_list;
1276         while ((dst = *pprev) != NULL) {
1277                 struct rt6_info *rt = (struct rt6_info *) dst;
1278                 if (func(rt, arg)) {
1279                         *pprev = dst->next;
1280                         dst_free(dst);
1281                 } else {
1282                         pprev = &dst->next;
1283                 }
1284         }
1285         spin_unlock_bh(&icmp6_dst_lock);
1286 }
1287
1288 static int ip6_dst_gc(struct dst_ops *ops)
1289 {
1290         unsigned long now = jiffies;
1291         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1292         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1293         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1294         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1295         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1296         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1297         int entries;
1298
1299         entries = dst_entries_get_fast(ops);
1300         if (time_after(rt_last_gc + rt_min_interval, now) &&
1301             entries <= rt_max_size)
1302                 goto out;
1303
1304         net->ipv6.ip6_rt_gc_expire++;
1305         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1306         net->ipv6.ip6_rt_last_gc = now;
1307         entries = dst_entries_get_slow(ops);
1308         if (entries < ops->gc_thresh)
1309                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1310 out:
1311         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1312         return entries > rt_max_size;
1313 }
1314
1315 /* Clean host part of a prefix. Not necessary in radix tree,
1316    but results in cleaner routing tables.
1317
1318    Remove it only when all the things will work!
1319  */
1320
1321 int ip6_dst_hoplimit(struct dst_entry *dst)
1322 {
1323         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1324         if (hoplimit == 0) {
1325                 struct net_device *dev = dst->dev;
1326                 struct inet6_dev *idev;
1327
1328                 rcu_read_lock();
1329                 idev = __in6_dev_get(dev);
1330                 if (idev)
1331                         hoplimit = idev->cnf.hop_limit;
1332                 else
1333                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1334                 rcu_read_unlock();
1335         }
1336         return hoplimit;
1337 }
1338 EXPORT_SYMBOL(ip6_dst_hoplimit);
1339
1340 /*
1341  *
1342  */
1343
1344 int ip6_route_add(struct fib6_config *cfg)
1345 {
1346         int err;
1347         struct net *net = cfg->fc_nlinfo.nl_net;
1348         struct rt6_info *rt = NULL;
1349         struct net_device *dev = NULL;
1350         struct inet6_dev *idev = NULL;
1351         struct fib6_table *table;
1352         int addr_type;
1353
1354         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1355                 return -EINVAL;
1356 #ifndef CONFIG_IPV6_SUBTREES
1357         if (cfg->fc_src_len)
1358                 return -EINVAL;
1359 #endif
1360         if (cfg->fc_ifindex) {
1361                 err = -ENODEV;
1362                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1363                 if (!dev)
1364                         goto out;
1365                 idev = in6_dev_get(dev);
1366                 if (!idev)
1367                         goto out;
1368         }
1369
1370         if (cfg->fc_metric == 0)
1371                 cfg->fc_metric = IP6_RT_PRIO_USER;
1372
1373         err = -ENOBUFS;
1374         if (cfg->fc_nlinfo.nlh &&
1375             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1376                 table = fib6_get_table(net, cfg->fc_table);
1377                 if (!table) {
1378                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1379                         table = fib6_new_table(net, cfg->fc_table);
1380                 }
1381         } else {
1382                 table = fib6_new_table(net, cfg->fc_table);
1383         }
1384
1385         if (!table)
1386                 goto out;
1387
1388         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1389
1390         if (!rt) {
1391                 err = -ENOMEM;
1392                 goto out;
1393         }
1394
1395         rt->dst.obsolete = -1;
1396
1397         if (cfg->fc_flags & RTF_EXPIRES)
1398                 rt6_set_expires(rt, jiffies +
1399                                 clock_t_to_jiffies(cfg->fc_expires));
1400         else
1401                 rt6_clean_expires(rt);
1402
1403         if (cfg->fc_protocol == RTPROT_UNSPEC)
1404                 cfg->fc_protocol = RTPROT_BOOT;
1405         rt->rt6i_protocol = cfg->fc_protocol;
1406
1407         addr_type = ipv6_addr_type(&cfg->fc_dst);
1408
1409         if (addr_type & IPV6_ADDR_MULTICAST)
1410                 rt->dst.input = ip6_mc_input;
1411         else if (cfg->fc_flags & RTF_LOCAL)
1412                 rt->dst.input = ip6_input;
1413         else
1414                 rt->dst.input = ip6_forward;
1415
1416         rt->dst.output = ip6_output;
1417
1418         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1419         rt->rt6i_dst.plen = cfg->fc_dst_len;
1420         if (rt->rt6i_dst.plen == 128)
1421                rt->dst.flags |= DST_HOST;
1422
1423         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1424                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1425                 if (!metrics) {
1426                         err = -ENOMEM;
1427                         goto out;
1428                 }
1429                 dst_init_metrics(&rt->dst, metrics, 0);
1430         }
1431 #ifdef CONFIG_IPV6_SUBTREES
1432         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1433         rt->rt6i_src.plen = cfg->fc_src_len;
1434 #endif
1435
1436         rt->rt6i_metric = cfg->fc_metric;
1437
1438         /* We cannot add true routes via loopback here,
1439            they would result in kernel looping; promote them to reject routes
1440          */
1441         if ((cfg->fc_flags & RTF_REJECT) ||
1442             (dev && (dev->flags & IFF_LOOPBACK) &&
1443              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1444              !(cfg->fc_flags & RTF_LOCAL))) {
1445                 /* hold loopback dev/idev if we haven't done so. */
1446                 if (dev != net->loopback_dev) {
1447                         if (dev) {
1448                                 dev_put(dev);
1449                                 in6_dev_put(idev);
1450                         }
1451                         dev = net->loopback_dev;
1452                         dev_hold(dev);
1453                         idev = in6_dev_get(dev);
1454                         if (!idev) {
1455                                 err = -ENODEV;
1456                                 goto out;
1457                         }
1458                 }
1459                 rt->dst.output = ip6_pkt_discard_out;
1460                 rt->dst.input = ip6_pkt_discard;
1461                 rt->dst.error = -ENETUNREACH;
1462                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1463                 goto install_route;
1464         }
1465
1466         if (cfg->fc_flags & RTF_GATEWAY) {
1467                 const struct in6_addr *gw_addr;
1468                 int gwa_type;
1469
1470                 gw_addr = &cfg->fc_gateway;
1471                 rt->rt6i_gateway = *gw_addr;
1472                 gwa_type = ipv6_addr_type(gw_addr);
1473
1474                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1475                         struct rt6_info *grt;
1476
1477                         /* IPv6 strictly inhibits using not link-local
1478                            addresses as nexthop address.
1479                            Otherwise, router will not able to send redirects.
1480                            It is very good, but in some (rare!) circumstances
1481                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1482                            some exceptions. --ANK
1483                          */
1484                         err = -EINVAL;
1485                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1486                                 goto out;
1487
1488                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1489
1490                         err = -EHOSTUNREACH;
1491                         if (!grt)
1492                                 goto out;
1493                         if (dev) {
1494                                 if (dev != grt->dst.dev) {
1495                                         dst_release(&grt->dst);
1496                                         goto out;
1497                                 }
1498                         } else {
1499                                 dev = grt->dst.dev;
1500                                 idev = grt->rt6i_idev;
1501                                 dev_hold(dev);
1502                                 in6_dev_hold(grt->rt6i_idev);
1503                         }
1504                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1505                                 err = 0;
1506                         dst_release(&grt->dst);
1507
1508                         if (err)
1509                                 goto out;
1510                 }
1511                 err = -EINVAL;
1512                 if (!dev || (dev->flags & IFF_LOOPBACK))
1513                         goto out;
1514         }
1515
1516         err = -ENODEV;
1517         if (!dev)
1518                 goto out;
1519
1520         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1521                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1522                         err = -EINVAL;
1523                         goto out;
1524                 }
1525                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1526                 rt->rt6i_prefsrc.plen = 128;
1527         } else
1528                 rt->rt6i_prefsrc.plen = 0;
1529
1530         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1531                 err = rt6_bind_neighbour(rt, dev);
1532                 if (err)
1533                         goto out;
1534         }
1535
1536         rt->rt6i_flags = cfg->fc_flags;
1537
1538 install_route:
1539         if (cfg->fc_mx) {
1540                 struct nlattr *nla;
1541                 int remaining;
1542
1543                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1544                         int type = nla_type(nla);
1545
1546                         if (type) {
1547                                 if (type > RTAX_MAX) {
1548                                         err = -EINVAL;
1549                                         goto out;
1550                                 }
1551
1552                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1553                         }
1554                 }
1555         }
1556
1557         rt->dst.dev = dev;
1558         rt->rt6i_idev = idev;
1559         rt->rt6i_table = table;
1560
1561         cfg->fc_nlinfo.nl_net = dev_net(dev);
1562
1563         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1564
1565 out:
1566         if (dev)
1567                 dev_put(dev);
1568         if (idev)
1569                 in6_dev_put(idev);
1570         if (rt)
1571                 dst_free(&rt->dst);
1572         return err;
1573 }
1574
1575 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1576 {
1577         int err;
1578         struct fib6_table *table;
1579         struct net *net = dev_net(rt->dst.dev);
1580
1581         if (rt == net->ipv6.ip6_null_entry)
1582                 return -ENOENT;
1583
1584         table = rt->rt6i_table;
1585         write_lock_bh(&table->tb6_lock);
1586
1587         err = fib6_del(rt, info);
1588         dst_release(&rt->dst);
1589
1590         write_unlock_bh(&table->tb6_lock);
1591
1592         return err;
1593 }
1594
1595 int ip6_del_rt(struct rt6_info *rt)
1596 {
1597         struct nl_info info = {
1598                 .nl_net = dev_net(rt->dst.dev),
1599         };
1600         return __ip6_del_rt(rt, &info);
1601 }
1602
1603 static int ip6_route_del(struct fib6_config *cfg)
1604 {
1605         struct fib6_table *table;
1606         struct fib6_node *fn;
1607         struct rt6_info *rt;
1608         int err = -ESRCH;
1609
1610         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1611         if (!table)
1612                 return err;
1613
1614         read_lock_bh(&table->tb6_lock);
1615
1616         fn = fib6_locate(&table->tb6_root,
1617                          &cfg->fc_dst, cfg->fc_dst_len,
1618                          &cfg->fc_src, cfg->fc_src_len);
1619
1620         if (fn) {
1621                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1622                         if (cfg->fc_ifindex &&
1623                             (!rt->dst.dev ||
1624                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1625                                 continue;
1626                         if (cfg->fc_flags & RTF_GATEWAY &&
1627                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1628                                 continue;
1629                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1630                                 continue;
1631                         dst_hold(&rt->dst);
1632                         read_unlock_bh(&table->tb6_lock);
1633
1634                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1635                 }
1636         }
1637         read_unlock_bh(&table->tb6_lock);
1638
1639         return err;
1640 }
1641
1642 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
1643 {
1644         struct net *net = dev_net(skb->dev);
1645         struct netevent_redirect netevent;
1646         struct rt6_info *rt, *nrt = NULL;
1647         const struct in6_addr *target;
1648         struct ndisc_options ndopts;
1649         const struct in6_addr *dest;
1650         struct neighbour *old_neigh;
1651         struct inet6_dev *in6_dev;
1652         struct neighbour *neigh;
1653         struct icmp6hdr *icmph;
1654         int optlen, on_link;
1655         u8 *lladdr;
1656
1657         optlen = skb->tail - skb->transport_header;
1658         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1659
1660         if (optlen < 0) {
1661                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1662                 return;
1663         }
1664
1665         icmph = icmp6_hdr(skb);
1666         target = (const struct in6_addr *) (icmph + 1);
1667         dest = target + 1;
1668
1669         if (ipv6_addr_is_multicast(dest)) {
1670                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1671                 return;
1672         }
1673
1674         on_link = 0;
1675         if (ipv6_addr_equal(dest, target)) {
1676                 on_link = 1;
1677         } else if (ipv6_addr_type(target) !=
1678                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1679                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1680                 return;
1681         }
1682
1683         in6_dev = __in6_dev_get(skb->dev);
1684         if (!in6_dev)
1685                 return;
1686         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1687                 return;
1688
1689         /* RFC2461 8.1:
1690          *      The IP source address of the Redirect MUST be the same as the current
1691          *      first-hop router for the specified ICMP Destination Address.
1692          */
1693
1694         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1695                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1696                 return;
1697         }
1698
1699         lladdr = NULL;
1700         if (ndopts.nd_opts_tgt_lladdr) {
1701                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1702                                              skb->dev);
1703                 if (!lladdr) {
1704                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1705                         return;
1706                 }
1707         }
1708
1709         rt = (struct rt6_info *) dst;
1710         if (rt == net->ipv6.ip6_null_entry) {
1711                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1712                 return;
1713         }
1714
1715         /* Redirect received -> path was valid.
1716          * Look, redirects are sent only in response to data packets,
1717          * so that this nexthop apparently is reachable. --ANK
1718          */
1719         dst_confirm(&rt->dst);
1720
1721         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1722         if (!neigh)
1723                 return;
1724
1725         /* Duplicate redirect: silently ignore. */
1726         old_neigh = rt->n;
1727         if (neigh == old_neigh)
1728                 goto out;
1729
1730         /*
1731          *      We have finally decided to accept it.
1732          */
1733
1734         neigh_update(neigh, lladdr, NUD_STALE,
1735                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1736                      NEIGH_UPDATE_F_OVERRIDE|
1737                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1738                                      NEIGH_UPDATE_F_ISROUTER))
1739                      );
1740
1741         nrt = ip6_rt_copy(rt, dest);
1742         if (!nrt)
1743                 goto out;
1744
1745         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1746         if (on_link)
1747                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1748
1749         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1750         nrt->n = neigh_clone(neigh);
1751
1752         if (ip6_ins_rt(nrt))
1753                 goto out;
1754
1755         netevent.old = &rt->dst;
1756         netevent.old_neigh = old_neigh;
1757         netevent.new = &nrt->dst;
1758         netevent.new_neigh = neigh;
1759         netevent.daddr = dest;
1760         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1761
1762         if (rt->rt6i_flags & RTF_CACHE) {
1763                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1764                 ip6_del_rt(rt);
1765         }
1766
1767 out:
1768         neigh_release(neigh);
1769 }
1770
1771 /*
1772  *      Misc support functions
1773  */
1774
1775 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1776                                     const struct in6_addr *dest)
1777 {
1778         struct net *net = dev_net(ort->dst.dev);
1779         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1780                                             ort->rt6i_table);
1781
1782         if (rt) {
1783                 rt->dst.input = ort->dst.input;
1784                 rt->dst.output = ort->dst.output;
1785                 rt->dst.flags |= DST_HOST;
1786
1787                 rt->rt6i_dst.addr = *dest;
1788                 rt->rt6i_dst.plen = 128;
1789                 dst_copy_metrics(&rt->dst, &ort->dst);
1790                 rt->dst.error = ort->dst.error;
1791                 rt->rt6i_idev = ort->rt6i_idev;
1792                 if (rt->rt6i_idev)
1793                         in6_dev_hold(rt->rt6i_idev);
1794                 rt->dst.lastuse = jiffies;
1795
1796                 rt->rt6i_gateway = ort->rt6i_gateway;
1797                 rt->rt6i_flags = ort->rt6i_flags;
1798                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1799                     (RTF_DEFAULT | RTF_ADDRCONF))
1800                         rt6_set_from(rt, ort);
1801                 else
1802                         rt6_clean_expires(rt);
1803                 rt->rt6i_metric = 0;
1804
1805 #ifdef CONFIG_IPV6_SUBTREES
1806                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1807 #endif
1808                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1809                 rt->rt6i_table = ort->rt6i_table;
1810         }
1811         return rt;
1812 }
1813
1814 #ifdef CONFIG_IPV6_ROUTE_INFO
1815 static struct rt6_info *rt6_get_route_info(struct net *net,
1816                                            const struct in6_addr *prefix, int prefixlen,
1817                                            const struct in6_addr *gwaddr, int ifindex)
1818 {
1819         struct fib6_node *fn;
1820         struct rt6_info *rt = NULL;
1821         struct fib6_table *table;
1822
1823         table = fib6_get_table(net, RT6_TABLE_INFO);
1824         if (!table)
1825                 return NULL;
1826
1827         write_lock_bh(&table->tb6_lock);
1828         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1829         if (!fn)
1830                 goto out;
1831
1832         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1833                 if (rt->dst.dev->ifindex != ifindex)
1834                         continue;
1835                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1836                         continue;
1837                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1838                         continue;
1839                 dst_hold(&rt->dst);
1840                 break;
1841         }
1842 out:
1843         write_unlock_bh(&table->tb6_lock);
1844         return rt;
1845 }
1846
1847 static struct rt6_info *rt6_add_route_info(struct net *net,
1848                                            const struct in6_addr *prefix, int prefixlen,
1849                                            const struct in6_addr *gwaddr, int ifindex,
1850                                            unsigned int pref)
1851 {
1852         struct fib6_config cfg = {
1853                 .fc_table       = RT6_TABLE_INFO,
1854                 .fc_metric      = IP6_RT_PRIO_USER,
1855                 .fc_ifindex     = ifindex,
1856                 .fc_dst_len     = prefixlen,
1857                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1858                                   RTF_UP | RTF_PREF(pref),
1859                 .fc_nlinfo.pid = 0,
1860                 .fc_nlinfo.nlh = NULL,
1861                 .fc_nlinfo.nl_net = net,
1862         };
1863
1864         cfg.fc_dst = *prefix;
1865         cfg.fc_gateway = *gwaddr;
1866
1867         /* We should treat it as a default route if prefix length is 0. */
1868         if (!prefixlen)
1869                 cfg.fc_flags |= RTF_DEFAULT;
1870
1871         ip6_route_add(&cfg);
1872
1873         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1874 }
1875 #endif
1876
1877 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1878 {
1879         struct rt6_info *rt;
1880         struct fib6_table *table;
1881
1882         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1883         if (!table)
1884                 return NULL;
1885
1886         write_lock_bh(&table->tb6_lock);
1887         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1888                 if (dev == rt->dst.dev &&
1889                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1890                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1891                         break;
1892         }
1893         if (rt)
1894                 dst_hold(&rt->dst);
1895         write_unlock_bh(&table->tb6_lock);
1896         return rt;
1897 }
1898
1899 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1900                                      struct net_device *dev,
1901                                      unsigned int pref)
1902 {
1903         struct fib6_config cfg = {
1904                 .fc_table       = RT6_TABLE_DFLT,
1905                 .fc_metric      = IP6_RT_PRIO_USER,
1906                 .fc_ifindex     = dev->ifindex,
1907                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1908                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1909                 .fc_nlinfo.pid = 0,
1910                 .fc_nlinfo.nlh = NULL,
1911                 .fc_nlinfo.nl_net = dev_net(dev),
1912         };
1913
1914         cfg.fc_gateway = *gwaddr;
1915
1916         ip6_route_add(&cfg);
1917
1918         return rt6_get_dflt_router(gwaddr, dev);
1919 }
1920
1921 void rt6_purge_dflt_routers(struct net *net)
1922 {
1923         struct rt6_info *rt;
1924         struct fib6_table *table;
1925
1926         /* NOTE: Keep consistent with rt6_get_dflt_router */
1927         table = fib6_get_table(net, RT6_TABLE_DFLT);
1928         if (!table)
1929                 return;
1930
1931 restart:
1932         read_lock_bh(&table->tb6_lock);
1933         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1934                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1935                         dst_hold(&rt->dst);
1936                         read_unlock_bh(&table->tb6_lock);
1937                         ip6_del_rt(rt);
1938                         goto restart;
1939                 }
1940         }
1941         read_unlock_bh(&table->tb6_lock);
1942 }
1943
1944 static void rtmsg_to_fib6_config(struct net *net,
1945                                  struct in6_rtmsg *rtmsg,
1946                                  struct fib6_config *cfg)
1947 {
1948         memset(cfg, 0, sizeof(*cfg));
1949
1950         cfg->fc_table = RT6_TABLE_MAIN;
1951         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1952         cfg->fc_metric = rtmsg->rtmsg_metric;
1953         cfg->fc_expires = rtmsg->rtmsg_info;
1954         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1955         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1956         cfg->fc_flags = rtmsg->rtmsg_flags;
1957
1958         cfg->fc_nlinfo.nl_net = net;
1959
1960         cfg->fc_dst = rtmsg->rtmsg_dst;
1961         cfg->fc_src = rtmsg->rtmsg_src;
1962         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1963 }
1964
1965 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1966 {
1967         struct fib6_config cfg;
1968         struct in6_rtmsg rtmsg;
1969         int err;
1970
1971         switch(cmd) {
1972         case SIOCADDRT:         /* Add a route */
1973         case SIOCDELRT:         /* Delete a route */
1974                 if (!capable(CAP_NET_ADMIN))
1975                         return -EPERM;
1976                 err = copy_from_user(&rtmsg, arg,
1977                                      sizeof(struct in6_rtmsg));
1978                 if (err)
1979                         return -EFAULT;
1980
1981                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1982
1983                 rtnl_lock();
1984                 switch (cmd) {
1985                 case SIOCADDRT:
1986                         err = ip6_route_add(&cfg);
1987                         break;
1988                 case SIOCDELRT:
1989                         err = ip6_route_del(&cfg);
1990                         break;
1991                 default:
1992                         err = -EINVAL;
1993                 }
1994                 rtnl_unlock();
1995
1996                 return err;
1997         }
1998
1999         return -EINVAL;
2000 }
2001
2002 /*
2003  *      Drop the packet on the floor
2004  */
2005
2006 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2007 {
2008         int type;
2009         struct dst_entry *dst = skb_dst(skb);
2010         switch (ipstats_mib_noroutes) {
2011         case IPSTATS_MIB_INNOROUTES:
2012                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2013                 if (type == IPV6_ADDR_ANY) {
2014                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2015                                       IPSTATS_MIB_INADDRERRORS);
2016                         break;
2017                 }
2018                 /* FALLTHROUGH */
2019         case IPSTATS_MIB_OUTNOROUTES:
2020                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2021                               ipstats_mib_noroutes);
2022                 break;
2023         }
2024         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2025         kfree_skb(skb);
2026         return 0;
2027 }
2028
2029 static int ip6_pkt_discard(struct sk_buff *skb)
2030 {
2031         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2032 }
2033
2034 static int ip6_pkt_discard_out(struct sk_buff *skb)
2035 {
2036         skb->dev = skb_dst(skb)->dev;
2037         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2038 }
2039
2040 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2041
2042 static int ip6_pkt_prohibit(struct sk_buff *skb)
2043 {
2044         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2045 }
2046
2047 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2048 {
2049         skb->dev = skb_dst(skb)->dev;
2050         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2051 }
2052
2053 #endif
2054
2055 /*
2056  *      Allocate a dst for local (unicast / anycast) address.
2057  */
2058
2059 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2060                                     const struct in6_addr *addr,
2061                                     bool anycast)
2062 {
2063         struct net *net = dev_net(idev->dev);
2064         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2065         int err;
2066
2067         if (!rt) {
2068                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2069                 return ERR_PTR(-ENOMEM);
2070         }
2071
2072         in6_dev_hold(idev);
2073
2074         rt->dst.flags |= DST_HOST;
2075         rt->dst.input = ip6_input;
2076         rt->dst.output = ip6_output;
2077         rt->rt6i_idev = idev;
2078         rt->dst.obsolete = -1;
2079
2080         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2081         if (anycast)
2082                 rt->rt6i_flags |= RTF_ANYCAST;
2083         else
2084                 rt->rt6i_flags |= RTF_LOCAL;
2085         err = rt6_bind_neighbour(rt, rt->dst.dev);
2086         if (err) {
2087                 dst_free(&rt->dst);
2088                 return ERR_PTR(err);
2089         }
2090
2091         rt->rt6i_dst.addr = *addr;
2092         rt->rt6i_dst.plen = 128;
2093         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2094
2095         atomic_set(&rt->dst.__refcnt, 1);
2096
2097         return rt;
2098 }
2099
2100 int ip6_route_get_saddr(struct net *net,
2101                         struct rt6_info *rt,
2102                         const struct in6_addr *daddr,
2103                         unsigned int prefs,
2104                         struct in6_addr *saddr)
2105 {
2106         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2107         int err = 0;
2108         if (rt->rt6i_prefsrc.plen)
2109                 *saddr = rt->rt6i_prefsrc.addr;
2110         else
2111                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2112                                          daddr, prefs, saddr);
2113         return err;
2114 }
2115
2116 /* remove deleted ip from prefsrc entries */
2117 struct arg_dev_net_ip {
2118         struct net_device *dev;
2119         struct net *net;
2120         struct in6_addr *addr;
2121 };
2122
2123 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2124 {
2125         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2126         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2127         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2128
2129         if (((void *)rt->dst.dev == dev || !dev) &&
2130             rt != net->ipv6.ip6_null_entry &&
2131             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2132                 /* remove prefsrc entry */
2133                 rt->rt6i_prefsrc.plen = 0;
2134         }
2135         return 0;
2136 }
2137
2138 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2139 {
2140         struct net *net = dev_net(ifp->idev->dev);
2141         struct arg_dev_net_ip adni = {
2142                 .dev = ifp->idev->dev,
2143                 .net = net,
2144                 .addr = &ifp->addr,
2145         };
2146         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2147 }
2148
2149 struct arg_dev_net {
2150         struct net_device *dev;
2151         struct net *net;
2152 };
2153
2154 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2155 {
2156         const struct arg_dev_net *adn = arg;
2157         const struct net_device *dev = adn->dev;
2158
2159         if ((rt->dst.dev == dev || !dev) &&
2160             rt != adn->net->ipv6.ip6_null_entry)
2161                 return -1;
2162
2163         return 0;
2164 }
2165
2166 void rt6_ifdown(struct net *net, struct net_device *dev)
2167 {
2168         struct arg_dev_net adn = {
2169                 .dev = dev,
2170                 .net = net,
2171         };
2172
2173         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2174         icmp6_clean_all(fib6_ifdown, &adn);
2175 }
2176
2177 struct rt6_mtu_change_arg {
2178         struct net_device *dev;
2179         unsigned int mtu;
2180 };
2181
2182 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2183 {
2184         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2185         struct inet6_dev *idev;
2186
2187         /* In IPv6 pmtu discovery is not optional,
2188            so that RTAX_MTU lock cannot disable it.
2189            We still use this lock to block changes
2190            caused by addrconf/ndisc.
2191         */
2192
2193         idev = __in6_dev_get(arg->dev);
2194         if (!idev)
2195                 return 0;
2196
2197         /* For administrative MTU increase, there is no way to discover
2198            IPv6 PMTU increase, so PMTU increase should be updated here.
2199            Since RFC 1981 doesn't include administrative MTU increase
2200            update PMTU increase is a MUST. (i.e. jumbo frame)
2201          */
2202         /*
2203            If new MTU is less than route PMTU, this new MTU will be the
2204            lowest MTU in the path, update the route PMTU to reflect PMTU
2205            decreases; if new MTU is greater than route PMTU, and the
2206            old MTU is the lowest MTU in the path, update the route PMTU
2207            to reflect the increase. In this case if the other nodes' MTU
2208            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2209            PMTU discouvery.
2210          */
2211         if (rt->dst.dev == arg->dev &&
2212             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2213             (dst_mtu(&rt->dst) >= arg->mtu ||
2214              (dst_mtu(&rt->dst) < arg->mtu &&
2215               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2216                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2217         }
2218         return 0;
2219 }
2220
2221 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2222 {
2223         struct rt6_mtu_change_arg arg = {
2224                 .dev = dev,
2225                 .mtu = mtu,
2226         };
2227
2228         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2229 }
2230
2231 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2232         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2233         [RTA_OIF]               = { .type = NLA_U32 },
2234         [RTA_IIF]               = { .type = NLA_U32 },
2235         [RTA_PRIORITY]          = { .type = NLA_U32 },
2236         [RTA_METRICS]           = { .type = NLA_NESTED },
2237 };
2238
2239 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2240                               struct fib6_config *cfg)
2241 {
2242         struct rtmsg *rtm;
2243         struct nlattr *tb[RTA_MAX+1];
2244         int err;
2245
2246         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2247         if (err < 0)
2248                 goto errout;
2249
2250         err = -EINVAL;
2251         rtm = nlmsg_data(nlh);
2252         memset(cfg, 0, sizeof(*cfg));
2253
2254         cfg->fc_table = rtm->rtm_table;
2255         cfg->fc_dst_len = rtm->rtm_dst_len;
2256         cfg->fc_src_len = rtm->rtm_src_len;
2257         cfg->fc_flags = RTF_UP;
2258         cfg->fc_protocol = rtm->rtm_protocol;
2259
2260         if (rtm->rtm_type == RTN_UNREACHABLE)
2261                 cfg->fc_flags |= RTF_REJECT;
2262
2263         if (rtm->rtm_type == RTN_LOCAL)
2264                 cfg->fc_flags |= RTF_LOCAL;
2265
2266         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2267         cfg->fc_nlinfo.nlh = nlh;
2268         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2269
2270         if (tb[RTA_GATEWAY]) {
2271                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2272                 cfg->fc_flags |= RTF_GATEWAY;
2273         }
2274
2275         if (tb[RTA_DST]) {
2276                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2277
2278                 if (nla_len(tb[RTA_DST]) < plen)
2279                         goto errout;
2280
2281                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2282         }
2283
2284         if (tb[RTA_SRC]) {
2285                 int plen = (rtm->rtm_src_len + 7) >> 3;
2286
2287                 if (nla_len(tb[RTA_SRC]) < plen)
2288                         goto errout;
2289
2290                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2291         }
2292
2293         if (tb[RTA_PREFSRC])
2294                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2295
2296         if (tb[RTA_OIF])
2297                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2298
2299         if (tb[RTA_PRIORITY])
2300                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2301
2302         if (tb[RTA_METRICS]) {
2303                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2304                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2305         }
2306
2307         if (tb[RTA_TABLE])
2308                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2309
2310         err = 0;
2311 errout:
2312         return err;
2313 }
2314
2315 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2316 {
2317         struct fib6_config cfg;
2318         int err;
2319
2320         err = rtm_to_fib6_config(skb, nlh, &cfg);
2321         if (err < 0)
2322                 return err;
2323
2324         return ip6_route_del(&cfg);
2325 }
2326
2327 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2328 {
2329         struct fib6_config cfg;
2330         int err;
2331
2332         err = rtm_to_fib6_config(skb, nlh, &cfg);
2333         if (err < 0)
2334                 return err;
2335
2336         return ip6_route_add(&cfg);
2337 }
2338
2339 static inline size_t rt6_nlmsg_size(void)
2340 {
2341         return NLMSG_ALIGN(sizeof(struct rtmsg))
2342                + nla_total_size(16) /* RTA_SRC */
2343                + nla_total_size(16) /* RTA_DST */
2344                + nla_total_size(16) /* RTA_GATEWAY */
2345                + nla_total_size(16) /* RTA_PREFSRC */
2346                + nla_total_size(4) /* RTA_TABLE */
2347                + nla_total_size(4) /* RTA_IIF */
2348                + nla_total_size(4) /* RTA_OIF */
2349                + nla_total_size(4) /* RTA_PRIORITY */
2350                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2351                + nla_total_size(sizeof(struct rta_cacheinfo));
2352 }
2353
2354 static int rt6_fill_node(struct net *net,
2355                          struct sk_buff *skb, struct rt6_info *rt,
2356                          struct in6_addr *dst, struct in6_addr *src,
2357                          int iif, int type, u32 pid, u32 seq,
2358                          int prefix, int nowait, unsigned int flags)
2359 {
2360         struct rtmsg *rtm;
2361         struct nlmsghdr *nlh;
2362         long expires;
2363         u32 table;
2364         struct neighbour *n;
2365
2366         if (prefix) {   /* user wants prefix routes only */
2367                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2368                         /* success since this is not a prefix route */
2369                         return 1;
2370                 }
2371         }
2372
2373         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2374         if (!nlh)
2375                 return -EMSGSIZE;
2376
2377         rtm = nlmsg_data(nlh);
2378         rtm->rtm_family = AF_INET6;
2379         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2380         rtm->rtm_src_len = rt->rt6i_src.plen;
2381         rtm->rtm_tos = 0;
2382         if (rt->rt6i_table)
2383                 table = rt->rt6i_table->tb6_id;
2384         else
2385                 table = RT6_TABLE_UNSPEC;
2386         rtm->rtm_table = table;
2387         if (nla_put_u32(skb, RTA_TABLE, table))
2388                 goto nla_put_failure;
2389         if (rt->rt6i_flags & RTF_REJECT)
2390                 rtm->rtm_type = RTN_UNREACHABLE;
2391         else if (rt->rt6i_flags & RTF_LOCAL)
2392                 rtm->rtm_type = RTN_LOCAL;
2393         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2394                 rtm->rtm_type = RTN_LOCAL;
2395         else
2396                 rtm->rtm_type = RTN_UNICAST;
2397         rtm->rtm_flags = 0;
2398         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2399         rtm->rtm_protocol = rt->rt6i_protocol;
2400         if (rt->rt6i_flags & RTF_DYNAMIC)
2401                 rtm->rtm_protocol = RTPROT_REDIRECT;
2402         else if (rt->rt6i_flags & RTF_ADDRCONF)
2403                 rtm->rtm_protocol = RTPROT_KERNEL;
2404         else if (rt->rt6i_flags & RTF_DEFAULT)
2405                 rtm->rtm_protocol = RTPROT_RA;
2406
2407         if (rt->rt6i_flags & RTF_CACHE)
2408                 rtm->rtm_flags |= RTM_F_CLONED;
2409
2410         if (dst) {
2411                 if (nla_put(skb, RTA_DST, 16, dst))
2412                         goto nla_put_failure;
2413                 rtm->rtm_dst_len = 128;
2414         } else if (rtm->rtm_dst_len)
2415                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2416                         goto nla_put_failure;
2417 #ifdef CONFIG_IPV6_SUBTREES
2418         if (src) {
2419                 if (nla_put(skb, RTA_SRC, 16, src))
2420                         goto nla_put_failure;
2421                 rtm->rtm_src_len = 128;
2422         } else if (rtm->rtm_src_len &&
2423                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2424                 goto nla_put_failure;
2425 #endif
2426         if (iif) {
2427 #ifdef CONFIG_IPV6_MROUTE
2428                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2429                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2430                         if (err <= 0) {
2431                                 if (!nowait) {
2432                                         if (err == 0)
2433                                                 return 0;
2434                                         goto nla_put_failure;
2435                                 } else {
2436                                         if (err == -EMSGSIZE)
2437                                                 goto nla_put_failure;
2438                                 }
2439                         }
2440                 } else
2441 #endif
2442                         if (nla_put_u32(skb, RTA_IIF, iif))
2443                                 goto nla_put_failure;
2444         } else if (dst) {
2445                 struct in6_addr saddr_buf;
2446                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2447                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2448                         goto nla_put_failure;
2449         }
2450
2451         if (rt->rt6i_prefsrc.plen) {
2452                 struct in6_addr saddr_buf;
2453                 saddr_buf = rt->rt6i_prefsrc.addr;
2454                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2455                         goto nla_put_failure;
2456         }
2457
2458         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2459                 goto nla_put_failure;
2460
2461         rcu_read_lock();
2462         n = rt->n;
2463         if (n) {
2464                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2465                         rcu_read_unlock();
2466                         goto nla_put_failure;
2467                 }
2468         }
2469         rcu_read_unlock();
2470
2471         if (rt->dst.dev &&
2472             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2473                 goto nla_put_failure;
2474         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2475                 goto nla_put_failure;
2476         if (!(rt->rt6i_flags & RTF_EXPIRES))
2477                 expires = 0;
2478         else if (rt->dst.expires - jiffies < INT_MAX)
2479                 expires = rt->dst.expires - jiffies;
2480         else
2481                 expires = INT_MAX;
2482
2483         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2484                 goto nla_put_failure;
2485
2486         return nlmsg_end(skb, nlh);
2487
2488 nla_put_failure:
2489         nlmsg_cancel(skb, nlh);
2490         return -EMSGSIZE;
2491 }
2492
2493 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2494 {
2495         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2496         int prefix;
2497
2498         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2499                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2500                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2501         } else
2502                 prefix = 0;
2503
2504         return rt6_fill_node(arg->net,
2505                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2506                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2507                      prefix, 0, NLM_F_MULTI);
2508 }
2509
2510 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2511 {
2512         struct net *net = sock_net(in_skb->sk);
2513         struct nlattr *tb[RTA_MAX+1];
2514         struct rt6_info *rt;
2515         struct sk_buff *skb;
2516         struct rtmsg *rtm;
2517         struct flowi6 fl6;
2518         int err, iif = 0, oif = 0;
2519
2520         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2521         if (err < 0)
2522                 goto errout;
2523
2524         err = -EINVAL;
2525         memset(&fl6, 0, sizeof(fl6));
2526
2527         if (tb[RTA_SRC]) {
2528                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2529                         goto errout;
2530
2531                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2532         }
2533
2534         if (tb[RTA_DST]) {
2535                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2536                         goto errout;
2537
2538                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2539         }
2540
2541         if (tb[RTA_IIF])
2542                 iif = nla_get_u32(tb[RTA_IIF]);
2543
2544         if (tb[RTA_OIF])
2545                 oif = nla_get_u32(tb[RTA_OIF]);
2546
2547         if (iif) {
2548                 struct net_device *dev;
2549                 int flags = 0;
2550
2551                 dev = __dev_get_by_index(net, iif);
2552                 if (!dev) {
2553                         err = -ENODEV;
2554                         goto errout;
2555                 }
2556
2557                 fl6.flowi6_iif = iif;
2558
2559                 if (!ipv6_addr_any(&fl6.saddr))
2560                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2561
2562                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2563                                                                flags);
2564         } else {
2565                 fl6.flowi6_oif = oif;
2566
2567                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2568         }
2569
2570         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2571         if (!skb) {
2572                 dst_release(&rt->dst);
2573                 err = -ENOBUFS;
2574                 goto errout;
2575         }
2576
2577         /* Reserve room for dummy headers, this skb can pass
2578            through good chunk of routing engine.
2579          */
2580         skb_reset_mac_header(skb);
2581         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2582
2583         skb_dst_set(skb, &rt->dst);
2584
2585         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2586                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2587                             nlh->nlmsg_seq, 0, 0, 0);
2588         if (err < 0) {
2589                 kfree_skb(skb);
2590                 goto errout;
2591         }
2592
2593         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2594 errout:
2595         return err;
2596 }
2597
2598 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2599 {
2600         struct sk_buff *skb;
2601         struct net *net = info->nl_net;
2602         u32 seq;
2603         int err;
2604
2605         err = -ENOBUFS;
2606         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2607
2608         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2609         if (!skb)
2610                 goto errout;
2611
2612         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2613                                 event, info->pid, seq, 0, 0, 0);
2614         if (err < 0) {
2615                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2616                 WARN_ON(err == -EMSGSIZE);
2617                 kfree_skb(skb);
2618                 goto errout;
2619         }
2620         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2621                     info->nlh, gfp_any());
2622         return;
2623 errout:
2624         if (err < 0)
2625                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2626 }
2627
2628 static int ip6_route_dev_notify(struct notifier_block *this,
2629                                 unsigned long event, void *data)
2630 {
2631         struct net_device *dev = (struct net_device *)data;
2632         struct net *net = dev_net(dev);
2633
2634         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2635                 net->ipv6.ip6_null_entry->dst.dev = dev;
2636                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2637 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2638                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2639                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2640                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2641                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2642 #endif
2643         }
2644
2645         return NOTIFY_OK;
2646 }
2647
2648 /*
2649  *      /proc
2650  */
2651
2652 #ifdef CONFIG_PROC_FS
2653
2654 struct rt6_proc_arg
2655 {
2656         char *buffer;
2657         int offset;
2658         int length;
2659         int skip;
2660         int len;
2661 };
2662
2663 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2664 {
2665         struct seq_file *m = p_arg;
2666         struct neighbour *n;
2667
2668         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2669
2670 #ifdef CONFIG_IPV6_SUBTREES
2671         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2672 #else
2673         seq_puts(m, "00000000000000000000000000000000 00 ");
2674 #endif
2675         rcu_read_lock();
2676         n = rt->n;
2677         if (n) {
2678                 seq_printf(m, "%pi6", n->primary_key);
2679         } else {
2680                 seq_puts(m, "00000000000000000000000000000000");
2681         }
2682         rcu_read_unlock();
2683         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2684                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2685                    rt->dst.__use, rt->rt6i_flags,
2686                    rt->dst.dev ? rt->dst.dev->name : "");
2687         return 0;
2688 }
2689
2690 static int ipv6_route_show(struct seq_file *m, void *v)
2691 {
2692         struct net *net = (struct net *)m->private;
2693         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2694         return 0;
2695 }
2696
2697 static int ipv6_route_open(struct inode *inode, struct file *file)
2698 {
2699         return single_open_net(inode, file, ipv6_route_show);
2700 }
2701
2702 static const struct file_operations ipv6_route_proc_fops = {
2703         .owner          = THIS_MODULE,
2704         .open           = ipv6_route_open,
2705         .read           = seq_read,
2706         .llseek         = seq_lseek,
2707         .release        = single_release_net,
2708 };
2709
2710 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2711 {
2712         struct net *net = (struct net *)seq->private;
2713         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2714                    net->ipv6.rt6_stats->fib_nodes,
2715                    net->ipv6.rt6_stats->fib_route_nodes,
2716                    net->ipv6.rt6_stats->fib_rt_alloc,
2717                    net->ipv6.rt6_stats->fib_rt_entries,
2718                    net->ipv6.rt6_stats->fib_rt_cache,
2719                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2720                    net->ipv6.rt6_stats->fib_discarded_routes);
2721
2722         return 0;
2723 }
2724
2725 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2726 {
2727         return single_open_net(inode, file, rt6_stats_seq_show);
2728 }
2729
2730 static const struct file_operations rt6_stats_seq_fops = {
2731         .owner   = THIS_MODULE,
2732         .open    = rt6_stats_seq_open,
2733         .read    = seq_read,
2734         .llseek  = seq_lseek,
2735         .release = single_release_net,
2736 };
2737 #endif  /* CONFIG_PROC_FS */
2738
2739 #ifdef CONFIG_SYSCTL
2740
2741 static
2742 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2743                               void __user *buffer, size_t *lenp, loff_t *ppos)
2744 {
2745         struct net *net;
2746         int delay;
2747         if (!write)
2748                 return -EINVAL;
2749
2750         net = (struct net *)ctl->extra1;
2751         delay = net->ipv6.sysctl.flush_delay;
2752         proc_dointvec(ctl, write, buffer, lenp, ppos);
2753         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2754         return 0;
2755 }
2756
2757 ctl_table ipv6_route_table_template[] = {
2758         {
2759                 .procname       =       "flush",
2760                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2761                 .maxlen         =       sizeof(int),
2762                 .mode           =       0200,
2763                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2764         },
2765         {
2766                 .procname       =       "gc_thresh",
2767                 .data           =       &ip6_dst_ops_template.gc_thresh,
2768                 .maxlen         =       sizeof(int),
2769                 .mode           =       0644,
2770                 .proc_handler   =       proc_dointvec,
2771         },
2772         {
2773                 .procname       =       "max_size",
2774                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2775                 .maxlen         =       sizeof(int),
2776                 .mode           =       0644,
2777                 .proc_handler   =       proc_dointvec,
2778         },
2779         {
2780                 .procname       =       "gc_min_interval",
2781                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2782                 .maxlen         =       sizeof(int),
2783                 .mode           =       0644,
2784                 .proc_handler   =       proc_dointvec_jiffies,
2785         },
2786         {
2787                 .procname       =       "gc_timeout",
2788                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2789                 .maxlen         =       sizeof(int),
2790                 .mode           =       0644,
2791                 .proc_handler   =       proc_dointvec_jiffies,
2792         },
2793         {
2794                 .procname       =       "gc_interval",
2795                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2796                 .maxlen         =       sizeof(int),
2797                 .mode           =       0644,
2798                 .proc_handler   =       proc_dointvec_jiffies,
2799         },
2800         {
2801                 .procname       =       "gc_elasticity",
2802                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2803                 .maxlen         =       sizeof(int),
2804                 .mode           =       0644,
2805                 .proc_handler   =       proc_dointvec,
2806         },
2807         {
2808                 .procname       =       "mtu_expires",
2809                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2810                 .maxlen         =       sizeof(int),
2811                 .mode           =       0644,
2812                 .proc_handler   =       proc_dointvec_jiffies,
2813         },
2814         {
2815                 .procname       =       "min_adv_mss",
2816                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2817                 .maxlen         =       sizeof(int),
2818                 .mode           =       0644,
2819                 .proc_handler   =       proc_dointvec,
2820         },
2821         {
2822                 .procname       =       "gc_min_interval_ms",
2823                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2824                 .maxlen         =       sizeof(int),
2825                 .mode           =       0644,
2826                 .proc_handler   =       proc_dointvec_ms_jiffies,
2827         },
2828         { }
2829 };
2830
2831 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2832 {
2833         struct ctl_table *table;
2834
2835         table = kmemdup(ipv6_route_table_template,
2836                         sizeof(ipv6_route_table_template),
2837                         GFP_KERNEL);
2838
2839         if (table) {
2840                 table[0].data = &net->ipv6.sysctl.flush_delay;
2841                 table[0].extra1 = net;
2842                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2843                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2844                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2845                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2846                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2847                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2848                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2849                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2850                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2851         }
2852
2853         return table;
2854 }
2855 #endif
2856
2857 static int __net_init ip6_route_net_init(struct net *net)
2858 {
2859         int ret = -ENOMEM;
2860
2861         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2862                sizeof(net->ipv6.ip6_dst_ops));
2863
2864         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2865                 goto out_ip6_dst_ops;
2866
2867         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2868                                            sizeof(*net->ipv6.ip6_null_entry),
2869                                            GFP_KERNEL);
2870         if (!net->ipv6.ip6_null_entry)
2871                 goto out_ip6_dst_entries;
2872         net->ipv6.ip6_null_entry->dst.path =
2873                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2874         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2875         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2876                          ip6_template_metrics, true);
2877
2878 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2879         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2880                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2881                                                GFP_KERNEL);
2882         if (!net->ipv6.ip6_prohibit_entry)
2883                 goto out_ip6_null_entry;
2884         net->ipv6.ip6_prohibit_entry->dst.path =
2885                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2886         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2887         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2888                          ip6_template_metrics, true);
2889
2890         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2891                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2892                                                GFP_KERNEL);
2893         if (!net->ipv6.ip6_blk_hole_entry)
2894                 goto out_ip6_prohibit_entry;
2895         net->ipv6.ip6_blk_hole_entry->dst.path =
2896                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2897         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2898         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2899                          ip6_template_metrics, true);
2900 #endif
2901
2902         net->ipv6.sysctl.flush_delay = 0;
2903         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2904         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2905         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2906         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2907         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2908         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2909         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2910
2911         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2912
2913         ret = 0;
2914 out:
2915         return ret;
2916
2917 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2918 out_ip6_prohibit_entry:
2919         kfree(net->ipv6.ip6_prohibit_entry);
2920 out_ip6_null_entry:
2921         kfree(net->ipv6.ip6_null_entry);
2922 #endif
2923 out_ip6_dst_entries:
2924         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2925 out_ip6_dst_ops:
2926         goto out;
2927 }
2928
2929 static void __net_exit ip6_route_net_exit(struct net *net)
2930 {
2931         kfree(net->ipv6.ip6_null_entry);
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2933         kfree(net->ipv6.ip6_prohibit_entry);
2934         kfree(net->ipv6.ip6_blk_hole_entry);
2935 #endif
2936         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2937 }
2938
2939 static int __net_init ip6_route_net_init_late(struct net *net)
2940 {
2941 #ifdef CONFIG_PROC_FS
2942         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2943         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2944 #endif
2945         return 0;
2946 }
2947
2948 static void __net_exit ip6_route_net_exit_late(struct net *net)
2949 {
2950 #ifdef CONFIG_PROC_FS
2951         proc_net_remove(net, "ipv6_route");
2952         proc_net_remove(net, "rt6_stats");
2953 #endif
2954 }
2955
2956 static struct pernet_operations ip6_route_net_ops = {
2957         .init = ip6_route_net_init,
2958         .exit = ip6_route_net_exit,
2959 };
2960
2961 static int __net_init ipv6_inetpeer_init(struct net *net)
2962 {
2963         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2964
2965         if (!bp)
2966                 return -ENOMEM;
2967         inet_peer_base_init(bp);
2968         net->ipv6.peers = bp;
2969         return 0;
2970 }
2971
2972 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2973 {
2974         struct inet_peer_base *bp = net->ipv6.peers;
2975
2976         net->ipv6.peers = NULL;
2977         inetpeer_invalidate_tree(bp);
2978         kfree(bp);
2979 }
2980
2981 static struct pernet_operations ipv6_inetpeer_ops = {
2982         .init   =       ipv6_inetpeer_init,
2983         .exit   =       ipv6_inetpeer_exit,
2984 };
2985
2986 static struct pernet_operations ip6_route_net_late_ops = {
2987         .init = ip6_route_net_init_late,
2988         .exit = ip6_route_net_exit_late,
2989 };
2990
2991 static struct notifier_block ip6_route_dev_notifier = {
2992         .notifier_call = ip6_route_dev_notify,
2993         .priority = 0,
2994 };
2995
2996 int __init ip6_route_init(void)
2997 {
2998         int ret;
2999
3000         ret = -ENOMEM;
3001         ip6_dst_ops_template.kmem_cachep =
3002                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3003                                   SLAB_HWCACHE_ALIGN, NULL);
3004         if (!ip6_dst_ops_template.kmem_cachep)
3005                 goto out;
3006
3007         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3008         if (ret)
3009                 goto out_kmem_cache;
3010
3011         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3012         if (ret)
3013                 goto out_dst_entries;
3014
3015         ret = register_pernet_subsys(&ip6_route_net_ops);
3016         if (ret)
3017                 goto out_register_inetpeer;
3018
3019         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3020
3021         /* Registering of the loopback is done before this portion of code,
3022          * the loopback reference in rt6_info will not be taken, do it
3023          * manually for init_net */
3024         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3025         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3026   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3027         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3028         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3029         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3030         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3031   #endif
3032         ret = fib6_init();
3033         if (ret)
3034                 goto out_register_subsys;
3035
3036         ret = xfrm6_init();
3037         if (ret)
3038                 goto out_fib6_init;
3039
3040         ret = fib6_rules_init();
3041         if (ret)
3042                 goto xfrm6_init;
3043
3044         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3045         if (ret)
3046                 goto fib6_rules_init;
3047
3048         ret = -ENOBUFS;
3049         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3050             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3051             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3052                 goto out_register_late_subsys;
3053
3054         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3055         if (ret)
3056                 goto out_register_late_subsys;
3057
3058 out:
3059         return ret;
3060
3061 out_register_late_subsys:
3062         unregister_pernet_subsys(&ip6_route_net_late_ops);
3063 fib6_rules_init:
3064         fib6_rules_cleanup();
3065 xfrm6_init:
3066         xfrm6_fini();
3067 out_fib6_init:
3068         fib6_gc_cleanup();
3069 out_register_subsys:
3070         unregister_pernet_subsys(&ip6_route_net_ops);
3071 out_register_inetpeer:
3072         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3073 out_dst_entries:
3074         dst_entries_destroy(&ip6_dst_blackhole_ops);
3075 out_kmem_cache:
3076         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3077         goto out;
3078 }
3079
3080 void ip6_route_cleanup(void)
3081 {
3082         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3083         unregister_pernet_subsys(&ip6_route_net_late_ops);
3084         fib6_rules_cleanup();
3085         xfrm6_fini();
3086         fib6_gc_cleanup();
3087         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3088         unregister_pernet_subsys(&ip6_route_net_ops);
3089         dst_entries_destroy(&ip6_dst_blackhole_ops);
3090         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3091 }