2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
61 #include <asm/uaccess.h>
64 #include <linux/sysctl.h>
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
84 #ifdef CONFIG_IPV6_ROUTE_INFO
85 static struct rt6_info *rt6_add_route_info(struct net *net,
86 const struct in6_addr *prefix, int prefixlen,
87 const struct in6_addr *gwaddr, int ifindex,
89 static struct rt6_info *rt6_get_route_info(struct net *net,
90 const struct in6_addr *prefix, int prefixlen,
91 const struct in6_addr *gwaddr, int ifindex);
94 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
96 struct rt6_info *rt = (struct rt6_info *) dst;
97 struct inet_peer *peer;
100 if (!(rt->dst.flags & DST_HOST))
103 peer = rt6_get_peer_create(rt);
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct in6_addr *p = &rt->rt6i_gateway;
130 if (!ipv6_addr_any(p))
131 return (const void *) p;
133 return &ipv6_hdr(skb)->daddr;
137 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct rt6_info *rt = (struct rt6_info *) dst;
144 daddr = choose_neigh_daddr(rt, skb, daddr);
145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 return neigh_create(&nd_tbl, daddr, dst->dev);
151 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
164 static struct dst_ops ip6_dst_ops_template = {
166 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .check = ip6_dst_check,
170 .default_advmss = ip6_default_advmss,
172 .cow_metrics = ipv6_cow_metrics,
173 .destroy = ip6_dst_destroy,
174 .ifdown = ip6_dst_ifdown,
175 .negative_advice = ip6_negative_advice,
176 .link_failure = ip6_link_failure,
177 .update_pmtu = ip6_rt_update_pmtu,
178 .redirect = rt6_do_redirect,
179 .local_out = __ip6_local_out,
180 .neigh_lookup = ip6_neigh_lookup,
183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
187 return mtu ? : dst->dev->mtu;
190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
194 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
200 static struct dst_ops ip6_dst_blackhole_ops = {
202 .protocol = cpu_to_be16(ETH_P_IPV6),
203 .destroy = ip6_dst_destroy,
204 .check = ip6_dst_check,
205 .mtu = ip6_blackhole_mtu,
206 .default_advmss = ip6_default_advmss,
207 .update_pmtu = ip6_rt_blackhole_update_pmtu,
208 .cow_metrics = ip6_rt_blackhole_cow_metrics,
209 .neigh_lookup = ip6_neigh_lookup,
212 static const u32 ip6_template_metrics[RTAX_MAX] = {
213 [RTAX_HOPLIMIT - 1] = 255,
216 static struct rt6_info ip6_null_entry_template = {
218 .__refcnt = ATOMIC_INIT(1),
221 .error = -ENETUNREACH,
222 .input = ip6_pkt_discard,
223 .output = ip6_pkt_discard_out,
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
226 .rt6i_protocol = RTPROT_KERNEL,
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
231 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
233 static int ip6_pkt_prohibit(struct sk_buff *skb);
234 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
236 static struct rt6_info ip6_prohibit_entry_template = {
238 .__refcnt = ATOMIC_INIT(1),
242 .input = ip6_pkt_prohibit,
243 .output = ip6_pkt_prohibit_out,
245 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
246 .rt6i_protocol = RTPROT_KERNEL,
247 .rt6i_metric = ~(u32) 0,
248 .rt6i_ref = ATOMIC_INIT(1),
251 static struct rt6_info ip6_blk_hole_entry_template = {
253 .__refcnt = ATOMIC_INIT(1),
257 .input = dst_discard,
258 .output = dst_discard,
260 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
261 .rt6i_protocol = RTPROT_KERNEL,
262 .rt6i_metric = ~(u32) 0,
263 .rt6i_ref = ATOMIC_INIT(1),
268 /* allocate dst with ip6_dst_ops */
269 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
270 struct net_device *dev,
272 struct fib6_table *table)
274 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 sizeof(*rt) - sizeof(struct dst_entry));
280 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
285 static void ip6_dst_destroy(struct dst_entry *dst)
287 struct rt6_info *rt = (struct rt6_info *)dst;
288 struct inet6_dev *idev = rt->rt6i_idev;
291 neigh_release(rt->n);
293 if (!(rt->dst.flags & DST_HOST))
294 dst_destroy_metrics_generic(dst);
297 rt->rt6i_idev = NULL;
301 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
302 dst_release(dst->from);
304 if (rt6_has_peer(rt)) {
305 struct inet_peer *peer = rt6_peer_ptr(rt);
310 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
312 static u32 rt6_peer_genid(void)
314 return atomic_read(&__rt6_peer_genid);
317 void rt6_bind_peer(struct rt6_info *rt, int create)
319 struct inet_peer_base *base;
320 struct inet_peer *peer;
322 base = inetpeer_base_ptr(rt->_rt6i_peer);
326 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
328 if (!rt6_set_peer(rt, peer))
331 rt->rt6i_peer_genid = rt6_peer_genid();
335 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
338 struct rt6_info *rt = (struct rt6_info *)dst;
339 struct inet6_dev *idev = rt->rt6i_idev;
340 struct net_device *loopback_dev =
341 dev_net(dev)->loopback_dev;
343 if (dev != loopback_dev) {
344 if (idev && idev->dev == dev) {
345 struct inet6_dev *loopback_idev =
346 in6_dev_get(loopback_dev);
348 rt->rt6i_idev = loopback_idev;
352 if (rt->n && rt->n->dev == dev) {
353 rt->n->dev = loopback_dev;
354 dev_hold(loopback_dev);
360 static bool rt6_check_expired(const struct rt6_info *rt)
362 struct rt6_info *ort = NULL;
364 if (rt->rt6i_flags & RTF_EXPIRES) {
365 if (time_after(jiffies, rt->dst.expires))
367 } else if (rt->dst.from) {
368 ort = (struct rt6_info *) rt->dst.from;
369 return (ort->rt6i_flags & RTF_EXPIRES) &&
370 time_after(jiffies, ort->dst.expires);
375 static bool rt6_need_strict(const struct in6_addr *daddr)
377 return ipv6_addr_type(daddr) &
378 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
382 * Route lookup. Any table->tb6_lock is implied.
385 static inline struct rt6_info *rt6_device_match(struct net *net,
387 const struct in6_addr *saddr,
391 struct rt6_info *local = NULL;
392 struct rt6_info *sprt;
394 if (!oif && ipv6_addr_any(saddr))
397 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
398 struct net_device *dev = sprt->dst.dev;
401 if (dev->ifindex == oif)
403 if (dev->flags & IFF_LOOPBACK) {
404 if (!sprt->rt6i_idev ||
405 sprt->rt6i_idev->dev->ifindex != oif) {
406 if (flags & RT6_LOOKUP_F_IFACE && oif)
408 if (local && (!oif ||
409 local->rt6i_idev->dev->ifindex == oif))
415 if (ipv6_chk_addr(net, saddr, dev,
416 flags & RT6_LOOKUP_F_IFACE))
425 if (flags & RT6_LOOKUP_F_IFACE)
426 return net->ipv6.ip6_null_entry;
432 #ifdef CONFIG_IPV6_ROUTER_PREF
433 static void rt6_probe(struct rt6_info *rt)
435 struct neighbour *neigh;
437 * Okay, this does not seem to be appropriate
438 * for now, however, we need to check if it
439 * is really so; aka Router Reachability Probing.
441 * Router Reachability Probe MUST be rate-limited
442 * to no more than one per minute.
445 neigh = rt ? rt->n : NULL;
446 if (!neigh || (neigh->nud_state & NUD_VALID))
448 read_lock_bh(&neigh->lock);
449 if (!(neigh->nud_state & NUD_VALID) &&
450 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
451 struct in6_addr mcaddr;
452 struct in6_addr *target;
454 neigh->updated = jiffies;
455 read_unlock_bh(&neigh->lock);
457 target = (struct in6_addr *)&neigh->primary_key;
458 addrconf_addr_solict_mult(target, &mcaddr);
459 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
461 read_unlock_bh(&neigh->lock);
467 static inline void rt6_probe(struct rt6_info *rt)
473 * Default Router Selection (RFC 2461 6.3.6)
475 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
477 struct net_device *dev = rt->dst.dev;
478 if (!oif || dev->ifindex == oif)
480 if ((dev->flags & IFF_LOOPBACK) &&
481 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 static inline int rt6_check_neigh(struct rt6_info *rt)
488 struct neighbour *neigh;
493 if (rt->rt6i_flags & RTF_NONEXTHOP ||
494 !(rt->rt6i_flags & RTF_GATEWAY))
497 read_lock_bh(&neigh->lock);
498 if (neigh->nud_state & NUD_VALID)
500 #ifdef CONFIG_IPV6_ROUTER_PREF
501 else if (neigh->nud_state & NUD_FAILED)
506 read_unlock_bh(&neigh->lock);
513 static int rt6_score_route(struct rt6_info *rt, int oif,
518 m = rt6_check_dev(rt, oif);
519 if (!m && (strict & RT6_LOOKUP_F_IFACE))
521 #ifdef CONFIG_IPV6_ROUTER_PREF
522 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
524 n = rt6_check_neigh(rt);
525 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
530 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
531 int *mpri, struct rt6_info *match)
535 if (rt6_check_expired(rt))
538 m = rt6_score_route(rt, oif, strict);
543 if (strict & RT6_LOOKUP_F_REACHABLE)
547 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
555 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
556 struct rt6_info *rr_head,
557 u32 metric, int oif, int strict)
559 struct rt6_info *rt, *match;
563 for (rt = rr_head; rt && rt->rt6i_metric == metric;
564 rt = rt->dst.rt6_next)
565 match = find_match(rt, oif, strict, &mpri, match);
566 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
567 rt = rt->dst.rt6_next)
568 match = find_match(rt, oif, strict, &mpri, match);
573 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
575 struct rt6_info *match, *rt0;
580 fn->rr_ptr = rt0 = fn->leaf;
582 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585 (strict & RT6_LOOKUP_F_REACHABLE)) {
586 struct rt6_info *next = rt0->dst.rt6_next;
588 /* no entries matched; do round-robin */
589 if (!next || next->rt6i_metric != rt0->rt6i_metric)
596 net = dev_net(rt0->dst.dev);
597 return match ? match : net->ipv6.ip6_null_entry;
600 #ifdef CONFIG_IPV6_ROUTE_INFO
601 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
602 const struct in6_addr *gwaddr)
604 struct net *net = dev_net(dev);
605 struct route_info *rinfo = (struct route_info *) opt;
606 struct in6_addr prefix_buf, *prefix;
608 unsigned long lifetime;
611 if (len < sizeof(struct route_info)) {
615 /* Sanity check for prefix_len and length */
616 if (rinfo->length > 3) {
618 } else if (rinfo->prefix_len > 128) {
620 } else if (rinfo->prefix_len > 64) {
621 if (rinfo->length < 2) {
624 } else if (rinfo->prefix_len > 0) {
625 if (rinfo->length < 1) {
630 pref = rinfo->route_pref;
631 if (pref == ICMPV6_ROUTER_PREF_INVALID)
634 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
636 if (rinfo->length == 3)
637 prefix = (struct in6_addr *)rinfo->prefix;
639 /* this function is safe */
640 ipv6_addr_prefix(&prefix_buf,
641 (struct in6_addr *)rinfo->prefix,
643 prefix = &prefix_buf;
646 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 if (rt && !lifetime) {
655 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658 rt->rt6i_flags = RTF_ROUTEINFO |
659 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662 if (!addrconf_finite_timeout(lifetime))
663 rt6_clean_expires(rt);
665 rt6_set_expires(rt, jiffies + HZ * lifetime);
667 dst_release(&rt->dst);
673 #define BACKTRACK(__net, saddr) \
675 if (rt == __net->ipv6.ip6_null_entry) { \
676 struct fib6_node *pn; \
678 if (fn->fn_flags & RTN_TL_ROOT) \
681 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
682 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685 if (fn->fn_flags & RTN_RTINFO) \
691 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
692 struct fib6_table *table,
693 struct flowi6 *fl6, int flags)
695 struct fib6_node *fn;
698 read_lock_bh(&table->tb6_lock);
699 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
703 BACKTRACK(net, &fl6->saddr);
705 dst_use(&rt->dst, jiffies);
706 read_unlock_bh(&table->tb6_lock);
711 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
716 EXPORT_SYMBOL_GPL(ip6_route_lookup);
718 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
719 const struct in6_addr *saddr, int oif, int strict)
721 struct flowi6 fl6 = {
725 struct dst_entry *dst;
726 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
730 flags |= RT6_LOOKUP_F_HAS_SADDR;
733 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
735 return (struct rt6_info *) dst;
742 EXPORT_SYMBOL(rt6_lookup);
744 /* ip6_ins_rt is called with FREE table->tb6_lock.
745 It takes new route entry, the addition fails by any reason the
746 route is freed. In any case, if caller does not hold it, it may
750 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753 struct fib6_table *table;
755 table = rt->rt6i_table;
756 write_lock_bh(&table->tb6_lock);
757 err = fib6_add(&table->tb6_root, rt, info);
758 write_unlock_bh(&table->tb6_lock);
763 int ip6_ins_rt(struct rt6_info *rt)
765 struct nl_info info = {
766 .nl_net = dev_net(rt->dst.dev),
768 return __ip6_ins_rt(rt, &info);
771 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
772 const struct in6_addr *daddr,
773 const struct in6_addr *saddr)
781 rt = ip6_rt_copy(ort, daddr);
784 int attempts = !in_softirq();
786 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
787 if (ort->rt6i_dst.plen != 128 &&
788 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
789 rt->rt6i_flags |= RTF_ANYCAST;
790 rt->rt6i_gateway = *daddr;
793 rt->rt6i_flags |= RTF_CACHE;
795 #ifdef CONFIG_IPV6_SUBTREES
796 if (rt->rt6i_src.plen && saddr) {
797 rt->rt6i_src.addr = *saddr;
798 rt->rt6i_src.plen = 128;
803 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
804 struct net *net = dev_net(rt->dst.dev);
805 int saved_rt_min_interval =
806 net->ipv6.sysctl.ip6_rt_gc_min_interval;
807 int saved_rt_elasticity =
808 net->ipv6.sysctl.ip6_rt_gc_elasticity;
810 if (attempts-- > 0) {
811 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
812 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
814 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
816 net->ipv6.sysctl.ip6_rt_gc_elasticity =
818 net->ipv6.sysctl.ip6_rt_gc_min_interval =
819 saved_rt_min_interval;
823 net_warn_ratelimited("Neighbour table overflow\n");
832 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
833 const struct in6_addr *daddr)
835 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838 rt->rt6i_flags |= RTF_CACHE;
839 rt->n = neigh_clone(ort->n);
844 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
845 struct flowi6 *fl6, int flags)
847 struct fib6_node *fn;
848 struct rt6_info *rt, *nrt;
852 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
854 strict |= flags & RT6_LOOKUP_F_IFACE;
857 read_lock_bh(&table->tb6_lock);
860 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863 rt = rt6_select(fn, oif, strict | reachable);
865 BACKTRACK(net, &fl6->saddr);
866 if (rt == net->ipv6.ip6_null_entry ||
867 rt->rt6i_flags & RTF_CACHE)
871 read_unlock_bh(&table->tb6_lock);
873 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
874 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
875 else if (!(rt->dst.flags & DST_HOST))
876 nrt = rt6_alloc_clone(rt, &fl6->daddr);
880 dst_release(&rt->dst);
881 rt = nrt ? : net->ipv6.ip6_null_entry;
885 err = ip6_ins_rt(nrt);
894 * Race condition! In the gap, when table->tb6_lock was
895 * released someone could insert this route. Relookup.
897 dst_release(&rt->dst);
906 read_unlock_bh(&table->tb6_lock);
908 rt->dst.lastuse = jiffies;
914 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
915 struct flowi6 *fl6, int flags)
917 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 static struct dst_entry *ip6_route_input_lookup(struct net *net,
921 struct net_device *dev,
922 struct flowi6 *fl6, int flags)
924 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925 flags |= RT6_LOOKUP_F_IFACE;
927 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 void ip6_route_input(struct sk_buff *skb)
932 const struct ipv6hdr *iph = ipv6_hdr(skb);
933 struct net *net = dev_net(skb->dev);
934 int flags = RT6_LOOKUP_F_HAS_SADDR;
935 struct flowi6 fl6 = {
936 .flowi6_iif = skb->dev->ifindex,
939 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
940 .flowi6_mark = skb->mark,
941 .flowi6_proto = iph->nexthdr,
944 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
948 struct flowi6 *fl6, int flags)
950 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
958 fl6->flowi6_iif = net->loopback_dev->ifindex;
960 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
961 flags |= RT6_LOOKUP_F_IFACE;
963 if (!ipv6_addr_any(&fl6->saddr))
964 flags |= RT6_LOOKUP_F_HAS_SADDR;
966 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
968 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971 EXPORT_SYMBOL(ip6_route_output);
973 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
975 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
976 struct dst_entry *new = NULL;
978 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
980 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
981 rt6_init_peer(rt, net->ipv6.peers);
986 new->input = dst_discard;
987 new->output = dst_discard;
989 if (dst_metrics_read_only(&ort->dst))
990 new->_metrics = ort->dst._metrics;
992 dst_copy_metrics(new, &ort->dst);
993 rt->rt6i_idev = ort->rt6i_idev;
995 in6_dev_hold(rt->rt6i_idev);
997 rt->rt6i_gateway = ort->rt6i_gateway;
998 rt->rt6i_flags = ort->rt6i_flags;
999 rt6_clean_expires(rt);
1000 rt->rt6i_metric = 0;
1002 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1003 #ifdef CONFIG_IPV6_SUBTREES
1004 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1010 dst_release(dst_orig);
1011 return new ? new : ERR_PTR(-ENOMEM);
1015 * Destination cache support functions
1018 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1020 struct rt6_info *rt;
1022 rt = (struct rt6_info *) dst;
1024 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1025 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1026 if (!rt6_has_peer(rt))
1027 rt6_bind_peer(rt, 0);
1028 rt->rt6i_peer_genid = rt6_peer_genid();
1035 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1037 struct rt6_info *rt = (struct rt6_info *) dst;
1040 if (rt->rt6i_flags & RTF_CACHE) {
1041 if (rt6_check_expired(rt)) {
1053 static void ip6_link_failure(struct sk_buff *skb)
1055 struct rt6_info *rt;
1057 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1059 rt = (struct rt6_info *) skb_dst(skb);
1061 if (rt->rt6i_flags & RTF_CACHE)
1062 rt6_update_expires(rt, 0);
1063 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1064 rt->rt6i_node->fn_sernum = -1;
1068 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1070 struct rt6_info *rt6 = (struct rt6_info*)dst;
1073 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1074 struct net *net = dev_net(dst->dev);
1076 rt6->rt6i_flags |= RTF_MODIFIED;
1077 if (mtu < IPV6_MIN_MTU) {
1078 u32 features = dst_metric(dst, RTAX_FEATURES);
1080 features |= RTAX_FEATURE_ALLFRAG;
1081 dst_metric_set(dst, RTAX_FEATURES, features);
1083 dst_metric_set(dst, RTAX_MTU, mtu);
1084 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1088 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1091 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1092 struct dst_entry *dst;
1095 memset(&fl6, 0, sizeof(fl6));
1096 fl6.flowi6_oif = oif;
1097 fl6.flowi6_mark = mark;
1098 fl6.flowi6_flags = 0;
1099 fl6.daddr = iph->daddr;
1100 fl6.saddr = iph->saddr;
1101 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1103 dst = ip6_route_output(net, NULL, &fl6);
1105 ip6_rt_update_pmtu(dst, ntohl(mtu));
1108 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1110 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1112 ip6_update_pmtu(skb, sock_net(sk), mtu,
1113 sk->sk_bound_dev_if, sk->sk_mark);
1115 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1117 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1119 struct net_device *dev = dst->dev;
1120 unsigned int mtu = dst_mtu(dst);
1121 struct net *net = dev_net(dev);
1123 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1125 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1126 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1129 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1130 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1131 * IPV6_MAXPLEN is also valid and means: "any MSS,
1132 * rely only on pmtu discovery"
1134 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1139 static unsigned int ip6_mtu(const struct dst_entry *dst)
1141 struct inet6_dev *idev;
1142 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1150 idev = __in6_dev_get(dst->dev);
1152 mtu = idev->cnf.mtu6;
1158 static struct dst_entry *icmp6_dst_gc_list;
1159 static DEFINE_SPINLOCK(icmp6_dst_lock);
1161 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1162 struct neighbour *neigh,
1165 struct dst_entry *dst;
1166 struct rt6_info *rt;
1167 struct inet6_dev *idev = in6_dev_get(dev);
1168 struct net *net = dev_net(dev);
1170 if (unlikely(!idev))
1171 return ERR_PTR(-ENODEV);
1173 rt = ip6_dst_alloc(net, dev, 0, NULL);
1174 if (unlikely(!rt)) {
1176 dst = ERR_PTR(-ENOMEM);
1183 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1184 if (IS_ERR(neigh)) {
1187 return ERR_CAST(neigh);
1191 rt->dst.flags |= DST_HOST;
1192 rt->dst.output = ip6_output;
1194 atomic_set(&rt->dst.__refcnt, 1);
1195 rt->rt6i_dst.addr = fl6->daddr;
1196 rt->rt6i_dst.plen = 128;
1197 rt->rt6i_idev = idev;
1198 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1200 spin_lock_bh(&icmp6_dst_lock);
1201 rt->dst.next = icmp6_dst_gc_list;
1202 icmp6_dst_gc_list = &rt->dst;
1203 spin_unlock_bh(&icmp6_dst_lock);
1205 fib6_force_start_gc(net);
1207 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1213 int icmp6_dst_gc(void)
1215 struct dst_entry *dst, **pprev;
1218 spin_lock_bh(&icmp6_dst_lock);
1219 pprev = &icmp6_dst_gc_list;
1221 while ((dst = *pprev) != NULL) {
1222 if (!atomic_read(&dst->__refcnt)) {
1231 spin_unlock_bh(&icmp6_dst_lock);
1236 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1239 struct dst_entry *dst, **pprev;
1241 spin_lock_bh(&icmp6_dst_lock);
1242 pprev = &icmp6_dst_gc_list;
1243 while ((dst = *pprev) != NULL) {
1244 struct rt6_info *rt = (struct rt6_info *) dst;
1245 if (func(rt, arg)) {
1252 spin_unlock_bh(&icmp6_dst_lock);
1255 static int ip6_dst_gc(struct dst_ops *ops)
1257 unsigned long now = jiffies;
1258 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1259 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1260 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1261 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1262 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1263 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1266 entries = dst_entries_get_fast(ops);
1267 if (time_after(rt_last_gc + rt_min_interval, now) &&
1268 entries <= rt_max_size)
1271 net->ipv6.ip6_rt_gc_expire++;
1272 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1273 net->ipv6.ip6_rt_last_gc = now;
1274 entries = dst_entries_get_slow(ops);
1275 if (entries < ops->gc_thresh)
1276 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1278 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1279 return entries > rt_max_size;
1282 /* Clean host part of a prefix. Not necessary in radix tree,
1283 but results in cleaner routing tables.
1285 Remove it only when all the things will work!
1288 int ip6_dst_hoplimit(struct dst_entry *dst)
1290 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1291 if (hoplimit == 0) {
1292 struct net_device *dev = dst->dev;
1293 struct inet6_dev *idev;
1296 idev = __in6_dev_get(dev);
1298 hoplimit = idev->cnf.hop_limit;
1300 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1305 EXPORT_SYMBOL(ip6_dst_hoplimit);
1311 int ip6_route_add(struct fib6_config *cfg)
1314 struct net *net = cfg->fc_nlinfo.nl_net;
1315 struct rt6_info *rt = NULL;
1316 struct net_device *dev = NULL;
1317 struct inet6_dev *idev = NULL;
1318 struct fib6_table *table;
1321 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1323 #ifndef CONFIG_IPV6_SUBTREES
1324 if (cfg->fc_src_len)
1327 if (cfg->fc_ifindex) {
1329 dev = dev_get_by_index(net, cfg->fc_ifindex);
1332 idev = in6_dev_get(dev);
1337 if (cfg->fc_metric == 0)
1338 cfg->fc_metric = IP6_RT_PRIO_USER;
1341 if (cfg->fc_nlinfo.nlh &&
1342 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1343 table = fib6_get_table(net, cfg->fc_table);
1345 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1346 table = fib6_new_table(net, cfg->fc_table);
1349 table = fib6_new_table(net, cfg->fc_table);
1355 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1362 rt->dst.obsolete = -1;
1364 if (cfg->fc_flags & RTF_EXPIRES)
1365 rt6_set_expires(rt, jiffies +
1366 clock_t_to_jiffies(cfg->fc_expires));
1368 rt6_clean_expires(rt);
1370 if (cfg->fc_protocol == RTPROT_UNSPEC)
1371 cfg->fc_protocol = RTPROT_BOOT;
1372 rt->rt6i_protocol = cfg->fc_protocol;
1374 addr_type = ipv6_addr_type(&cfg->fc_dst);
1376 if (addr_type & IPV6_ADDR_MULTICAST)
1377 rt->dst.input = ip6_mc_input;
1378 else if (cfg->fc_flags & RTF_LOCAL)
1379 rt->dst.input = ip6_input;
1381 rt->dst.input = ip6_forward;
1383 rt->dst.output = ip6_output;
1385 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1386 rt->rt6i_dst.plen = cfg->fc_dst_len;
1387 if (rt->rt6i_dst.plen == 128)
1388 rt->dst.flags |= DST_HOST;
1390 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1391 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1396 dst_init_metrics(&rt->dst, metrics, 0);
1398 #ifdef CONFIG_IPV6_SUBTREES
1399 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1400 rt->rt6i_src.plen = cfg->fc_src_len;
1403 rt->rt6i_metric = cfg->fc_metric;
1405 /* We cannot add true routes via loopback here,
1406 they would result in kernel looping; promote them to reject routes
1408 if ((cfg->fc_flags & RTF_REJECT) ||
1409 (dev && (dev->flags & IFF_LOOPBACK) &&
1410 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1411 !(cfg->fc_flags & RTF_LOCAL))) {
1412 /* hold loopback dev/idev if we haven't done so. */
1413 if (dev != net->loopback_dev) {
1418 dev = net->loopback_dev;
1420 idev = in6_dev_get(dev);
1426 rt->dst.output = ip6_pkt_discard_out;
1427 rt->dst.input = ip6_pkt_discard;
1428 rt->dst.error = -ENETUNREACH;
1429 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1433 if (cfg->fc_flags & RTF_GATEWAY) {
1434 const struct in6_addr *gw_addr;
1437 gw_addr = &cfg->fc_gateway;
1438 rt->rt6i_gateway = *gw_addr;
1439 gwa_type = ipv6_addr_type(gw_addr);
1441 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1442 struct rt6_info *grt;
1444 /* IPv6 strictly inhibits using not link-local
1445 addresses as nexthop address.
1446 Otherwise, router will not able to send redirects.
1447 It is very good, but in some (rare!) circumstances
1448 (SIT, PtP, NBMA NOARP links) it is handy to allow
1449 some exceptions. --ANK
1452 if (!(gwa_type & IPV6_ADDR_UNICAST))
1455 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1457 err = -EHOSTUNREACH;
1461 if (dev != grt->dst.dev) {
1462 dst_release(&grt->dst);
1467 idev = grt->rt6i_idev;
1469 in6_dev_hold(grt->rt6i_idev);
1471 if (!(grt->rt6i_flags & RTF_GATEWAY))
1473 dst_release(&grt->dst);
1479 if (!dev || (dev->flags & IFF_LOOPBACK))
1487 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1488 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1492 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1493 rt->rt6i_prefsrc.plen = 128;
1495 rt->rt6i_prefsrc.plen = 0;
1497 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1498 err = rt6_bind_neighbour(rt, dev);
1503 rt->rt6i_flags = cfg->fc_flags;
1510 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1511 int type = nla_type(nla);
1514 if (type > RTAX_MAX) {
1519 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1525 rt->rt6i_idev = idev;
1526 rt->rt6i_table = table;
1528 cfg->fc_nlinfo.nl_net = dev_net(dev);
1530 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1542 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1545 struct fib6_table *table;
1546 struct net *net = dev_net(rt->dst.dev);
1548 if (rt == net->ipv6.ip6_null_entry)
1551 table = rt->rt6i_table;
1552 write_lock_bh(&table->tb6_lock);
1554 err = fib6_del(rt, info);
1555 dst_release(&rt->dst);
1557 write_unlock_bh(&table->tb6_lock);
1562 int ip6_del_rt(struct rt6_info *rt)
1564 struct nl_info info = {
1565 .nl_net = dev_net(rt->dst.dev),
1567 return __ip6_del_rt(rt, &info);
1570 static int ip6_route_del(struct fib6_config *cfg)
1572 struct fib6_table *table;
1573 struct fib6_node *fn;
1574 struct rt6_info *rt;
1577 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1581 read_lock_bh(&table->tb6_lock);
1583 fn = fib6_locate(&table->tb6_root,
1584 &cfg->fc_dst, cfg->fc_dst_len,
1585 &cfg->fc_src, cfg->fc_src_len);
1588 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1589 if (cfg->fc_ifindex &&
1591 rt->dst.dev->ifindex != cfg->fc_ifindex))
1593 if (cfg->fc_flags & RTF_GATEWAY &&
1594 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1596 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1599 read_unlock_bh(&table->tb6_lock);
1601 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1604 read_unlock_bh(&table->tb6_lock);
1612 struct ip6rd_flowi {
1614 struct in6_addr gateway;
1617 static struct rt6_info *__ip6_route_redirect(struct net *net,
1618 struct fib6_table *table,
1622 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1623 struct rt6_info *rt;
1624 struct fib6_node *fn;
1627 * Get the "current" route for this destination and
1628 * check if the redirect has come from approriate router.
1630 * RFC 2461 specifies that redirects should only be
1631 * accepted if they come from the nexthop to the target.
1632 * Due to the way the routes are chosen, this notion
1633 * is a bit fuzzy and one might need to check all possible
1637 read_lock_bh(&table->tb6_lock);
1638 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1640 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1642 * Current route is on-link; redirect is always invalid.
1644 * Seems, previous statement is not true. It could
1645 * be node, which looks for us as on-link (f.e. proxy ndisc)
1646 * But then router serving it might decide, that we should
1647 * know truth 8)8) --ANK (980726).
1649 if (rt6_check_expired(rt))
1651 if (!(rt->rt6i_flags & RTF_GATEWAY))
1653 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1655 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1661 rt = net->ipv6.ip6_null_entry;
1662 BACKTRACK(net, &fl6->saddr);
1666 read_unlock_bh(&table->tb6_lock);
1671 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1672 const struct in6_addr *src,
1673 const struct in6_addr *gateway,
1674 struct net_device *dev)
1676 int flags = RT6_LOOKUP_F_HAS_SADDR;
1677 struct net *net = dev_net(dev);
1678 struct ip6rd_flowi rdfl = {
1680 .flowi6_oif = dev->ifindex,
1686 rdfl.gateway = *gateway;
1688 if (rt6_need_strict(dest))
1689 flags |= RT6_LOOKUP_F_IFACE;
1691 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1692 flags, __ip6_route_redirect);
1695 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
1697 struct net *net = dev_net(skb->dev);
1698 struct netevent_redirect netevent;
1699 struct rt6_info *rt, *nrt = NULL;
1700 const struct in6_addr *target;
1701 struct ndisc_options ndopts;
1702 const struct in6_addr *dest;
1703 struct neighbour *old_neigh;
1704 struct inet6_dev *in6_dev;
1705 struct neighbour *neigh;
1706 struct icmp6hdr *icmph;
1707 int optlen, on_link;
1710 optlen = skb->tail - skb->transport_header;
1711 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1714 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1718 icmph = icmp6_hdr(skb);
1719 target = (const struct in6_addr *) (icmph + 1);
1722 if (ipv6_addr_is_multicast(dest)) {
1723 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1728 if (ipv6_addr_equal(dest, target)) {
1730 } else if (ipv6_addr_type(target) !=
1731 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1732 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1736 in6_dev = __in6_dev_get(skb->dev);
1739 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1743 * The IP source address of the Redirect MUST be the same as the current
1744 * first-hop router for the specified ICMP Destination Address.
1747 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1748 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1753 if (ndopts.nd_opts_tgt_lladdr) {
1754 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1757 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1762 rt = (struct rt6_info *) dst;
1763 if (rt == net->ipv6.ip6_null_entry) {
1764 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1768 /* Redirect received -> path was valid.
1769 * Look, redirects are sent only in response to data packets,
1770 * so that this nexthop apparently is reachable. --ANK
1772 dst_confirm(&rt->dst);
1774 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1778 /* Duplicate redirect: silently ignore. */
1780 if (neigh == old_neigh)
1784 * We have finally decided to accept it.
1787 neigh_update(neigh, lladdr, NUD_STALE,
1788 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1789 NEIGH_UPDATE_F_OVERRIDE|
1790 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1791 NEIGH_UPDATE_F_ISROUTER))
1794 nrt = ip6_rt_copy(rt, dest);
1798 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1800 nrt->rt6i_flags &= ~RTF_GATEWAY;
1802 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1803 nrt->n = neigh_clone(neigh);
1805 if (ip6_ins_rt(nrt))
1808 netevent.old = &rt->dst;
1809 netevent.old_neigh = old_neigh;
1810 netevent.new = &nrt->dst;
1811 netevent.new_neigh = neigh;
1812 netevent.daddr = dest;
1813 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1815 if (rt->rt6i_flags & RTF_CACHE) {
1816 rt = (struct rt6_info *) dst_clone(&rt->dst);
1821 neigh_release(neigh);
1824 void rt6_redirect(struct sk_buff *skb)
1826 const struct in6_addr *target;
1827 const struct in6_addr *dest;
1828 const struct in6_addr *src;
1829 const struct in6_addr *saddr;
1830 struct icmp6hdr *icmph;
1831 struct rt6_info *rt;
1833 icmph = icmp6_hdr(skb);
1834 target = (const struct in6_addr *) (icmph + 1);
1837 src = &ipv6_hdr(skb)->daddr;
1838 saddr = &ipv6_hdr(skb)->saddr;
1840 rt = ip6_route_redirect(dest, src, saddr, skb->dev);
1841 rt6_do_redirect(&rt->dst, skb);
1842 dst_release(&rt->dst);
1846 * Misc support functions
1849 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1850 const struct in6_addr *dest)
1852 struct net *net = dev_net(ort->dst.dev);
1853 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1857 rt->dst.input = ort->dst.input;
1858 rt->dst.output = ort->dst.output;
1859 rt->dst.flags |= DST_HOST;
1861 rt->rt6i_dst.addr = *dest;
1862 rt->rt6i_dst.plen = 128;
1863 dst_copy_metrics(&rt->dst, &ort->dst);
1864 rt->dst.error = ort->dst.error;
1865 rt->rt6i_idev = ort->rt6i_idev;
1867 in6_dev_hold(rt->rt6i_idev);
1868 rt->dst.lastuse = jiffies;
1870 rt->rt6i_gateway = ort->rt6i_gateway;
1871 rt->rt6i_flags = ort->rt6i_flags;
1872 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1873 (RTF_DEFAULT | RTF_ADDRCONF))
1874 rt6_set_from(rt, ort);
1876 rt6_clean_expires(rt);
1877 rt->rt6i_metric = 0;
1879 #ifdef CONFIG_IPV6_SUBTREES
1880 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1882 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1883 rt->rt6i_table = ort->rt6i_table;
1888 #ifdef CONFIG_IPV6_ROUTE_INFO
1889 static struct rt6_info *rt6_get_route_info(struct net *net,
1890 const struct in6_addr *prefix, int prefixlen,
1891 const struct in6_addr *gwaddr, int ifindex)
1893 struct fib6_node *fn;
1894 struct rt6_info *rt = NULL;
1895 struct fib6_table *table;
1897 table = fib6_get_table(net, RT6_TABLE_INFO);
1901 write_lock_bh(&table->tb6_lock);
1902 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1906 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1907 if (rt->dst.dev->ifindex != ifindex)
1909 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1911 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1917 write_unlock_bh(&table->tb6_lock);
1921 static struct rt6_info *rt6_add_route_info(struct net *net,
1922 const struct in6_addr *prefix, int prefixlen,
1923 const struct in6_addr *gwaddr, int ifindex,
1926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_INFO,
1928 .fc_metric = IP6_RT_PRIO_USER,
1929 .fc_ifindex = ifindex,
1930 .fc_dst_len = prefixlen,
1931 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1932 RTF_UP | RTF_PREF(pref),
1934 .fc_nlinfo.nlh = NULL,
1935 .fc_nlinfo.nl_net = net,
1938 cfg.fc_dst = *prefix;
1939 cfg.fc_gateway = *gwaddr;
1941 /* We should treat it as a default route if prefix length is 0. */
1943 cfg.fc_flags |= RTF_DEFAULT;
1945 ip6_route_add(&cfg);
1947 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1951 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1953 struct rt6_info *rt;
1954 struct fib6_table *table;
1956 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1960 write_lock_bh(&table->tb6_lock);
1961 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1962 if (dev == rt->dst.dev &&
1963 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1964 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1969 write_unlock_bh(&table->tb6_lock);
1973 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1974 struct net_device *dev,
1977 struct fib6_config cfg = {
1978 .fc_table = RT6_TABLE_DFLT,
1979 .fc_metric = IP6_RT_PRIO_USER,
1980 .fc_ifindex = dev->ifindex,
1981 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1982 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1984 .fc_nlinfo.nlh = NULL,
1985 .fc_nlinfo.nl_net = dev_net(dev),
1988 cfg.fc_gateway = *gwaddr;
1990 ip6_route_add(&cfg);
1992 return rt6_get_dflt_router(gwaddr, dev);
1995 void rt6_purge_dflt_routers(struct net *net)
1997 struct rt6_info *rt;
1998 struct fib6_table *table;
2000 /* NOTE: Keep consistent with rt6_get_dflt_router */
2001 table = fib6_get_table(net, RT6_TABLE_DFLT);
2006 read_lock_bh(&table->tb6_lock);
2007 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2008 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2010 read_unlock_bh(&table->tb6_lock);
2015 read_unlock_bh(&table->tb6_lock);
2018 static void rtmsg_to_fib6_config(struct net *net,
2019 struct in6_rtmsg *rtmsg,
2020 struct fib6_config *cfg)
2022 memset(cfg, 0, sizeof(*cfg));
2024 cfg->fc_table = RT6_TABLE_MAIN;
2025 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2026 cfg->fc_metric = rtmsg->rtmsg_metric;
2027 cfg->fc_expires = rtmsg->rtmsg_info;
2028 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2029 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2030 cfg->fc_flags = rtmsg->rtmsg_flags;
2032 cfg->fc_nlinfo.nl_net = net;
2034 cfg->fc_dst = rtmsg->rtmsg_dst;
2035 cfg->fc_src = rtmsg->rtmsg_src;
2036 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2039 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2041 struct fib6_config cfg;
2042 struct in6_rtmsg rtmsg;
2046 case SIOCADDRT: /* Add a route */
2047 case SIOCDELRT: /* Delete a route */
2048 if (!capable(CAP_NET_ADMIN))
2050 err = copy_from_user(&rtmsg, arg,
2051 sizeof(struct in6_rtmsg));
2055 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2060 err = ip6_route_add(&cfg);
2063 err = ip6_route_del(&cfg);
2077 * Drop the packet on the floor
2080 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2083 struct dst_entry *dst = skb_dst(skb);
2084 switch (ipstats_mib_noroutes) {
2085 case IPSTATS_MIB_INNOROUTES:
2086 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2087 if (type == IPV6_ADDR_ANY) {
2088 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2089 IPSTATS_MIB_INADDRERRORS);
2093 case IPSTATS_MIB_OUTNOROUTES:
2094 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2095 ipstats_mib_noroutes);
2098 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2103 static int ip6_pkt_discard(struct sk_buff *skb)
2105 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2108 static int ip6_pkt_discard_out(struct sk_buff *skb)
2110 skb->dev = skb_dst(skb)->dev;
2111 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2114 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2116 static int ip6_pkt_prohibit(struct sk_buff *skb)
2118 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2121 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2123 skb->dev = skb_dst(skb)->dev;
2124 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2130 * Allocate a dst for local (unicast / anycast) address.
2133 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2134 const struct in6_addr *addr,
2137 struct net *net = dev_net(idev->dev);
2138 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2142 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2143 return ERR_PTR(-ENOMEM);
2148 rt->dst.flags |= DST_HOST;
2149 rt->dst.input = ip6_input;
2150 rt->dst.output = ip6_output;
2151 rt->rt6i_idev = idev;
2152 rt->dst.obsolete = -1;
2154 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2156 rt->rt6i_flags |= RTF_ANYCAST;
2158 rt->rt6i_flags |= RTF_LOCAL;
2159 err = rt6_bind_neighbour(rt, rt->dst.dev);
2162 return ERR_PTR(err);
2165 rt->rt6i_dst.addr = *addr;
2166 rt->rt6i_dst.plen = 128;
2167 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2169 atomic_set(&rt->dst.__refcnt, 1);
2174 int ip6_route_get_saddr(struct net *net,
2175 struct rt6_info *rt,
2176 const struct in6_addr *daddr,
2178 struct in6_addr *saddr)
2180 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2182 if (rt->rt6i_prefsrc.plen)
2183 *saddr = rt->rt6i_prefsrc.addr;
2185 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2186 daddr, prefs, saddr);
2190 /* remove deleted ip from prefsrc entries */
2191 struct arg_dev_net_ip {
2192 struct net_device *dev;
2194 struct in6_addr *addr;
2197 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2199 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2200 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2201 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2203 if (((void *)rt->dst.dev == dev || !dev) &&
2204 rt != net->ipv6.ip6_null_entry &&
2205 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2206 /* remove prefsrc entry */
2207 rt->rt6i_prefsrc.plen = 0;
2212 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2214 struct net *net = dev_net(ifp->idev->dev);
2215 struct arg_dev_net_ip adni = {
2216 .dev = ifp->idev->dev,
2220 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2223 struct arg_dev_net {
2224 struct net_device *dev;
2228 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2230 const struct arg_dev_net *adn = arg;
2231 const struct net_device *dev = adn->dev;
2233 if ((rt->dst.dev == dev || !dev) &&
2234 rt != adn->net->ipv6.ip6_null_entry)
2240 void rt6_ifdown(struct net *net, struct net_device *dev)
2242 struct arg_dev_net adn = {
2247 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2248 icmp6_clean_all(fib6_ifdown, &adn);
2251 struct rt6_mtu_change_arg {
2252 struct net_device *dev;
2256 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2258 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2259 struct inet6_dev *idev;
2261 /* In IPv6 pmtu discovery is not optional,
2262 so that RTAX_MTU lock cannot disable it.
2263 We still use this lock to block changes
2264 caused by addrconf/ndisc.
2267 idev = __in6_dev_get(arg->dev);
2271 /* For administrative MTU increase, there is no way to discover
2272 IPv6 PMTU increase, so PMTU increase should be updated here.
2273 Since RFC 1981 doesn't include administrative MTU increase
2274 update PMTU increase is a MUST. (i.e. jumbo frame)
2277 If new MTU is less than route PMTU, this new MTU will be the
2278 lowest MTU in the path, update the route PMTU to reflect PMTU
2279 decreases; if new MTU is greater than route PMTU, and the
2280 old MTU is the lowest MTU in the path, update the route PMTU
2281 to reflect the increase. In this case if the other nodes' MTU
2282 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2285 if (rt->dst.dev == arg->dev &&
2286 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2287 (dst_mtu(&rt->dst) >= arg->mtu ||
2288 (dst_mtu(&rt->dst) < arg->mtu &&
2289 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2290 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2295 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2297 struct rt6_mtu_change_arg arg = {
2302 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2305 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2306 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2307 [RTA_OIF] = { .type = NLA_U32 },
2308 [RTA_IIF] = { .type = NLA_U32 },
2309 [RTA_PRIORITY] = { .type = NLA_U32 },
2310 [RTA_METRICS] = { .type = NLA_NESTED },
2313 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2314 struct fib6_config *cfg)
2317 struct nlattr *tb[RTA_MAX+1];
2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2325 rtm = nlmsg_data(nlh);
2326 memset(cfg, 0, sizeof(*cfg));
2328 cfg->fc_table = rtm->rtm_table;
2329 cfg->fc_dst_len = rtm->rtm_dst_len;
2330 cfg->fc_src_len = rtm->rtm_src_len;
2331 cfg->fc_flags = RTF_UP;
2332 cfg->fc_protocol = rtm->rtm_protocol;
2334 if (rtm->rtm_type == RTN_UNREACHABLE)
2335 cfg->fc_flags |= RTF_REJECT;
2337 if (rtm->rtm_type == RTN_LOCAL)
2338 cfg->fc_flags |= RTF_LOCAL;
2340 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2341 cfg->fc_nlinfo.nlh = nlh;
2342 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2344 if (tb[RTA_GATEWAY]) {
2345 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2346 cfg->fc_flags |= RTF_GATEWAY;
2350 int plen = (rtm->rtm_dst_len + 7) >> 3;
2352 if (nla_len(tb[RTA_DST]) < plen)
2355 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2359 int plen = (rtm->rtm_src_len + 7) >> 3;
2361 if (nla_len(tb[RTA_SRC]) < plen)
2364 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2367 if (tb[RTA_PREFSRC])
2368 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2371 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2373 if (tb[RTA_PRIORITY])
2374 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2376 if (tb[RTA_METRICS]) {
2377 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2378 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2382 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2389 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2391 struct fib6_config cfg;
2394 err = rtm_to_fib6_config(skb, nlh, &cfg);
2398 return ip6_route_del(&cfg);
2401 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2403 struct fib6_config cfg;
2406 err = rtm_to_fib6_config(skb, nlh, &cfg);
2410 return ip6_route_add(&cfg);
2413 static inline size_t rt6_nlmsg_size(void)
2415 return NLMSG_ALIGN(sizeof(struct rtmsg))
2416 + nla_total_size(16) /* RTA_SRC */
2417 + nla_total_size(16) /* RTA_DST */
2418 + nla_total_size(16) /* RTA_GATEWAY */
2419 + nla_total_size(16) /* RTA_PREFSRC */
2420 + nla_total_size(4) /* RTA_TABLE */
2421 + nla_total_size(4) /* RTA_IIF */
2422 + nla_total_size(4) /* RTA_OIF */
2423 + nla_total_size(4) /* RTA_PRIORITY */
2424 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2425 + nla_total_size(sizeof(struct rta_cacheinfo));
2428 static int rt6_fill_node(struct net *net,
2429 struct sk_buff *skb, struct rt6_info *rt,
2430 struct in6_addr *dst, struct in6_addr *src,
2431 int iif, int type, u32 pid, u32 seq,
2432 int prefix, int nowait, unsigned int flags)
2435 struct nlmsghdr *nlh;
2438 struct neighbour *n;
2440 if (prefix) { /* user wants prefix routes only */
2441 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2442 /* success since this is not a prefix route */
2447 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2451 rtm = nlmsg_data(nlh);
2452 rtm->rtm_family = AF_INET6;
2453 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2454 rtm->rtm_src_len = rt->rt6i_src.plen;
2457 table = rt->rt6i_table->tb6_id;
2459 table = RT6_TABLE_UNSPEC;
2460 rtm->rtm_table = table;
2461 if (nla_put_u32(skb, RTA_TABLE, table))
2462 goto nla_put_failure;
2463 if (rt->rt6i_flags & RTF_REJECT)
2464 rtm->rtm_type = RTN_UNREACHABLE;
2465 else if (rt->rt6i_flags & RTF_LOCAL)
2466 rtm->rtm_type = RTN_LOCAL;
2467 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2468 rtm->rtm_type = RTN_LOCAL;
2470 rtm->rtm_type = RTN_UNICAST;
2472 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2473 rtm->rtm_protocol = rt->rt6i_protocol;
2474 if (rt->rt6i_flags & RTF_DYNAMIC)
2475 rtm->rtm_protocol = RTPROT_REDIRECT;
2476 else if (rt->rt6i_flags & RTF_ADDRCONF)
2477 rtm->rtm_protocol = RTPROT_KERNEL;
2478 else if (rt->rt6i_flags & RTF_DEFAULT)
2479 rtm->rtm_protocol = RTPROT_RA;
2481 if (rt->rt6i_flags & RTF_CACHE)
2482 rtm->rtm_flags |= RTM_F_CLONED;
2485 if (nla_put(skb, RTA_DST, 16, dst))
2486 goto nla_put_failure;
2487 rtm->rtm_dst_len = 128;
2488 } else if (rtm->rtm_dst_len)
2489 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2490 goto nla_put_failure;
2491 #ifdef CONFIG_IPV6_SUBTREES
2493 if (nla_put(skb, RTA_SRC, 16, src))
2494 goto nla_put_failure;
2495 rtm->rtm_src_len = 128;
2496 } else if (rtm->rtm_src_len &&
2497 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2498 goto nla_put_failure;
2501 #ifdef CONFIG_IPV6_MROUTE
2502 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2503 int err = ip6mr_get_route(net, skb, rtm, nowait);
2508 goto nla_put_failure;
2510 if (err == -EMSGSIZE)
2511 goto nla_put_failure;
2516 if (nla_put_u32(skb, RTA_IIF, iif))
2517 goto nla_put_failure;
2519 struct in6_addr saddr_buf;
2520 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2521 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2522 goto nla_put_failure;
2525 if (rt->rt6i_prefsrc.plen) {
2526 struct in6_addr saddr_buf;
2527 saddr_buf = rt->rt6i_prefsrc.addr;
2528 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2529 goto nla_put_failure;
2532 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2533 goto nla_put_failure;
2538 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2540 goto nla_put_failure;
2546 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2547 goto nla_put_failure;
2548 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2549 goto nla_put_failure;
2550 if (!(rt->rt6i_flags & RTF_EXPIRES))
2552 else if (rt->dst.expires - jiffies < INT_MAX)
2553 expires = rt->dst.expires - jiffies;
2557 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2558 goto nla_put_failure;
2560 return nlmsg_end(skb, nlh);
2563 nlmsg_cancel(skb, nlh);
2567 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2569 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2572 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2573 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2574 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2578 return rt6_fill_node(arg->net,
2579 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2580 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2581 prefix, 0, NLM_F_MULTI);
2584 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2586 struct net *net = sock_net(in_skb->sk);
2587 struct nlattr *tb[RTA_MAX+1];
2588 struct rt6_info *rt;
2589 struct sk_buff *skb;
2592 int err, iif = 0, oif = 0;
2594 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2599 memset(&fl6, 0, sizeof(fl6));
2602 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2605 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2609 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2612 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2616 iif = nla_get_u32(tb[RTA_IIF]);
2619 oif = nla_get_u32(tb[RTA_OIF]);
2622 struct net_device *dev;
2625 dev = __dev_get_by_index(net, iif);
2631 fl6.flowi6_iif = iif;
2633 if (!ipv6_addr_any(&fl6.saddr))
2634 flags |= RT6_LOOKUP_F_HAS_SADDR;
2636 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2639 fl6.flowi6_oif = oif;
2641 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2644 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2646 dst_release(&rt->dst);
2651 /* Reserve room for dummy headers, this skb can pass
2652 through good chunk of routing engine.
2654 skb_reset_mac_header(skb);
2655 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2657 skb_dst_set(skb, &rt->dst);
2659 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2660 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2661 nlh->nlmsg_seq, 0, 0, 0);
2667 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2672 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2674 struct sk_buff *skb;
2675 struct net *net = info->nl_net;
2680 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2682 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2686 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2687 event, info->pid, seq, 0, 0, 0);
2689 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2690 WARN_ON(err == -EMSGSIZE);
2694 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2695 info->nlh, gfp_any());
2699 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2702 static int ip6_route_dev_notify(struct notifier_block *this,
2703 unsigned long event, void *data)
2705 struct net_device *dev = (struct net_device *)data;
2706 struct net *net = dev_net(dev);
2708 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2709 net->ipv6.ip6_null_entry->dst.dev = dev;
2710 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2711 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2712 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2713 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2714 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2715 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2726 #ifdef CONFIG_PROC_FS
2737 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2739 struct seq_file *m = p_arg;
2740 struct neighbour *n;
2742 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2744 #ifdef CONFIG_IPV6_SUBTREES
2745 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2747 seq_puts(m, "00000000000000000000000000000000 00 ");
2752 seq_printf(m, "%pi6", n->primary_key);
2754 seq_puts(m, "00000000000000000000000000000000");
2757 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2758 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2759 rt->dst.__use, rt->rt6i_flags,
2760 rt->dst.dev ? rt->dst.dev->name : "");
2764 static int ipv6_route_show(struct seq_file *m, void *v)
2766 struct net *net = (struct net *)m->private;
2767 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2771 static int ipv6_route_open(struct inode *inode, struct file *file)
2773 return single_open_net(inode, file, ipv6_route_show);
2776 static const struct file_operations ipv6_route_proc_fops = {
2777 .owner = THIS_MODULE,
2778 .open = ipv6_route_open,
2780 .llseek = seq_lseek,
2781 .release = single_release_net,
2784 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2786 struct net *net = (struct net *)seq->private;
2787 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2788 net->ipv6.rt6_stats->fib_nodes,
2789 net->ipv6.rt6_stats->fib_route_nodes,
2790 net->ipv6.rt6_stats->fib_rt_alloc,
2791 net->ipv6.rt6_stats->fib_rt_entries,
2792 net->ipv6.rt6_stats->fib_rt_cache,
2793 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2794 net->ipv6.rt6_stats->fib_discarded_routes);
2799 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2801 return single_open_net(inode, file, rt6_stats_seq_show);
2804 static const struct file_operations rt6_stats_seq_fops = {
2805 .owner = THIS_MODULE,
2806 .open = rt6_stats_seq_open,
2808 .llseek = seq_lseek,
2809 .release = single_release_net,
2811 #endif /* CONFIG_PROC_FS */
2813 #ifdef CONFIG_SYSCTL
2816 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2817 void __user *buffer, size_t *lenp, loff_t *ppos)
2824 net = (struct net *)ctl->extra1;
2825 delay = net->ipv6.sysctl.flush_delay;
2826 proc_dointvec(ctl, write, buffer, lenp, ppos);
2827 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2831 ctl_table ipv6_route_table_template[] = {
2833 .procname = "flush",
2834 .data = &init_net.ipv6.sysctl.flush_delay,
2835 .maxlen = sizeof(int),
2837 .proc_handler = ipv6_sysctl_rtcache_flush
2840 .procname = "gc_thresh",
2841 .data = &ip6_dst_ops_template.gc_thresh,
2842 .maxlen = sizeof(int),
2844 .proc_handler = proc_dointvec,
2847 .procname = "max_size",
2848 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2849 .maxlen = sizeof(int),
2851 .proc_handler = proc_dointvec,
2854 .procname = "gc_min_interval",
2855 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2856 .maxlen = sizeof(int),
2858 .proc_handler = proc_dointvec_jiffies,
2861 .procname = "gc_timeout",
2862 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2863 .maxlen = sizeof(int),
2865 .proc_handler = proc_dointvec_jiffies,
2868 .procname = "gc_interval",
2869 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2870 .maxlen = sizeof(int),
2872 .proc_handler = proc_dointvec_jiffies,
2875 .procname = "gc_elasticity",
2876 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2877 .maxlen = sizeof(int),
2879 .proc_handler = proc_dointvec,
2882 .procname = "mtu_expires",
2883 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2884 .maxlen = sizeof(int),
2886 .proc_handler = proc_dointvec_jiffies,
2889 .procname = "min_adv_mss",
2890 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2891 .maxlen = sizeof(int),
2893 .proc_handler = proc_dointvec,
2896 .procname = "gc_min_interval_ms",
2897 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2898 .maxlen = sizeof(int),
2900 .proc_handler = proc_dointvec_ms_jiffies,
2905 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2907 struct ctl_table *table;
2909 table = kmemdup(ipv6_route_table_template,
2910 sizeof(ipv6_route_table_template),
2914 table[0].data = &net->ipv6.sysctl.flush_delay;
2915 table[0].extra1 = net;
2916 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2917 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2918 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2919 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2920 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2921 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2922 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2923 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2924 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2931 static int __net_init ip6_route_net_init(struct net *net)
2935 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2936 sizeof(net->ipv6.ip6_dst_ops));
2938 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2939 goto out_ip6_dst_ops;
2941 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2942 sizeof(*net->ipv6.ip6_null_entry),
2944 if (!net->ipv6.ip6_null_entry)
2945 goto out_ip6_dst_entries;
2946 net->ipv6.ip6_null_entry->dst.path =
2947 (struct dst_entry *)net->ipv6.ip6_null_entry;
2948 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2949 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2950 ip6_template_metrics, true);
2952 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2953 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2954 sizeof(*net->ipv6.ip6_prohibit_entry),
2956 if (!net->ipv6.ip6_prohibit_entry)
2957 goto out_ip6_null_entry;
2958 net->ipv6.ip6_prohibit_entry->dst.path =
2959 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2960 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2961 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2962 ip6_template_metrics, true);
2964 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2965 sizeof(*net->ipv6.ip6_blk_hole_entry),
2967 if (!net->ipv6.ip6_blk_hole_entry)
2968 goto out_ip6_prohibit_entry;
2969 net->ipv6.ip6_blk_hole_entry->dst.path =
2970 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2971 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2972 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2973 ip6_template_metrics, true);
2976 net->ipv6.sysctl.flush_delay = 0;
2977 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2978 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2979 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2980 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2981 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2982 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2983 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2985 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2991 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2992 out_ip6_prohibit_entry:
2993 kfree(net->ipv6.ip6_prohibit_entry);
2995 kfree(net->ipv6.ip6_null_entry);
2997 out_ip6_dst_entries:
2998 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3003 static void __net_exit ip6_route_net_exit(struct net *net)
3005 kfree(net->ipv6.ip6_null_entry);
3006 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3007 kfree(net->ipv6.ip6_prohibit_entry);
3008 kfree(net->ipv6.ip6_blk_hole_entry);
3010 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3013 static int __net_init ip6_route_net_init_late(struct net *net)
3015 #ifdef CONFIG_PROC_FS
3016 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3017 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3022 static void __net_exit ip6_route_net_exit_late(struct net *net)
3024 #ifdef CONFIG_PROC_FS
3025 proc_net_remove(net, "ipv6_route");
3026 proc_net_remove(net, "rt6_stats");
3030 static struct pernet_operations ip6_route_net_ops = {
3031 .init = ip6_route_net_init,
3032 .exit = ip6_route_net_exit,
3035 static int __net_init ipv6_inetpeer_init(struct net *net)
3037 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3041 inet_peer_base_init(bp);
3042 net->ipv6.peers = bp;
3046 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3048 struct inet_peer_base *bp = net->ipv6.peers;
3050 net->ipv6.peers = NULL;
3051 inetpeer_invalidate_tree(bp);
3055 static struct pernet_operations ipv6_inetpeer_ops = {
3056 .init = ipv6_inetpeer_init,
3057 .exit = ipv6_inetpeer_exit,
3060 static struct pernet_operations ip6_route_net_late_ops = {
3061 .init = ip6_route_net_init_late,
3062 .exit = ip6_route_net_exit_late,
3065 static struct notifier_block ip6_route_dev_notifier = {
3066 .notifier_call = ip6_route_dev_notify,
3070 int __init ip6_route_init(void)
3075 ip6_dst_ops_template.kmem_cachep =
3076 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3077 SLAB_HWCACHE_ALIGN, NULL);
3078 if (!ip6_dst_ops_template.kmem_cachep)
3081 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3083 goto out_kmem_cache;
3085 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3087 goto out_dst_entries;
3089 ret = register_pernet_subsys(&ip6_route_net_ops);
3091 goto out_register_inetpeer;
3093 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3095 /* Registering of the loopback is done before this portion of code,
3096 * the loopback reference in rt6_info will not be taken, do it
3097 * manually for init_net */
3098 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3099 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3100 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3101 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3102 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3103 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3104 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3108 goto out_register_subsys;
3114 ret = fib6_rules_init();
3118 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3120 goto fib6_rules_init;
3123 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3124 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3125 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3126 goto out_register_late_subsys;
3128 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3130 goto out_register_late_subsys;
3135 out_register_late_subsys:
3136 unregister_pernet_subsys(&ip6_route_net_late_ops);
3138 fib6_rules_cleanup();
3143 out_register_subsys:
3144 unregister_pernet_subsys(&ip6_route_net_ops);
3145 out_register_inetpeer:
3146 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3148 dst_entries_destroy(&ip6_dst_blackhole_ops);
3150 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3154 void ip6_route_cleanup(void)
3156 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3157 unregister_pernet_subsys(&ip6_route_net_late_ops);
3158 fib6_rules_cleanup();
3161 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3162 unregister_pernet_subsys(&ip6_route_net_ops);
3163 dst_entries_destroy(&ip6_dst_blackhole_ops);
3164 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);