2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
62 #include <asm/uaccess.h>
65 #include <linux/sysctl.h>
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
95 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
110 struct rt6_info *rt = (struct rt6_info *)dst;
112 if (rt->rt6i_flags & RTF_CACHE)
115 return dst_cow_metrics_generic(dst, old);
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
122 struct in6_addr *p = &rt->rt6i_gateway;
124 if (!ipv6_addr_any(p))
125 return (const void *) p;
127 return &ipv6_hdr(skb)->daddr;
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
135 struct rt6_info *rt = (struct rt6_info *) dst;
138 daddr = choose_neigh_daddr(rt, skb, daddr);
139 n = __ipv6_neigh_lookup(dst->dev, daddr);
142 return neigh_create(&nd_tbl, daddr, dst->dev);
145 static struct dst_ops ip6_dst_ops_template = {
149 .check = ip6_dst_check,
150 .default_advmss = ip6_default_advmss,
152 .cow_metrics = ipv6_cow_metrics,
153 .destroy = ip6_dst_destroy,
154 .ifdown = ip6_dst_ifdown,
155 .negative_advice = ip6_negative_advice,
156 .link_failure = ip6_link_failure,
157 .update_pmtu = ip6_rt_update_pmtu,
158 .redirect = rt6_do_redirect,
159 .local_out = __ip6_local_out,
160 .neigh_lookup = ip6_neigh_lookup,
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
167 return mtu ? : dst->dev->mtu;
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171 struct sk_buff *skb, u32 mtu)
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
186 static struct dst_ops ip6_dst_blackhole_ops = {
188 .destroy = ip6_dst_destroy,
189 .check = ip6_dst_check,
190 .mtu = ip6_blackhole_mtu,
191 .default_advmss = ip6_default_advmss,
192 .update_pmtu = ip6_rt_blackhole_update_pmtu,
193 .redirect = ip6_rt_blackhole_redirect,
194 .cow_metrics = ip6_rt_blackhole_cow_metrics,
195 .neigh_lookup = ip6_neigh_lookup,
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199 [RTAX_HOPLIMIT - 1] = 0,
202 static const struct rt6_info ip6_null_entry_template = {
204 .__refcnt = ATOMIC_INIT(1),
206 .obsolete = DST_OBSOLETE_FORCE_CHK,
207 .error = -ENETUNREACH,
208 .input = ip6_pkt_discard,
209 .output = ip6_pkt_discard_out,
211 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
212 .rt6i_protocol = RTPROT_KERNEL,
213 .rt6i_metric = ~(u32) 0,
214 .rt6i_ref = ATOMIC_INIT(1),
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
219 static const struct rt6_info ip6_prohibit_entry_template = {
221 .__refcnt = ATOMIC_INIT(1),
223 .obsolete = DST_OBSOLETE_FORCE_CHK,
225 .input = ip6_pkt_prohibit,
226 .output = ip6_pkt_prohibit_out,
228 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
229 .rt6i_protocol = RTPROT_KERNEL,
230 .rt6i_metric = ~(u32) 0,
231 .rt6i_ref = ATOMIC_INIT(1),
234 static const struct rt6_info ip6_blk_hole_entry_template = {
236 .__refcnt = ATOMIC_INIT(1),
238 .obsolete = DST_OBSOLETE_FORCE_CHK,
240 .input = dst_discard,
241 .output = dst_discard_sk,
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
244 .rt6i_protocol = RTPROT_KERNEL,
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253 struct net_device *dev,
255 struct fib6_table *table)
257 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258 0, DST_OBSOLETE_FORCE_CHK, flags);
261 struct dst_entry *dst = &rt->dst;
263 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264 INIT_LIST_HEAD(&rt->rt6i_siblings);
269 static void ip6_dst_destroy(struct dst_entry *dst)
271 struct rt6_info *rt = (struct rt6_info *)dst;
272 struct inet6_dev *idev = rt->rt6i_idev;
273 struct dst_entry *from = dst->from;
275 dst_destroy_metrics_generic(dst);
278 rt->rt6i_idev = NULL;
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
289 struct rt6_info *rt = (struct rt6_info *)dst;
290 struct inet6_dev *idev = rt->rt6i_idev;
291 struct net_device *loopback_dev =
292 dev_net(dev)->loopback_dev;
294 if (dev != loopback_dev) {
295 if (idev && idev->dev == dev) {
296 struct inet6_dev *loopback_idev =
297 in6_dev_get(loopback_dev);
299 rt->rt6i_idev = loopback_idev;
306 static bool rt6_check_expired(const struct rt6_info *rt)
308 if (rt->rt6i_flags & RTF_EXPIRES) {
309 if (time_after(jiffies, rt->dst.expires))
311 } else if (rt->dst.from) {
312 return rt6_check_expired((struct rt6_info *) rt->dst.from);
317 /* Multipath route selection:
318 * Hash based function using packet header and flowlabel.
319 * Adapted from fib_info_hashfn()
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322 const struct flowi6 *fl6)
324 unsigned int val = fl6->flowi6_proto;
326 val ^= ipv6_addr_hash(&fl6->daddr);
327 val ^= ipv6_addr_hash(&fl6->saddr);
329 /* Work only if this not encapsulated */
330 switch (fl6->flowi6_proto) {
334 val ^= (__force u16)fl6->fl6_sport;
335 val ^= (__force u16)fl6->fl6_dport;
339 val ^= (__force u16)fl6->fl6_icmp_type;
340 val ^= (__force u16)fl6->fl6_icmp_code;
343 /* RFC6438 recommands to use flowlabel */
344 val ^= (__force u32)fl6->flowlabel;
346 /* Perhaps, we need to tune, this function? */
347 val = val ^ (val >> 7) ^ (val >> 12);
348 return val % candidate_count;
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352 struct flowi6 *fl6, int oif,
355 struct rt6_info *sibling, *next_sibling;
358 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359 /* Don't change the route, if route_choosen == 0
360 * (siblings does not include ourself)
363 list_for_each_entry_safe(sibling, next_sibling,
364 &match->rt6i_siblings, rt6i_siblings) {
366 if (route_choosen == 0) {
367 if (rt6_score_route(sibling, oif, strict) < 0)
377 * Route lookup. Any table->tb6_lock is implied.
380 static inline struct rt6_info *rt6_device_match(struct net *net,
382 const struct in6_addr *saddr,
386 struct rt6_info *local = NULL;
387 struct rt6_info *sprt;
389 if (!oif && ipv6_addr_any(saddr))
392 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393 struct net_device *dev = sprt->dst.dev;
396 if (dev->ifindex == oif)
398 if (dev->flags & IFF_LOOPBACK) {
399 if (!sprt->rt6i_idev ||
400 sprt->rt6i_idev->dev->ifindex != oif) {
401 if (flags & RT6_LOOKUP_F_IFACE && oif)
403 if (local && (!oif ||
404 local->rt6i_idev->dev->ifindex == oif))
410 if (ipv6_chk_addr(net, saddr, dev,
411 flags & RT6_LOOKUP_F_IFACE))
420 if (flags & RT6_LOOKUP_F_IFACE)
421 return net->ipv6.ip6_null_entry;
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429 struct work_struct work;
430 struct in6_addr target;
431 struct net_device *dev;
434 static void rt6_probe_deferred(struct work_struct *w)
436 struct in6_addr mcaddr;
437 struct __rt6_probe_work *work =
438 container_of(w, struct __rt6_probe_work, work);
440 addrconf_addr_solict_mult(&work->target, &mcaddr);
441 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
446 static void rt6_probe(struct rt6_info *rt)
448 struct neighbour *neigh;
450 * Okay, this does not seem to be appropriate
451 * for now, however, we need to check if it
452 * is really so; aka Router Reachability Probing.
454 * Router Reachability Probe MUST be rate-limited
455 * to no more than one per minute.
457 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
460 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
462 write_lock(&neigh->lock);
463 if (neigh->nud_state & NUD_VALID)
468 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469 struct __rt6_probe_work *work;
471 work = kmalloc(sizeof(*work), GFP_ATOMIC);
474 __neigh_set_probe_once(neigh);
477 write_unlock(&neigh->lock);
480 INIT_WORK(&work->work, rt6_probe_deferred);
481 work->target = rt->rt6i_gateway;
482 dev_hold(rt->dst.dev);
483 work->dev = rt->dst.dev;
484 schedule_work(&work->work);
488 write_unlock(&neigh->lock);
490 rcu_read_unlock_bh();
493 static inline void rt6_probe(struct rt6_info *rt)
499 * Default Router Selection (RFC 2461 6.3.6)
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
503 struct net_device *dev = rt->dst.dev;
504 if (!oif || dev->ifindex == oif)
506 if ((dev->flags & IFF_LOOPBACK) &&
507 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
514 struct neighbour *neigh;
515 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
517 if (rt->rt6i_flags & RTF_NONEXTHOP ||
518 !(rt->rt6i_flags & RTF_GATEWAY))
519 return RT6_NUD_SUCCEED;
522 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
524 read_lock(&neigh->lock);
525 if (neigh->nud_state & NUD_VALID)
526 ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 else if (!(neigh->nud_state & NUD_FAILED))
529 ret = RT6_NUD_SUCCEED;
531 ret = RT6_NUD_FAIL_PROBE;
533 read_unlock(&neigh->lock);
535 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
538 rcu_read_unlock_bh();
543 static int rt6_score_route(struct rt6_info *rt, int oif,
548 m = rt6_check_dev(rt, oif);
549 if (!m && (strict & RT6_LOOKUP_F_IFACE))
550 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
554 if (strict & RT6_LOOKUP_F_REACHABLE) {
555 int n = rt6_check_neigh(rt);
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563 int *mpri, struct rt6_info *match,
567 bool match_do_rr = false;
569 if (rt6_check_expired(rt))
572 m = rt6_score_route(rt, oif, strict);
573 if (m == RT6_NUD_FAIL_DO_RR) {
575 m = 0; /* lowest valid score */
576 } else if (m == RT6_NUD_FAIL_HARD) {
580 if (strict & RT6_LOOKUP_F_REACHABLE)
583 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
585 *do_rr = match_do_rr;
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594 struct rt6_info *rr_head,
595 u32 metric, int oif, int strict,
598 struct rt6_info *rt, *match, *cont;
603 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604 if (rt->rt6i_metric != metric) {
609 match = find_match(rt, oif, strict, &mpri, match, do_rr);
612 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613 if (rt->rt6i_metric != metric) {
618 match = find_match(rt, oif, strict, &mpri, match, do_rr);
624 for (rt = cont; rt; rt = rt->dst.rt6_next)
625 match = find_match(rt, oif, strict, &mpri, match, do_rr);
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
632 struct rt6_info *match, *rt0;
638 fn->rr_ptr = rt0 = fn->leaf;
640 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
644 struct rt6_info *next = rt0->dst.rt6_next;
646 /* no entries matched; do round-robin */
647 if (!next || next->rt6i_metric != rt0->rt6i_metric)
654 net = dev_net(rt0->dst.dev);
655 return match ? match : net->ipv6.ip6_null_entry;
658 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
660 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
663 #ifdef CONFIG_IPV6_ROUTE_INFO
664 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
665 const struct in6_addr *gwaddr)
667 struct net *net = dev_net(dev);
668 struct route_info *rinfo = (struct route_info *) opt;
669 struct in6_addr prefix_buf, *prefix;
671 unsigned long lifetime;
674 if (len < sizeof(struct route_info)) {
678 /* Sanity check for prefix_len and length */
679 if (rinfo->length > 3) {
681 } else if (rinfo->prefix_len > 128) {
683 } else if (rinfo->prefix_len > 64) {
684 if (rinfo->length < 2) {
687 } else if (rinfo->prefix_len > 0) {
688 if (rinfo->length < 1) {
693 pref = rinfo->route_pref;
694 if (pref == ICMPV6_ROUTER_PREF_INVALID)
697 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
699 if (rinfo->length == 3)
700 prefix = (struct in6_addr *)rinfo->prefix;
702 /* this function is safe */
703 ipv6_addr_prefix(&prefix_buf,
704 (struct in6_addr *)rinfo->prefix,
706 prefix = &prefix_buf;
709 if (rinfo->prefix_len == 0)
710 rt = rt6_get_dflt_router(gwaddr, dev);
712 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
713 gwaddr, dev->ifindex);
715 if (rt && !lifetime) {
721 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
724 rt->rt6i_flags = RTF_ROUTEINFO |
725 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
728 if (!addrconf_finite_timeout(lifetime))
729 rt6_clean_expires(rt);
731 rt6_set_expires(rt, jiffies + HZ * lifetime);
739 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
740 struct in6_addr *saddr)
742 struct fib6_node *pn;
744 if (fn->fn_flags & RTN_TL_ROOT)
747 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
748 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
751 if (fn->fn_flags & RTN_RTINFO)
756 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757 struct fib6_table *table,
758 struct flowi6 *fl6, int flags)
760 struct fib6_node *fn;
763 read_lock_bh(&table->tb6_lock);
764 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
767 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
768 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
769 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
770 if (rt == net->ipv6.ip6_null_entry) {
771 fn = fib6_backtrack(fn, &fl6->saddr);
775 dst_use(&rt->dst, jiffies);
776 read_unlock_bh(&table->tb6_lock);
781 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
786 EXPORT_SYMBOL_GPL(ip6_route_lookup);
788 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789 const struct in6_addr *saddr, int oif, int strict)
791 struct flowi6 fl6 = {
795 struct dst_entry *dst;
796 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
799 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800 flags |= RT6_LOOKUP_F_HAS_SADDR;
803 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
805 return (struct rt6_info *) dst;
811 EXPORT_SYMBOL(rt6_lookup);
813 /* ip6_ins_rt is called with FREE table->tb6_lock.
814 It takes new route entry, the addition fails by any reason the
815 route is freed. In any case, if caller does not hold it, it may
819 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
820 struct mx6_config *mxc)
823 struct fib6_table *table;
825 table = rt->rt6i_table;
826 write_lock_bh(&table->tb6_lock);
827 err = fib6_add(&table->tb6_root, rt, info, mxc);
828 write_unlock_bh(&table->tb6_lock);
833 int ip6_ins_rt(struct rt6_info *rt)
835 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
836 struct mx6_config mxc = { .mx = NULL, };
838 return __ip6_ins_rt(rt, &info, &mxc);
841 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
842 const struct in6_addr *daddr,
843 const struct in6_addr *saddr)
851 rt = ip6_rt_copy(ort, daddr);
854 rt->rt6i_flags |= RTF_CACHE;
856 if (!rt6_is_gw_or_nonexthop(ort)) {
857 if (ort->rt6i_dst.plen != 128 &&
858 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859 rt->rt6i_flags |= RTF_ANYCAST;
860 #ifdef CONFIG_IPV6_SUBTREES
861 if (rt->rt6i_src.plen && saddr) {
862 rt->rt6i_src.addr = *saddr;
863 rt->rt6i_src.plen = 128;
872 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
873 struct flowi6 *fl6, int flags)
875 struct fib6_node *fn, *saved_fn;
876 struct rt6_info *rt, *nrt;
881 strict |= flags & RT6_LOOKUP_F_IFACE;
882 if (net->ipv6.devconf_all->forwarding == 0)
883 strict |= RT6_LOOKUP_F_REACHABLE;
885 redo_fib6_lookup_lock:
886 read_lock_bh(&table->tb6_lock);
888 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
892 rt = rt6_select(fn, oif, strict);
893 if (rt->rt6i_nsiblings)
894 rt = rt6_multipath_select(rt, fl6, oif, strict);
895 if (rt == net->ipv6.ip6_null_entry) {
896 fn = fib6_backtrack(fn, &fl6->saddr);
898 goto redo_rt6_select;
899 else if (strict & RT6_LOOKUP_F_REACHABLE) {
900 /* also consider unreachable route */
901 strict &= ~RT6_LOOKUP_F_REACHABLE;
903 goto redo_rt6_select;
906 read_unlock_bh(&table->tb6_lock);
912 read_unlock_bh(&table->tb6_lock);
914 if (rt->rt6i_flags & RTF_CACHE)
917 if (!rt6_is_gw_or_nonexthop(rt) ||
918 !(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
919 nrt = ip6_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr);
924 rt = nrt ? : net->ipv6.ip6_null_entry;
928 err = ip6_ins_rt(nrt);
937 * Race condition! In the gap, when table->tb6_lock was
938 * released someone could insert this route. Relookup.
941 goto redo_fib6_lookup_lock;
944 rt6_dst_from_metrics_check(rt);
945 rt->dst.lastuse = jiffies;
951 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
952 struct flowi6 *fl6, int flags)
954 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
957 static struct dst_entry *ip6_route_input_lookup(struct net *net,
958 struct net_device *dev,
959 struct flowi6 *fl6, int flags)
961 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
962 flags |= RT6_LOOKUP_F_IFACE;
964 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
967 void ip6_route_input(struct sk_buff *skb)
969 const struct ipv6hdr *iph = ipv6_hdr(skb);
970 struct net *net = dev_net(skb->dev);
971 int flags = RT6_LOOKUP_F_HAS_SADDR;
972 struct flowi6 fl6 = {
973 .flowi6_iif = skb->dev->ifindex,
976 .flowlabel = ip6_flowinfo(iph),
977 .flowi6_mark = skb->mark,
978 .flowi6_proto = iph->nexthdr,
981 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
984 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
985 struct flowi6 *fl6, int flags)
987 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
990 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
995 fl6->flowi6_iif = LOOPBACK_IFINDEX;
997 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
998 flags |= RT6_LOOKUP_F_IFACE;
1000 if (!ipv6_addr_any(&fl6->saddr))
1001 flags |= RT6_LOOKUP_F_HAS_SADDR;
1003 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1005 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1007 EXPORT_SYMBOL(ip6_route_output);
1009 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1011 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1012 struct dst_entry *new = NULL;
1014 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1018 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1021 new->input = dst_discard;
1022 new->output = dst_discard_sk;
1024 if (dst_metrics_read_only(&ort->dst))
1025 new->_metrics = ort->dst._metrics;
1027 dst_copy_metrics(new, &ort->dst);
1028 rt->rt6i_idev = ort->rt6i_idev;
1030 in6_dev_hold(rt->rt6i_idev);
1032 rt->rt6i_gateway = ort->rt6i_gateway;
1033 rt->rt6i_flags = ort->rt6i_flags;
1034 rt->rt6i_metric = 0;
1036 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1037 #ifdef CONFIG_IPV6_SUBTREES
1038 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1044 dst_release(dst_orig);
1045 return new ? new : ERR_PTR(-ENOMEM);
1049 * Destination cache support functions
1052 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1055 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1056 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1059 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1061 struct rt6_info *rt;
1063 rt = (struct rt6_info *) dst;
1065 /* All IPV6 dsts are created with ->obsolete set to the value
1066 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1067 * into this function always.
1069 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1072 if (rt6_check_expired(rt))
1075 rt6_dst_from_metrics_check(rt);
1080 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1082 struct rt6_info *rt = (struct rt6_info *) dst;
1085 if (rt->rt6i_flags & RTF_CACHE) {
1086 if (rt6_check_expired(rt)) {
1098 static void ip6_link_failure(struct sk_buff *skb)
1100 struct rt6_info *rt;
1102 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1104 rt = (struct rt6_info *) skb_dst(skb);
1106 if (rt->rt6i_flags & RTF_CACHE) {
1110 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1111 rt->rt6i_node->fn_sernum = -1;
1116 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1117 struct sk_buff *skb, u32 mtu)
1119 struct rt6_info *rt6 = (struct rt6_info *)dst;
1122 if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1123 struct net *net = dev_net(dst->dev);
1125 rt6->rt6i_flags |= RTF_MODIFIED;
1126 if (mtu < IPV6_MIN_MTU)
1129 rt6->rt6i_pmtu = mtu;
1130 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1134 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1137 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1138 struct dst_entry *dst;
1141 memset(&fl6, 0, sizeof(fl6));
1142 fl6.flowi6_oif = oif;
1143 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1144 fl6.daddr = iph->daddr;
1145 fl6.saddr = iph->saddr;
1146 fl6.flowlabel = ip6_flowinfo(iph);
1148 dst = ip6_route_output(net, NULL, &fl6);
1150 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1153 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1155 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1157 ip6_update_pmtu(skb, sock_net(sk), mtu,
1158 sk->sk_bound_dev_if, sk->sk_mark);
1160 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1162 /* Handle redirects */
1163 struct ip6rd_flowi {
1165 struct in6_addr gateway;
1168 static struct rt6_info *__ip6_route_redirect(struct net *net,
1169 struct fib6_table *table,
1173 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1174 struct rt6_info *rt;
1175 struct fib6_node *fn;
1177 /* Get the "current" route for this destination and
1178 * check if the redirect has come from approriate router.
1180 * RFC 4861 specifies that redirects should only be
1181 * accepted if they come from the nexthop to the target.
1182 * Due to the way the routes are chosen, this notion
1183 * is a bit fuzzy and one might need to check all possible
1187 read_lock_bh(&table->tb6_lock);
1188 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1190 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1191 if (rt6_check_expired(rt))
1195 if (!(rt->rt6i_flags & RTF_GATEWAY))
1197 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1199 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1205 rt = net->ipv6.ip6_null_entry;
1206 else if (rt->dst.error) {
1207 rt = net->ipv6.ip6_null_entry;
1211 if (rt == net->ipv6.ip6_null_entry) {
1212 fn = fib6_backtrack(fn, &fl6->saddr);
1220 read_unlock_bh(&table->tb6_lock);
1225 static struct dst_entry *ip6_route_redirect(struct net *net,
1226 const struct flowi6 *fl6,
1227 const struct in6_addr *gateway)
1229 int flags = RT6_LOOKUP_F_HAS_SADDR;
1230 struct ip6rd_flowi rdfl;
1233 rdfl.gateway = *gateway;
1235 return fib6_rule_lookup(net, &rdfl.fl6,
1236 flags, __ip6_route_redirect);
1239 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1241 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1242 struct dst_entry *dst;
1245 memset(&fl6, 0, sizeof(fl6));
1246 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1247 fl6.flowi6_oif = oif;
1248 fl6.flowi6_mark = mark;
1249 fl6.daddr = iph->daddr;
1250 fl6.saddr = iph->saddr;
1251 fl6.flowlabel = ip6_flowinfo(iph);
1253 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1254 rt6_do_redirect(dst, NULL, skb);
1257 EXPORT_SYMBOL_GPL(ip6_redirect);
1259 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1262 const struct ipv6hdr *iph = ipv6_hdr(skb);
1263 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1264 struct dst_entry *dst;
1267 memset(&fl6, 0, sizeof(fl6));
1268 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1269 fl6.flowi6_oif = oif;
1270 fl6.flowi6_mark = mark;
1271 fl6.daddr = msg->dest;
1272 fl6.saddr = iph->daddr;
1274 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1275 rt6_do_redirect(dst, NULL, skb);
1279 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1281 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1283 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1285 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1287 struct net_device *dev = dst->dev;
1288 unsigned int mtu = dst_mtu(dst);
1289 struct net *net = dev_net(dev);
1291 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1293 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1294 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1297 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1298 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1299 * IPV6_MAXPLEN is also valid and means: "any MSS,
1300 * rely only on pmtu discovery"
1302 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1307 static unsigned int ip6_mtu(const struct dst_entry *dst)
1309 const struct rt6_info *rt = (const struct rt6_info *)dst;
1310 unsigned int mtu = rt->rt6i_pmtu;
1311 struct inet6_dev *idev;
1316 mtu = dst_metric_raw(dst, RTAX_MTU);
1323 idev = __in6_dev_get(dst->dev);
1325 mtu = idev->cnf.mtu6;
1329 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1332 static struct dst_entry *icmp6_dst_gc_list;
1333 static DEFINE_SPINLOCK(icmp6_dst_lock);
1335 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1338 struct dst_entry *dst;
1339 struct rt6_info *rt;
1340 struct inet6_dev *idev = in6_dev_get(dev);
1341 struct net *net = dev_net(dev);
1343 if (unlikely(!idev))
1344 return ERR_PTR(-ENODEV);
1346 rt = ip6_dst_alloc(net, dev, 0, NULL);
1347 if (unlikely(!rt)) {
1349 dst = ERR_PTR(-ENOMEM);
1353 rt->dst.flags |= DST_HOST;
1354 rt->dst.output = ip6_output;
1355 atomic_set(&rt->dst.__refcnt, 1);
1356 rt->rt6i_gateway = fl6->daddr;
1357 rt->rt6i_dst.addr = fl6->daddr;
1358 rt->rt6i_dst.plen = 128;
1359 rt->rt6i_idev = idev;
1360 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1362 spin_lock_bh(&icmp6_dst_lock);
1363 rt->dst.next = icmp6_dst_gc_list;
1364 icmp6_dst_gc_list = &rt->dst;
1365 spin_unlock_bh(&icmp6_dst_lock);
1367 fib6_force_start_gc(net);
1369 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1375 int icmp6_dst_gc(void)
1377 struct dst_entry *dst, **pprev;
1380 spin_lock_bh(&icmp6_dst_lock);
1381 pprev = &icmp6_dst_gc_list;
1383 while ((dst = *pprev) != NULL) {
1384 if (!atomic_read(&dst->__refcnt)) {
1393 spin_unlock_bh(&icmp6_dst_lock);
1398 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1401 struct dst_entry *dst, **pprev;
1403 spin_lock_bh(&icmp6_dst_lock);
1404 pprev = &icmp6_dst_gc_list;
1405 while ((dst = *pprev) != NULL) {
1406 struct rt6_info *rt = (struct rt6_info *) dst;
1407 if (func(rt, arg)) {
1414 spin_unlock_bh(&icmp6_dst_lock);
1417 static int ip6_dst_gc(struct dst_ops *ops)
1419 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1420 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1421 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1422 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1423 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1424 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1427 entries = dst_entries_get_fast(ops);
1428 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1429 entries <= rt_max_size)
1432 net->ipv6.ip6_rt_gc_expire++;
1433 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1434 entries = dst_entries_get_slow(ops);
1435 if (entries < ops->gc_thresh)
1436 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1438 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1439 return entries > rt_max_size;
1442 static int ip6_convert_metrics(struct mx6_config *mxc,
1443 const struct fib6_config *cfg)
1452 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1456 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1457 int type = nla_type(nla);
1462 if (unlikely(type > RTAX_MAX))
1464 if (type == RTAX_CC_ALGO) {
1465 char tmp[TCP_CA_NAME_MAX];
1467 nla_strlcpy(tmp, nla, sizeof(tmp));
1468 val = tcp_ca_get_key_by_name(tmp);
1469 if (val == TCP_CA_UNSPEC)
1472 val = nla_get_u32(nla);
1476 __set_bit(type - 1, mxc->mx_valid);
1488 int ip6_route_add(struct fib6_config *cfg)
1491 struct net *net = cfg->fc_nlinfo.nl_net;
1492 struct rt6_info *rt = NULL;
1493 struct net_device *dev = NULL;
1494 struct inet6_dev *idev = NULL;
1495 struct fib6_table *table;
1496 struct mx6_config mxc = { .mx = NULL, };
1499 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1501 #ifndef CONFIG_IPV6_SUBTREES
1502 if (cfg->fc_src_len)
1505 if (cfg->fc_ifindex) {
1507 dev = dev_get_by_index(net, cfg->fc_ifindex);
1510 idev = in6_dev_get(dev);
1515 if (cfg->fc_metric == 0)
1516 cfg->fc_metric = IP6_RT_PRIO_USER;
1519 if (cfg->fc_nlinfo.nlh &&
1520 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1521 table = fib6_get_table(net, cfg->fc_table);
1523 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1524 table = fib6_new_table(net, cfg->fc_table);
1527 table = fib6_new_table(net, cfg->fc_table);
1533 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1540 if (cfg->fc_flags & RTF_EXPIRES)
1541 rt6_set_expires(rt, jiffies +
1542 clock_t_to_jiffies(cfg->fc_expires));
1544 rt6_clean_expires(rt);
1546 if (cfg->fc_protocol == RTPROT_UNSPEC)
1547 cfg->fc_protocol = RTPROT_BOOT;
1548 rt->rt6i_protocol = cfg->fc_protocol;
1550 addr_type = ipv6_addr_type(&cfg->fc_dst);
1552 if (addr_type & IPV6_ADDR_MULTICAST)
1553 rt->dst.input = ip6_mc_input;
1554 else if (cfg->fc_flags & RTF_LOCAL)
1555 rt->dst.input = ip6_input;
1557 rt->dst.input = ip6_forward;
1559 rt->dst.output = ip6_output;
1561 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1562 rt->rt6i_dst.plen = cfg->fc_dst_len;
1563 if (rt->rt6i_dst.plen == 128)
1564 rt->dst.flags |= DST_HOST;
1566 #ifdef CONFIG_IPV6_SUBTREES
1567 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1568 rt->rt6i_src.plen = cfg->fc_src_len;
1571 rt->rt6i_metric = cfg->fc_metric;
1573 /* We cannot add true routes via loopback here,
1574 they would result in kernel looping; promote them to reject routes
1576 if ((cfg->fc_flags & RTF_REJECT) ||
1577 (dev && (dev->flags & IFF_LOOPBACK) &&
1578 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1579 !(cfg->fc_flags & RTF_LOCAL))) {
1580 /* hold loopback dev/idev if we haven't done so. */
1581 if (dev != net->loopback_dev) {
1586 dev = net->loopback_dev;
1588 idev = in6_dev_get(dev);
1594 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1595 switch (cfg->fc_type) {
1597 rt->dst.error = -EINVAL;
1598 rt->dst.output = dst_discard_sk;
1599 rt->dst.input = dst_discard;
1602 rt->dst.error = -EACCES;
1603 rt->dst.output = ip6_pkt_prohibit_out;
1604 rt->dst.input = ip6_pkt_prohibit;
1608 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1610 rt->dst.output = ip6_pkt_discard_out;
1611 rt->dst.input = ip6_pkt_discard;
1617 if (cfg->fc_flags & RTF_GATEWAY) {
1618 const struct in6_addr *gw_addr;
1621 gw_addr = &cfg->fc_gateway;
1623 /* if gw_addr is local we will fail to detect this in case
1624 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1625 * will return already-added prefix route via interface that
1626 * prefix route was assigned to, which might be non-loopback.
1629 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1632 rt->rt6i_gateway = *gw_addr;
1633 gwa_type = ipv6_addr_type(gw_addr);
1635 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1636 struct rt6_info *grt;
1638 /* IPv6 strictly inhibits using not link-local
1639 addresses as nexthop address.
1640 Otherwise, router will not able to send redirects.
1641 It is very good, but in some (rare!) circumstances
1642 (SIT, PtP, NBMA NOARP links) it is handy to allow
1643 some exceptions. --ANK
1645 if (!(gwa_type & IPV6_ADDR_UNICAST))
1648 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1650 err = -EHOSTUNREACH;
1654 if (dev != grt->dst.dev) {
1660 idev = grt->rt6i_idev;
1662 in6_dev_hold(grt->rt6i_idev);
1664 if (!(grt->rt6i_flags & RTF_GATEWAY))
1672 if (!dev || (dev->flags & IFF_LOOPBACK))
1680 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1681 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1685 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1686 rt->rt6i_prefsrc.plen = 128;
1688 rt->rt6i_prefsrc.plen = 0;
1690 rt->rt6i_flags = cfg->fc_flags;
1694 rt->rt6i_idev = idev;
1695 rt->rt6i_table = table;
1697 cfg->fc_nlinfo.nl_net = dev_net(dev);
1699 err = ip6_convert_metrics(&mxc, cfg);
1703 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1717 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1720 struct fib6_table *table;
1721 struct net *net = dev_net(rt->dst.dev);
1723 if (rt == net->ipv6.ip6_null_entry) {
1728 table = rt->rt6i_table;
1729 write_lock_bh(&table->tb6_lock);
1730 err = fib6_del(rt, info);
1731 write_unlock_bh(&table->tb6_lock);
1738 int ip6_del_rt(struct rt6_info *rt)
1740 struct nl_info info = {
1741 .nl_net = dev_net(rt->dst.dev),
1743 return __ip6_del_rt(rt, &info);
1746 static int ip6_route_del(struct fib6_config *cfg)
1748 struct fib6_table *table;
1749 struct fib6_node *fn;
1750 struct rt6_info *rt;
1753 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1757 read_lock_bh(&table->tb6_lock);
1759 fn = fib6_locate(&table->tb6_root,
1760 &cfg->fc_dst, cfg->fc_dst_len,
1761 &cfg->fc_src, cfg->fc_src_len);
1764 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1765 if ((rt->rt6i_flags & RTF_CACHE) &&
1766 !(cfg->fc_flags & RTF_CACHE))
1768 if (cfg->fc_ifindex &&
1770 rt->dst.dev->ifindex != cfg->fc_ifindex))
1772 if (cfg->fc_flags & RTF_GATEWAY &&
1773 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1775 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1778 read_unlock_bh(&table->tb6_lock);
1780 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1783 read_unlock_bh(&table->tb6_lock);
1788 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1790 struct net *net = dev_net(skb->dev);
1791 struct netevent_redirect netevent;
1792 struct rt6_info *rt, *nrt = NULL;
1793 struct ndisc_options ndopts;
1794 struct inet6_dev *in6_dev;
1795 struct neighbour *neigh;
1797 int optlen, on_link;
1800 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1801 optlen -= sizeof(*msg);
1804 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1808 msg = (struct rd_msg *)icmp6_hdr(skb);
1810 if (ipv6_addr_is_multicast(&msg->dest)) {
1811 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1816 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1818 } else if (ipv6_addr_type(&msg->target) !=
1819 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1820 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1824 in6_dev = __in6_dev_get(skb->dev);
1827 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1831 * The IP source address of the Redirect MUST be the same as the current
1832 * first-hop router for the specified ICMP Destination Address.
1835 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1836 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1841 if (ndopts.nd_opts_tgt_lladdr) {
1842 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1845 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1850 rt = (struct rt6_info *) dst;
1851 if (rt == net->ipv6.ip6_null_entry) {
1852 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1856 /* Redirect received -> path was valid.
1857 * Look, redirects are sent only in response to data packets,
1858 * so that this nexthop apparently is reachable. --ANK
1860 dst_confirm(&rt->dst);
1862 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1867 * We have finally decided to accept it.
1870 neigh_update(neigh, lladdr, NUD_STALE,
1871 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1872 NEIGH_UPDATE_F_OVERRIDE|
1873 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1874 NEIGH_UPDATE_F_ISROUTER))
1877 nrt = ip6_rt_copy(rt, &msg->dest);
1881 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1883 nrt->rt6i_flags &= ~RTF_GATEWAY;
1885 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1887 if (ip6_ins_rt(nrt))
1890 netevent.old = &rt->dst;
1891 netevent.new = &nrt->dst;
1892 netevent.daddr = &msg->dest;
1893 netevent.neigh = neigh;
1894 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1896 if (rt->rt6i_flags & RTF_CACHE) {
1897 rt = (struct rt6_info *) dst_clone(&rt->dst);
1902 neigh_release(neigh);
1906 * Misc support functions
1909 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1911 BUG_ON(from->dst.from);
1913 rt->rt6i_flags &= ~RTF_EXPIRES;
1914 dst_hold(&from->dst);
1915 rt->dst.from = &from->dst;
1916 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1919 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1920 const struct in6_addr *dest)
1922 struct net *net = dev_net(ort->dst.dev);
1923 struct rt6_info *rt;
1925 if (ort->rt6i_flags & RTF_CACHE)
1926 ort = (struct rt6_info *)ort->dst.from;
1928 rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1932 rt->dst.input = ort->dst.input;
1933 rt->dst.output = ort->dst.output;
1934 rt->dst.flags |= DST_HOST;
1936 rt->rt6i_dst.addr = *dest;
1937 rt->rt6i_dst.plen = 128;
1938 rt->dst.error = ort->dst.error;
1939 rt->rt6i_idev = ort->rt6i_idev;
1941 in6_dev_hold(rt->rt6i_idev);
1942 rt->dst.lastuse = jiffies;
1943 rt->rt6i_gateway = ort->rt6i_gateway;
1944 rt->rt6i_flags = ort->rt6i_flags;
1945 rt6_set_from(rt, ort);
1946 rt->rt6i_metric = 0;
1948 #ifdef CONFIG_IPV6_SUBTREES
1949 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1951 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1952 rt->rt6i_table = ort->rt6i_table;
1957 #ifdef CONFIG_IPV6_ROUTE_INFO
1958 static struct rt6_info *rt6_get_route_info(struct net *net,
1959 const struct in6_addr *prefix, int prefixlen,
1960 const struct in6_addr *gwaddr, int ifindex)
1962 struct fib6_node *fn;
1963 struct rt6_info *rt = NULL;
1964 struct fib6_table *table;
1966 table = fib6_get_table(net, RT6_TABLE_INFO);
1970 read_lock_bh(&table->tb6_lock);
1971 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1975 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1976 if (rt->dst.dev->ifindex != ifindex)
1978 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1980 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1986 read_unlock_bh(&table->tb6_lock);
1990 static struct rt6_info *rt6_add_route_info(struct net *net,
1991 const struct in6_addr *prefix, int prefixlen,
1992 const struct in6_addr *gwaddr, int ifindex,
1995 struct fib6_config cfg = {
1996 .fc_table = RT6_TABLE_INFO,
1997 .fc_metric = IP6_RT_PRIO_USER,
1998 .fc_ifindex = ifindex,
1999 .fc_dst_len = prefixlen,
2000 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2001 RTF_UP | RTF_PREF(pref),
2002 .fc_nlinfo.portid = 0,
2003 .fc_nlinfo.nlh = NULL,
2004 .fc_nlinfo.nl_net = net,
2007 cfg.fc_dst = *prefix;
2008 cfg.fc_gateway = *gwaddr;
2010 /* We should treat it as a default route if prefix length is 0. */
2012 cfg.fc_flags |= RTF_DEFAULT;
2014 ip6_route_add(&cfg);
2016 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2020 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2022 struct rt6_info *rt;
2023 struct fib6_table *table;
2025 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2029 read_lock_bh(&table->tb6_lock);
2030 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2031 if (dev == rt->dst.dev &&
2032 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2033 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2038 read_unlock_bh(&table->tb6_lock);
2042 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2043 struct net_device *dev,
2046 struct fib6_config cfg = {
2047 .fc_table = RT6_TABLE_DFLT,
2048 .fc_metric = IP6_RT_PRIO_USER,
2049 .fc_ifindex = dev->ifindex,
2050 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2051 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2052 .fc_nlinfo.portid = 0,
2053 .fc_nlinfo.nlh = NULL,
2054 .fc_nlinfo.nl_net = dev_net(dev),
2057 cfg.fc_gateway = *gwaddr;
2059 ip6_route_add(&cfg);
2061 return rt6_get_dflt_router(gwaddr, dev);
2064 void rt6_purge_dflt_routers(struct net *net)
2066 struct rt6_info *rt;
2067 struct fib6_table *table;
2069 /* NOTE: Keep consistent with rt6_get_dflt_router */
2070 table = fib6_get_table(net, RT6_TABLE_DFLT);
2075 read_lock_bh(&table->tb6_lock);
2076 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2077 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2078 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2080 read_unlock_bh(&table->tb6_lock);
2085 read_unlock_bh(&table->tb6_lock);
2088 static void rtmsg_to_fib6_config(struct net *net,
2089 struct in6_rtmsg *rtmsg,
2090 struct fib6_config *cfg)
2092 memset(cfg, 0, sizeof(*cfg));
2094 cfg->fc_table = RT6_TABLE_MAIN;
2095 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2096 cfg->fc_metric = rtmsg->rtmsg_metric;
2097 cfg->fc_expires = rtmsg->rtmsg_info;
2098 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2099 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2100 cfg->fc_flags = rtmsg->rtmsg_flags;
2102 cfg->fc_nlinfo.nl_net = net;
2104 cfg->fc_dst = rtmsg->rtmsg_dst;
2105 cfg->fc_src = rtmsg->rtmsg_src;
2106 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2109 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2111 struct fib6_config cfg;
2112 struct in6_rtmsg rtmsg;
2116 case SIOCADDRT: /* Add a route */
2117 case SIOCDELRT: /* Delete a route */
2118 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2120 err = copy_from_user(&rtmsg, arg,
2121 sizeof(struct in6_rtmsg));
2125 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2130 err = ip6_route_add(&cfg);
2133 err = ip6_route_del(&cfg);
2147 * Drop the packet on the floor
2150 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2153 struct dst_entry *dst = skb_dst(skb);
2154 switch (ipstats_mib_noroutes) {
2155 case IPSTATS_MIB_INNOROUTES:
2156 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2157 if (type == IPV6_ADDR_ANY) {
2158 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2159 IPSTATS_MIB_INADDRERRORS);
2163 case IPSTATS_MIB_OUTNOROUTES:
2164 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2165 ipstats_mib_noroutes);
2168 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2173 static int ip6_pkt_discard(struct sk_buff *skb)
2175 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2178 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2180 skb->dev = skb_dst(skb)->dev;
2181 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2184 static int ip6_pkt_prohibit(struct sk_buff *skb)
2186 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2189 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2191 skb->dev = skb_dst(skb)->dev;
2192 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2196 * Allocate a dst for local (unicast / anycast) address.
2199 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2200 const struct in6_addr *addr,
2203 struct net *net = dev_net(idev->dev);
2204 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2207 return ERR_PTR(-ENOMEM);
2211 rt->dst.flags |= DST_HOST;
2212 rt->dst.input = ip6_input;
2213 rt->dst.output = ip6_output;
2214 rt->rt6i_idev = idev;
2216 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2218 rt->rt6i_flags |= RTF_ANYCAST;
2220 rt->rt6i_flags |= RTF_LOCAL;
2222 rt->rt6i_gateway = *addr;
2223 rt->rt6i_dst.addr = *addr;
2224 rt->rt6i_dst.plen = 128;
2225 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2227 atomic_set(&rt->dst.__refcnt, 1);
2232 int ip6_route_get_saddr(struct net *net,
2233 struct rt6_info *rt,
2234 const struct in6_addr *daddr,
2236 struct in6_addr *saddr)
2238 struct inet6_dev *idev =
2239 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2241 if (rt && rt->rt6i_prefsrc.plen)
2242 *saddr = rt->rt6i_prefsrc.addr;
2244 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2245 daddr, prefs, saddr);
2249 /* remove deleted ip from prefsrc entries */
2250 struct arg_dev_net_ip {
2251 struct net_device *dev;
2253 struct in6_addr *addr;
2256 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2258 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2259 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2260 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2262 if (((void *)rt->dst.dev == dev || !dev) &&
2263 rt != net->ipv6.ip6_null_entry &&
2264 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2265 /* remove prefsrc entry */
2266 rt->rt6i_prefsrc.plen = 0;
2271 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2273 struct net *net = dev_net(ifp->idev->dev);
2274 struct arg_dev_net_ip adni = {
2275 .dev = ifp->idev->dev,
2279 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2282 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2283 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2285 /* Remove routers and update dst entries when gateway turn into host. */
2286 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2288 struct in6_addr *gateway = (struct in6_addr *)arg;
2290 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2291 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2292 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2298 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2300 fib6_clean_all(net, fib6_clean_tohost, gateway);
2303 struct arg_dev_net {
2304 struct net_device *dev;
2308 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2310 const struct arg_dev_net *adn = arg;
2311 const struct net_device *dev = adn->dev;
2313 if ((rt->dst.dev == dev || !dev) &&
2314 rt != adn->net->ipv6.ip6_null_entry)
2320 void rt6_ifdown(struct net *net, struct net_device *dev)
2322 struct arg_dev_net adn = {
2327 fib6_clean_all(net, fib6_ifdown, &adn);
2328 icmp6_clean_all(fib6_ifdown, &adn);
2331 struct rt6_mtu_change_arg {
2332 struct net_device *dev;
2336 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2338 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2339 struct inet6_dev *idev;
2341 /* In IPv6 pmtu discovery is not optional,
2342 so that RTAX_MTU lock cannot disable it.
2343 We still use this lock to block changes
2344 caused by addrconf/ndisc.
2347 idev = __in6_dev_get(arg->dev);
2351 /* For administrative MTU increase, there is no way to discover
2352 IPv6 PMTU increase, so PMTU increase should be updated here.
2353 Since RFC 1981 doesn't include administrative MTU increase
2354 update PMTU increase is a MUST. (i.e. jumbo frame)
2357 If new MTU is less than route PMTU, this new MTU will be the
2358 lowest MTU in the path, update the route PMTU to reflect PMTU
2359 decreases; if new MTU is greater than route PMTU, and the
2360 old MTU is the lowest MTU in the path, update the route PMTU
2361 to reflect the increase. In this case if the other nodes' MTU
2362 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2365 if (rt->dst.dev == arg->dev &&
2366 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2367 if (rt->rt6i_flags & RTF_CACHE) {
2368 /* For RTF_CACHE with rt6i_pmtu == 0
2369 * (i.e. a redirected route),
2370 * the metrics of its rt->dst.from has already
2373 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2374 rt->rt6i_pmtu = arg->mtu;
2375 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2376 (dst_mtu(&rt->dst) < arg->mtu &&
2377 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2378 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2384 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2386 struct rt6_mtu_change_arg arg = {
2391 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2394 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2395 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2396 [RTA_OIF] = { .type = NLA_U32 },
2397 [RTA_IIF] = { .type = NLA_U32 },
2398 [RTA_PRIORITY] = { .type = NLA_U32 },
2399 [RTA_METRICS] = { .type = NLA_NESTED },
2400 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2401 [RTA_PREF] = { .type = NLA_U8 },
2404 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2405 struct fib6_config *cfg)
2408 struct nlattr *tb[RTA_MAX+1];
2412 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2417 rtm = nlmsg_data(nlh);
2418 memset(cfg, 0, sizeof(*cfg));
2420 cfg->fc_table = rtm->rtm_table;
2421 cfg->fc_dst_len = rtm->rtm_dst_len;
2422 cfg->fc_src_len = rtm->rtm_src_len;
2423 cfg->fc_flags = RTF_UP;
2424 cfg->fc_protocol = rtm->rtm_protocol;
2425 cfg->fc_type = rtm->rtm_type;
2427 if (rtm->rtm_type == RTN_UNREACHABLE ||
2428 rtm->rtm_type == RTN_BLACKHOLE ||
2429 rtm->rtm_type == RTN_PROHIBIT ||
2430 rtm->rtm_type == RTN_THROW)
2431 cfg->fc_flags |= RTF_REJECT;
2433 if (rtm->rtm_type == RTN_LOCAL)
2434 cfg->fc_flags |= RTF_LOCAL;
2436 if (rtm->rtm_flags & RTM_F_CLONED)
2437 cfg->fc_flags |= RTF_CACHE;
2439 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2440 cfg->fc_nlinfo.nlh = nlh;
2441 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2443 if (tb[RTA_GATEWAY]) {
2444 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2445 cfg->fc_flags |= RTF_GATEWAY;
2449 int plen = (rtm->rtm_dst_len + 7) >> 3;
2451 if (nla_len(tb[RTA_DST]) < plen)
2454 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2458 int plen = (rtm->rtm_src_len + 7) >> 3;
2460 if (nla_len(tb[RTA_SRC]) < plen)
2463 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2466 if (tb[RTA_PREFSRC])
2467 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2470 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2472 if (tb[RTA_PRIORITY])
2473 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2475 if (tb[RTA_METRICS]) {
2476 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2477 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2481 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2483 if (tb[RTA_MULTIPATH]) {
2484 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2485 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2489 pref = nla_get_u8(tb[RTA_PREF]);
2490 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2491 pref != ICMPV6_ROUTER_PREF_HIGH)
2492 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2493 cfg->fc_flags |= RTF_PREF(pref);
2501 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2503 struct fib6_config r_cfg;
2504 struct rtnexthop *rtnh;
2507 int err = 0, last_err = 0;
2509 remaining = cfg->fc_mp_len;
2511 rtnh = (struct rtnexthop *)cfg->fc_mp;
2513 /* Parse a Multipath Entry */
2514 while (rtnh_ok(rtnh, remaining)) {
2515 memcpy(&r_cfg, cfg, sizeof(*cfg));
2516 if (rtnh->rtnh_ifindex)
2517 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2519 attrlen = rtnh_attrlen(rtnh);
2521 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2523 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2525 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2526 r_cfg.fc_flags |= RTF_GATEWAY;
2529 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2532 /* If we are trying to remove a route, do not stop the
2533 * loop when ip6_route_del() fails (because next hop is
2534 * already gone), we should try to remove all next hops.
2537 /* If add fails, we should try to delete all
2538 * next hops that have been already added.
2541 remaining = cfg->fc_mp_len - remaining;
2545 /* Because each route is added like a single route we remove
2546 * these flags after the first nexthop: if there is a collision,
2547 * we have already failed to add the first nexthop:
2548 * fib6_add_rt2node() has rejected it; when replacing, old
2549 * nexthops have been replaced by first new, the rest should
2552 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2554 rtnh = rtnh_next(rtnh, &remaining);
2560 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2562 struct fib6_config cfg;
2565 err = rtm_to_fib6_config(skb, nlh, &cfg);
2570 return ip6_route_multipath(&cfg, 0);
2572 return ip6_route_del(&cfg);
2575 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2577 struct fib6_config cfg;
2580 err = rtm_to_fib6_config(skb, nlh, &cfg);
2585 return ip6_route_multipath(&cfg, 1);
2587 return ip6_route_add(&cfg);
2590 static inline size_t rt6_nlmsg_size(void)
2592 return NLMSG_ALIGN(sizeof(struct rtmsg))
2593 + nla_total_size(16) /* RTA_SRC */
2594 + nla_total_size(16) /* RTA_DST */
2595 + nla_total_size(16) /* RTA_GATEWAY */
2596 + nla_total_size(16) /* RTA_PREFSRC */
2597 + nla_total_size(4) /* RTA_TABLE */
2598 + nla_total_size(4) /* RTA_IIF */
2599 + nla_total_size(4) /* RTA_OIF */
2600 + nla_total_size(4) /* RTA_PRIORITY */
2601 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2602 + nla_total_size(sizeof(struct rta_cacheinfo))
2603 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2604 + nla_total_size(1); /* RTA_PREF */
2607 static int rt6_fill_node(struct net *net,
2608 struct sk_buff *skb, struct rt6_info *rt,
2609 struct in6_addr *dst, struct in6_addr *src,
2610 int iif, int type, u32 portid, u32 seq,
2611 int prefix, int nowait, unsigned int flags)
2613 u32 metrics[RTAX_MAX];
2615 struct nlmsghdr *nlh;
2619 if (prefix) { /* user wants prefix routes only */
2620 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2621 /* success since this is not a prefix route */
2626 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2630 rtm = nlmsg_data(nlh);
2631 rtm->rtm_family = AF_INET6;
2632 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2633 rtm->rtm_src_len = rt->rt6i_src.plen;
2636 table = rt->rt6i_table->tb6_id;
2638 table = RT6_TABLE_UNSPEC;
2639 rtm->rtm_table = table;
2640 if (nla_put_u32(skb, RTA_TABLE, table))
2641 goto nla_put_failure;
2642 if (rt->rt6i_flags & RTF_REJECT) {
2643 switch (rt->dst.error) {
2645 rtm->rtm_type = RTN_BLACKHOLE;
2648 rtm->rtm_type = RTN_PROHIBIT;
2651 rtm->rtm_type = RTN_THROW;
2654 rtm->rtm_type = RTN_UNREACHABLE;
2658 else if (rt->rt6i_flags & RTF_LOCAL)
2659 rtm->rtm_type = RTN_LOCAL;
2660 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2661 rtm->rtm_type = RTN_LOCAL;
2663 rtm->rtm_type = RTN_UNICAST;
2665 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2666 rtm->rtm_protocol = rt->rt6i_protocol;
2667 if (rt->rt6i_flags & RTF_DYNAMIC)
2668 rtm->rtm_protocol = RTPROT_REDIRECT;
2669 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2670 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2671 rtm->rtm_protocol = RTPROT_RA;
2673 rtm->rtm_protocol = RTPROT_KERNEL;
2676 if (rt->rt6i_flags & RTF_CACHE)
2677 rtm->rtm_flags |= RTM_F_CLONED;
2680 if (nla_put_in6_addr(skb, RTA_DST, dst))
2681 goto nla_put_failure;
2682 rtm->rtm_dst_len = 128;
2683 } else if (rtm->rtm_dst_len)
2684 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2685 goto nla_put_failure;
2686 #ifdef CONFIG_IPV6_SUBTREES
2688 if (nla_put_in6_addr(skb, RTA_SRC, src))
2689 goto nla_put_failure;
2690 rtm->rtm_src_len = 128;
2691 } else if (rtm->rtm_src_len &&
2692 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2693 goto nla_put_failure;
2696 #ifdef CONFIG_IPV6_MROUTE
2697 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2698 int err = ip6mr_get_route(net, skb, rtm, nowait);
2703 goto nla_put_failure;
2705 if (err == -EMSGSIZE)
2706 goto nla_put_failure;
2711 if (nla_put_u32(skb, RTA_IIF, iif))
2712 goto nla_put_failure;
2714 struct in6_addr saddr_buf;
2715 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2716 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2717 goto nla_put_failure;
2720 if (rt->rt6i_prefsrc.plen) {
2721 struct in6_addr saddr_buf;
2722 saddr_buf = rt->rt6i_prefsrc.addr;
2723 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2724 goto nla_put_failure;
2727 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2729 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2730 if (rtnetlink_put_metrics(skb, metrics) < 0)
2731 goto nla_put_failure;
2733 if (rt->rt6i_flags & RTF_GATEWAY) {
2734 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2735 goto nla_put_failure;
2739 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2740 goto nla_put_failure;
2741 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2742 goto nla_put_failure;
2744 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2746 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2747 goto nla_put_failure;
2749 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2750 goto nla_put_failure;
2752 nlmsg_end(skb, nlh);
2756 nlmsg_cancel(skb, nlh);
2760 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2762 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2765 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2766 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2767 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2771 return rt6_fill_node(arg->net,
2772 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2773 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2774 prefix, 0, NLM_F_MULTI);
2777 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2779 struct net *net = sock_net(in_skb->sk);
2780 struct nlattr *tb[RTA_MAX+1];
2781 struct rt6_info *rt;
2782 struct sk_buff *skb;
2785 int err, iif = 0, oif = 0;
2787 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2792 memset(&fl6, 0, sizeof(fl6));
2795 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2798 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2802 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2805 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2809 iif = nla_get_u32(tb[RTA_IIF]);
2812 oif = nla_get_u32(tb[RTA_OIF]);
2815 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2818 struct net_device *dev;
2821 dev = __dev_get_by_index(net, iif);
2827 fl6.flowi6_iif = iif;
2829 if (!ipv6_addr_any(&fl6.saddr))
2830 flags |= RT6_LOOKUP_F_HAS_SADDR;
2832 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2835 fl6.flowi6_oif = oif;
2837 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2840 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2847 /* Reserve room for dummy headers, this skb can pass
2848 through good chunk of routing engine.
2850 skb_reset_mac_header(skb);
2851 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2853 skb_dst_set(skb, &rt->dst);
2855 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2856 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2857 nlh->nlmsg_seq, 0, 0, 0);
2863 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2868 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2870 struct sk_buff *skb;
2871 struct net *net = info->nl_net;
2876 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2878 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2882 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2883 event, info->portid, seq, 0, 0, 0);
2885 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2886 WARN_ON(err == -EMSGSIZE);
2890 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2891 info->nlh, gfp_any());
2895 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2898 static int ip6_route_dev_notify(struct notifier_block *this,
2899 unsigned long event, void *ptr)
2901 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2902 struct net *net = dev_net(dev);
2904 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2905 net->ipv6.ip6_null_entry->dst.dev = dev;
2906 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2907 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2908 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2909 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2910 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2911 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2922 #ifdef CONFIG_PROC_FS
2924 static const struct file_operations ipv6_route_proc_fops = {
2925 .owner = THIS_MODULE,
2926 .open = ipv6_route_open,
2928 .llseek = seq_lseek,
2929 .release = seq_release_net,
2932 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2934 struct net *net = (struct net *)seq->private;
2935 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2936 net->ipv6.rt6_stats->fib_nodes,
2937 net->ipv6.rt6_stats->fib_route_nodes,
2938 net->ipv6.rt6_stats->fib_rt_alloc,
2939 net->ipv6.rt6_stats->fib_rt_entries,
2940 net->ipv6.rt6_stats->fib_rt_cache,
2941 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2942 net->ipv6.rt6_stats->fib_discarded_routes);
2947 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2949 return single_open_net(inode, file, rt6_stats_seq_show);
2952 static const struct file_operations rt6_stats_seq_fops = {
2953 .owner = THIS_MODULE,
2954 .open = rt6_stats_seq_open,
2956 .llseek = seq_lseek,
2957 .release = single_release_net,
2959 #endif /* CONFIG_PROC_FS */
2961 #ifdef CONFIG_SYSCTL
2964 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2965 void __user *buffer, size_t *lenp, loff_t *ppos)
2972 net = (struct net *)ctl->extra1;
2973 delay = net->ipv6.sysctl.flush_delay;
2974 proc_dointvec(ctl, write, buffer, lenp, ppos);
2975 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2979 struct ctl_table ipv6_route_table_template[] = {
2981 .procname = "flush",
2982 .data = &init_net.ipv6.sysctl.flush_delay,
2983 .maxlen = sizeof(int),
2985 .proc_handler = ipv6_sysctl_rtcache_flush
2988 .procname = "gc_thresh",
2989 .data = &ip6_dst_ops_template.gc_thresh,
2990 .maxlen = sizeof(int),
2992 .proc_handler = proc_dointvec,
2995 .procname = "max_size",
2996 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2997 .maxlen = sizeof(int),
2999 .proc_handler = proc_dointvec,
3002 .procname = "gc_min_interval",
3003 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3004 .maxlen = sizeof(int),
3006 .proc_handler = proc_dointvec_jiffies,
3009 .procname = "gc_timeout",
3010 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3011 .maxlen = sizeof(int),
3013 .proc_handler = proc_dointvec_jiffies,
3016 .procname = "gc_interval",
3017 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3018 .maxlen = sizeof(int),
3020 .proc_handler = proc_dointvec_jiffies,
3023 .procname = "gc_elasticity",
3024 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3025 .maxlen = sizeof(int),
3027 .proc_handler = proc_dointvec,
3030 .procname = "mtu_expires",
3031 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3032 .maxlen = sizeof(int),
3034 .proc_handler = proc_dointvec_jiffies,
3037 .procname = "min_adv_mss",
3038 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3039 .maxlen = sizeof(int),
3041 .proc_handler = proc_dointvec,
3044 .procname = "gc_min_interval_ms",
3045 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3046 .maxlen = sizeof(int),
3048 .proc_handler = proc_dointvec_ms_jiffies,
3053 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3055 struct ctl_table *table;
3057 table = kmemdup(ipv6_route_table_template,
3058 sizeof(ipv6_route_table_template),
3062 table[0].data = &net->ipv6.sysctl.flush_delay;
3063 table[0].extra1 = net;
3064 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3065 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3066 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3067 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3068 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3069 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3070 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3071 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3072 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3074 /* Don't export sysctls to unprivileged users */
3075 if (net->user_ns != &init_user_ns)
3076 table[0].procname = NULL;
3083 static int __net_init ip6_route_net_init(struct net *net)
3087 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3088 sizeof(net->ipv6.ip6_dst_ops));
3090 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3091 goto out_ip6_dst_ops;
3093 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3094 sizeof(*net->ipv6.ip6_null_entry),
3096 if (!net->ipv6.ip6_null_entry)
3097 goto out_ip6_dst_entries;
3098 net->ipv6.ip6_null_entry->dst.path =
3099 (struct dst_entry *)net->ipv6.ip6_null_entry;
3100 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3101 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3102 ip6_template_metrics, true);
3104 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3105 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3106 sizeof(*net->ipv6.ip6_prohibit_entry),
3108 if (!net->ipv6.ip6_prohibit_entry)
3109 goto out_ip6_null_entry;
3110 net->ipv6.ip6_prohibit_entry->dst.path =
3111 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3112 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3113 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3114 ip6_template_metrics, true);
3116 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3117 sizeof(*net->ipv6.ip6_blk_hole_entry),
3119 if (!net->ipv6.ip6_blk_hole_entry)
3120 goto out_ip6_prohibit_entry;
3121 net->ipv6.ip6_blk_hole_entry->dst.path =
3122 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3123 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3124 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3125 ip6_template_metrics, true);
3128 net->ipv6.sysctl.flush_delay = 0;
3129 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3130 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3131 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3132 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3133 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3134 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3135 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3137 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3144 out_ip6_prohibit_entry:
3145 kfree(net->ipv6.ip6_prohibit_entry);
3147 kfree(net->ipv6.ip6_null_entry);
3149 out_ip6_dst_entries:
3150 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3155 static void __net_exit ip6_route_net_exit(struct net *net)
3157 kfree(net->ipv6.ip6_null_entry);
3158 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3159 kfree(net->ipv6.ip6_prohibit_entry);
3160 kfree(net->ipv6.ip6_blk_hole_entry);
3162 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3165 static int __net_init ip6_route_net_init_late(struct net *net)
3167 #ifdef CONFIG_PROC_FS
3168 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3169 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3174 static void __net_exit ip6_route_net_exit_late(struct net *net)
3176 #ifdef CONFIG_PROC_FS
3177 remove_proc_entry("ipv6_route", net->proc_net);
3178 remove_proc_entry("rt6_stats", net->proc_net);
3182 static struct pernet_operations ip6_route_net_ops = {
3183 .init = ip6_route_net_init,
3184 .exit = ip6_route_net_exit,
3187 static int __net_init ipv6_inetpeer_init(struct net *net)
3189 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3193 inet_peer_base_init(bp);
3194 net->ipv6.peers = bp;
3198 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3200 struct inet_peer_base *bp = net->ipv6.peers;
3202 net->ipv6.peers = NULL;
3203 inetpeer_invalidate_tree(bp);
3207 static struct pernet_operations ipv6_inetpeer_ops = {
3208 .init = ipv6_inetpeer_init,
3209 .exit = ipv6_inetpeer_exit,
3212 static struct pernet_operations ip6_route_net_late_ops = {
3213 .init = ip6_route_net_init_late,
3214 .exit = ip6_route_net_exit_late,
3217 static struct notifier_block ip6_route_dev_notifier = {
3218 .notifier_call = ip6_route_dev_notify,
3222 int __init ip6_route_init(void)
3227 ip6_dst_ops_template.kmem_cachep =
3228 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3229 SLAB_HWCACHE_ALIGN, NULL);
3230 if (!ip6_dst_ops_template.kmem_cachep)
3233 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3235 goto out_kmem_cache;
3237 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3239 goto out_dst_entries;
3241 ret = register_pernet_subsys(&ip6_route_net_ops);
3243 goto out_register_inetpeer;
3245 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3247 /* Registering of the loopback is done before this portion of code,
3248 * the loopback reference in rt6_info will not be taken, do it
3249 * manually for init_net */
3250 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3251 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3252 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3253 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3254 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3255 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3256 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3260 goto out_register_subsys;
3266 ret = fib6_rules_init();
3270 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3272 goto fib6_rules_init;
3275 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3276 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3277 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3278 goto out_register_late_subsys;
3280 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3282 goto out_register_late_subsys;
3287 out_register_late_subsys:
3288 unregister_pernet_subsys(&ip6_route_net_late_ops);
3290 fib6_rules_cleanup();
3295 out_register_subsys:
3296 unregister_pernet_subsys(&ip6_route_net_ops);
3297 out_register_inetpeer:
3298 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3300 dst_entries_destroy(&ip6_dst_blackhole_ops);
3302 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3306 void ip6_route_cleanup(void)
3308 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3309 unregister_pernet_subsys(&ip6_route_net_late_ops);
3310 fib6_rules_cleanup();
3313 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3314 unregister_pernet_subsys(&ip6_route_net_ops);
3315 dst_entries_destroy(&ip6_dst_blackhole_ops);
3316 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);