2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
62 #include <asm/uaccess.h>
65 #include <linux/sysctl.h>
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
95 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
110 struct rt6_info *rt = (struct rt6_info *)dst;
112 if (rt->rt6i_flags & RTF_CACHE)
115 return dst_cow_metrics_generic(dst, old);
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
122 struct in6_addr *p = &rt->rt6i_gateway;
124 if (!ipv6_addr_any(p))
125 return (const void *) p;
127 return &ipv6_hdr(skb)->daddr;
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
135 struct rt6_info *rt = (struct rt6_info *) dst;
138 daddr = choose_neigh_daddr(rt, skb, daddr);
139 n = __ipv6_neigh_lookup(dst->dev, daddr);
142 return neigh_create(&nd_tbl, daddr, dst->dev);
145 static struct dst_ops ip6_dst_ops_template = {
149 .check = ip6_dst_check,
150 .default_advmss = ip6_default_advmss,
152 .cow_metrics = ipv6_cow_metrics,
153 .destroy = ip6_dst_destroy,
154 .ifdown = ip6_dst_ifdown,
155 .negative_advice = ip6_negative_advice,
156 .link_failure = ip6_link_failure,
157 .update_pmtu = ip6_rt_update_pmtu,
158 .redirect = rt6_do_redirect,
159 .local_out = __ip6_local_out,
160 .neigh_lookup = ip6_neigh_lookup,
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
167 return mtu ? : dst->dev->mtu;
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171 struct sk_buff *skb, u32 mtu)
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
186 static struct dst_ops ip6_dst_blackhole_ops = {
188 .destroy = ip6_dst_destroy,
189 .check = ip6_dst_check,
190 .mtu = ip6_blackhole_mtu,
191 .default_advmss = ip6_default_advmss,
192 .update_pmtu = ip6_rt_blackhole_update_pmtu,
193 .redirect = ip6_rt_blackhole_redirect,
194 .cow_metrics = ip6_rt_blackhole_cow_metrics,
195 .neigh_lookup = ip6_neigh_lookup,
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199 [RTAX_HOPLIMIT - 1] = 0,
202 static const struct rt6_info ip6_null_entry_template = {
204 .__refcnt = ATOMIC_INIT(1),
206 .obsolete = DST_OBSOLETE_FORCE_CHK,
207 .error = -ENETUNREACH,
208 .input = ip6_pkt_discard,
209 .output = ip6_pkt_discard_out,
211 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
212 .rt6i_protocol = RTPROT_KERNEL,
213 .rt6i_metric = ~(u32) 0,
214 .rt6i_ref = ATOMIC_INIT(1),
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
219 static const struct rt6_info ip6_prohibit_entry_template = {
221 .__refcnt = ATOMIC_INIT(1),
223 .obsolete = DST_OBSOLETE_FORCE_CHK,
225 .input = ip6_pkt_prohibit,
226 .output = ip6_pkt_prohibit_out,
228 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
229 .rt6i_protocol = RTPROT_KERNEL,
230 .rt6i_metric = ~(u32) 0,
231 .rt6i_ref = ATOMIC_INIT(1),
234 static const struct rt6_info ip6_blk_hole_entry_template = {
236 .__refcnt = ATOMIC_INIT(1),
238 .obsolete = DST_OBSOLETE_FORCE_CHK,
240 .input = dst_discard,
241 .output = dst_discard_sk,
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
244 .rt6i_protocol = RTPROT_KERNEL,
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253 struct net_device *dev,
255 struct fib6_table *table)
257 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258 0, DST_OBSOLETE_FORCE_CHK, flags);
261 struct dst_entry *dst = &rt->dst;
263 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264 INIT_LIST_HEAD(&rt->rt6i_siblings);
269 static void ip6_dst_destroy(struct dst_entry *dst)
271 struct rt6_info *rt = (struct rt6_info *)dst;
272 struct inet6_dev *idev = rt->rt6i_idev;
273 struct dst_entry *from = dst->from;
275 dst_destroy_metrics_generic(dst);
278 rt->rt6i_idev = NULL;
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
289 struct rt6_info *rt = (struct rt6_info *)dst;
290 struct inet6_dev *idev = rt->rt6i_idev;
291 struct net_device *loopback_dev =
292 dev_net(dev)->loopback_dev;
294 if (dev != loopback_dev) {
295 if (idev && idev->dev == dev) {
296 struct inet6_dev *loopback_idev =
297 in6_dev_get(loopback_dev);
299 rt->rt6i_idev = loopback_idev;
306 static bool rt6_check_expired(const struct rt6_info *rt)
308 if (rt->rt6i_flags & RTF_EXPIRES) {
309 if (time_after(jiffies, rt->dst.expires))
311 } else if (rt->dst.from) {
312 return rt6_check_expired((struct rt6_info *) rt->dst.from);
317 /* Multipath route selection:
318 * Hash based function using packet header and flowlabel.
319 * Adapted from fib_info_hashfn()
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322 const struct flowi6 *fl6)
324 unsigned int val = fl6->flowi6_proto;
326 val ^= ipv6_addr_hash(&fl6->daddr);
327 val ^= ipv6_addr_hash(&fl6->saddr);
329 /* Work only if this not encapsulated */
330 switch (fl6->flowi6_proto) {
334 val ^= (__force u16)fl6->fl6_sport;
335 val ^= (__force u16)fl6->fl6_dport;
339 val ^= (__force u16)fl6->fl6_icmp_type;
340 val ^= (__force u16)fl6->fl6_icmp_code;
343 /* RFC6438 recommands to use flowlabel */
344 val ^= (__force u32)fl6->flowlabel;
346 /* Perhaps, we need to tune, this function? */
347 val = val ^ (val >> 7) ^ (val >> 12);
348 return val % candidate_count;
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352 struct flowi6 *fl6, int oif,
355 struct rt6_info *sibling, *next_sibling;
358 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359 /* Don't change the route, if route_choosen == 0
360 * (siblings does not include ourself)
363 list_for_each_entry_safe(sibling, next_sibling,
364 &match->rt6i_siblings, rt6i_siblings) {
366 if (route_choosen == 0) {
367 if (rt6_score_route(sibling, oif, strict) < 0)
377 * Route lookup. Any table->tb6_lock is implied.
380 static inline struct rt6_info *rt6_device_match(struct net *net,
382 const struct in6_addr *saddr,
386 struct rt6_info *local = NULL;
387 struct rt6_info *sprt;
389 if (!oif && ipv6_addr_any(saddr))
392 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393 struct net_device *dev = sprt->dst.dev;
396 if (dev->ifindex == oif)
398 if (dev->flags & IFF_LOOPBACK) {
399 if (!sprt->rt6i_idev ||
400 sprt->rt6i_idev->dev->ifindex != oif) {
401 if (flags & RT6_LOOKUP_F_IFACE && oif)
403 if (local && (!oif ||
404 local->rt6i_idev->dev->ifindex == oif))
410 if (ipv6_chk_addr(net, saddr, dev,
411 flags & RT6_LOOKUP_F_IFACE))
420 if (flags & RT6_LOOKUP_F_IFACE)
421 return net->ipv6.ip6_null_entry;
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429 struct work_struct work;
430 struct in6_addr target;
431 struct net_device *dev;
434 static void rt6_probe_deferred(struct work_struct *w)
436 struct in6_addr mcaddr;
437 struct __rt6_probe_work *work =
438 container_of(w, struct __rt6_probe_work, work);
440 addrconf_addr_solict_mult(&work->target, &mcaddr);
441 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
446 static void rt6_probe(struct rt6_info *rt)
448 struct neighbour *neigh;
450 * Okay, this does not seem to be appropriate
451 * for now, however, we need to check if it
452 * is really so; aka Router Reachability Probing.
454 * Router Reachability Probe MUST be rate-limited
455 * to no more than one per minute.
457 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
460 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
462 write_lock(&neigh->lock);
463 if (neigh->nud_state & NUD_VALID)
468 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469 struct __rt6_probe_work *work;
471 work = kmalloc(sizeof(*work), GFP_ATOMIC);
474 __neigh_set_probe_once(neigh);
477 write_unlock(&neigh->lock);
480 INIT_WORK(&work->work, rt6_probe_deferred);
481 work->target = rt->rt6i_gateway;
482 dev_hold(rt->dst.dev);
483 work->dev = rt->dst.dev;
484 schedule_work(&work->work);
488 write_unlock(&neigh->lock);
490 rcu_read_unlock_bh();
493 static inline void rt6_probe(struct rt6_info *rt)
499 * Default Router Selection (RFC 2461 6.3.6)
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
503 struct net_device *dev = rt->dst.dev;
504 if (!oif || dev->ifindex == oif)
506 if ((dev->flags & IFF_LOOPBACK) &&
507 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
514 struct neighbour *neigh;
515 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
517 if (rt->rt6i_flags & RTF_NONEXTHOP ||
518 !(rt->rt6i_flags & RTF_GATEWAY))
519 return RT6_NUD_SUCCEED;
522 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
524 read_lock(&neigh->lock);
525 if (neigh->nud_state & NUD_VALID)
526 ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 else if (!(neigh->nud_state & NUD_FAILED))
529 ret = RT6_NUD_SUCCEED;
531 ret = RT6_NUD_FAIL_PROBE;
533 read_unlock(&neigh->lock);
535 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
538 rcu_read_unlock_bh();
543 static int rt6_score_route(struct rt6_info *rt, int oif,
548 m = rt6_check_dev(rt, oif);
549 if (!m && (strict & RT6_LOOKUP_F_IFACE))
550 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
554 if (strict & RT6_LOOKUP_F_REACHABLE) {
555 int n = rt6_check_neigh(rt);
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563 int *mpri, struct rt6_info *match,
567 bool match_do_rr = false;
569 if (rt6_check_expired(rt))
572 m = rt6_score_route(rt, oif, strict);
573 if (m == RT6_NUD_FAIL_DO_RR) {
575 m = 0; /* lowest valid score */
576 } else if (m == RT6_NUD_FAIL_HARD) {
580 if (strict & RT6_LOOKUP_F_REACHABLE)
583 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
585 *do_rr = match_do_rr;
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594 struct rt6_info *rr_head,
595 u32 metric, int oif, int strict,
598 struct rt6_info *rt, *match, *cont;
603 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604 if (rt->rt6i_metric != metric) {
609 match = find_match(rt, oif, strict, &mpri, match, do_rr);
612 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613 if (rt->rt6i_metric != metric) {
618 match = find_match(rt, oif, strict, &mpri, match, do_rr);
624 for (rt = cont; rt; rt = rt->dst.rt6_next)
625 match = find_match(rt, oif, strict, &mpri, match, do_rr);
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
632 struct rt6_info *match, *rt0;
638 fn->rr_ptr = rt0 = fn->leaf;
640 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
644 struct rt6_info *next = rt0->dst.rt6_next;
646 /* no entries matched; do round-robin */
647 if (!next || next->rt6i_metric != rt0->rt6i_metric)
654 net = dev_net(rt0->dst.dev);
655 return match ? match : net->ipv6.ip6_null_entry;
658 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
660 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
663 #ifdef CONFIG_IPV6_ROUTE_INFO
664 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
665 const struct in6_addr *gwaddr)
667 struct net *net = dev_net(dev);
668 struct route_info *rinfo = (struct route_info *) opt;
669 struct in6_addr prefix_buf, *prefix;
671 unsigned long lifetime;
674 if (len < sizeof(struct route_info)) {
678 /* Sanity check for prefix_len and length */
679 if (rinfo->length > 3) {
681 } else if (rinfo->prefix_len > 128) {
683 } else if (rinfo->prefix_len > 64) {
684 if (rinfo->length < 2) {
687 } else if (rinfo->prefix_len > 0) {
688 if (rinfo->length < 1) {
693 pref = rinfo->route_pref;
694 if (pref == ICMPV6_ROUTER_PREF_INVALID)
697 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
699 if (rinfo->length == 3)
700 prefix = (struct in6_addr *)rinfo->prefix;
702 /* this function is safe */
703 ipv6_addr_prefix(&prefix_buf,
704 (struct in6_addr *)rinfo->prefix,
706 prefix = &prefix_buf;
709 if (rinfo->prefix_len == 0)
710 rt = rt6_get_dflt_router(gwaddr, dev);
712 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
713 gwaddr, dev->ifindex);
715 if (rt && !lifetime) {
721 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
724 rt->rt6i_flags = RTF_ROUTEINFO |
725 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
728 if (!addrconf_finite_timeout(lifetime))
729 rt6_clean_expires(rt);
731 rt6_set_expires(rt, jiffies + HZ * lifetime);
739 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
740 struct in6_addr *saddr)
742 struct fib6_node *pn;
744 if (fn->fn_flags & RTN_TL_ROOT)
747 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
748 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
751 if (fn->fn_flags & RTN_RTINFO)
756 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757 struct fib6_table *table,
758 struct flowi6 *fl6, int flags)
760 struct fib6_node *fn;
763 read_lock_bh(&table->tb6_lock);
764 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
767 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
768 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
769 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
770 if (rt == net->ipv6.ip6_null_entry) {
771 fn = fib6_backtrack(fn, &fl6->saddr);
775 dst_use(&rt->dst, jiffies);
776 read_unlock_bh(&table->tb6_lock);
781 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
786 EXPORT_SYMBOL_GPL(ip6_route_lookup);
788 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789 const struct in6_addr *saddr, int oif, int strict)
791 struct flowi6 fl6 = {
795 struct dst_entry *dst;
796 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
799 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800 flags |= RT6_LOOKUP_F_HAS_SADDR;
803 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
805 return (struct rt6_info *) dst;
811 EXPORT_SYMBOL(rt6_lookup);
813 /* ip6_ins_rt is called with FREE table->tb6_lock.
814 It takes new route entry, the addition fails by any reason the
815 route is freed. In any case, if caller does not hold it, it may
819 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
820 struct mx6_config *mxc)
823 struct fib6_table *table;
825 table = rt->rt6i_table;
826 write_lock_bh(&table->tb6_lock);
827 err = fib6_add(&table->tb6_root, rt, info, mxc);
828 write_unlock_bh(&table->tb6_lock);
833 int ip6_ins_rt(struct rt6_info *rt)
835 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
836 struct mx6_config mxc = { .mx = NULL, };
838 return __ip6_ins_rt(rt, &info, &mxc);
841 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
842 const struct in6_addr *daddr,
843 const struct in6_addr *saddr)
851 rt = ip6_rt_copy(ort, daddr);
854 rt->rt6i_flags |= RTF_CACHE;
856 if (!rt6_is_gw_or_nonexthop(ort)) {
857 if (ort->rt6i_dst.plen != 128 &&
858 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859 rt->rt6i_flags |= RTF_ANYCAST;
860 #ifdef CONFIG_IPV6_SUBTREES
861 if (rt->rt6i_src.plen && saddr) {
862 rt->rt6i_src.addr = *saddr;
863 rt->rt6i_src.plen = 128;
872 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
873 struct flowi6 *fl6, int flags)
875 struct fib6_node *fn, *saved_fn;
879 strict |= flags & RT6_LOOKUP_F_IFACE;
880 if (net->ipv6.devconf_all->forwarding == 0)
881 strict |= RT6_LOOKUP_F_REACHABLE;
883 read_lock_bh(&table->tb6_lock);
885 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
889 rt = rt6_select(fn, oif, strict);
890 if (rt->rt6i_nsiblings)
891 rt = rt6_multipath_select(rt, fl6, oif, strict);
892 if (rt == net->ipv6.ip6_null_entry) {
893 fn = fib6_backtrack(fn, &fl6->saddr);
895 goto redo_rt6_select;
896 else if (strict & RT6_LOOKUP_F_REACHABLE) {
897 /* also consider unreachable route */
898 strict &= ~RT6_LOOKUP_F_REACHABLE;
900 goto redo_rt6_select;
905 read_unlock_bh(&table->tb6_lock);
907 rt6_dst_from_metrics_check(rt);
908 rt->dst.lastuse = jiffies;
914 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
915 struct flowi6 *fl6, int flags)
917 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 static struct dst_entry *ip6_route_input_lookup(struct net *net,
921 struct net_device *dev,
922 struct flowi6 *fl6, int flags)
924 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925 flags |= RT6_LOOKUP_F_IFACE;
927 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 void ip6_route_input(struct sk_buff *skb)
932 const struct ipv6hdr *iph = ipv6_hdr(skb);
933 struct net *net = dev_net(skb->dev);
934 int flags = RT6_LOOKUP_F_HAS_SADDR;
935 struct flowi6 fl6 = {
936 .flowi6_iif = skb->dev->ifindex,
939 .flowlabel = ip6_flowinfo(iph),
940 .flowi6_mark = skb->mark,
941 .flowi6_proto = iph->nexthdr,
944 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
948 struct flowi6 *fl6, int flags)
950 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
958 fl6->flowi6_iif = LOOPBACK_IFINDEX;
960 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
961 flags |= RT6_LOOKUP_F_IFACE;
963 if (!ipv6_addr_any(&fl6->saddr))
964 flags |= RT6_LOOKUP_F_HAS_SADDR;
966 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
968 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
970 EXPORT_SYMBOL(ip6_route_output);
972 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
974 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
975 struct dst_entry *new = NULL;
977 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
984 new->input = dst_discard;
985 new->output = dst_discard_sk;
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
990 dst_copy_metrics(new, &ort->dst);
991 rt->rt6i_idev = ort->rt6i_idev;
993 in6_dev_hold(rt->rt6i_idev);
995 rt->rt6i_gateway = ort->rt6i_gateway;
996 rt->rt6i_flags = ort->rt6i_flags;
999 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1000 #ifdef CONFIG_IPV6_SUBTREES
1001 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007 dst_release(dst_orig);
1008 return new ? new : ERR_PTR(-ENOMEM);
1012 * Destination cache support functions
1015 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1018 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1019 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1022 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1024 struct rt6_info *rt;
1026 rt = (struct rt6_info *) dst;
1028 /* All IPV6 dsts are created with ->obsolete set to the value
1029 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1030 * into this function always.
1032 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1035 if (rt6_check_expired(rt))
1038 rt6_dst_from_metrics_check(rt);
1043 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045 struct rt6_info *rt = (struct rt6_info *) dst;
1048 if (rt->rt6i_flags & RTF_CACHE) {
1049 if (rt6_check_expired(rt)) {
1061 static void ip6_link_failure(struct sk_buff *skb)
1063 struct rt6_info *rt;
1065 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067 rt = (struct rt6_info *) skb_dst(skb);
1069 if (rt->rt6i_flags & RTF_CACHE) {
1073 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1074 rt->rt6i_node->fn_sernum = -1;
1079 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1081 struct net *net = dev_net(rt->dst.dev);
1083 rt->rt6i_flags |= RTF_MODIFIED;
1084 rt->rt6i_pmtu = mtu;
1085 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1088 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1089 const struct ipv6hdr *iph, u32 mtu)
1091 struct rt6_info *rt6 = (struct rt6_info *)dst;
1093 if (rt6->rt6i_flags & RTF_LOCAL)
1097 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1098 if (mtu >= dst_mtu(dst))
1101 if (rt6->rt6i_flags & RTF_CACHE) {
1102 rt6_do_update_pmtu(rt6, mtu);
1104 const struct in6_addr *daddr, *saddr;
1105 struct rt6_info *nrt6;
1108 daddr = &iph->daddr;
1109 saddr = &iph->saddr;
1111 daddr = &sk->sk_v6_daddr;
1112 saddr = &inet6_sk(sk)->saddr;
1116 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1118 rt6_do_update_pmtu(nrt6, mtu);
1120 /* ip6_ins_rt(nrt6) will bump the
1121 * rt6->rt6i_node->fn_sernum
1122 * which will fail the next rt6_check() and
1123 * invalidate the sk->sk_dst_cache.
1130 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1131 struct sk_buff *skb, u32 mtu)
1133 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1136 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1139 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1140 struct dst_entry *dst;
1143 memset(&fl6, 0, sizeof(fl6));
1144 fl6.flowi6_oif = oif;
1145 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1146 fl6.daddr = iph->daddr;
1147 fl6.saddr = iph->saddr;
1148 fl6.flowlabel = ip6_flowinfo(iph);
1150 dst = ip6_route_output(net, NULL, &fl6);
1152 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1155 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1157 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1159 ip6_update_pmtu(skb, sock_net(sk), mtu,
1160 sk->sk_bound_dev_if, sk->sk_mark);
1162 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1164 /* Handle redirects */
1165 struct ip6rd_flowi {
1167 struct in6_addr gateway;
1170 static struct rt6_info *__ip6_route_redirect(struct net *net,
1171 struct fib6_table *table,
1175 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1176 struct rt6_info *rt;
1177 struct fib6_node *fn;
1179 /* Get the "current" route for this destination and
1180 * check if the redirect has come from approriate router.
1182 * RFC 4861 specifies that redirects should only be
1183 * accepted if they come from the nexthop to the target.
1184 * Due to the way the routes are chosen, this notion
1185 * is a bit fuzzy and one might need to check all possible
1189 read_lock_bh(&table->tb6_lock);
1190 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1192 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1193 if (rt6_check_expired(rt))
1197 if (!(rt->rt6i_flags & RTF_GATEWAY))
1199 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1201 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1207 rt = net->ipv6.ip6_null_entry;
1208 else if (rt->dst.error) {
1209 rt = net->ipv6.ip6_null_entry;
1213 if (rt == net->ipv6.ip6_null_entry) {
1214 fn = fib6_backtrack(fn, &fl6->saddr);
1222 read_unlock_bh(&table->tb6_lock);
1227 static struct dst_entry *ip6_route_redirect(struct net *net,
1228 const struct flowi6 *fl6,
1229 const struct in6_addr *gateway)
1231 int flags = RT6_LOOKUP_F_HAS_SADDR;
1232 struct ip6rd_flowi rdfl;
1235 rdfl.gateway = *gateway;
1237 return fib6_rule_lookup(net, &rdfl.fl6,
1238 flags, __ip6_route_redirect);
1241 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1243 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1244 struct dst_entry *dst;
1247 memset(&fl6, 0, sizeof(fl6));
1248 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1249 fl6.flowi6_oif = oif;
1250 fl6.flowi6_mark = mark;
1251 fl6.daddr = iph->daddr;
1252 fl6.saddr = iph->saddr;
1253 fl6.flowlabel = ip6_flowinfo(iph);
1255 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1256 rt6_do_redirect(dst, NULL, skb);
1259 EXPORT_SYMBOL_GPL(ip6_redirect);
1261 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1264 const struct ipv6hdr *iph = ipv6_hdr(skb);
1265 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1266 struct dst_entry *dst;
1269 memset(&fl6, 0, sizeof(fl6));
1270 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1271 fl6.flowi6_oif = oif;
1272 fl6.flowi6_mark = mark;
1273 fl6.daddr = msg->dest;
1274 fl6.saddr = iph->daddr;
1276 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1277 rt6_do_redirect(dst, NULL, skb);
1281 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1283 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1285 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1287 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1289 struct net_device *dev = dst->dev;
1290 unsigned int mtu = dst_mtu(dst);
1291 struct net *net = dev_net(dev);
1293 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1295 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1296 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1299 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1300 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1301 * IPV6_MAXPLEN is also valid and means: "any MSS,
1302 * rely only on pmtu discovery"
1304 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1309 static unsigned int ip6_mtu(const struct dst_entry *dst)
1311 const struct rt6_info *rt = (const struct rt6_info *)dst;
1312 unsigned int mtu = rt->rt6i_pmtu;
1313 struct inet6_dev *idev;
1318 mtu = dst_metric_raw(dst, RTAX_MTU);
1325 idev = __in6_dev_get(dst->dev);
1327 mtu = idev->cnf.mtu6;
1331 return min_t(unsigned int, mtu, IP6_MAX_MTU);
1334 static struct dst_entry *icmp6_dst_gc_list;
1335 static DEFINE_SPINLOCK(icmp6_dst_lock);
1337 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1340 struct dst_entry *dst;
1341 struct rt6_info *rt;
1342 struct inet6_dev *idev = in6_dev_get(dev);
1343 struct net *net = dev_net(dev);
1345 if (unlikely(!idev))
1346 return ERR_PTR(-ENODEV);
1348 rt = ip6_dst_alloc(net, dev, 0, NULL);
1349 if (unlikely(!rt)) {
1351 dst = ERR_PTR(-ENOMEM);
1355 rt->dst.flags |= DST_HOST;
1356 rt->dst.output = ip6_output;
1357 atomic_set(&rt->dst.__refcnt, 1);
1358 rt->rt6i_gateway = fl6->daddr;
1359 rt->rt6i_dst.addr = fl6->daddr;
1360 rt->rt6i_dst.plen = 128;
1361 rt->rt6i_idev = idev;
1362 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1364 spin_lock_bh(&icmp6_dst_lock);
1365 rt->dst.next = icmp6_dst_gc_list;
1366 icmp6_dst_gc_list = &rt->dst;
1367 spin_unlock_bh(&icmp6_dst_lock);
1369 fib6_force_start_gc(net);
1371 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1377 int icmp6_dst_gc(void)
1379 struct dst_entry *dst, **pprev;
1382 spin_lock_bh(&icmp6_dst_lock);
1383 pprev = &icmp6_dst_gc_list;
1385 while ((dst = *pprev) != NULL) {
1386 if (!atomic_read(&dst->__refcnt)) {
1395 spin_unlock_bh(&icmp6_dst_lock);
1400 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1403 struct dst_entry *dst, **pprev;
1405 spin_lock_bh(&icmp6_dst_lock);
1406 pprev = &icmp6_dst_gc_list;
1407 while ((dst = *pprev) != NULL) {
1408 struct rt6_info *rt = (struct rt6_info *) dst;
1409 if (func(rt, arg)) {
1416 spin_unlock_bh(&icmp6_dst_lock);
1419 static int ip6_dst_gc(struct dst_ops *ops)
1421 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1422 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1423 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1424 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1425 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1426 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1429 entries = dst_entries_get_fast(ops);
1430 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1431 entries <= rt_max_size)
1434 net->ipv6.ip6_rt_gc_expire++;
1435 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1436 entries = dst_entries_get_slow(ops);
1437 if (entries < ops->gc_thresh)
1438 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1440 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1441 return entries > rt_max_size;
1444 static int ip6_convert_metrics(struct mx6_config *mxc,
1445 const struct fib6_config *cfg)
1454 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1458 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1459 int type = nla_type(nla);
1464 if (unlikely(type > RTAX_MAX))
1466 if (type == RTAX_CC_ALGO) {
1467 char tmp[TCP_CA_NAME_MAX];
1469 nla_strlcpy(tmp, nla, sizeof(tmp));
1470 val = tcp_ca_get_key_by_name(tmp);
1471 if (val == TCP_CA_UNSPEC)
1474 val = nla_get_u32(nla);
1478 __set_bit(type - 1, mxc->mx_valid);
1490 int ip6_route_add(struct fib6_config *cfg)
1493 struct net *net = cfg->fc_nlinfo.nl_net;
1494 struct rt6_info *rt = NULL;
1495 struct net_device *dev = NULL;
1496 struct inet6_dev *idev = NULL;
1497 struct fib6_table *table;
1498 struct mx6_config mxc = { .mx = NULL, };
1501 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1503 #ifndef CONFIG_IPV6_SUBTREES
1504 if (cfg->fc_src_len)
1507 if (cfg->fc_ifindex) {
1509 dev = dev_get_by_index(net, cfg->fc_ifindex);
1512 idev = in6_dev_get(dev);
1517 if (cfg->fc_metric == 0)
1518 cfg->fc_metric = IP6_RT_PRIO_USER;
1521 if (cfg->fc_nlinfo.nlh &&
1522 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1523 table = fib6_get_table(net, cfg->fc_table);
1525 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1526 table = fib6_new_table(net, cfg->fc_table);
1529 table = fib6_new_table(net, cfg->fc_table);
1535 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1542 if (cfg->fc_flags & RTF_EXPIRES)
1543 rt6_set_expires(rt, jiffies +
1544 clock_t_to_jiffies(cfg->fc_expires));
1546 rt6_clean_expires(rt);
1548 if (cfg->fc_protocol == RTPROT_UNSPEC)
1549 cfg->fc_protocol = RTPROT_BOOT;
1550 rt->rt6i_protocol = cfg->fc_protocol;
1552 addr_type = ipv6_addr_type(&cfg->fc_dst);
1554 if (addr_type & IPV6_ADDR_MULTICAST)
1555 rt->dst.input = ip6_mc_input;
1556 else if (cfg->fc_flags & RTF_LOCAL)
1557 rt->dst.input = ip6_input;
1559 rt->dst.input = ip6_forward;
1561 rt->dst.output = ip6_output;
1563 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1564 rt->rt6i_dst.plen = cfg->fc_dst_len;
1565 if (rt->rt6i_dst.plen == 128)
1566 rt->dst.flags |= DST_HOST;
1568 #ifdef CONFIG_IPV6_SUBTREES
1569 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1570 rt->rt6i_src.plen = cfg->fc_src_len;
1573 rt->rt6i_metric = cfg->fc_metric;
1575 /* We cannot add true routes via loopback here,
1576 they would result in kernel looping; promote them to reject routes
1578 if ((cfg->fc_flags & RTF_REJECT) ||
1579 (dev && (dev->flags & IFF_LOOPBACK) &&
1580 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1581 !(cfg->fc_flags & RTF_LOCAL))) {
1582 /* hold loopback dev/idev if we haven't done so. */
1583 if (dev != net->loopback_dev) {
1588 dev = net->loopback_dev;
1590 idev = in6_dev_get(dev);
1596 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1597 switch (cfg->fc_type) {
1599 rt->dst.error = -EINVAL;
1600 rt->dst.output = dst_discard_sk;
1601 rt->dst.input = dst_discard;
1604 rt->dst.error = -EACCES;
1605 rt->dst.output = ip6_pkt_prohibit_out;
1606 rt->dst.input = ip6_pkt_prohibit;
1610 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1612 rt->dst.output = ip6_pkt_discard_out;
1613 rt->dst.input = ip6_pkt_discard;
1619 if (cfg->fc_flags & RTF_GATEWAY) {
1620 const struct in6_addr *gw_addr;
1623 gw_addr = &cfg->fc_gateway;
1625 /* if gw_addr is local we will fail to detect this in case
1626 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1627 * will return already-added prefix route via interface that
1628 * prefix route was assigned to, which might be non-loopback.
1631 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1634 rt->rt6i_gateway = *gw_addr;
1635 gwa_type = ipv6_addr_type(gw_addr);
1637 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1638 struct rt6_info *grt;
1640 /* IPv6 strictly inhibits using not link-local
1641 addresses as nexthop address.
1642 Otherwise, router will not able to send redirects.
1643 It is very good, but in some (rare!) circumstances
1644 (SIT, PtP, NBMA NOARP links) it is handy to allow
1645 some exceptions. --ANK
1647 if (!(gwa_type & IPV6_ADDR_UNICAST))
1650 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1652 err = -EHOSTUNREACH;
1656 if (dev != grt->dst.dev) {
1662 idev = grt->rt6i_idev;
1664 in6_dev_hold(grt->rt6i_idev);
1666 if (!(grt->rt6i_flags & RTF_GATEWAY))
1674 if (!dev || (dev->flags & IFF_LOOPBACK))
1682 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1683 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1687 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1688 rt->rt6i_prefsrc.plen = 128;
1690 rt->rt6i_prefsrc.plen = 0;
1692 rt->rt6i_flags = cfg->fc_flags;
1696 rt->rt6i_idev = idev;
1697 rt->rt6i_table = table;
1699 cfg->fc_nlinfo.nl_net = dev_net(dev);
1701 err = ip6_convert_metrics(&mxc, cfg);
1705 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1719 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1722 struct fib6_table *table;
1723 struct net *net = dev_net(rt->dst.dev);
1725 if (rt == net->ipv6.ip6_null_entry) {
1730 table = rt->rt6i_table;
1731 write_lock_bh(&table->tb6_lock);
1732 err = fib6_del(rt, info);
1733 write_unlock_bh(&table->tb6_lock);
1740 int ip6_del_rt(struct rt6_info *rt)
1742 struct nl_info info = {
1743 .nl_net = dev_net(rt->dst.dev),
1745 return __ip6_del_rt(rt, &info);
1748 static int ip6_route_del(struct fib6_config *cfg)
1750 struct fib6_table *table;
1751 struct fib6_node *fn;
1752 struct rt6_info *rt;
1755 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1759 read_lock_bh(&table->tb6_lock);
1761 fn = fib6_locate(&table->tb6_root,
1762 &cfg->fc_dst, cfg->fc_dst_len,
1763 &cfg->fc_src, cfg->fc_src_len);
1766 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1767 if ((rt->rt6i_flags & RTF_CACHE) &&
1768 !(cfg->fc_flags & RTF_CACHE))
1770 if (cfg->fc_ifindex &&
1772 rt->dst.dev->ifindex != cfg->fc_ifindex))
1774 if (cfg->fc_flags & RTF_GATEWAY &&
1775 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1777 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1780 read_unlock_bh(&table->tb6_lock);
1782 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1785 read_unlock_bh(&table->tb6_lock);
1790 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1792 struct net *net = dev_net(skb->dev);
1793 struct netevent_redirect netevent;
1794 struct rt6_info *rt, *nrt = NULL;
1795 struct ndisc_options ndopts;
1796 struct inet6_dev *in6_dev;
1797 struct neighbour *neigh;
1799 int optlen, on_link;
1802 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1803 optlen -= sizeof(*msg);
1806 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1810 msg = (struct rd_msg *)icmp6_hdr(skb);
1812 if (ipv6_addr_is_multicast(&msg->dest)) {
1813 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1818 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1820 } else if (ipv6_addr_type(&msg->target) !=
1821 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1822 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1826 in6_dev = __in6_dev_get(skb->dev);
1829 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1833 * The IP source address of the Redirect MUST be the same as the current
1834 * first-hop router for the specified ICMP Destination Address.
1837 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1838 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1843 if (ndopts.nd_opts_tgt_lladdr) {
1844 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1847 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1852 rt = (struct rt6_info *) dst;
1853 if (rt == net->ipv6.ip6_null_entry) {
1854 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1858 /* Redirect received -> path was valid.
1859 * Look, redirects are sent only in response to data packets,
1860 * so that this nexthop apparently is reachable. --ANK
1862 dst_confirm(&rt->dst);
1864 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1869 * We have finally decided to accept it.
1872 neigh_update(neigh, lladdr, NUD_STALE,
1873 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1874 NEIGH_UPDATE_F_OVERRIDE|
1875 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1876 NEIGH_UPDATE_F_ISROUTER))
1879 nrt = ip6_rt_copy(rt, &msg->dest);
1883 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1885 nrt->rt6i_flags &= ~RTF_GATEWAY;
1887 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1889 if (ip6_ins_rt(nrt))
1892 netevent.old = &rt->dst;
1893 netevent.new = &nrt->dst;
1894 netevent.daddr = &msg->dest;
1895 netevent.neigh = neigh;
1896 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1898 if (rt->rt6i_flags & RTF_CACHE) {
1899 rt = (struct rt6_info *) dst_clone(&rt->dst);
1904 neigh_release(neigh);
1908 * Misc support functions
1911 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1913 BUG_ON(from->dst.from);
1915 rt->rt6i_flags &= ~RTF_EXPIRES;
1916 dst_hold(&from->dst);
1917 rt->dst.from = &from->dst;
1918 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1921 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1922 const struct in6_addr *dest)
1924 struct net *net = dev_net(ort->dst.dev);
1925 struct rt6_info *rt;
1927 if (ort->rt6i_flags & RTF_CACHE)
1928 ort = (struct rt6_info *)ort->dst.from;
1930 rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1934 rt->dst.input = ort->dst.input;
1935 rt->dst.output = ort->dst.output;
1936 rt->dst.flags |= DST_HOST;
1938 rt->rt6i_dst.addr = *dest;
1939 rt->rt6i_dst.plen = 128;
1940 rt->dst.error = ort->dst.error;
1941 rt->rt6i_idev = ort->rt6i_idev;
1943 in6_dev_hold(rt->rt6i_idev);
1944 rt->dst.lastuse = jiffies;
1945 rt->rt6i_gateway = ort->rt6i_gateway;
1946 rt->rt6i_flags = ort->rt6i_flags;
1947 rt6_set_from(rt, ort);
1948 rt->rt6i_metric = 0;
1950 #ifdef CONFIG_IPV6_SUBTREES
1951 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1953 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1954 rt->rt6i_table = ort->rt6i_table;
1959 #ifdef CONFIG_IPV6_ROUTE_INFO
1960 static struct rt6_info *rt6_get_route_info(struct net *net,
1961 const struct in6_addr *prefix, int prefixlen,
1962 const struct in6_addr *gwaddr, int ifindex)
1964 struct fib6_node *fn;
1965 struct rt6_info *rt = NULL;
1966 struct fib6_table *table;
1968 table = fib6_get_table(net, RT6_TABLE_INFO);
1972 read_lock_bh(&table->tb6_lock);
1973 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1977 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1978 if (rt->dst.dev->ifindex != ifindex)
1980 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1982 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1988 read_unlock_bh(&table->tb6_lock);
1992 static struct rt6_info *rt6_add_route_info(struct net *net,
1993 const struct in6_addr *prefix, int prefixlen,
1994 const struct in6_addr *gwaddr, int ifindex,
1997 struct fib6_config cfg = {
1998 .fc_table = RT6_TABLE_INFO,
1999 .fc_metric = IP6_RT_PRIO_USER,
2000 .fc_ifindex = ifindex,
2001 .fc_dst_len = prefixlen,
2002 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2003 RTF_UP | RTF_PREF(pref),
2004 .fc_nlinfo.portid = 0,
2005 .fc_nlinfo.nlh = NULL,
2006 .fc_nlinfo.nl_net = net,
2009 cfg.fc_dst = *prefix;
2010 cfg.fc_gateway = *gwaddr;
2012 /* We should treat it as a default route if prefix length is 0. */
2014 cfg.fc_flags |= RTF_DEFAULT;
2016 ip6_route_add(&cfg);
2018 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2022 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2024 struct rt6_info *rt;
2025 struct fib6_table *table;
2027 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2031 read_lock_bh(&table->tb6_lock);
2032 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2033 if (dev == rt->dst.dev &&
2034 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2035 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2040 read_unlock_bh(&table->tb6_lock);
2044 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2045 struct net_device *dev,
2048 struct fib6_config cfg = {
2049 .fc_table = RT6_TABLE_DFLT,
2050 .fc_metric = IP6_RT_PRIO_USER,
2051 .fc_ifindex = dev->ifindex,
2052 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2053 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2054 .fc_nlinfo.portid = 0,
2055 .fc_nlinfo.nlh = NULL,
2056 .fc_nlinfo.nl_net = dev_net(dev),
2059 cfg.fc_gateway = *gwaddr;
2061 ip6_route_add(&cfg);
2063 return rt6_get_dflt_router(gwaddr, dev);
2066 void rt6_purge_dflt_routers(struct net *net)
2068 struct rt6_info *rt;
2069 struct fib6_table *table;
2071 /* NOTE: Keep consistent with rt6_get_dflt_router */
2072 table = fib6_get_table(net, RT6_TABLE_DFLT);
2077 read_lock_bh(&table->tb6_lock);
2078 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2079 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2080 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2082 read_unlock_bh(&table->tb6_lock);
2087 read_unlock_bh(&table->tb6_lock);
2090 static void rtmsg_to_fib6_config(struct net *net,
2091 struct in6_rtmsg *rtmsg,
2092 struct fib6_config *cfg)
2094 memset(cfg, 0, sizeof(*cfg));
2096 cfg->fc_table = RT6_TABLE_MAIN;
2097 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2098 cfg->fc_metric = rtmsg->rtmsg_metric;
2099 cfg->fc_expires = rtmsg->rtmsg_info;
2100 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2101 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2102 cfg->fc_flags = rtmsg->rtmsg_flags;
2104 cfg->fc_nlinfo.nl_net = net;
2106 cfg->fc_dst = rtmsg->rtmsg_dst;
2107 cfg->fc_src = rtmsg->rtmsg_src;
2108 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2111 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2113 struct fib6_config cfg;
2114 struct in6_rtmsg rtmsg;
2118 case SIOCADDRT: /* Add a route */
2119 case SIOCDELRT: /* Delete a route */
2120 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2122 err = copy_from_user(&rtmsg, arg,
2123 sizeof(struct in6_rtmsg));
2127 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2132 err = ip6_route_add(&cfg);
2135 err = ip6_route_del(&cfg);
2149 * Drop the packet on the floor
2152 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2155 struct dst_entry *dst = skb_dst(skb);
2156 switch (ipstats_mib_noroutes) {
2157 case IPSTATS_MIB_INNOROUTES:
2158 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2159 if (type == IPV6_ADDR_ANY) {
2160 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2161 IPSTATS_MIB_INADDRERRORS);
2165 case IPSTATS_MIB_OUTNOROUTES:
2166 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2167 ipstats_mib_noroutes);
2170 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2175 static int ip6_pkt_discard(struct sk_buff *skb)
2177 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2180 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2182 skb->dev = skb_dst(skb)->dev;
2183 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2186 static int ip6_pkt_prohibit(struct sk_buff *skb)
2188 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2191 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2193 skb->dev = skb_dst(skb)->dev;
2194 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2198 * Allocate a dst for local (unicast / anycast) address.
2201 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2202 const struct in6_addr *addr,
2205 struct net *net = dev_net(idev->dev);
2206 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2209 return ERR_PTR(-ENOMEM);
2213 rt->dst.flags |= DST_HOST;
2214 rt->dst.input = ip6_input;
2215 rt->dst.output = ip6_output;
2216 rt->rt6i_idev = idev;
2218 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2220 rt->rt6i_flags |= RTF_ANYCAST;
2222 rt->rt6i_flags |= RTF_LOCAL;
2224 rt->rt6i_gateway = *addr;
2225 rt->rt6i_dst.addr = *addr;
2226 rt->rt6i_dst.plen = 128;
2227 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2229 atomic_set(&rt->dst.__refcnt, 1);
2234 int ip6_route_get_saddr(struct net *net,
2235 struct rt6_info *rt,
2236 const struct in6_addr *daddr,
2238 struct in6_addr *saddr)
2240 struct inet6_dev *idev =
2241 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2243 if (rt && rt->rt6i_prefsrc.plen)
2244 *saddr = rt->rt6i_prefsrc.addr;
2246 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2247 daddr, prefs, saddr);
2251 /* remove deleted ip from prefsrc entries */
2252 struct arg_dev_net_ip {
2253 struct net_device *dev;
2255 struct in6_addr *addr;
2258 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2260 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2261 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2262 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2264 if (((void *)rt->dst.dev == dev || !dev) &&
2265 rt != net->ipv6.ip6_null_entry &&
2266 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2267 /* remove prefsrc entry */
2268 rt->rt6i_prefsrc.plen = 0;
2273 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2275 struct net *net = dev_net(ifp->idev->dev);
2276 struct arg_dev_net_ip adni = {
2277 .dev = ifp->idev->dev,
2281 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2284 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2285 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2287 /* Remove routers and update dst entries when gateway turn into host. */
2288 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2290 struct in6_addr *gateway = (struct in6_addr *)arg;
2292 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2293 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2294 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2300 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2302 fib6_clean_all(net, fib6_clean_tohost, gateway);
2305 struct arg_dev_net {
2306 struct net_device *dev;
2310 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2312 const struct arg_dev_net *adn = arg;
2313 const struct net_device *dev = adn->dev;
2315 if ((rt->dst.dev == dev || !dev) &&
2316 rt != adn->net->ipv6.ip6_null_entry)
2322 void rt6_ifdown(struct net *net, struct net_device *dev)
2324 struct arg_dev_net adn = {
2329 fib6_clean_all(net, fib6_ifdown, &adn);
2330 icmp6_clean_all(fib6_ifdown, &adn);
2333 struct rt6_mtu_change_arg {
2334 struct net_device *dev;
2338 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2340 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2341 struct inet6_dev *idev;
2343 /* In IPv6 pmtu discovery is not optional,
2344 so that RTAX_MTU lock cannot disable it.
2345 We still use this lock to block changes
2346 caused by addrconf/ndisc.
2349 idev = __in6_dev_get(arg->dev);
2353 /* For administrative MTU increase, there is no way to discover
2354 IPv6 PMTU increase, so PMTU increase should be updated here.
2355 Since RFC 1981 doesn't include administrative MTU increase
2356 update PMTU increase is a MUST. (i.e. jumbo frame)
2359 If new MTU is less than route PMTU, this new MTU will be the
2360 lowest MTU in the path, update the route PMTU to reflect PMTU
2361 decreases; if new MTU is greater than route PMTU, and the
2362 old MTU is the lowest MTU in the path, update the route PMTU
2363 to reflect the increase. In this case if the other nodes' MTU
2364 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2367 if (rt->dst.dev == arg->dev &&
2368 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2369 if (rt->rt6i_flags & RTF_CACHE) {
2370 /* For RTF_CACHE with rt6i_pmtu == 0
2371 * (i.e. a redirected route),
2372 * the metrics of its rt->dst.from has already
2375 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2376 rt->rt6i_pmtu = arg->mtu;
2377 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2378 (dst_mtu(&rt->dst) < arg->mtu &&
2379 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2380 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2386 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2388 struct rt6_mtu_change_arg arg = {
2393 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2396 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2397 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2398 [RTA_OIF] = { .type = NLA_U32 },
2399 [RTA_IIF] = { .type = NLA_U32 },
2400 [RTA_PRIORITY] = { .type = NLA_U32 },
2401 [RTA_METRICS] = { .type = NLA_NESTED },
2402 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2403 [RTA_PREF] = { .type = NLA_U8 },
2406 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2407 struct fib6_config *cfg)
2410 struct nlattr *tb[RTA_MAX+1];
2414 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2419 rtm = nlmsg_data(nlh);
2420 memset(cfg, 0, sizeof(*cfg));
2422 cfg->fc_table = rtm->rtm_table;
2423 cfg->fc_dst_len = rtm->rtm_dst_len;
2424 cfg->fc_src_len = rtm->rtm_src_len;
2425 cfg->fc_flags = RTF_UP;
2426 cfg->fc_protocol = rtm->rtm_protocol;
2427 cfg->fc_type = rtm->rtm_type;
2429 if (rtm->rtm_type == RTN_UNREACHABLE ||
2430 rtm->rtm_type == RTN_BLACKHOLE ||
2431 rtm->rtm_type == RTN_PROHIBIT ||
2432 rtm->rtm_type == RTN_THROW)
2433 cfg->fc_flags |= RTF_REJECT;
2435 if (rtm->rtm_type == RTN_LOCAL)
2436 cfg->fc_flags |= RTF_LOCAL;
2438 if (rtm->rtm_flags & RTM_F_CLONED)
2439 cfg->fc_flags |= RTF_CACHE;
2441 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2442 cfg->fc_nlinfo.nlh = nlh;
2443 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2445 if (tb[RTA_GATEWAY]) {
2446 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2447 cfg->fc_flags |= RTF_GATEWAY;
2451 int plen = (rtm->rtm_dst_len + 7) >> 3;
2453 if (nla_len(tb[RTA_DST]) < plen)
2456 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2460 int plen = (rtm->rtm_src_len + 7) >> 3;
2462 if (nla_len(tb[RTA_SRC]) < plen)
2465 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2468 if (tb[RTA_PREFSRC])
2469 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2472 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2474 if (tb[RTA_PRIORITY])
2475 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2477 if (tb[RTA_METRICS]) {
2478 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2479 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2483 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2485 if (tb[RTA_MULTIPATH]) {
2486 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2487 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2491 pref = nla_get_u8(tb[RTA_PREF]);
2492 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2493 pref != ICMPV6_ROUTER_PREF_HIGH)
2494 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2495 cfg->fc_flags |= RTF_PREF(pref);
2503 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2505 struct fib6_config r_cfg;
2506 struct rtnexthop *rtnh;
2509 int err = 0, last_err = 0;
2511 remaining = cfg->fc_mp_len;
2513 rtnh = (struct rtnexthop *)cfg->fc_mp;
2515 /* Parse a Multipath Entry */
2516 while (rtnh_ok(rtnh, remaining)) {
2517 memcpy(&r_cfg, cfg, sizeof(*cfg));
2518 if (rtnh->rtnh_ifindex)
2519 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2521 attrlen = rtnh_attrlen(rtnh);
2523 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2525 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2527 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2528 r_cfg.fc_flags |= RTF_GATEWAY;
2531 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2534 /* If we are trying to remove a route, do not stop the
2535 * loop when ip6_route_del() fails (because next hop is
2536 * already gone), we should try to remove all next hops.
2539 /* If add fails, we should try to delete all
2540 * next hops that have been already added.
2543 remaining = cfg->fc_mp_len - remaining;
2547 /* Because each route is added like a single route we remove
2548 * these flags after the first nexthop: if there is a collision,
2549 * we have already failed to add the first nexthop:
2550 * fib6_add_rt2node() has rejected it; when replacing, old
2551 * nexthops have been replaced by first new, the rest should
2554 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2556 rtnh = rtnh_next(rtnh, &remaining);
2562 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2564 struct fib6_config cfg;
2567 err = rtm_to_fib6_config(skb, nlh, &cfg);
2572 return ip6_route_multipath(&cfg, 0);
2574 return ip6_route_del(&cfg);
2577 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2579 struct fib6_config cfg;
2582 err = rtm_to_fib6_config(skb, nlh, &cfg);
2587 return ip6_route_multipath(&cfg, 1);
2589 return ip6_route_add(&cfg);
2592 static inline size_t rt6_nlmsg_size(void)
2594 return NLMSG_ALIGN(sizeof(struct rtmsg))
2595 + nla_total_size(16) /* RTA_SRC */
2596 + nla_total_size(16) /* RTA_DST */
2597 + nla_total_size(16) /* RTA_GATEWAY */
2598 + nla_total_size(16) /* RTA_PREFSRC */
2599 + nla_total_size(4) /* RTA_TABLE */
2600 + nla_total_size(4) /* RTA_IIF */
2601 + nla_total_size(4) /* RTA_OIF */
2602 + nla_total_size(4) /* RTA_PRIORITY */
2603 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2604 + nla_total_size(sizeof(struct rta_cacheinfo))
2605 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2606 + nla_total_size(1); /* RTA_PREF */
2609 static int rt6_fill_node(struct net *net,
2610 struct sk_buff *skb, struct rt6_info *rt,
2611 struct in6_addr *dst, struct in6_addr *src,
2612 int iif, int type, u32 portid, u32 seq,
2613 int prefix, int nowait, unsigned int flags)
2615 u32 metrics[RTAX_MAX];
2617 struct nlmsghdr *nlh;
2621 if (prefix) { /* user wants prefix routes only */
2622 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2623 /* success since this is not a prefix route */
2628 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2632 rtm = nlmsg_data(nlh);
2633 rtm->rtm_family = AF_INET6;
2634 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2635 rtm->rtm_src_len = rt->rt6i_src.plen;
2638 table = rt->rt6i_table->tb6_id;
2640 table = RT6_TABLE_UNSPEC;
2641 rtm->rtm_table = table;
2642 if (nla_put_u32(skb, RTA_TABLE, table))
2643 goto nla_put_failure;
2644 if (rt->rt6i_flags & RTF_REJECT) {
2645 switch (rt->dst.error) {
2647 rtm->rtm_type = RTN_BLACKHOLE;
2650 rtm->rtm_type = RTN_PROHIBIT;
2653 rtm->rtm_type = RTN_THROW;
2656 rtm->rtm_type = RTN_UNREACHABLE;
2660 else if (rt->rt6i_flags & RTF_LOCAL)
2661 rtm->rtm_type = RTN_LOCAL;
2662 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2663 rtm->rtm_type = RTN_LOCAL;
2665 rtm->rtm_type = RTN_UNICAST;
2667 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2668 rtm->rtm_protocol = rt->rt6i_protocol;
2669 if (rt->rt6i_flags & RTF_DYNAMIC)
2670 rtm->rtm_protocol = RTPROT_REDIRECT;
2671 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2672 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2673 rtm->rtm_protocol = RTPROT_RA;
2675 rtm->rtm_protocol = RTPROT_KERNEL;
2678 if (rt->rt6i_flags & RTF_CACHE)
2679 rtm->rtm_flags |= RTM_F_CLONED;
2682 if (nla_put_in6_addr(skb, RTA_DST, dst))
2683 goto nla_put_failure;
2684 rtm->rtm_dst_len = 128;
2685 } else if (rtm->rtm_dst_len)
2686 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2687 goto nla_put_failure;
2688 #ifdef CONFIG_IPV6_SUBTREES
2690 if (nla_put_in6_addr(skb, RTA_SRC, src))
2691 goto nla_put_failure;
2692 rtm->rtm_src_len = 128;
2693 } else if (rtm->rtm_src_len &&
2694 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2695 goto nla_put_failure;
2698 #ifdef CONFIG_IPV6_MROUTE
2699 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2700 int err = ip6mr_get_route(net, skb, rtm, nowait);
2705 goto nla_put_failure;
2707 if (err == -EMSGSIZE)
2708 goto nla_put_failure;
2713 if (nla_put_u32(skb, RTA_IIF, iif))
2714 goto nla_put_failure;
2716 struct in6_addr saddr_buf;
2717 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2718 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2719 goto nla_put_failure;
2722 if (rt->rt6i_prefsrc.plen) {
2723 struct in6_addr saddr_buf;
2724 saddr_buf = rt->rt6i_prefsrc.addr;
2725 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2726 goto nla_put_failure;
2729 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2731 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2732 if (rtnetlink_put_metrics(skb, metrics) < 0)
2733 goto nla_put_failure;
2735 if (rt->rt6i_flags & RTF_GATEWAY) {
2736 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2737 goto nla_put_failure;
2741 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2742 goto nla_put_failure;
2743 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2744 goto nla_put_failure;
2746 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2748 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2749 goto nla_put_failure;
2751 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2752 goto nla_put_failure;
2754 nlmsg_end(skb, nlh);
2758 nlmsg_cancel(skb, nlh);
2762 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2764 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2767 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2768 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2769 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2773 return rt6_fill_node(arg->net,
2774 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2775 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2776 prefix, 0, NLM_F_MULTI);
2779 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2781 struct net *net = sock_net(in_skb->sk);
2782 struct nlattr *tb[RTA_MAX+1];
2783 struct rt6_info *rt;
2784 struct sk_buff *skb;
2787 int err, iif = 0, oif = 0;
2789 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2794 memset(&fl6, 0, sizeof(fl6));
2797 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2800 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2804 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2807 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2811 iif = nla_get_u32(tb[RTA_IIF]);
2814 oif = nla_get_u32(tb[RTA_OIF]);
2817 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2820 struct net_device *dev;
2823 dev = __dev_get_by_index(net, iif);
2829 fl6.flowi6_iif = iif;
2831 if (!ipv6_addr_any(&fl6.saddr))
2832 flags |= RT6_LOOKUP_F_HAS_SADDR;
2834 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2837 fl6.flowi6_oif = oif;
2839 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2842 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2849 /* Reserve room for dummy headers, this skb can pass
2850 through good chunk of routing engine.
2852 skb_reset_mac_header(skb);
2853 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2855 skb_dst_set(skb, &rt->dst);
2857 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2858 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2859 nlh->nlmsg_seq, 0, 0, 0);
2865 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2870 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2872 struct sk_buff *skb;
2873 struct net *net = info->nl_net;
2878 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2880 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2884 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2885 event, info->portid, seq, 0, 0, 0);
2887 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2888 WARN_ON(err == -EMSGSIZE);
2892 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2893 info->nlh, gfp_any());
2897 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2900 static int ip6_route_dev_notify(struct notifier_block *this,
2901 unsigned long event, void *ptr)
2903 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2904 struct net *net = dev_net(dev);
2906 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2907 net->ipv6.ip6_null_entry->dst.dev = dev;
2908 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2911 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2912 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2913 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2924 #ifdef CONFIG_PROC_FS
2926 static const struct file_operations ipv6_route_proc_fops = {
2927 .owner = THIS_MODULE,
2928 .open = ipv6_route_open,
2930 .llseek = seq_lseek,
2931 .release = seq_release_net,
2934 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2936 struct net *net = (struct net *)seq->private;
2937 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2938 net->ipv6.rt6_stats->fib_nodes,
2939 net->ipv6.rt6_stats->fib_route_nodes,
2940 net->ipv6.rt6_stats->fib_rt_alloc,
2941 net->ipv6.rt6_stats->fib_rt_entries,
2942 net->ipv6.rt6_stats->fib_rt_cache,
2943 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2944 net->ipv6.rt6_stats->fib_discarded_routes);
2949 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2951 return single_open_net(inode, file, rt6_stats_seq_show);
2954 static const struct file_operations rt6_stats_seq_fops = {
2955 .owner = THIS_MODULE,
2956 .open = rt6_stats_seq_open,
2958 .llseek = seq_lseek,
2959 .release = single_release_net,
2961 #endif /* CONFIG_PROC_FS */
2963 #ifdef CONFIG_SYSCTL
2966 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2967 void __user *buffer, size_t *lenp, loff_t *ppos)
2974 net = (struct net *)ctl->extra1;
2975 delay = net->ipv6.sysctl.flush_delay;
2976 proc_dointvec(ctl, write, buffer, lenp, ppos);
2977 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2981 struct ctl_table ipv6_route_table_template[] = {
2983 .procname = "flush",
2984 .data = &init_net.ipv6.sysctl.flush_delay,
2985 .maxlen = sizeof(int),
2987 .proc_handler = ipv6_sysctl_rtcache_flush
2990 .procname = "gc_thresh",
2991 .data = &ip6_dst_ops_template.gc_thresh,
2992 .maxlen = sizeof(int),
2994 .proc_handler = proc_dointvec,
2997 .procname = "max_size",
2998 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2999 .maxlen = sizeof(int),
3001 .proc_handler = proc_dointvec,
3004 .procname = "gc_min_interval",
3005 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3006 .maxlen = sizeof(int),
3008 .proc_handler = proc_dointvec_jiffies,
3011 .procname = "gc_timeout",
3012 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3013 .maxlen = sizeof(int),
3015 .proc_handler = proc_dointvec_jiffies,
3018 .procname = "gc_interval",
3019 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3020 .maxlen = sizeof(int),
3022 .proc_handler = proc_dointvec_jiffies,
3025 .procname = "gc_elasticity",
3026 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3027 .maxlen = sizeof(int),
3029 .proc_handler = proc_dointvec,
3032 .procname = "mtu_expires",
3033 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3034 .maxlen = sizeof(int),
3036 .proc_handler = proc_dointvec_jiffies,
3039 .procname = "min_adv_mss",
3040 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3041 .maxlen = sizeof(int),
3043 .proc_handler = proc_dointvec,
3046 .procname = "gc_min_interval_ms",
3047 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3048 .maxlen = sizeof(int),
3050 .proc_handler = proc_dointvec_ms_jiffies,
3055 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3057 struct ctl_table *table;
3059 table = kmemdup(ipv6_route_table_template,
3060 sizeof(ipv6_route_table_template),
3064 table[0].data = &net->ipv6.sysctl.flush_delay;
3065 table[0].extra1 = net;
3066 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3067 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3068 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3069 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3070 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3071 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3072 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3073 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3074 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3076 /* Don't export sysctls to unprivileged users */
3077 if (net->user_ns != &init_user_ns)
3078 table[0].procname = NULL;
3085 static int __net_init ip6_route_net_init(struct net *net)
3089 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3090 sizeof(net->ipv6.ip6_dst_ops));
3092 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3093 goto out_ip6_dst_ops;
3095 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3096 sizeof(*net->ipv6.ip6_null_entry),
3098 if (!net->ipv6.ip6_null_entry)
3099 goto out_ip6_dst_entries;
3100 net->ipv6.ip6_null_entry->dst.path =
3101 (struct dst_entry *)net->ipv6.ip6_null_entry;
3102 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3103 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3104 ip6_template_metrics, true);
3106 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3107 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3108 sizeof(*net->ipv6.ip6_prohibit_entry),
3110 if (!net->ipv6.ip6_prohibit_entry)
3111 goto out_ip6_null_entry;
3112 net->ipv6.ip6_prohibit_entry->dst.path =
3113 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3114 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3115 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3116 ip6_template_metrics, true);
3118 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3119 sizeof(*net->ipv6.ip6_blk_hole_entry),
3121 if (!net->ipv6.ip6_blk_hole_entry)
3122 goto out_ip6_prohibit_entry;
3123 net->ipv6.ip6_blk_hole_entry->dst.path =
3124 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3125 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3126 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3127 ip6_template_metrics, true);
3130 net->ipv6.sysctl.flush_delay = 0;
3131 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3132 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3133 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3134 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3135 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3136 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3137 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3139 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3146 out_ip6_prohibit_entry:
3147 kfree(net->ipv6.ip6_prohibit_entry);
3149 kfree(net->ipv6.ip6_null_entry);
3151 out_ip6_dst_entries:
3152 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3157 static void __net_exit ip6_route_net_exit(struct net *net)
3159 kfree(net->ipv6.ip6_null_entry);
3160 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3161 kfree(net->ipv6.ip6_prohibit_entry);
3162 kfree(net->ipv6.ip6_blk_hole_entry);
3164 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3167 static int __net_init ip6_route_net_init_late(struct net *net)
3169 #ifdef CONFIG_PROC_FS
3170 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3171 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3176 static void __net_exit ip6_route_net_exit_late(struct net *net)
3178 #ifdef CONFIG_PROC_FS
3179 remove_proc_entry("ipv6_route", net->proc_net);
3180 remove_proc_entry("rt6_stats", net->proc_net);
3184 static struct pernet_operations ip6_route_net_ops = {
3185 .init = ip6_route_net_init,
3186 .exit = ip6_route_net_exit,
3189 static int __net_init ipv6_inetpeer_init(struct net *net)
3191 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3195 inet_peer_base_init(bp);
3196 net->ipv6.peers = bp;
3200 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3202 struct inet_peer_base *bp = net->ipv6.peers;
3204 net->ipv6.peers = NULL;
3205 inetpeer_invalidate_tree(bp);
3209 static struct pernet_operations ipv6_inetpeer_ops = {
3210 .init = ipv6_inetpeer_init,
3211 .exit = ipv6_inetpeer_exit,
3214 static struct pernet_operations ip6_route_net_late_ops = {
3215 .init = ip6_route_net_init_late,
3216 .exit = ip6_route_net_exit_late,
3219 static struct notifier_block ip6_route_dev_notifier = {
3220 .notifier_call = ip6_route_dev_notify,
3224 int __init ip6_route_init(void)
3229 ip6_dst_ops_template.kmem_cachep =
3230 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3231 SLAB_HWCACHE_ALIGN, NULL);
3232 if (!ip6_dst_ops_template.kmem_cachep)
3235 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3237 goto out_kmem_cache;
3239 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3241 goto out_dst_entries;
3243 ret = register_pernet_subsys(&ip6_route_net_ops);
3245 goto out_register_inetpeer;
3247 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3249 /* Registering of the loopback is done before this portion of code,
3250 * the loopback reference in rt6_info will not be taken, do it
3251 * manually for init_net */
3252 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3253 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3254 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3255 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3256 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3257 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3258 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3262 goto out_register_subsys;
3268 ret = fib6_rules_init();
3272 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3274 goto fib6_rules_init;
3277 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3278 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3279 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3280 goto out_register_late_subsys;
3282 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3284 goto out_register_late_subsys;
3289 out_register_late_subsys:
3290 unregister_pernet_subsys(&ip6_route_net_late_ops);
3292 fib6_rules_cleanup();
3297 out_register_subsys:
3298 unregister_pernet_subsys(&ip6_route_net_ops);
3299 out_register_inetpeer:
3300 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3302 dst_entries_destroy(&ip6_dst_blackhole_ops);
3304 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3308 void ip6_route_cleanup(void)
3310 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3311 unregister_pernet_subsys(&ip6_route_net_late_ops);
3312 fib6_rules_cleanup();
3315 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3316 unregister_pernet_subsys(&ip6_route_net_ops);
3317 dst_entries_destroy(&ip6_dst_blackhole_ops);
3318 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);