2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
69 #include "fib_lookup.h"
71 static struct ipv4_devconf ipv4_devconf = {
73 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80 static struct ipv4_devconf ipv4_devconf_dflt = {
82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
90 #define IPV4_DEVCONF_DFLT(net, attr) \
91 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
93 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
94 [IFA_LOCAL] = { .type = NLA_U32 },
95 [IFA_ADDRESS] = { .type = NLA_U32 },
96 [IFA_BROADCAST] = { .type = NLA_U32 },
97 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
98 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
101 #define IN4_ADDR_HSIZE_SHIFT 8
102 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
104 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
105 static DEFINE_SPINLOCK(inet_addr_hash_lock);
107 static u32 inet_addr_hash(struct net *net, __be32 addr)
109 u32 val = (__force u32) addr ^ net_hash_mix(net);
111 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
116 u32 hash = inet_addr_hash(net, ifa->ifa_local);
118 spin_lock(&inet_addr_hash_lock);
119 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 spin_unlock(&inet_addr_hash_lock);
123 static void inet_hash_remove(struct in_ifaddr *ifa)
125 spin_lock(&inet_addr_hash_lock);
126 hlist_del_init_rcu(&ifa->hash);
127 spin_unlock(&inet_addr_hash_lock);
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL;
142 struct in_ifaddr *ifa;
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 if (ifa->ifa_local == addr) {
147 struct net_device *dev = ifa->ifa_dev->dev;
149 if (!net_eq(dev_net(dev), net))
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
163 local = fib_get_table(net, RT_TABLE_LOCAL);
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
169 if (result && devref)
174 EXPORT_SYMBOL(__ip_dev_find);
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
185 static void devinet_sysctl_register(struct in_device *idev)
188 static void devinet_sysctl_unregister(struct in_device *idev)
193 /* Locks all the inet devices. */
195 static struct in_ifaddr *inet_alloc_ifa(void)
197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 static void inet_rcu_free_ifa(struct rcu_head *head)
202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 in_dev_put(ifa->ifa_dev);
208 static void inet_free_ifa(struct in_ifaddr *ifa)
210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 void in_dev_finish_destroy(struct in_device *idev)
215 struct net_device *dev = idev->dev;
217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 pr_err("Freeing alive in_device %p\n", idev);
228 EXPORT_SYMBOL(in_dev_finish_destroy);
230 static struct in_device *inetdev_init(struct net_device *dev)
232 struct in_device *in_dev;
236 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 sizeof(in_dev->cnf));
241 in_dev->cnf.sysctl = NULL;
243 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 if (!in_dev->arp_parms)
246 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 dev_disable_lro(dev);
248 /* Reference in_dev->dev */
250 /* Account for reference dev->ip_ptr (below) */
253 devinet_sysctl_register(in_dev);
254 ip_mc_init_dev(in_dev);
255 if (dev->flags & IFF_UP)
258 /* we can receive as soon as ip_ptr is set -- do this last */
259 rcu_assign_pointer(dev->ip_ptr, in_dev);
268 static void in_dev_rcu_put(struct rcu_head *head)
270 struct in_device *idev = container_of(head, struct in_device, rcu_head);
274 static void inetdev_destroy(struct in_device *in_dev)
276 struct in_ifaddr *ifa;
277 struct net_device *dev;
285 ip_mc_destroy_dev(in_dev);
287 while ((ifa = in_dev->ifa_list) != NULL) {
288 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
292 RCU_INIT_POINTER(dev->ip_ptr, NULL);
294 devinet_sysctl_unregister(in_dev);
295 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 for_primary_ifa(in_dev) {
305 if (inet_ifa_match(a, ifa)) {
306 if (!b || inet_ifa_match(b, ifa)) {
311 } endfor_ifa(in_dev);
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 int destroy, struct nlmsghdr *nlh, u32 portid)
319 struct in_ifaddr *promote = NULL;
320 struct in_ifaddr *ifa, *ifa1 = *ifap;
321 struct in_ifaddr *last_prim = in_dev->ifa_list;
322 struct in_ifaddr *prev_prom = NULL;
323 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
327 /* 1. Deleting primary ifaddr forces deletion all secondaries
328 * unless alias promotion is set
331 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
334 while ((ifa = *ifap1) != NULL) {
335 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 ifa1->ifa_scope <= ifa->ifa_scope)
339 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 ifa1->ifa_mask != ifa->ifa_mask ||
341 !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 ifap1 = &ifa->ifa_next;
348 inet_hash_remove(ifa);
349 *ifap1 = ifa->ifa_next;
351 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352 blocking_notifier_call_chain(&inetaddr_chain,
362 /* On promotion all secondaries from subnet are changing
363 * the primary IP, we must remove all their routes silently
364 * and later to add them back with new prefsrc. Do this
365 * while all addresses are on the device list.
367 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 if (ifa1->ifa_mask == ifa->ifa_mask &&
369 inet_ifa_match(ifa1->ifa_address, ifa))
370 fib_del_ifaddr(ifa, ifa1);
375 *ifap = ifa1->ifa_next;
376 inet_hash_remove(ifa1);
378 /* 3. Announce address deletion */
380 /* Send message first, then call notifier.
381 At first sight, FIB update triggered by notifier
382 will refer to already deleted ifaddr, that could confuse
383 netlink listeners. It is not true: look, gated sees
384 that route deleted and if it still thinks that ifaddr
385 is valid, it will try to restore deleted routes... Grr.
386 So that, this order is correct.
388 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 struct in_ifaddr *next_sec = promote->ifa_next;
395 prev_prom->ifa_next = promote->ifa_next;
396 promote->ifa_next = last_prim->ifa_next;
397 last_prim->ifa_next = promote;
400 promote->ifa_flags &= ~IFA_F_SECONDARY;
401 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402 blocking_notifier_call_chain(&inetaddr_chain,
404 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 if (ifa1->ifa_mask != ifa->ifa_mask ||
406 !inet_ifa_match(ifa1->ifa_address, ifa))
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 static void check_lifetime(struct work_struct *work);
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429 struct in_device *in_dev = ifa->ifa_dev;
430 struct in_ifaddr *ifa1, **ifap, **last_primary;
434 if (!ifa->ifa_local) {
439 ifa->ifa_flags &= ~IFA_F_SECONDARY;
440 last_primary = &in_dev->ifa_list;
442 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443 ifap = &ifa1->ifa_next) {
444 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445 ifa->ifa_scope <= ifa1->ifa_scope)
446 last_primary = &ifa1->ifa_next;
447 if (ifa1->ifa_mask == ifa->ifa_mask &&
448 inet_ifa_match(ifa1->ifa_address, ifa)) {
449 if (ifa1->ifa_local == ifa->ifa_local) {
453 if (ifa1->ifa_scope != ifa->ifa_scope) {
457 ifa->ifa_flags |= IFA_F_SECONDARY;
461 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462 net_srandom(ifa->ifa_local);
466 ifa->ifa_next = *ifap;
469 inet_hash_insert(dev_net(in_dev->dev), ifa);
471 cancel_delayed_work(&check_lifetime_work);
472 schedule_delayed_work(&check_lifetime_work, 0);
474 /* Send message first, then call notifier.
475 Notifier will trigger FIB update, so that
476 listeners of netlink will know about new ifaddr */
477 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
485 return __inet_insert_ifa(ifa, NULL, 0);
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
490 struct in_device *in_dev = __in_dev_get_rtnl(dev);
498 ipv4_devconf_setall(in_dev);
499 if (ifa->ifa_dev != in_dev) {
500 WARN_ON(ifa->ifa_dev);
502 ifa->ifa_dev = in_dev;
504 if (ipv4_is_loopback(ifa->ifa_local))
505 ifa->ifa_scope = RT_SCOPE_HOST;
506 return inet_insert_ifa(ifa);
509 /* Caller must hold RCU or RTNL :
510 * We dont take a reference on found in_device
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
514 struct net_device *dev;
515 struct in_device *in_dev = NULL;
518 dev = dev_get_by_index_rcu(net, ifindex);
520 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
524 EXPORT_SYMBOL(inetdev_by_index);
526 /* Called only from RTNL semaphored context. No locks. */
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
533 for_primary_ifa(in_dev) {
534 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
536 } endfor_ifa(in_dev);
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
542 struct net *net = sock_net(skb->sk);
543 struct nlattr *tb[IFA_MAX+1];
544 struct in_device *in_dev;
545 struct ifaddrmsg *ifm;
546 struct in_ifaddr *ifa, **ifap;
551 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
555 ifm = nlmsg_data(nlh);
556 in_dev = inetdev_by_index(net, ifm->ifa_index);
557 if (in_dev == NULL) {
562 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563 ifap = &ifa->ifa_next) {
565 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
568 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
571 if (tb[IFA_ADDRESS] &&
572 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
576 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
580 err = -EADDRNOTAVAIL;
585 #define INFINITY_LIFE_TIME 0xFFFFFFFF
587 static void check_lifetime(struct work_struct *work)
589 unsigned long now, next, next_sec, next_sched;
590 struct in_ifaddr *ifa;
591 struct hlist_node *n;
595 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
597 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598 bool change_needed = false;
601 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
604 if (ifa->ifa_flags & IFA_F_PERMANENT)
607 /* We try to batch several events at once. */
608 age = (now - ifa->ifa_tstamp +
609 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
611 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612 age >= ifa->ifa_valid_lft) {
613 change_needed = true;
614 } else if (ifa->ifa_preferred_lft ==
615 INFINITY_LIFE_TIME) {
617 } else if (age >= ifa->ifa_preferred_lft) {
618 if (time_before(ifa->ifa_tstamp +
619 ifa->ifa_valid_lft * HZ, next))
620 next = ifa->ifa_tstamp +
621 ifa->ifa_valid_lft * HZ;
623 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624 change_needed = true;
625 } else if (time_before(ifa->ifa_tstamp +
626 ifa->ifa_preferred_lft * HZ,
628 next = ifa->ifa_tstamp +
629 ifa->ifa_preferred_lft * HZ;
636 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
639 if (ifa->ifa_flags & IFA_F_PERMANENT)
642 /* We try to batch several events at once. */
643 age = (now - ifa->ifa_tstamp +
644 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
646 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 age >= ifa->ifa_valid_lft) {
648 struct in_ifaddr **ifap;
650 for (ifap = &ifa->ifa_dev->ifa_list;
651 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
653 inet_del_ifa(ifa->ifa_dev,
658 } else if (ifa->ifa_preferred_lft !=
659 INFINITY_LIFE_TIME &&
660 age >= ifa->ifa_preferred_lft &&
661 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662 ifa->ifa_flags |= IFA_F_DEPRECATED;
663 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
669 next_sec = round_jiffies_up(next);
672 /* If rounded timeout is accurate enough, accept it. */
673 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674 next_sched = next_sec;
677 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
681 schedule_delayed_work(&check_lifetime_work, next_sched - now);
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
687 unsigned long timeout;
689 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
691 timeout = addrconf_timeout_fixup(valid_lft, HZ);
692 if (addrconf_finite_timeout(timeout))
693 ifa->ifa_valid_lft = timeout;
695 ifa->ifa_flags |= IFA_F_PERMANENT;
697 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698 if (addrconf_finite_timeout(timeout)) {
700 ifa->ifa_flags |= IFA_F_DEPRECATED;
701 ifa->ifa_preferred_lft = timeout;
703 ifa->ifa_tstamp = jiffies;
704 if (!ifa->ifa_cstamp)
705 ifa->ifa_cstamp = ifa->ifa_tstamp;
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709 __u32 *pvalid_lft, __u32 *pprefered_lft)
711 struct nlattr *tb[IFA_MAX+1];
712 struct in_ifaddr *ifa;
713 struct ifaddrmsg *ifm;
714 struct net_device *dev;
715 struct in_device *in_dev;
718 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
722 ifm = nlmsg_data(nlh);
724 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
727 dev = __dev_get_by_index(net, ifm->ifa_index);
732 in_dev = __in_dev_get_rtnl(dev);
737 ifa = inet_alloc_ifa();
740 * A potential indev allocation can be left alive, it stays
741 * assigned to its device and is destroy with it.
745 ipv4_devconf_setall(in_dev);
748 if (tb[IFA_ADDRESS] == NULL)
749 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
751 INIT_HLIST_NODE(&ifa->hash);
752 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754 ifa->ifa_flags = ifm->ifa_flags;
755 ifa->ifa_scope = ifm->ifa_scope;
756 ifa->ifa_dev = in_dev;
758 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
761 if (tb[IFA_BROADCAST])
762 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
765 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
767 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
769 if (tb[IFA_CACHEINFO]) {
770 struct ifa_cacheinfo *ci;
772 ci = nla_data(tb[IFA_CACHEINFO]);
773 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
777 *pvalid_lft = ci->ifa_valid;
778 *pprefered_lft = ci->ifa_prefered;
789 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
791 struct in_device *in_dev = ifa->ifa_dev;
792 struct in_ifaddr *ifa1, **ifap;
797 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
798 ifap = &ifa1->ifa_next) {
799 if (ifa1->ifa_mask == ifa->ifa_mask &&
800 inet_ifa_match(ifa1->ifa_address, ifa) &&
801 ifa1->ifa_local == ifa->ifa_local)
807 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
809 struct net *net = sock_net(skb->sk);
810 struct in_ifaddr *ifa;
811 struct in_ifaddr *ifa_existing;
812 __u32 valid_lft = INFINITY_LIFE_TIME;
813 __u32 prefered_lft = INFINITY_LIFE_TIME;
817 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
821 ifa_existing = find_matching_ifa(ifa);
823 /* It would be best to check for !NLM_F_CREATE here but
824 * userspace alreay relies on not having to provide this.
826 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
827 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
831 if (nlh->nlmsg_flags & NLM_F_EXCL ||
832 !(nlh->nlmsg_flags & NLM_F_REPLACE))
835 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836 cancel_delayed_work(&check_lifetime_work);
837 schedule_delayed_work(&check_lifetime_work, 0);
838 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
839 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
845 * Determine a default network mask, based on the IP address.
848 static int inet_abc_len(__be32 addr)
850 int rc = -1; /* Something else, probably a multicast. */
852 if (ipv4_is_zeronet(addr))
855 __u32 haddr = ntohl(addr);
857 if (IN_CLASSA(haddr))
859 else if (IN_CLASSB(haddr))
861 else if (IN_CLASSC(haddr))
869 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
872 struct sockaddr_in sin_orig;
873 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
874 struct in_device *in_dev;
875 struct in_ifaddr **ifap = NULL;
876 struct in_ifaddr *ifa = NULL;
877 struct net_device *dev;
880 int tryaddrmatch = 0;
883 * Fetch the caller's info block into kernel space
886 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
888 ifr.ifr_name[IFNAMSIZ - 1] = 0;
890 /* save original address for comparison */
891 memcpy(&sin_orig, sin, sizeof(*sin));
893 colon = strchr(ifr.ifr_name, ':');
897 dev_load(net, ifr.ifr_name);
900 case SIOCGIFADDR: /* Get interface address */
901 case SIOCGIFBRDADDR: /* Get the broadcast address */
902 case SIOCGIFDSTADDR: /* Get the destination address */
903 case SIOCGIFNETMASK: /* Get the netmask for the interface */
904 /* Note that these ioctls will not sleep,
905 so that we do not impose a lock.
906 One day we will be forced to put shlock here (I mean SMP)
908 tryaddrmatch = (sin_orig.sin_family == AF_INET);
909 memset(sin, 0, sizeof(*sin));
910 sin->sin_family = AF_INET;
915 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
918 case SIOCSIFADDR: /* Set interface address (and family) */
919 case SIOCSIFBRDADDR: /* Set the broadcast address */
920 case SIOCSIFDSTADDR: /* Set the destination address */
921 case SIOCSIFNETMASK: /* Set the netmask for the interface */
922 case SIOCKILLADDR: /* Nuke all sockets on this address */
924 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
927 if (sin->sin_family != AF_INET)
938 dev = __dev_get_by_name(net, ifr.ifr_name);
945 in_dev = __in_dev_get_rtnl(dev);
948 /* Matthias Andree */
949 /* compare label and address (4.4BSD style) */
950 /* note: we only do this for a limited set of ioctls
951 and only if the original address family was AF_INET.
952 This is checked above. */
953 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
954 ifap = &ifa->ifa_next) {
955 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
956 sin_orig.sin_addr.s_addr ==
962 /* we didn't get a match, maybe the application is
963 4.3BSD-style and passed in junk so we fall back to
964 comparing just the label */
966 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
967 ifap = &ifa->ifa_next)
968 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
973 ret = -EADDRNOTAVAIL;
974 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
975 && cmd != SIOCKILLADDR)
979 case SIOCGIFADDR: /* Get interface address */
980 sin->sin_addr.s_addr = ifa->ifa_local;
983 case SIOCGIFBRDADDR: /* Get the broadcast address */
984 sin->sin_addr.s_addr = ifa->ifa_broadcast;
987 case SIOCGIFDSTADDR: /* Get the destination address */
988 sin->sin_addr.s_addr = ifa->ifa_address;
991 case SIOCGIFNETMASK: /* Get the netmask for the interface */
992 sin->sin_addr.s_addr = ifa->ifa_mask;
997 ret = -EADDRNOTAVAIL;
1001 if (!(ifr.ifr_flags & IFF_UP))
1002 inet_del_ifa(in_dev, ifap, 1);
1005 ret = dev_change_flags(dev, ifr.ifr_flags);
1008 case SIOCSIFADDR: /* Set interface address (and family) */
1010 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1015 ifa = inet_alloc_ifa();
1018 INIT_HLIST_NODE(&ifa->hash);
1020 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1022 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1025 if (ifa->ifa_local == sin->sin_addr.s_addr)
1027 inet_del_ifa(in_dev, ifap, 0);
1028 ifa->ifa_broadcast = 0;
1032 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1034 if (!(dev->flags & IFF_POINTOPOINT)) {
1035 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1036 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1037 if ((dev->flags & IFF_BROADCAST) &&
1038 ifa->ifa_prefixlen < 31)
1039 ifa->ifa_broadcast = ifa->ifa_address |
1042 ifa->ifa_prefixlen = 32;
1043 ifa->ifa_mask = inet_make_mask(32);
1045 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1046 ret = inet_set_ifa(dev, ifa);
1049 case SIOCSIFBRDADDR: /* Set the broadcast address */
1051 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1052 inet_del_ifa(in_dev, ifap, 0);
1053 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1054 inet_insert_ifa(ifa);
1058 case SIOCSIFDSTADDR: /* Set the destination address */
1060 if (ifa->ifa_address == sin->sin_addr.s_addr)
1063 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1066 inet_del_ifa(in_dev, ifap, 0);
1067 ifa->ifa_address = sin->sin_addr.s_addr;
1068 inet_insert_ifa(ifa);
1071 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1074 * The mask we set must be legal.
1077 if (bad_mask(sin->sin_addr.s_addr, 0))
1080 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1081 __be32 old_mask = ifa->ifa_mask;
1082 inet_del_ifa(in_dev, ifap, 0);
1083 ifa->ifa_mask = sin->sin_addr.s_addr;
1084 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1086 /* See if current broadcast address matches
1087 * with current netmask, then recalculate
1088 * the broadcast address. Otherwise it's a
1089 * funny address, so don't touch it since
1090 * the user seems to know what (s)he's doing...
1092 if ((dev->flags & IFF_BROADCAST) &&
1093 (ifa->ifa_prefixlen < 31) &&
1094 (ifa->ifa_broadcast ==
1095 (ifa->ifa_local|~old_mask))) {
1096 ifa->ifa_broadcast = (ifa->ifa_local |
1097 ~sin->sin_addr.s_addr);
1099 inet_insert_ifa(ifa);
1102 case SIOCKILLADDR: /* Nuke all connections on this address */
1103 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1112 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1116 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1118 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1119 struct in_ifaddr *ifa;
1126 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1128 done += sizeof(ifr);
1131 if (len < (int) sizeof(ifr))
1133 memset(&ifr, 0, sizeof(struct ifreq));
1135 strcpy(ifr.ifr_name, ifa->ifa_label);
1137 strcpy(ifr.ifr_name, dev->name);
1139 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1143 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1147 buf += sizeof(struct ifreq);
1148 len -= sizeof(struct ifreq);
1149 done += sizeof(struct ifreq);
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1158 struct in_device *in_dev;
1159 struct net *net = dev_net(dev);
1162 in_dev = __in_dev_get_rcu(dev);
1166 for_primary_ifa(in_dev) {
1167 if (ifa->ifa_scope > scope)
1169 if (!dst || inet_ifa_match(dst, ifa)) {
1170 addr = ifa->ifa_local;
1174 addr = ifa->ifa_local;
1175 } endfor_ifa(in_dev);
1181 /* Not loopback addresses on loopback should be preferred
1182 in this case. It is importnat that lo is the first interface
1185 for_each_netdev_rcu(net, dev) {
1186 in_dev = __in_dev_get_rcu(dev);
1190 for_primary_ifa(in_dev) {
1191 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192 ifa->ifa_scope <= scope) {
1193 addr = ifa->ifa_local;
1196 } endfor_ifa(in_dev);
1202 EXPORT_SYMBOL(inet_select_addr);
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205 __be32 local, int scope)
1212 (local == ifa->ifa_local || !local) &&
1213 ifa->ifa_scope <= scope) {
1214 addr = ifa->ifa_local;
1219 same = (!local || inet_ifa_match(local, ifa)) &&
1220 (!dst || inet_ifa_match(dst, ifa));
1224 /* Is the selected addr into dst subnet? */
1225 if (inet_ifa_match(addr, ifa))
1227 /* No, then can we use new local src? */
1228 if (ifa->ifa_scope <= scope) {
1229 addr = ifa->ifa_local;
1232 /* search for large dst subnet for addr */
1236 } endfor_ifa(in_dev);
1238 return same ? addr : 0;
1242 * Confirm that local IP address exists using wildcards:
1243 * - in_dev: only on this interface, 0=any interface
1244 * - dst: only in the same subnet as dst, 0=any dst
1245 * - local: address, 0=autoselect the local address
1246 * - scope: maximum allowed scope value for the local address
1248 __be32 inet_confirm_addr(struct in_device *in_dev,
1249 __be32 dst, __be32 local, int scope)
1252 struct net_device *dev;
1255 if (scope != RT_SCOPE_LINK)
1256 return confirm_addr_indev(in_dev, dst, local, scope);
1258 net = dev_net(in_dev->dev);
1260 for_each_netdev_rcu(net, dev) {
1261 in_dev = __in_dev_get_rcu(dev);
1263 addr = confirm_addr_indev(in_dev, dst, local, scope);
1272 EXPORT_SYMBOL(inet_confirm_addr);
1278 int register_inetaddr_notifier(struct notifier_block *nb)
1280 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1282 EXPORT_SYMBOL(register_inetaddr_notifier);
1284 int unregister_inetaddr_notifier(struct notifier_block *nb)
1286 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1288 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1290 /* Rename ifa_labels for a device name change. Make some effort to preserve
1291 * existing alias numbering and to create unique labels if possible.
1293 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1295 struct in_ifaddr *ifa;
1298 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1299 char old[IFNAMSIZ], *dot;
1301 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1302 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1305 dot = strchr(old, ':');
1307 sprintf(old, ":%d", named);
1310 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1311 strcat(ifa->ifa_label, dot);
1313 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1315 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1319 static bool inetdev_valid_mtu(unsigned int mtu)
1324 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1325 struct in_device *in_dev)
1328 struct in_ifaddr *ifa;
1330 for (ifa = in_dev->ifa_list; ifa;
1331 ifa = ifa->ifa_next) {
1332 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1333 ifa->ifa_local, dev,
1334 ifa->ifa_local, NULL,
1335 dev->dev_addr, NULL);
1339 /* Called only under RTNL semaphore */
1341 static int inetdev_event(struct notifier_block *this, unsigned long event,
1344 struct net_device *dev = ptr;
1345 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1350 if (event == NETDEV_REGISTER) {
1351 in_dev = inetdev_init(dev);
1353 return notifier_from_errno(-ENOMEM);
1354 if (dev->flags & IFF_LOOPBACK) {
1355 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1356 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1358 } else if (event == NETDEV_CHANGEMTU) {
1359 /* Re-enabling IP */
1360 if (inetdev_valid_mtu(dev->mtu))
1361 in_dev = inetdev_init(dev);
1367 case NETDEV_REGISTER:
1368 pr_debug("%s: bug\n", __func__);
1369 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1372 if (!inetdev_valid_mtu(dev->mtu))
1374 if (dev->flags & IFF_LOOPBACK) {
1375 struct in_ifaddr *ifa = inet_alloc_ifa();
1378 INIT_HLIST_NODE(&ifa->hash);
1380 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1381 ifa->ifa_prefixlen = 8;
1382 ifa->ifa_mask = inet_make_mask(8);
1383 in_dev_hold(in_dev);
1384 ifa->ifa_dev = in_dev;
1385 ifa->ifa_scope = RT_SCOPE_HOST;
1386 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1387 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1388 INFINITY_LIFE_TIME);
1389 inet_insert_ifa(ifa);
1394 case NETDEV_CHANGEADDR:
1395 if (!IN_DEV_ARP_NOTIFY(in_dev))
1398 case NETDEV_NOTIFY_PEERS:
1399 /* Send gratuitous ARP to notify of link change */
1400 inetdev_send_gratuitous_arp(dev, in_dev);
1405 case NETDEV_PRE_TYPE_CHANGE:
1406 ip_mc_unmap(in_dev);
1408 case NETDEV_POST_TYPE_CHANGE:
1409 ip_mc_remap(in_dev);
1411 case NETDEV_CHANGEMTU:
1412 if (inetdev_valid_mtu(dev->mtu))
1414 /* disable IP when MTU is not enough */
1415 case NETDEV_UNREGISTER:
1416 inetdev_destroy(in_dev);
1418 case NETDEV_CHANGENAME:
1419 /* Do not notify about label change, this event is
1420 * not interesting to applications using netlink.
1422 inetdev_changename(dev, in_dev);
1424 devinet_sysctl_unregister(in_dev);
1425 devinet_sysctl_register(in_dev);
1432 static struct notifier_block ip_netdev_notifier = {
1433 .notifier_call = inetdev_event,
1436 static size_t inet_nlmsg_size(void)
1438 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439 + nla_total_size(4) /* IFA_ADDRESS */
1440 + nla_total_size(4) /* IFA_LOCAL */
1441 + nla_total_size(4) /* IFA_BROADCAST */
1442 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1446 static inline u32 cstamp_delta(unsigned long cstamp)
1448 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1451 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1452 unsigned long tstamp, u32 preferred, u32 valid)
1454 struct ifa_cacheinfo ci;
1456 ci.cstamp = cstamp_delta(cstamp);
1457 ci.tstamp = cstamp_delta(tstamp);
1458 ci.ifa_prefered = preferred;
1459 ci.ifa_valid = valid;
1461 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1464 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1465 u32 portid, u32 seq, int event, unsigned int flags)
1467 struct ifaddrmsg *ifm;
1468 struct nlmsghdr *nlh;
1469 u32 preferred, valid;
1471 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1475 ifm = nlmsg_data(nlh);
1476 ifm->ifa_family = AF_INET;
1477 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1478 ifm->ifa_flags = ifa->ifa_flags;
1479 ifm->ifa_scope = ifa->ifa_scope;
1480 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1483 preferred = ifa->ifa_preferred_lft;
1484 valid = ifa->ifa_valid_lft;
1485 if (preferred != INFINITY_LIFE_TIME) {
1486 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488 if (preferred > tval)
1492 if (valid != INFINITY_LIFE_TIME) {
1500 preferred = INFINITY_LIFE_TIME;
1501 valid = INFINITY_LIFE_TIME;
1503 if ((ifa->ifa_address &&
1504 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1507 (ifa->ifa_broadcast &&
1508 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1509 (ifa->ifa_label[0] &&
1510 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1511 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1513 goto nla_put_failure;
1515 return nlmsg_end(skb, nlh);
1518 nlmsg_cancel(skb, nlh);
1522 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1524 struct net *net = sock_net(skb->sk);
1527 int ip_idx, s_ip_idx;
1528 struct net_device *dev;
1529 struct in_device *in_dev;
1530 struct in_ifaddr *ifa;
1531 struct hlist_head *head;
1534 s_idx = idx = cb->args[1];
1535 s_ip_idx = ip_idx = cb->args[2];
1537 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1539 head = &net->dev_index_head[h];
1541 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1543 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546 if (h > s_h || idx > s_idx)
1548 in_dev = __in_dev_get_rcu(dev);
1552 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1553 ifa = ifa->ifa_next, ip_idx++) {
1554 if (ip_idx < s_ip_idx)
1556 if (inet_fill_ifaddr(skb, ifa,
1557 NETLINK_CB(cb->skb).portid,
1559 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1563 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1574 cb->args[2] = ip_idx;
1579 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582 struct sk_buff *skb;
1583 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1587 net = dev_net(ifa->ifa_dev->dev);
1588 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1592 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1594 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1595 WARN_ON(err == -EMSGSIZE);
1599 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1603 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606 static size_t inet_get_link_af_size(const struct net_device *dev)
1608 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1613 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1618 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1625 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1629 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1630 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1635 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1636 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1639 static int inet_validate_link_af(const struct net_device *dev,
1640 const struct nlattr *nla)
1642 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645 if (dev && !__in_dev_get_rtnl(dev))
1646 return -EAFNOSUPPORT;
1648 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1652 if (tb[IFLA_INET_CONF]) {
1653 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1654 int cfgid = nla_type(a);
1659 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1667 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1669 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1670 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1674 return -EAFNOSUPPORT;
1676 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679 if (tb[IFLA_INET_CONF]) {
1680 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1681 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1687 static int inet_netconf_msgsize_devconf(int type)
1689 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1690 + nla_total_size(4); /* NETCONFA_IFINDEX */
1692 /* type -1 is used for ALL */
1693 if (type == -1 || type == NETCONFA_FORWARDING)
1694 size += nla_total_size(4);
1695 if (type == -1 || type == NETCONFA_RP_FILTER)
1696 size += nla_total_size(4);
1697 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1698 size += nla_total_size(4);
1703 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1704 struct ipv4_devconf *devconf, u32 portid,
1705 u32 seq, int event, unsigned int flags,
1708 struct nlmsghdr *nlh;
1709 struct netconfmsg *ncm;
1711 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716 ncm = nlmsg_data(nlh);
1717 ncm->ncm_family = AF_INET;
1719 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1720 goto nla_put_failure;
1722 /* type -1 is used for ALL */
1723 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1724 nla_put_s32(skb, NETCONFA_FORWARDING,
1725 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1726 goto nla_put_failure;
1727 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1728 nla_put_s32(skb, NETCONFA_RP_FILTER,
1729 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1730 goto nla_put_failure;
1731 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1732 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1733 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1734 goto nla_put_failure;
1736 return nlmsg_end(skb, nlh);
1739 nlmsg_cancel(skb, nlh);
1743 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1744 struct ipv4_devconf *devconf)
1746 struct sk_buff *skb;
1749 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1753 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1754 RTM_NEWNETCONF, 0, type);
1756 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1757 WARN_ON(err == -EMSGSIZE);
1761 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1765 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1768 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1769 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1770 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1771 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1774 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1775 struct nlmsghdr *nlh)
1777 struct net *net = sock_net(in_skb->sk);
1778 struct nlattr *tb[NETCONFA_MAX+1];
1779 struct netconfmsg *ncm;
1780 struct sk_buff *skb;
1781 struct ipv4_devconf *devconf;
1782 struct in_device *in_dev;
1783 struct net_device *dev;
1787 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1788 devconf_ipv4_policy);
1793 if (!tb[NETCONFA_IFINDEX])
1796 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1798 case NETCONFA_IFINDEX_ALL:
1799 devconf = net->ipv4.devconf_all;
1801 case NETCONFA_IFINDEX_DEFAULT:
1802 devconf = net->ipv4.devconf_dflt;
1805 dev = __dev_get_by_index(net, ifindex);
1808 in_dev = __in_dev_get_rtnl(dev);
1811 devconf = &in_dev->cnf;
1816 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1820 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1821 NETLINK_CB(in_skb).portid,
1822 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1825 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1826 WARN_ON(err == -EMSGSIZE);
1830 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1835 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1836 struct netlink_callback *cb)
1838 struct net *net = sock_net(skb->sk);
1841 struct net_device *dev;
1842 struct in_device *in_dev;
1843 struct hlist_head *head;
1846 s_idx = idx = cb->args[1];
1848 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1850 head = &net->dev_index_head[h];
1852 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1854 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1857 in_dev = __in_dev_get_rcu(dev);
1861 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1863 NETLINK_CB(cb->skb).portid,
1871 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1877 if (h == NETDEV_HASHENTRIES) {
1878 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1879 net->ipv4.devconf_all,
1880 NETLINK_CB(cb->skb).portid,
1882 RTM_NEWNETCONF, NLM_F_MULTI,
1888 if (h == NETDEV_HASHENTRIES + 1) {
1889 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1890 net->ipv4.devconf_dflt,
1891 NETLINK_CB(cb->skb).portid,
1893 RTM_NEWNETCONF, NLM_F_MULTI,
1906 #ifdef CONFIG_SYSCTL
1908 static void devinet_copy_dflt_conf(struct net *net, int i)
1910 struct net_device *dev;
1913 for_each_netdev_rcu(net, dev) {
1914 struct in_device *in_dev;
1916 in_dev = __in_dev_get_rcu(dev);
1917 if (in_dev && !test_bit(i, in_dev->cnf.state))
1918 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1923 /* called with RTNL locked */
1924 static void inet_forward_change(struct net *net)
1926 struct net_device *dev;
1927 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1929 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1930 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1931 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1932 NETCONFA_IFINDEX_ALL,
1933 net->ipv4.devconf_all);
1934 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1935 NETCONFA_IFINDEX_DEFAULT,
1936 net->ipv4.devconf_dflt);
1938 for_each_netdev(net, dev) {
1939 struct in_device *in_dev;
1941 dev_disable_lro(dev);
1943 in_dev = __in_dev_get_rcu(dev);
1945 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1946 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1947 dev->ifindex, &in_dev->cnf);
1953 static int devinet_conf_proc(ctl_table *ctl, int write,
1954 void __user *buffer,
1955 size_t *lenp, loff_t *ppos)
1957 int old_value = *(int *)ctl->data;
1958 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1959 int new_value = *(int *)ctl->data;
1962 struct ipv4_devconf *cnf = ctl->extra1;
1963 struct net *net = ctl->extra2;
1964 int i = (int *)ctl->data - cnf->data;
1966 set_bit(i, cnf->state);
1968 if (cnf == net->ipv4.devconf_dflt)
1969 devinet_copy_dflt_conf(net, i);
1970 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1971 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1972 if ((new_value == 0) && (old_value != 0))
1973 rt_cache_flush(net);
1974 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1975 new_value != old_value) {
1978 if (cnf == net->ipv4.devconf_dflt)
1979 ifindex = NETCONFA_IFINDEX_DEFAULT;
1980 else if (cnf == net->ipv4.devconf_all)
1981 ifindex = NETCONFA_IFINDEX_ALL;
1983 struct in_device *idev =
1984 container_of(cnf, struct in_device,
1986 ifindex = idev->dev->ifindex;
1988 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1996 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1997 void __user *buffer,
1998 size_t *lenp, loff_t *ppos)
2000 int *valp = ctl->data;
2003 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2005 if (write && *valp != val) {
2006 struct net *net = ctl->extra2;
2008 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2009 if (!rtnl_trylock()) {
2010 /* Restore the original values before restarting */
2013 return restart_syscall();
2015 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2016 inet_forward_change(net);
2018 struct ipv4_devconf *cnf = ctl->extra1;
2019 struct in_device *idev =
2020 container_of(cnf, struct in_device, cnf);
2022 dev_disable_lro(idev->dev);
2023 inet_netconf_notify_devconf(net,
2024 NETCONFA_FORWARDING,
2029 rt_cache_flush(net);
2031 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2032 NETCONFA_IFINDEX_DEFAULT,
2033 net->ipv4.devconf_dflt);
2039 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2040 void __user *buffer,
2041 size_t *lenp, loff_t *ppos)
2043 int *valp = ctl->data;
2045 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2046 struct net *net = ctl->extra2;
2048 if (write && *valp != val)
2049 rt_cache_flush(net);
2054 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2057 .data = ipv4_devconf.data + \
2058 IPV4_DEVCONF_ ## attr - 1, \
2059 .maxlen = sizeof(int), \
2061 .proc_handler = proc, \
2062 .extra1 = &ipv4_devconf, \
2065 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2066 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2068 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2069 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2071 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2072 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2074 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2075 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2077 static struct devinet_sysctl_table {
2078 struct ctl_table_header *sysctl_header;
2079 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2080 } devinet_sysctl = {
2082 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2083 devinet_sysctl_forward),
2084 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2086 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2087 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2088 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2089 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2090 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2091 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2092 "accept_source_route"),
2093 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2094 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2095 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2096 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2097 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2098 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2099 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2100 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2101 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2102 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2103 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2104 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2105 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2107 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2108 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2109 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2110 "force_igmp_version"),
2111 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2112 "promote_secondaries"),
2113 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2118 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2119 struct ipv4_devconf *p)
2122 struct devinet_sysctl_table *t;
2123 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2125 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2129 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2130 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2131 t->devinet_vars[i].extra1 = p;
2132 t->devinet_vars[i].extra2 = net;
2135 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2137 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2138 if (!t->sysctl_header)
2150 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2152 struct devinet_sysctl_table *t = cnf->sysctl;
2158 unregister_net_sysctl_table(t->sysctl_header);
2162 static void devinet_sysctl_register(struct in_device *idev)
2164 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2165 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2169 static void devinet_sysctl_unregister(struct in_device *idev)
2171 __devinet_sysctl_unregister(&idev->cnf);
2172 neigh_sysctl_unregister(idev->arp_parms);
2175 static struct ctl_table ctl_forward_entry[] = {
2177 .procname = "ip_forward",
2178 .data = &ipv4_devconf.data[
2179 IPV4_DEVCONF_FORWARDING - 1],
2180 .maxlen = sizeof(int),
2182 .proc_handler = devinet_sysctl_forward,
2183 .extra1 = &ipv4_devconf,
2184 .extra2 = &init_net,
2190 static __net_init int devinet_init_net(struct net *net)
2193 struct ipv4_devconf *all, *dflt;
2194 #ifdef CONFIG_SYSCTL
2195 struct ctl_table *tbl = ctl_forward_entry;
2196 struct ctl_table_header *forw_hdr;
2200 all = &ipv4_devconf;
2201 dflt = &ipv4_devconf_dflt;
2203 if (!net_eq(net, &init_net)) {
2204 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2208 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2210 goto err_alloc_dflt;
2212 #ifdef CONFIG_SYSCTL
2213 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2217 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2218 tbl[0].extra1 = all;
2219 tbl[0].extra2 = net;
2223 #ifdef CONFIG_SYSCTL
2224 err = __devinet_sysctl_register(net, "all", all);
2228 err = __devinet_sysctl_register(net, "default", dflt);
2233 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2234 if (forw_hdr == NULL)
2236 net->ipv4.forw_hdr = forw_hdr;
2239 net->ipv4.devconf_all = all;
2240 net->ipv4.devconf_dflt = dflt;
2243 #ifdef CONFIG_SYSCTL
2245 __devinet_sysctl_unregister(dflt);
2247 __devinet_sysctl_unregister(all);
2249 if (tbl != ctl_forward_entry)
2253 if (dflt != &ipv4_devconf_dflt)
2256 if (all != &ipv4_devconf)
2262 static __net_exit void devinet_exit_net(struct net *net)
2264 #ifdef CONFIG_SYSCTL
2265 struct ctl_table *tbl;
2267 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2268 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2269 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2270 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2273 kfree(net->ipv4.devconf_dflt);
2274 kfree(net->ipv4.devconf_all);
2277 static __net_initdata struct pernet_operations devinet_ops = {
2278 .init = devinet_init_net,
2279 .exit = devinet_exit_net,
2282 static struct rtnl_af_ops inet_af_ops = {
2284 .fill_link_af = inet_fill_link_af,
2285 .get_link_af_size = inet_get_link_af_size,
2286 .validate_link_af = inet_validate_link_af,
2287 .set_link_af = inet_set_link_af,
2290 void __init devinet_init(void)
2294 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2295 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2297 register_pernet_subsys(&devinet_ops);
2299 register_gifconf(PF_INET, inet_gifconf);
2300 register_netdevice_notifier(&ip_netdev_notifier);
2302 schedule_delayed_work(&check_lifetime_work, 0);
2304 rtnl_af_register(&inet_af_ops);
2306 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2307 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2308 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2309 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2310 inet_netconf_dump_devconf, NULL);