2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
79 static struct ipv4_devconf ipv4_devconf_dflt = {
81 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 [IFA_LOCAL] = { .type = NLA_U32 },
94 [IFA_ADDRESS] = { .type = NLA_U32 },
95 [IFA_BROADCAST] = { .type = NLA_U32 },
96 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
100 #define IN4_ADDR_HSIZE_SHIFT 8
101 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
108 u32 val = (__force u32) addr ^ net_hash_mix(net);
110 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 u32 hash = inet_addr_hash(net, ifa->ifa_local);
117 spin_lock(&inet_addr_hash_lock);
118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 spin_unlock(&inet_addr_hash_lock);
122 static void inet_hash_remove(struct in_ifaddr *ifa)
124 spin_lock(&inet_addr_hash_lock);
125 hlist_del_init_rcu(&ifa->hash);
126 spin_unlock(&inet_addr_hash_lock);
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 u32 hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
144 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 if (ifa->ifa_local == addr) {
146 struct net_device *dev = ifa->ifa_dev->dev;
148 if (!net_eq(dev_net(dev), net))
155 struct flowi4 fl4 = { .daddr = addr };
156 struct fib_result res = { 0 };
157 struct fib_table *local;
159 /* Fallback to FIB local table so that communication
160 * over loopback subnets work.
162 local = fib_get_table(net, RT_TABLE_LOCAL);
164 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 res.type == RTN_LOCAL)
166 result = FIB_RES_DEV(res);
168 if (result && devref)
173 EXPORT_SYMBOL(__ip_dev_find);
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
184 static void devinet_sysctl_register(struct in_device *idev)
187 static void devinet_sysctl_unregister(struct in_device *idev)
192 /* Locks all the inet devices. */
194 static struct in_ifaddr *inet_alloc_ifa(void)
196 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 static void inet_rcu_free_ifa(struct rcu_head *head)
201 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 in_dev_put(ifa->ifa_dev);
207 static void inet_free_ifa(struct in_ifaddr *ifa)
209 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 void in_dev_finish_destroy(struct in_device *idev)
214 struct net_device *dev = idev->dev;
216 WARN_ON(idev->ifa_list);
217 WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 pr_err("Freeing alive in_device %p\n", idev);
227 EXPORT_SYMBOL(in_dev_finish_destroy);
229 static struct in_device *inetdev_init(struct net_device *dev)
231 struct in_device *in_dev;
235 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 sizeof(in_dev->cnf));
240 in_dev->cnf.sysctl = NULL;
242 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 if (!in_dev->arp_parms)
245 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 dev_disable_lro(dev);
247 /* Reference in_dev->dev */
249 /* Account for reference dev->ip_ptr (below) */
252 devinet_sysctl_register(in_dev);
253 ip_mc_init_dev(in_dev);
254 if (dev->flags & IFF_UP)
257 /* we can receive as soon as ip_ptr is set -- do this last */
258 rcu_assign_pointer(dev->ip_ptr, in_dev);
267 static void in_dev_rcu_put(struct rcu_head *head)
269 struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 static void inetdev_destroy(struct in_device *in_dev)
275 struct in_ifaddr *ifa;
276 struct net_device *dev;
284 ip_mc_destroy_dev(in_dev);
286 while ((ifa = in_dev->ifa_list) != NULL) {
287 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 devinet_sysctl_unregister(in_dev);
294 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 for_primary_ifa(in_dev) {
304 if (inet_ifa_match(a, ifa)) {
305 if (!b || inet_ifa_match(b, ifa)) {
310 } endfor_ifa(in_dev);
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 int destroy, struct nlmsghdr *nlh, u32 portid)
318 struct in_ifaddr *promote = NULL;
319 struct in_ifaddr *ifa, *ifa1 = *ifap;
320 struct in_ifaddr *last_prim = in_dev->ifa_list;
321 struct in_ifaddr *prev_prom = NULL;
322 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 /* 1. Deleting primary ifaddr forces deletion all secondaries
327 * unless alias promotion is set
330 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 while ((ifa = *ifap1) != NULL) {
334 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 ifa1->ifa_scope <= ifa->ifa_scope)
338 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 ifa1->ifa_mask != ifa->ifa_mask ||
340 !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 ifap1 = &ifa->ifa_next;
347 inet_hash_remove(ifa);
348 *ifap1 = ifa->ifa_next;
350 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 blocking_notifier_call_chain(&inetaddr_chain,
361 /* On promotion all secondaries from subnet are changing
362 * the primary IP, we must remove all their routes silently
363 * and later to add them back with new prefsrc. Do this
364 * while all addresses are on the device list.
366 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 if (ifa1->ifa_mask == ifa->ifa_mask &&
368 inet_ifa_match(ifa1->ifa_address, ifa))
369 fib_del_ifaddr(ifa, ifa1);
374 *ifap = ifa1->ifa_next;
375 inet_hash_remove(ifa1);
377 /* 3. Announce address deletion */
379 /* Send message first, then call notifier.
380 At first sight, FIB update triggered by notifier
381 will refer to already deleted ifaddr, that could confuse
382 netlink listeners. It is not true: look, gated sees
383 that route deleted and if it still thinks that ifaddr
384 is valid, it will try to restore deleted routes... Grr.
385 So that, this order is correct.
387 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391 struct in_ifaddr *next_sec = promote->ifa_next;
394 prev_prom->ifa_next = promote->ifa_next;
395 promote->ifa_next = last_prim->ifa_next;
396 last_prim->ifa_next = promote;
399 promote->ifa_flags &= ~IFA_F_SECONDARY;
400 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 blocking_notifier_call_chain(&inetaddr_chain,
403 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 if (ifa1->ifa_mask != ifa->ifa_mask ||
405 !inet_ifa_match(ifa1->ifa_address, ifa))
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 static void check_lifetime(struct work_struct *work);
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
428 struct in_device *in_dev = ifa->ifa_dev;
429 struct in_ifaddr *ifa1, **ifap, **last_primary;
433 if (!ifa->ifa_local) {
438 ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 last_primary = &in_dev->ifa_list;
441 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 ifap = &ifa1->ifa_next) {
443 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 ifa->ifa_scope <= ifa1->ifa_scope)
445 last_primary = &ifa1->ifa_next;
446 if (ifa1->ifa_mask == ifa->ifa_mask &&
447 inet_ifa_match(ifa1->ifa_address, ifa)) {
448 if (ifa1->ifa_local == ifa->ifa_local) {
452 if (ifa1->ifa_scope != ifa->ifa_scope) {
456 ifa->ifa_flags |= IFA_F_SECONDARY;
460 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 net_srandom(ifa->ifa_local);
465 ifa->ifa_next = *ifap;
468 inet_hash_insert(dev_net(in_dev->dev), ifa);
470 cancel_delayed_work(&check_lifetime_work);
471 schedule_delayed_work(&check_lifetime_work, 0);
473 /* Send message first, then call notifier.
474 Notifier will trigger FIB update, so that
475 listeners of netlink will know about new ifaddr */
476 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 return __inet_insert_ifa(ifa, NULL, 0);
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 struct in_device *in_dev = __in_dev_get_rtnl(dev);
497 ipv4_devconf_setall(in_dev);
498 if (ifa->ifa_dev != in_dev) {
499 WARN_ON(ifa->ifa_dev);
501 ifa->ifa_dev = in_dev;
503 if (ipv4_is_loopback(ifa->ifa_local))
504 ifa->ifa_scope = RT_SCOPE_HOST;
505 return inet_insert_ifa(ifa);
508 /* Caller must hold RCU or RTNL :
509 * We dont take a reference on found in_device
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 struct net_device *dev;
514 struct in_device *in_dev = NULL;
517 dev = dev_get_by_index_rcu(net, ifindex);
519 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
523 EXPORT_SYMBOL(inetdev_by_index);
525 /* Called only from RTNL semaphored context. No locks. */
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532 for_primary_ifa(in_dev) {
533 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 } endfor_ifa(in_dev);
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1];
543 struct in_device *in_dev;
544 struct ifaddrmsg *ifm;
545 struct in_ifaddr *ifa, **ifap;
550 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
554 ifm = nlmsg_data(nlh);
555 in_dev = inetdev_by_index(net, ifm->ifa_index);
556 if (in_dev == NULL) {
561 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 ifap = &ifa->ifa_next) {
564 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
567 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
570 if (tb[IFA_ADDRESS] &&
571 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
575 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
579 err = -EADDRNOTAVAIL;
584 #define INFINITY_LIFE_TIME 0xFFFFFFFF
586 static void check_lifetime(struct work_struct *work)
588 unsigned long now, next, next_sec, next_sched;
589 struct in_ifaddr *ifa;
590 struct hlist_node *n;
594 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 bool change_needed = false;
600 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
603 if (ifa->ifa_flags & IFA_F_PERMANENT)
606 /* We try to batch several events at once. */
607 age = (now - ifa->ifa_tstamp +
608 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611 age >= ifa->ifa_valid_lft) {
612 change_needed = true;
613 } else if (ifa->ifa_preferred_lft ==
614 INFINITY_LIFE_TIME) {
616 } else if (age >= ifa->ifa_preferred_lft) {
617 if (time_before(ifa->ifa_tstamp +
618 ifa->ifa_valid_lft * HZ, next))
619 next = ifa->ifa_tstamp +
620 ifa->ifa_valid_lft * HZ;
622 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
623 change_needed = true;
624 } else if (time_before(ifa->ifa_tstamp +
625 ifa->ifa_preferred_lft * HZ,
627 next = ifa->ifa_tstamp +
628 ifa->ifa_preferred_lft * HZ;
635 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
638 if (ifa->ifa_flags & IFA_F_PERMANENT)
641 /* We try to batch several events at once. */
642 age = (now - ifa->ifa_tstamp +
643 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646 age >= ifa->ifa_valid_lft) {
647 struct in_ifaddr **ifap;
649 for (ifap = &ifa->ifa_dev->ifa_list;
650 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652 inet_del_ifa(ifa->ifa_dev,
657 } else if (ifa->ifa_preferred_lft !=
658 INFINITY_LIFE_TIME &&
659 age >= ifa->ifa_preferred_lft &&
660 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661 ifa->ifa_flags |= IFA_F_DEPRECATED;
662 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
668 next_sec = round_jiffies_up(next);
671 /* If rounded timeout is accurate enough, accept it. */
672 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
673 next_sched = next_sec;
676 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
677 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
678 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680 schedule_delayed_work(&check_lifetime_work, next_sched - now);
683 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
686 unsigned long timeout;
688 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690 timeout = addrconf_timeout_fixup(valid_lft, HZ);
691 if (addrconf_finite_timeout(timeout))
692 ifa->ifa_valid_lft = timeout;
694 ifa->ifa_flags |= IFA_F_PERMANENT;
696 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
697 if (addrconf_finite_timeout(timeout)) {
699 ifa->ifa_flags |= IFA_F_DEPRECATED;
700 ifa->ifa_preferred_lft = timeout;
702 ifa->ifa_tstamp = jiffies;
703 if (!ifa->ifa_cstamp)
704 ifa->ifa_cstamp = ifa->ifa_tstamp;
707 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
708 __u32 *pvalid_lft, __u32 *pprefered_lft)
710 struct nlattr *tb[IFA_MAX+1];
711 struct in_ifaddr *ifa;
712 struct ifaddrmsg *ifm;
713 struct net_device *dev;
714 struct in_device *in_dev;
717 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
721 ifm = nlmsg_data(nlh);
723 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
726 dev = __dev_get_by_index(net, ifm->ifa_index);
731 in_dev = __in_dev_get_rtnl(dev);
736 ifa = inet_alloc_ifa();
739 * A potential indev allocation can be left alive, it stays
740 * assigned to its device and is destroy with it.
744 ipv4_devconf_setall(in_dev);
747 if (tb[IFA_ADDRESS] == NULL)
748 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750 INIT_HLIST_NODE(&ifa->hash);
751 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
752 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
753 ifa->ifa_flags = ifm->ifa_flags;
754 ifa->ifa_scope = ifm->ifa_scope;
755 ifa->ifa_dev = in_dev;
757 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
758 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760 if (tb[IFA_BROADCAST])
761 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
764 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768 if (tb[IFA_CACHEINFO]) {
769 struct ifa_cacheinfo *ci;
771 ci = nla_data(tb[IFA_CACHEINFO]);
772 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
776 *pvalid_lft = ci->ifa_valid;
777 *pprefered_lft = ci->ifa_prefered;
788 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
790 struct in_device *in_dev = ifa->ifa_dev;
791 struct in_ifaddr *ifa1, **ifap;
796 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
797 ifap = &ifa1->ifa_next) {
798 if (ifa1->ifa_mask == ifa->ifa_mask &&
799 inet_ifa_match(ifa1->ifa_address, ifa) &&
800 ifa1->ifa_local == ifa->ifa_local)
806 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
808 struct net *net = sock_net(skb->sk);
809 struct in_ifaddr *ifa;
810 struct in_ifaddr *ifa_existing;
811 __u32 valid_lft = INFINITY_LIFE_TIME;
812 __u32 prefered_lft = INFINITY_LIFE_TIME;
816 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
820 ifa_existing = find_matching_ifa(ifa);
822 /* It would be best to check for !NLM_F_CREATE here but
823 * userspace alreay relies on not having to provide this.
825 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
826 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
830 if (nlh->nlmsg_flags & NLM_F_EXCL ||
831 !(nlh->nlmsg_flags & NLM_F_REPLACE))
834 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835 cancel_delayed_work(&check_lifetime_work);
836 schedule_delayed_work(&check_lifetime_work, 0);
837 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
838 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
844 * Determine a default network mask, based on the IP address.
847 static int inet_abc_len(__be32 addr)
849 int rc = -1; /* Something else, probably a multicast. */
851 if (ipv4_is_zeronet(addr))
854 __u32 haddr = ntohl(addr);
856 if (IN_CLASSA(haddr))
858 else if (IN_CLASSB(haddr))
860 else if (IN_CLASSC(haddr))
868 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
871 struct sockaddr_in sin_orig;
872 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
873 struct in_device *in_dev;
874 struct in_ifaddr **ifap = NULL;
875 struct in_ifaddr *ifa = NULL;
876 struct net_device *dev;
879 int tryaddrmatch = 0;
882 * Fetch the caller's info block into kernel space
885 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
887 ifr.ifr_name[IFNAMSIZ - 1] = 0;
889 /* save original address for comparison */
890 memcpy(&sin_orig, sin, sizeof(*sin));
892 colon = strchr(ifr.ifr_name, ':');
896 dev_load(net, ifr.ifr_name);
899 case SIOCGIFADDR: /* Get interface address */
900 case SIOCGIFBRDADDR: /* Get the broadcast address */
901 case SIOCGIFDSTADDR: /* Get the destination address */
902 case SIOCGIFNETMASK: /* Get the netmask for the interface */
903 /* Note that these ioctls will not sleep,
904 so that we do not impose a lock.
905 One day we will be forced to put shlock here (I mean SMP)
907 tryaddrmatch = (sin_orig.sin_family == AF_INET);
908 memset(sin, 0, sizeof(*sin));
909 sin->sin_family = AF_INET;
914 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
917 case SIOCSIFADDR: /* Set interface address (and family) */
918 case SIOCSIFBRDADDR: /* Set the broadcast address */
919 case SIOCSIFDSTADDR: /* Set the destination address */
920 case SIOCSIFNETMASK: /* Set the netmask for the interface */
922 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
925 if (sin->sin_family != AF_INET)
936 dev = __dev_get_by_name(net, ifr.ifr_name);
943 in_dev = __in_dev_get_rtnl(dev);
946 /* Matthias Andree */
947 /* compare label and address (4.4BSD style) */
948 /* note: we only do this for a limited set of ioctls
949 and only if the original address family was AF_INET.
950 This is checked above. */
951 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
952 ifap = &ifa->ifa_next) {
953 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
954 sin_orig.sin_addr.s_addr ==
960 /* we didn't get a match, maybe the application is
961 4.3BSD-style and passed in junk so we fall back to
962 comparing just the label */
964 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
965 ifap = &ifa->ifa_next)
966 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
971 ret = -EADDRNOTAVAIL;
972 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
976 case SIOCGIFADDR: /* Get interface address */
977 sin->sin_addr.s_addr = ifa->ifa_local;
980 case SIOCGIFBRDADDR: /* Get the broadcast address */
981 sin->sin_addr.s_addr = ifa->ifa_broadcast;
984 case SIOCGIFDSTADDR: /* Get the destination address */
985 sin->sin_addr.s_addr = ifa->ifa_address;
988 case SIOCGIFNETMASK: /* Get the netmask for the interface */
989 sin->sin_addr.s_addr = ifa->ifa_mask;
994 ret = -EADDRNOTAVAIL;
998 if (!(ifr.ifr_flags & IFF_UP))
999 inet_del_ifa(in_dev, ifap, 1);
1002 ret = dev_change_flags(dev, ifr.ifr_flags);
1005 case SIOCSIFADDR: /* Set interface address (and family) */
1007 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1012 ifa = inet_alloc_ifa();
1015 INIT_HLIST_NODE(&ifa->hash);
1017 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1019 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1022 if (ifa->ifa_local == sin->sin_addr.s_addr)
1024 inet_del_ifa(in_dev, ifap, 0);
1025 ifa->ifa_broadcast = 0;
1029 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1031 if (!(dev->flags & IFF_POINTOPOINT)) {
1032 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1033 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1034 if ((dev->flags & IFF_BROADCAST) &&
1035 ifa->ifa_prefixlen < 31)
1036 ifa->ifa_broadcast = ifa->ifa_address |
1039 ifa->ifa_prefixlen = 32;
1040 ifa->ifa_mask = inet_make_mask(32);
1042 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1043 ret = inet_set_ifa(dev, ifa);
1046 case SIOCSIFBRDADDR: /* Set the broadcast address */
1048 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1049 inet_del_ifa(in_dev, ifap, 0);
1050 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1051 inet_insert_ifa(ifa);
1055 case SIOCSIFDSTADDR: /* Set the destination address */
1057 if (ifa->ifa_address == sin->sin_addr.s_addr)
1060 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1063 inet_del_ifa(in_dev, ifap, 0);
1064 ifa->ifa_address = sin->sin_addr.s_addr;
1065 inet_insert_ifa(ifa);
1068 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1071 * The mask we set must be legal.
1074 if (bad_mask(sin->sin_addr.s_addr, 0))
1077 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1078 __be32 old_mask = ifa->ifa_mask;
1079 inet_del_ifa(in_dev, ifap, 0);
1080 ifa->ifa_mask = sin->sin_addr.s_addr;
1081 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1083 /* See if current broadcast address matches
1084 * with current netmask, then recalculate
1085 * the broadcast address. Otherwise it's a
1086 * funny address, so don't touch it since
1087 * the user seems to know what (s)he's doing...
1089 if ((dev->flags & IFF_BROADCAST) &&
1090 (ifa->ifa_prefixlen < 31) &&
1091 (ifa->ifa_broadcast ==
1092 (ifa->ifa_local|~old_mask))) {
1093 ifa->ifa_broadcast = (ifa->ifa_local |
1094 ~sin->sin_addr.s_addr);
1096 inet_insert_ifa(ifa);
1106 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1110 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1112 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1113 struct in_ifaddr *ifa;
1120 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1122 done += sizeof(ifr);
1125 if (len < (int) sizeof(ifr))
1127 memset(&ifr, 0, sizeof(struct ifreq));
1129 strcpy(ifr.ifr_name, ifa->ifa_label);
1131 strcpy(ifr.ifr_name, dev->name);
1133 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1134 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1137 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1141 buf += sizeof(struct ifreq);
1142 len -= sizeof(struct ifreq);
1143 done += sizeof(struct ifreq);
1149 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1152 struct in_device *in_dev;
1153 struct net *net = dev_net(dev);
1156 in_dev = __in_dev_get_rcu(dev);
1160 for_primary_ifa(in_dev) {
1161 if (ifa->ifa_scope > scope)
1163 if (!dst || inet_ifa_match(dst, ifa)) {
1164 addr = ifa->ifa_local;
1168 addr = ifa->ifa_local;
1169 } endfor_ifa(in_dev);
1175 /* Not loopback addresses on loopback should be preferred
1176 in this case. It is importnat that lo is the first interface
1179 for_each_netdev_rcu(net, dev) {
1180 in_dev = __in_dev_get_rcu(dev);
1184 for_primary_ifa(in_dev) {
1185 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1186 ifa->ifa_scope <= scope) {
1187 addr = ifa->ifa_local;
1190 } endfor_ifa(in_dev);
1196 EXPORT_SYMBOL(inet_select_addr);
1198 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1199 __be32 local, int scope)
1206 (local == ifa->ifa_local || !local) &&
1207 ifa->ifa_scope <= scope) {
1208 addr = ifa->ifa_local;
1213 same = (!local || inet_ifa_match(local, ifa)) &&
1214 (!dst || inet_ifa_match(dst, ifa));
1218 /* Is the selected addr into dst subnet? */
1219 if (inet_ifa_match(addr, ifa))
1221 /* No, then can we use new local src? */
1222 if (ifa->ifa_scope <= scope) {
1223 addr = ifa->ifa_local;
1226 /* search for large dst subnet for addr */
1230 } endfor_ifa(in_dev);
1232 return same ? addr : 0;
1236 * Confirm that local IP address exists using wildcards:
1237 * - in_dev: only on this interface, 0=any interface
1238 * - dst: only in the same subnet as dst, 0=any dst
1239 * - local: address, 0=autoselect the local address
1240 * - scope: maximum allowed scope value for the local address
1242 __be32 inet_confirm_addr(struct in_device *in_dev,
1243 __be32 dst, __be32 local, int scope)
1246 struct net_device *dev;
1249 if (scope != RT_SCOPE_LINK)
1250 return confirm_addr_indev(in_dev, dst, local, scope);
1252 net = dev_net(in_dev->dev);
1254 for_each_netdev_rcu(net, dev) {
1255 in_dev = __in_dev_get_rcu(dev);
1257 addr = confirm_addr_indev(in_dev, dst, local, scope);
1266 EXPORT_SYMBOL(inet_confirm_addr);
1272 int register_inetaddr_notifier(struct notifier_block *nb)
1274 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1276 EXPORT_SYMBOL(register_inetaddr_notifier);
1278 int unregister_inetaddr_notifier(struct notifier_block *nb)
1280 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1282 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1284 /* Rename ifa_labels for a device name change. Make some effort to preserve
1285 * existing alias numbering and to create unique labels if possible.
1287 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1289 struct in_ifaddr *ifa;
1292 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1293 char old[IFNAMSIZ], *dot;
1295 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1296 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1299 dot = strchr(old, ':');
1301 sprintf(old, ":%d", named);
1304 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1305 strcat(ifa->ifa_label, dot);
1307 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1309 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1313 static bool inetdev_valid_mtu(unsigned int mtu)
1318 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1319 struct in_device *in_dev)
1322 struct in_ifaddr *ifa;
1324 for (ifa = in_dev->ifa_list; ifa;
1325 ifa = ifa->ifa_next) {
1326 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1327 ifa->ifa_local, dev,
1328 ifa->ifa_local, NULL,
1329 dev->dev_addr, NULL);
1333 /* Called only under RTNL semaphore */
1335 static int inetdev_event(struct notifier_block *this, unsigned long event,
1338 struct net_device *dev = ptr;
1339 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1344 if (event == NETDEV_REGISTER) {
1345 in_dev = inetdev_init(dev);
1347 return notifier_from_errno(-ENOMEM);
1348 if (dev->flags & IFF_LOOPBACK) {
1349 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1350 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1352 } else if (event == NETDEV_CHANGEMTU) {
1353 /* Re-enabling IP */
1354 if (inetdev_valid_mtu(dev->mtu))
1355 in_dev = inetdev_init(dev);
1361 case NETDEV_REGISTER:
1362 pr_debug("%s: bug\n", __func__);
1363 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1366 if (!inetdev_valid_mtu(dev->mtu))
1368 if (dev->flags & IFF_LOOPBACK) {
1369 struct in_ifaddr *ifa = inet_alloc_ifa();
1372 INIT_HLIST_NODE(&ifa->hash);
1374 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1375 ifa->ifa_prefixlen = 8;
1376 ifa->ifa_mask = inet_make_mask(8);
1377 in_dev_hold(in_dev);
1378 ifa->ifa_dev = in_dev;
1379 ifa->ifa_scope = RT_SCOPE_HOST;
1380 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1381 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1382 INFINITY_LIFE_TIME);
1383 inet_insert_ifa(ifa);
1388 case NETDEV_CHANGEADDR:
1389 if (!IN_DEV_ARP_NOTIFY(in_dev))
1392 case NETDEV_NOTIFY_PEERS:
1393 /* Send gratuitous ARP to notify of link change */
1394 inetdev_send_gratuitous_arp(dev, in_dev);
1399 case NETDEV_PRE_TYPE_CHANGE:
1400 ip_mc_unmap(in_dev);
1402 case NETDEV_POST_TYPE_CHANGE:
1403 ip_mc_remap(in_dev);
1405 case NETDEV_CHANGEMTU:
1406 if (inetdev_valid_mtu(dev->mtu))
1408 /* disable IP when MTU is not enough */
1409 case NETDEV_UNREGISTER:
1410 inetdev_destroy(in_dev);
1412 case NETDEV_CHANGENAME:
1413 /* Do not notify about label change, this event is
1414 * not interesting to applications using netlink.
1416 inetdev_changename(dev, in_dev);
1418 devinet_sysctl_unregister(in_dev);
1419 devinet_sysctl_register(in_dev);
1426 static struct notifier_block ip_netdev_notifier = {
1427 .notifier_call = inetdev_event,
1430 static size_t inet_nlmsg_size(void)
1432 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1433 + nla_total_size(4) /* IFA_ADDRESS */
1434 + nla_total_size(4) /* IFA_LOCAL */
1435 + nla_total_size(4) /* IFA_BROADCAST */
1436 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1439 static inline u32 cstamp_delta(unsigned long cstamp)
1441 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1444 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1445 unsigned long tstamp, u32 preferred, u32 valid)
1447 struct ifa_cacheinfo ci;
1449 ci.cstamp = cstamp_delta(cstamp);
1450 ci.tstamp = cstamp_delta(tstamp);
1451 ci.ifa_prefered = preferred;
1452 ci.ifa_valid = valid;
1454 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1457 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1458 u32 portid, u32 seq, int event, unsigned int flags)
1460 struct ifaddrmsg *ifm;
1461 struct nlmsghdr *nlh;
1462 u32 preferred, valid;
1464 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1468 ifm = nlmsg_data(nlh);
1469 ifm->ifa_family = AF_INET;
1470 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1471 ifm->ifa_flags = ifa->ifa_flags;
1472 ifm->ifa_scope = ifa->ifa_scope;
1473 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1475 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1476 preferred = ifa->ifa_preferred_lft;
1477 valid = ifa->ifa_valid_lft;
1478 if (preferred != INFINITY_LIFE_TIME) {
1479 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1481 if (preferred > tval)
1485 if (valid != INFINITY_LIFE_TIME) {
1493 preferred = INFINITY_LIFE_TIME;
1494 valid = INFINITY_LIFE_TIME;
1496 if ((ifa->ifa_address &&
1497 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1499 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1500 (ifa->ifa_broadcast &&
1501 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1502 (ifa->ifa_label[0] &&
1503 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1504 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1506 goto nla_put_failure;
1508 return nlmsg_end(skb, nlh);
1511 nlmsg_cancel(skb, nlh);
1515 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1517 struct net *net = sock_net(skb->sk);
1520 int ip_idx, s_ip_idx;
1521 struct net_device *dev;
1522 struct in_device *in_dev;
1523 struct in_ifaddr *ifa;
1524 struct hlist_head *head;
1527 s_idx = idx = cb->args[1];
1528 s_ip_idx = ip_idx = cb->args[2];
1530 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1532 head = &net->dev_index_head[h];
1534 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1536 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1539 if (h > s_h || idx > s_idx)
1541 in_dev = __in_dev_get_rcu(dev);
1545 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1546 ifa = ifa->ifa_next, ip_idx++) {
1547 if (ip_idx < s_ip_idx)
1549 if (inet_fill_ifaddr(skb, ifa,
1550 NETLINK_CB(cb->skb).portid,
1552 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1556 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1567 cb->args[2] = ip_idx;
1572 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1575 struct sk_buff *skb;
1576 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1580 net = dev_net(ifa->ifa_dev->dev);
1581 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1585 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1587 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1588 WARN_ON(err == -EMSGSIZE);
1592 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1596 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1599 static size_t inet_get_link_af_size(const struct net_device *dev)
1601 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1606 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1609 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1611 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1618 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1622 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1623 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1628 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1629 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1632 static int inet_validate_link_af(const struct net_device *dev,
1633 const struct nlattr *nla)
1635 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1638 if (dev && !__in_dev_get_rtnl(dev))
1639 return -EAFNOSUPPORT;
1641 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1645 if (tb[IFLA_INET_CONF]) {
1646 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1647 int cfgid = nla_type(a);
1652 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1660 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1662 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1663 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1667 return -EAFNOSUPPORT;
1669 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1672 if (tb[IFLA_INET_CONF]) {
1673 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1674 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1680 static int inet_netconf_msgsize_devconf(int type)
1682 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1683 + nla_total_size(4); /* NETCONFA_IFINDEX */
1685 /* type -1 is used for ALL */
1686 if (type == -1 || type == NETCONFA_FORWARDING)
1687 size += nla_total_size(4);
1688 if (type == -1 || type == NETCONFA_RP_FILTER)
1689 size += nla_total_size(4);
1690 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1691 size += nla_total_size(4);
1696 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1697 struct ipv4_devconf *devconf, u32 portid,
1698 u32 seq, int event, unsigned int flags,
1701 struct nlmsghdr *nlh;
1702 struct netconfmsg *ncm;
1704 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1709 ncm = nlmsg_data(nlh);
1710 ncm->ncm_family = AF_INET;
1712 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1713 goto nla_put_failure;
1715 /* type -1 is used for ALL */
1716 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1717 nla_put_s32(skb, NETCONFA_FORWARDING,
1718 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1719 goto nla_put_failure;
1720 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1721 nla_put_s32(skb, NETCONFA_RP_FILTER,
1722 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1723 goto nla_put_failure;
1724 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1725 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1726 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1727 goto nla_put_failure;
1729 return nlmsg_end(skb, nlh);
1732 nlmsg_cancel(skb, nlh);
1736 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1737 struct ipv4_devconf *devconf)
1739 struct sk_buff *skb;
1742 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1746 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1747 RTM_NEWNETCONF, 0, type);
1749 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1750 WARN_ON(err == -EMSGSIZE);
1754 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1758 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1761 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1762 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1763 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1764 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1767 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1768 struct nlmsghdr *nlh)
1770 struct net *net = sock_net(in_skb->sk);
1771 struct nlattr *tb[NETCONFA_MAX+1];
1772 struct netconfmsg *ncm;
1773 struct sk_buff *skb;
1774 struct ipv4_devconf *devconf;
1775 struct in_device *in_dev;
1776 struct net_device *dev;
1780 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1781 devconf_ipv4_policy);
1786 if (!tb[NETCONFA_IFINDEX])
1789 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1791 case NETCONFA_IFINDEX_ALL:
1792 devconf = net->ipv4.devconf_all;
1794 case NETCONFA_IFINDEX_DEFAULT:
1795 devconf = net->ipv4.devconf_dflt;
1798 dev = __dev_get_by_index(net, ifindex);
1801 in_dev = __in_dev_get_rtnl(dev);
1804 devconf = &in_dev->cnf;
1809 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1813 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1814 NETLINK_CB(in_skb).portid,
1815 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1818 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1819 WARN_ON(err == -EMSGSIZE);
1823 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1828 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1829 struct netlink_callback *cb)
1831 struct net *net = sock_net(skb->sk);
1834 struct net_device *dev;
1835 struct in_device *in_dev;
1836 struct hlist_head *head;
1839 s_idx = idx = cb->args[1];
1841 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1843 head = &net->dev_index_head[h];
1845 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1847 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1850 in_dev = __in_dev_get_rcu(dev);
1854 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1856 NETLINK_CB(cb->skb).portid,
1864 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1870 if (h == NETDEV_HASHENTRIES) {
1871 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1872 net->ipv4.devconf_all,
1873 NETLINK_CB(cb->skb).portid,
1875 RTM_NEWNETCONF, NLM_F_MULTI,
1881 if (h == NETDEV_HASHENTRIES + 1) {
1882 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1883 net->ipv4.devconf_dflt,
1884 NETLINK_CB(cb->skb).portid,
1886 RTM_NEWNETCONF, NLM_F_MULTI,
1899 #ifdef CONFIG_SYSCTL
1901 static void devinet_copy_dflt_conf(struct net *net, int i)
1903 struct net_device *dev;
1906 for_each_netdev_rcu(net, dev) {
1907 struct in_device *in_dev;
1909 in_dev = __in_dev_get_rcu(dev);
1910 if (in_dev && !test_bit(i, in_dev->cnf.state))
1911 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1916 /* called with RTNL locked */
1917 static void inet_forward_change(struct net *net)
1919 struct net_device *dev;
1920 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1922 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1923 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1924 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1925 NETCONFA_IFINDEX_ALL,
1926 net->ipv4.devconf_all);
1927 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1928 NETCONFA_IFINDEX_DEFAULT,
1929 net->ipv4.devconf_dflt);
1931 for_each_netdev(net, dev) {
1932 struct in_device *in_dev;
1934 dev_disable_lro(dev);
1936 in_dev = __in_dev_get_rcu(dev);
1938 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1939 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1940 dev->ifindex, &in_dev->cnf);
1946 static int devinet_conf_proc(ctl_table *ctl, int write,
1947 void __user *buffer,
1948 size_t *lenp, loff_t *ppos)
1950 int old_value = *(int *)ctl->data;
1951 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1952 int new_value = *(int *)ctl->data;
1955 struct ipv4_devconf *cnf = ctl->extra1;
1956 struct net *net = ctl->extra2;
1957 int i = (int *)ctl->data - cnf->data;
1959 set_bit(i, cnf->state);
1961 if (cnf == net->ipv4.devconf_dflt)
1962 devinet_copy_dflt_conf(net, i);
1963 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1964 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1965 if ((new_value == 0) && (old_value != 0))
1966 rt_cache_flush(net);
1967 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1968 new_value != old_value) {
1971 if (cnf == net->ipv4.devconf_dflt)
1972 ifindex = NETCONFA_IFINDEX_DEFAULT;
1973 else if (cnf == net->ipv4.devconf_all)
1974 ifindex = NETCONFA_IFINDEX_ALL;
1976 struct in_device *idev =
1977 container_of(cnf, struct in_device,
1979 ifindex = idev->dev->ifindex;
1981 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1989 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1990 void __user *buffer,
1991 size_t *lenp, loff_t *ppos)
1993 int *valp = ctl->data;
1996 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1998 if (write && *valp != val) {
1999 struct net *net = ctl->extra2;
2001 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2002 if (!rtnl_trylock()) {
2003 /* Restore the original values before restarting */
2006 return restart_syscall();
2008 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2009 inet_forward_change(net);
2011 struct ipv4_devconf *cnf = ctl->extra1;
2012 struct in_device *idev =
2013 container_of(cnf, struct in_device, cnf);
2015 dev_disable_lro(idev->dev);
2016 inet_netconf_notify_devconf(net,
2017 NETCONFA_FORWARDING,
2022 rt_cache_flush(net);
2024 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2025 NETCONFA_IFINDEX_DEFAULT,
2026 net->ipv4.devconf_dflt);
2032 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2033 void __user *buffer,
2034 size_t *lenp, loff_t *ppos)
2036 int *valp = ctl->data;
2038 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2039 struct net *net = ctl->extra2;
2041 if (write && *valp != val)
2042 rt_cache_flush(net);
2047 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2050 .data = ipv4_devconf.data + \
2051 IPV4_DEVCONF_ ## attr - 1, \
2052 .maxlen = sizeof(int), \
2054 .proc_handler = proc, \
2055 .extra1 = &ipv4_devconf, \
2058 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2059 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2061 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2062 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2064 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2065 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2067 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2068 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2070 static struct devinet_sysctl_table {
2071 struct ctl_table_header *sysctl_header;
2072 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2073 } devinet_sysctl = {
2075 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2076 devinet_sysctl_forward),
2077 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2079 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2080 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2081 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2082 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2083 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2084 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2085 "accept_source_route"),
2086 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2087 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2088 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2089 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2090 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2091 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2092 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2093 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2094 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2095 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2096 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2097 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2098 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2100 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2101 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2102 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2103 "force_igmp_version"),
2104 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2105 "promote_secondaries"),
2106 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2111 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2112 struct ipv4_devconf *p)
2115 struct devinet_sysctl_table *t;
2116 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2118 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2122 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2123 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2124 t->devinet_vars[i].extra1 = p;
2125 t->devinet_vars[i].extra2 = net;
2128 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2130 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2131 if (!t->sysctl_header)
2143 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2145 struct devinet_sysctl_table *t = cnf->sysctl;
2151 unregister_net_sysctl_table(t->sysctl_header);
2155 static void devinet_sysctl_register(struct in_device *idev)
2157 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2158 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2162 static void devinet_sysctl_unregister(struct in_device *idev)
2164 __devinet_sysctl_unregister(&idev->cnf);
2165 neigh_sysctl_unregister(idev->arp_parms);
2168 static struct ctl_table ctl_forward_entry[] = {
2170 .procname = "ip_forward",
2171 .data = &ipv4_devconf.data[
2172 IPV4_DEVCONF_FORWARDING - 1],
2173 .maxlen = sizeof(int),
2175 .proc_handler = devinet_sysctl_forward,
2176 .extra1 = &ipv4_devconf,
2177 .extra2 = &init_net,
2183 static __net_init int devinet_init_net(struct net *net)
2186 struct ipv4_devconf *all, *dflt;
2187 #ifdef CONFIG_SYSCTL
2188 struct ctl_table *tbl = ctl_forward_entry;
2189 struct ctl_table_header *forw_hdr;
2193 all = &ipv4_devconf;
2194 dflt = &ipv4_devconf_dflt;
2196 if (!net_eq(net, &init_net)) {
2197 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2201 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2203 goto err_alloc_dflt;
2205 #ifdef CONFIG_SYSCTL
2206 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2210 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2211 tbl[0].extra1 = all;
2212 tbl[0].extra2 = net;
2216 #ifdef CONFIG_SYSCTL
2217 err = __devinet_sysctl_register(net, "all", all);
2221 err = __devinet_sysctl_register(net, "default", dflt);
2226 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2227 if (forw_hdr == NULL)
2229 net->ipv4.forw_hdr = forw_hdr;
2232 net->ipv4.devconf_all = all;
2233 net->ipv4.devconf_dflt = dflt;
2236 #ifdef CONFIG_SYSCTL
2238 __devinet_sysctl_unregister(dflt);
2240 __devinet_sysctl_unregister(all);
2242 if (tbl != ctl_forward_entry)
2246 if (dflt != &ipv4_devconf_dflt)
2249 if (all != &ipv4_devconf)
2255 static __net_exit void devinet_exit_net(struct net *net)
2257 #ifdef CONFIG_SYSCTL
2258 struct ctl_table *tbl;
2260 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2261 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2262 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2263 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2266 kfree(net->ipv4.devconf_dflt);
2267 kfree(net->ipv4.devconf_all);
2270 static __net_initdata struct pernet_operations devinet_ops = {
2271 .init = devinet_init_net,
2272 .exit = devinet_exit_net,
2275 static struct rtnl_af_ops inet_af_ops = {
2277 .fill_link_af = inet_fill_link_af,
2278 .get_link_af_size = inet_get_link_af_size,
2279 .validate_link_af = inet_validate_link_af,
2280 .set_link_af = inet_set_link_af,
2283 void __init devinet_init(void)
2287 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2288 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2290 register_pernet_subsys(&devinet_ops);
2292 register_gifconf(PF_INET, inet_gifconf);
2293 register_netdevice_notifier(&ip_netdev_notifier);
2295 schedule_delayed_work(&check_lifetime_work, 0);
2297 rtnl_af_register(&inet_af_ops);
2299 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2300 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2301 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2302 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2303 inet_netconf_dump_devconf, NULL);