netfilter: have ip*t REJECT set the sock err when an icmp is to be sent
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/tcp.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #include "fib_lookup.h"
70
71 static struct ipv4_devconf ipv4_devconf = {
72         .data = {
73                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87         },
88 };
89
90 #define IPV4_DEVCONF_DFLT(net, attr) \
91         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
92
93 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
94         [IFA_LOCAL]             = { .type = NLA_U32 },
95         [IFA_ADDRESS]           = { .type = NLA_U32 },
96         [IFA_BROADCAST]         = { .type = NLA_U32 },
97         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
98         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
99 };
100
101 #define IN4_ADDR_HSIZE_SHIFT    8
102 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
103
104 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
105 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106
107 static u32 inet_addr_hash(struct net *net, __be32 addr)
108 {
109         u32 val = (__force u32) addr ^ net_hash_mix(net);
110
111         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
112 }
113
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116         u32 hash = inet_addr_hash(net, ifa->ifa_local);
117
118         spin_lock(&inet_addr_hash_lock);
119         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120         spin_unlock(&inet_addr_hash_lock);
121 }
122
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125         spin_lock(&inet_addr_hash_lock);
126         hlist_del_init_rcu(&ifa->hash);
127         spin_unlock(&inet_addr_hash_lock);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         u32 hash = inet_addr_hash(net, addr);
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146                 if (ifa->ifa_local == addr) {
147                         struct net_device *dev = ifa->ifa_dev->dev;
148
149                         if (!net_eq(dev_net(dev), net))
150                                 continue;
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180                          int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203         if (ifa->ifa_dev)
204                 in_dev_put(ifa->ifa_dev);
205         kfree(ifa);
206 }
207
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215         struct net_device *dev = idev->dev;
216
217         WARN_ON(idev->ifa_list);
218         WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static void check_lifetime(struct work_struct *work);
423
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427                              u32 portid)
428 {
429         struct in_device *in_dev = ifa->ifa_dev;
430         struct in_ifaddr *ifa1, **ifap, **last_primary;
431
432         ASSERT_RTNL();
433
434         if (!ifa->ifa_local) {
435                 inet_free_ifa(ifa);
436                 return 0;
437         }
438
439         ifa->ifa_flags &= ~IFA_F_SECONDARY;
440         last_primary = &in_dev->ifa_list;
441
442         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443              ifap = &ifa1->ifa_next) {
444                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445                     ifa->ifa_scope <= ifa1->ifa_scope)
446                         last_primary = &ifa1->ifa_next;
447                 if (ifa1->ifa_mask == ifa->ifa_mask &&
448                     inet_ifa_match(ifa1->ifa_address, ifa)) {
449                         if (ifa1->ifa_local == ifa->ifa_local) {
450                                 inet_free_ifa(ifa);
451                                 return -EEXIST;
452                         }
453                         if (ifa1->ifa_scope != ifa->ifa_scope) {
454                                 inet_free_ifa(ifa);
455                                 return -EINVAL;
456                         }
457                         ifa->ifa_flags |= IFA_F_SECONDARY;
458                 }
459         }
460
461         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462                 net_srandom(ifa->ifa_local);
463                 ifap = last_primary;
464         }
465
466         ifa->ifa_next = *ifap;
467         *ifap = ifa;
468
469         inet_hash_insert(dev_net(in_dev->dev), ifa);
470
471         cancel_delayed_work(&check_lifetime_work);
472         schedule_delayed_work(&check_lifetime_work, 0);
473
474         /* Send message first, then call notifier.
475            Notifier will trigger FIB update, so that
476            listeners of netlink will know about new ifaddr */
477         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479
480         return 0;
481 }
482
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485         return __inet_insert_ifa(ifa, NULL, 0);
486 }
487
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490         struct in_device *in_dev = __in_dev_get_rtnl(dev);
491
492         ASSERT_RTNL();
493
494         if (!in_dev) {
495                 inet_free_ifa(ifa);
496                 return -ENOBUFS;
497         }
498         ipv4_devconf_setall(in_dev);
499         if (ifa->ifa_dev != in_dev) {
500                 WARN_ON(ifa->ifa_dev);
501                 in_dev_hold(in_dev);
502                 ifa->ifa_dev = in_dev;
503         }
504         if (ipv4_is_loopback(ifa->ifa_local))
505                 ifa->ifa_scope = RT_SCOPE_HOST;
506         return inet_insert_ifa(ifa);
507 }
508
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514         struct net_device *dev;
515         struct in_device *in_dev = NULL;
516
517         rcu_read_lock();
518         dev = dev_get_by_index_rcu(net, ifindex);
519         if (dev)
520                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521         rcu_read_unlock();
522         return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525
526 /* Called only from RTNL semaphored context. No locks. */
527
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529                                     __be32 mask)
530 {
531         ASSERT_RTNL();
532
533         for_primary_ifa(in_dev) {
534                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535                         return ifa;
536         } endfor_ifa(in_dev);
537         return NULL;
538 }
539
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542         struct net *net = sock_net(skb->sk);
543         struct nlattr *tb[IFA_MAX+1];
544         struct in_device *in_dev;
545         struct ifaddrmsg *ifm;
546         struct in_ifaddr *ifa, **ifap;
547         int err = -EINVAL;
548
549         ASSERT_RTNL();
550
551         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552         if (err < 0)
553                 goto errout;
554
555         ifm = nlmsg_data(nlh);
556         in_dev = inetdev_by_index(net, ifm->ifa_index);
557         if (in_dev == NULL) {
558                 err = -ENODEV;
559                 goto errout;
560         }
561
562         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563              ifap = &ifa->ifa_next) {
564                 if (tb[IFA_LOCAL] &&
565                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566                         continue;
567
568                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569                         continue;
570
571                 if (tb[IFA_ADDRESS] &&
572                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574                         continue;
575
576                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577                 return 0;
578         }
579
580         err = -EADDRNOTAVAIL;
581 errout:
582         return err;
583 }
584
585 #define INFINITY_LIFE_TIME      0xFFFFFFFF
586
587 static void check_lifetime(struct work_struct *work)
588 {
589         unsigned long now, next, next_sec, next_sched;
590         struct in_ifaddr *ifa;
591         struct hlist_node *n;
592         int i;
593
594         now = jiffies;
595         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596
597         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598                 bool change_needed = false;
599
600                 rcu_read_lock();
601                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602                         unsigned long age;
603
604                         if (ifa->ifa_flags & IFA_F_PERMANENT)
605                                 continue;
606
607                         /* We try to batch several events at once. */
608                         age = (now - ifa->ifa_tstamp +
609                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610
611                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612                             age >= ifa->ifa_valid_lft) {
613                                 change_needed = true;
614                         } else if (ifa->ifa_preferred_lft ==
615                                    INFINITY_LIFE_TIME) {
616                                 continue;
617                         } else if (age >= ifa->ifa_preferred_lft) {
618                                 if (time_before(ifa->ifa_tstamp +
619                                                 ifa->ifa_valid_lft * HZ, next))
620                                         next = ifa->ifa_tstamp +
621                                                ifa->ifa_valid_lft * HZ;
622
623                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624                                         change_needed = true;
625                         } else if (time_before(ifa->ifa_tstamp +
626                                                ifa->ifa_preferred_lft * HZ,
627                                                next)) {
628                                 next = ifa->ifa_tstamp +
629                                        ifa->ifa_preferred_lft * HZ;
630                         }
631                 }
632                 rcu_read_unlock();
633                 if (!change_needed)
634                         continue;
635                 rtnl_lock();
636                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637                         unsigned long age;
638
639                         if (ifa->ifa_flags & IFA_F_PERMANENT)
640                                 continue;
641
642                         /* We try to batch several events at once. */
643                         age = (now - ifa->ifa_tstamp +
644                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645
646                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647                             age >= ifa->ifa_valid_lft) {
648                                 struct in_ifaddr **ifap;
649
650                                 for (ifap = &ifa->ifa_dev->ifa_list;
651                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652                                         if (*ifap == ifa) {
653                                                 inet_del_ifa(ifa->ifa_dev,
654                                                              ifap, 1);
655                                                 break;
656                                         }
657                                 }
658                         } else if (ifa->ifa_preferred_lft !=
659                                    INFINITY_LIFE_TIME &&
660                                    age >= ifa->ifa_preferred_lft &&
661                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
663                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664                         }
665                 }
666                 rtnl_unlock();
667         }
668
669         next_sec = round_jiffies_up(next);
670         next_sched = next;
671
672         /* If rounded timeout is accurate enough, accept it. */
673         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674                 next_sched = next_sec;
675
676         now = jiffies;
677         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680
681         schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685                              __u32 prefered_lft)
686 {
687         unsigned long timeout;
688
689         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690
691         timeout = addrconf_timeout_fixup(valid_lft, HZ);
692         if (addrconf_finite_timeout(timeout))
693                 ifa->ifa_valid_lft = timeout;
694         else
695                 ifa->ifa_flags |= IFA_F_PERMANENT;
696
697         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698         if (addrconf_finite_timeout(timeout)) {
699                 if (timeout == 0)
700                         ifa->ifa_flags |= IFA_F_DEPRECATED;
701                 ifa->ifa_preferred_lft = timeout;
702         }
703         ifa->ifa_tstamp = jiffies;
704         if (!ifa->ifa_cstamp)
705                 ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711         struct nlattr *tb[IFA_MAX+1];
712         struct in_ifaddr *ifa;
713         struct ifaddrmsg *ifm;
714         struct net_device *dev;
715         struct in_device *in_dev;
716         int err;
717
718         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719         if (err < 0)
720                 goto errout;
721
722         ifm = nlmsg_data(nlh);
723         err = -EINVAL;
724         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725                 goto errout;
726
727         dev = __dev_get_by_index(net, ifm->ifa_index);
728         err = -ENODEV;
729         if (dev == NULL)
730                 goto errout;
731
732         in_dev = __in_dev_get_rtnl(dev);
733         err = -ENOBUFS;
734         if (in_dev == NULL)
735                 goto errout;
736
737         ifa = inet_alloc_ifa();
738         if (ifa == NULL)
739                 /*
740                  * A potential indev allocation can be left alive, it stays
741                  * assigned to its device and is destroy with it.
742                  */
743                 goto errout;
744
745         ipv4_devconf_setall(in_dev);
746         in_dev_hold(in_dev);
747
748         if (tb[IFA_ADDRESS] == NULL)
749                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750
751         INIT_HLIST_NODE(&ifa->hash);
752         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754         ifa->ifa_flags = ifm->ifa_flags;
755         ifa->ifa_scope = ifm->ifa_scope;
756         ifa->ifa_dev = in_dev;
757
758         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760
761         if (tb[IFA_BROADCAST])
762                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763
764         if (tb[IFA_LABEL])
765                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766         else
767                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768
769         if (tb[IFA_CACHEINFO]) {
770                 struct ifa_cacheinfo *ci;
771
772                 ci = nla_data(tb[IFA_CACHEINFO]);
773                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774                         err = -EINVAL;
775                         goto errout;
776                 }
777                 *pvalid_lft = ci->ifa_valid;
778                 *pprefered_lft = ci->ifa_prefered;
779         }
780
781         return ifa;
782
783 errout:
784         return ERR_PTR(err);
785 }
786
787 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
788 {
789         struct in_device *in_dev = ifa->ifa_dev;
790         struct in_ifaddr *ifa1, **ifap;
791
792         if (!ifa->ifa_local)
793                 return NULL;
794
795         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
796              ifap = &ifa1->ifa_next) {
797                 if (ifa1->ifa_mask == ifa->ifa_mask &&
798                     inet_ifa_match(ifa1->ifa_address, ifa) &&
799                     ifa1->ifa_local == ifa->ifa_local)
800                         return ifa1;
801         }
802         return NULL;
803 }
804
805 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
806 {
807         struct net *net = sock_net(skb->sk);
808         struct in_ifaddr *ifa;
809         struct in_ifaddr *ifa_existing;
810         __u32 valid_lft = INFINITY_LIFE_TIME;
811         __u32 prefered_lft = INFINITY_LIFE_TIME;
812
813         ASSERT_RTNL();
814
815         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
816         if (IS_ERR(ifa))
817                 return PTR_ERR(ifa);
818
819         ifa_existing = find_matching_ifa(ifa);
820         if (!ifa_existing) {
821                 /* It would be best to check for !NLM_F_CREATE here but
822                  * userspace alreay relies on not having to provide this.
823                  */
824                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
825                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
826         } else {
827                 inet_free_ifa(ifa);
828
829                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
830                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
831                         return -EEXIST;
832                 ifa = ifa_existing;
833                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
834                 cancel_delayed_work(&check_lifetime_work);
835                 schedule_delayed_work(&check_lifetime_work, 0);
836                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
837                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
838         }
839         return 0;
840 }
841
842 /*
843  *      Determine a default network mask, based on the IP address.
844  */
845
846 static int inet_abc_len(__be32 addr)
847 {
848         int rc = -1;    /* Something else, probably a multicast. */
849
850         if (ipv4_is_zeronet(addr))
851                 rc = 0;
852         else {
853                 __u32 haddr = ntohl(addr);
854
855                 if (IN_CLASSA(haddr))
856                         rc = 8;
857                 else if (IN_CLASSB(haddr))
858                         rc = 16;
859                 else if (IN_CLASSC(haddr))
860                         rc = 24;
861         }
862
863         return rc;
864 }
865
866
867 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
868 {
869         struct ifreq ifr;
870         struct sockaddr_in sin_orig;
871         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
872         struct in_device *in_dev;
873         struct in_ifaddr **ifap = NULL;
874         struct in_ifaddr *ifa = NULL;
875         struct net_device *dev;
876         char *colon;
877         int ret = -EFAULT;
878         int tryaddrmatch = 0;
879
880         /*
881          *      Fetch the caller's info block into kernel space
882          */
883
884         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
885                 goto out;
886         ifr.ifr_name[IFNAMSIZ - 1] = 0;
887
888         /* save original address for comparison */
889         memcpy(&sin_orig, sin, sizeof(*sin));
890
891         colon = strchr(ifr.ifr_name, ':');
892         if (colon)
893                 *colon = 0;
894
895         dev_load(net, ifr.ifr_name);
896
897         switch (cmd) {
898         case SIOCGIFADDR:       /* Get interface address */
899         case SIOCGIFBRDADDR:    /* Get the broadcast address */
900         case SIOCGIFDSTADDR:    /* Get the destination address */
901         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
902                 /* Note that these ioctls will not sleep,
903                    so that we do not impose a lock.
904                    One day we will be forced to put shlock here (I mean SMP)
905                  */
906                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
907                 memset(sin, 0, sizeof(*sin));
908                 sin->sin_family = AF_INET;
909                 break;
910
911         case SIOCSIFFLAGS:
912                 ret = -EPERM;
913                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
914                         goto out;
915                 break;
916         case SIOCSIFADDR:       /* Set interface address (and family) */
917         case SIOCSIFBRDADDR:    /* Set the broadcast address */
918         case SIOCSIFDSTADDR:    /* Set the destination address */
919         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
920         case SIOCKILLADDR:      /* Nuke all sockets on this address */
921                 ret = -EPERM;
922                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
923                         goto out;
924                 ret = -EINVAL;
925                 if (sin->sin_family != AF_INET)
926                         goto out;
927                 break;
928         default:
929                 ret = -EINVAL;
930                 goto out;
931         }
932
933         rtnl_lock();
934
935         ret = -ENODEV;
936         dev = __dev_get_by_name(net, ifr.ifr_name);
937         if (!dev)
938                 goto done;
939
940         if (colon)
941                 *colon = ':';
942
943         in_dev = __in_dev_get_rtnl(dev);
944         if (in_dev) {
945                 if (tryaddrmatch) {
946                         /* Matthias Andree */
947                         /* compare label and address (4.4BSD style) */
948                         /* note: we only do this for a limited set of ioctls
949                            and only if the original address family was AF_INET.
950                            This is checked above. */
951                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
952                              ifap = &ifa->ifa_next) {
953                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
954                                     sin_orig.sin_addr.s_addr ==
955                                                         ifa->ifa_local) {
956                                         break; /* found */
957                                 }
958                         }
959                 }
960                 /* we didn't get a match, maybe the application is
961                    4.3BSD-style and passed in junk so we fall back to
962                    comparing just the label */
963                 if (!ifa) {
964                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
965                              ifap = &ifa->ifa_next)
966                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
967                                         break;
968                 }
969         }
970
971         ret = -EADDRNOTAVAIL;
972         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
973             && cmd != SIOCKILLADDR)
974                 goto done;
975
976         switch (cmd) {
977         case SIOCGIFADDR:       /* Get interface address */
978                 sin->sin_addr.s_addr = ifa->ifa_local;
979                 goto rarok;
980
981         case SIOCGIFBRDADDR:    /* Get the broadcast address */
982                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
983                 goto rarok;
984
985         case SIOCGIFDSTADDR:    /* Get the destination address */
986                 sin->sin_addr.s_addr = ifa->ifa_address;
987                 goto rarok;
988
989         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
990                 sin->sin_addr.s_addr = ifa->ifa_mask;
991                 goto rarok;
992
993         case SIOCSIFFLAGS:
994                 if (colon) {
995                         ret = -EADDRNOTAVAIL;
996                         if (!ifa)
997                                 break;
998                         ret = 0;
999                         if (!(ifr.ifr_flags & IFF_UP))
1000                                 inet_del_ifa(in_dev, ifap, 1);
1001                         break;
1002                 }
1003                 ret = dev_change_flags(dev, ifr.ifr_flags);
1004                 break;
1005
1006         case SIOCSIFADDR:       /* Set interface address (and family) */
1007                 ret = -EINVAL;
1008                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1009                         break;
1010
1011                 if (!ifa) {
1012                         ret = -ENOBUFS;
1013                         ifa = inet_alloc_ifa();
1014                         if (!ifa)
1015                                 break;
1016                         INIT_HLIST_NODE(&ifa->hash);
1017                         if (colon)
1018                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1019                         else
1020                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1021                 } else {
1022                         ret = 0;
1023                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1024                                 break;
1025                         inet_del_ifa(in_dev, ifap, 0);
1026                         ifa->ifa_broadcast = 0;
1027                         ifa->ifa_scope = 0;
1028                 }
1029
1030                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1031
1032                 if (!(dev->flags & IFF_POINTOPOINT)) {
1033                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1034                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1035                         if ((dev->flags & IFF_BROADCAST) &&
1036                             ifa->ifa_prefixlen < 31)
1037                                 ifa->ifa_broadcast = ifa->ifa_address |
1038                                                      ~ifa->ifa_mask;
1039                 } else {
1040                         ifa->ifa_prefixlen = 32;
1041                         ifa->ifa_mask = inet_make_mask(32);
1042                 }
1043                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1044                 ret = inet_set_ifa(dev, ifa);
1045                 break;
1046
1047         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1048                 ret = 0;
1049                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1050                         inet_del_ifa(in_dev, ifap, 0);
1051                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1052                         inet_insert_ifa(ifa);
1053                 }
1054                 break;
1055
1056         case SIOCSIFDSTADDR:    /* Set the destination address */
1057                 ret = 0;
1058                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1059                         break;
1060                 ret = -EINVAL;
1061                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1062                         break;
1063                 ret = 0;
1064                 inet_del_ifa(in_dev, ifap, 0);
1065                 ifa->ifa_address = sin->sin_addr.s_addr;
1066                 inet_insert_ifa(ifa);
1067                 break;
1068
1069         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1070
1071                 /*
1072                  *      The mask we set must be legal.
1073                  */
1074                 ret = -EINVAL;
1075                 if (bad_mask(sin->sin_addr.s_addr, 0))
1076                         break;
1077                 ret = 0;
1078                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1079                         __be32 old_mask = ifa->ifa_mask;
1080                         inet_del_ifa(in_dev, ifap, 0);
1081                         ifa->ifa_mask = sin->sin_addr.s_addr;
1082                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1083
1084                         /* See if current broadcast address matches
1085                          * with current netmask, then recalculate
1086                          * the broadcast address. Otherwise it's a
1087                          * funny address, so don't touch it since
1088                          * the user seems to know what (s)he's doing...
1089                          */
1090                         if ((dev->flags & IFF_BROADCAST) &&
1091                             (ifa->ifa_prefixlen < 31) &&
1092                             (ifa->ifa_broadcast ==
1093                              (ifa->ifa_local|~old_mask))) {
1094                                 ifa->ifa_broadcast = (ifa->ifa_local |
1095                                                       ~sin->sin_addr.s_addr);
1096                         }
1097                         inet_insert_ifa(ifa);
1098                 }
1099                 break;
1100         case SIOCKILLADDR:      /* Nuke all connections on this address */
1101                 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1102                 break;
1103         }
1104 done:
1105         rtnl_unlock();
1106 out:
1107         return ret;
1108 rarok:
1109         rtnl_unlock();
1110         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1111         goto out;
1112 }
1113
1114 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1115 {
1116         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1117         struct in_ifaddr *ifa;
1118         struct ifreq ifr;
1119         int done = 0;
1120
1121         if (!in_dev)
1122                 goto out;
1123
1124         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1125                 if (!buf) {
1126                         done += sizeof(ifr);
1127                         continue;
1128                 }
1129                 if (len < (int) sizeof(ifr))
1130                         break;
1131                 memset(&ifr, 0, sizeof(struct ifreq));
1132                 if (ifa->ifa_label)
1133                         strcpy(ifr.ifr_name, ifa->ifa_label);
1134                 else
1135                         strcpy(ifr.ifr_name, dev->name);
1136
1137                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1138                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1139                                                                 ifa->ifa_local;
1140
1141                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1142                         done = -EFAULT;
1143                         break;
1144                 }
1145                 buf  += sizeof(struct ifreq);
1146                 len  -= sizeof(struct ifreq);
1147                 done += sizeof(struct ifreq);
1148         }
1149 out:
1150         return done;
1151 }
1152
1153 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1154 {
1155         __be32 addr = 0;
1156         struct in_device *in_dev;
1157         struct net *net = dev_net(dev);
1158
1159         rcu_read_lock();
1160         in_dev = __in_dev_get_rcu(dev);
1161         if (!in_dev)
1162                 goto no_in_dev;
1163
1164         for_primary_ifa(in_dev) {
1165                 if (ifa->ifa_scope > scope)
1166                         continue;
1167                 if (!dst || inet_ifa_match(dst, ifa)) {
1168                         addr = ifa->ifa_local;
1169                         break;
1170                 }
1171                 if (!addr)
1172                         addr = ifa->ifa_local;
1173         } endfor_ifa(in_dev);
1174
1175         if (addr)
1176                 goto out_unlock;
1177 no_in_dev:
1178
1179         /* Not loopback addresses on loopback should be preferred
1180            in this case. It is importnat that lo is the first interface
1181            in dev_base list.
1182          */
1183         for_each_netdev_rcu(net, dev) {
1184                 in_dev = __in_dev_get_rcu(dev);
1185                 if (!in_dev)
1186                         continue;
1187
1188                 for_primary_ifa(in_dev) {
1189                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1190                             ifa->ifa_scope <= scope) {
1191                                 addr = ifa->ifa_local;
1192                                 goto out_unlock;
1193                         }
1194                 } endfor_ifa(in_dev);
1195         }
1196 out_unlock:
1197         rcu_read_unlock();
1198         return addr;
1199 }
1200 EXPORT_SYMBOL(inet_select_addr);
1201
1202 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1203                               __be32 local, int scope)
1204 {
1205         int same = 0;
1206         __be32 addr = 0;
1207
1208         for_ifa(in_dev) {
1209                 if (!addr &&
1210                     (local == ifa->ifa_local || !local) &&
1211                     ifa->ifa_scope <= scope) {
1212                         addr = ifa->ifa_local;
1213                         if (same)
1214                                 break;
1215                 }
1216                 if (!same) {
1217                         same = (!local || inet_ifa_match(local, ifa)) &&
1218                                 (!dst || inet_ifa_match(dst, ifa));
1219                         if (same && addr) {
1220                                 if (local || !dst)
1221                                         break;
1222                                 /* Is the selected addr into dst subnet? */
1223                                 if (inet_ifa_match(addr, ifa))
1224                                         break;
1225                                 /* No, then can we use new local src? */
1226                                 if (ifa->ifa_scope <= scope) {
1227                                         addr = ifa->ifa_local;
1228                                         break;
1229                                 }
1230                                 /* search for large dst subnet for addr */
1231                                 same = 0;
1232                         }
1233                 }
1234         } endfor_ifa(in_dev);
1235
1236         return same ? addr : 0;
1237 }
1238
1239 /*
1240  * Confirm that local IP address exists using wildcards:
1241  * - in_dev: only on this interface, 0=any interface
1242  * - dst: only in the same subnet as dst, 0=any dst
1243  * - local: address, 0=autoselect the local address
1244  * - scope: maximum allowed scope value for the local address
1245  */
1246 __be32 inet_confirm_addr(struct in_device *in_dev,
1247                          __be32 dst, __be32 local, int scope)
1248 {
1249         __be32 addr = 0;
1250         struct net_device *dev;
1251         struct net *net;
1252
1253         if (scope != RT_SCOPE_LINK)
1254                 return confirm_addr_indev(in_dev, dst, local, scope);
1255
1256         net = dev_net(in_dev->dev);
1257         rcu_read_lock();
1258         for_each_netdev_rcu(net, dev) {
1259                 in_dev = __in_dev_get_rcu(dev);
1260                 if (in_dev) {
1261                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1262                         if (addr)
1263                                 break;
1264                 }
1265         }
1266         rcu_read_unlock();
1267
1268         return addr;
1269 }
1270 EXPORT_SYMBOL(inet_confirm_addr);
1271
1272 /*
1273  *      Device notifier
1274  */
1275
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1281
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1283 {
1284         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1285 }
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1287
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289  * existing alias numbering and to create unique labels if possible.
1290 */
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1292 {
1293         struct in_ifaddr *ifa;
1294         int named = 0;
1295
1296         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297                 char old[IFNAMSIZ], *dot;
1298
1299                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301                 if (named++ == 0)
1302                         goto skip;
1303                 dot = strchr(old, ':');
1304                 if (dot == NULL) {
1305                         sprintf(old, ":%d", named);
1306                         dot = old;
1307                 }
1308                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309                         strcat(ifa->ifa_label, dot);
1310                 else
1311                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1312 skip:
1313                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1314         }
1315 }
1316
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1318 {
1319         return mtu >= 68;
1320 }
1321
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323                                         struct in_device *in_dev)
1324
1325 {
1326         struct in_ifaddr *ifa;
1327
1328         for (ifa = in_dev->ifa_list; ifa;
1329              ifa = ifa->ifa_next) {
1330                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331                          ifa->ifa_local, dev,
1332                          ifa->ifa_local, NULL,
1333                          dev->dev_addr, NULL);
1334         }
1335 }
1336
1337 /* Called only under RTNL semaphore */
1338
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340                          void *ptr)
1341 {
1342         struct net_device *dev = ptr;
1343         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1344
1345         ASSERT_RTNL();
1346
1347         if (!in_dev) {
1348                 if (event == NETDEV_REGISTER) {
1349                         in_dev = inetdev_init(dev);
1350                         if (!in_dev)
1351                                 return notifier_from_errno(-ENOMEM);
1352                         if (dev->flags & IFF_LOOPBACK) {
1353                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1355                         }
1356                 } else if (event == NETDEV_CHANGEMTU) {
1357                         /* Re-enabling IP */
1358                         if (inetdev_valid_mtu(dev->mtu))
1359                                 in_dev = inetdev_init(dev);
1360                 }
1361                 goto out;
1362         }
1363
1364         switch (event) {
1365         case NETDEV_REGISTER:
1366                 pr_debug("%s: bug\n", __func__);
1367                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368                 break;
1369         case NETDEV_UP:
1370                 if (!inetdev_valid_mtu(dev->mtu))
1371                         break;
1372                 if (dev->flags & IFF_LOOPBACK) {
1373                         struct in_ifaddr *ifa = inet_alloc_ifa();
1374
1375                         if (ifa) {
1376                                 INIT_HLIST_NODE(&ifa->hash);
1377                                 ifa->ifa_local =
1378                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379                                 ifa->ifa_prefixlen = 8;
1380                                 ifa->ifa_mask = inet_make_mask(8);
1381                                 in_dev_hold(in_dev);
1382                                 ifa->ifa_dev = in_dev;
1383                                 ifa->ifa_scope = RT_SCOPE_HOST;
1384                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386                                                  INFINITY_LIFE_TIME);
1387                                 inet_insert_ifa(ifa);
1388                         }
1389                 }
1390                 ip_mc_up(in_dev);
1391                 /* fall through */
1392         case NETDEV_CHANGEADDR:
1393                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1394                         break;
1395                 /* fall through */
1396         case NETDEV_NOTIFY_PEERS:
1397                 /* Send gratuitous ARP to notify of link change */
1398                 inetdev_send_gratuitous_arp(dev, in_dev);
1399                 break;
1400         case NETDEV_DOWN:
1401                 ip_mc_down(in_dev);
1402                 break;
1403         case NETDEV_PRE_TYPE_CHANGE:
1404                 ip_mc_unmap(in_dev);
1405                 break;
1406         case NETDEV_POST_TYPE_CHANGE:
1407                 ip_mc_remap(in_dev);
1408                 break;
1409         case NETDEV_CHANGEMTU:
1410                 if (inetdev_valid_mtu(dev->mtu))
1411                         break;
1412                 /* disable IP when MTU is not enough */
1413         case NETDEV_UNREGISTER:
1414                 inetdev_destroy(in_dev);
1415                 break;
1416         case NETDEV_CHANGENAME:
1417                 /* Do not notify about label change, this event is
1418                  * not interesting to applications using netlink.
1419                  */
1420                 inetdev_changename(dev, in_dev);
1421
1422                 devinet_sysctl_unregister(in_dev);
1423                 devinet_sysctl_register(in_dev);
1424                 break;
1425         }
1426 out:
1427         return NOTIFY_DONE;
1428 }
1429
1430 static struct notifier_block ip_netdev_notifier = {
1431         .notifier_call = inetdev_event,
1432 };
1433
1434 static size_t inet_nlmsg_size(void)
1435 {
1436         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1437                + nla_total_size(4) /* IFA_ADDRESS */
1438                + nla_total_size(4) /* IFA_LOCAL */
1439                + nla_total_size(4) /* IFA_BROADCAST */
1440                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1441 }
1442
1443 static inline u32 cstamp_delta(unsigned long cstamp)
1444 {
1445         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1446 }
1447
1448 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1449                          unsigned long tstamp, u32 preferred, u32 valid)
1450 {
1451         struct ifa_cacheinfo ci;
1452
1453         ci.cstamp = cstamp_delta(cstamp);
1454         ci.tstamp = cstamp_delta(tstamp);
1455         ci.ifa_prefered = preferred;
1456         ci.ifa_valid = valid;
1457
1458         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1459 }
1460
1461 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1462                             u32 portid, u32 seq, int event, unsigned int flags)
1463 {
1464         struct ifaddrmsg *ifm;
1465         struct nlmsghdr  *nlh;
1466         u32 preferred, valid;
1467
1468         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1469         if (nlh == NULL)
1470                 return -EMSGSIZE;
1471
1472         ifm = nlmsg_data(nlh);
1473         ifm->ifa_family = AF_INET;
1474         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1475         ifm->ifa_flags = ifa->ifa_flags;
1476         ifm->ifa_scope = ifa->ifa_scope;
1477         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1478
1479         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1480                 preferred = ifa->ifa_preferred_lft;
1481                 valid = ifa->ifa_valid_lft;
1482                 if (preferred != INFINITY_LIFE_TIME) {
1483                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1484
1485                         if (preferred > tval)
1486                                 preferred -= tval;
1487                         else
1488                                 preferred = 0;
1489                         if (valid != INFINITY_LIFE_TIME) {
1490                                 if (valid > tval)
1491                                         valid -= tval;
1492                                 else
1493                                         valid = 0;
1494                         }
1495                 }
1496         } else {
1497                 preferred = INFINITY_LIFE_TIME;
1498                 valid = INFINITY_LIFE_TIME;
1499         }
1500         if ((ifa->ifa_address &&
1501              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1502             (ifa->ifa_local &&
1503              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1504             (ifa->ifa_broadcast &&
1505              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1506             (ifa->ifa_label[0] &&
1507              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1508             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1509                           preferred, valid))
1510                 goto nla_put_failure;
1511
1512         return nlmsg_end(skb, nlh);
1513
1514 nla_put_failure:
1515         nlmsg_cancel(skb, nlh);
1516         return -EMSGSIZE;
1517 }
1518
1519 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1520 {
1521         struct net *net = sock_net(skb->sk);
1522         int h, s_h;
1523         int idx, s_idx;
1524         int ip_idx, s_ip_idx;
1525         struct net_device *dev;
1526         struct in_device *in_dev;
1527         struct in_ifaddr *ifa;
1528         struct hlist_head *head;
1529
1530         s_h = cb->args[0];
1531         s_idx = idx = cb->args[1];
1532         s_ip_idx = ip_idx = cb->args[2];
1533
1534         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1535                 idx = 0;
1536                 head = &net->dev_index_head[h];
1537                 rcu_read_lock();
1538                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1539                           net->dev_base_seq;
1540                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1541                         if (idx < s_idx)
1542                                 goto cont;
1543                         if (h > s_h || idx > s_idx)
1544                                 s_ip_idx = 0;
1545                         in_dev = __in_dev_get_rcu(dev);
1546                         if (!in_dev)
1547                                 goto cont;
1548
1549                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1550                              ifa = ifa->ifa_next, ip_idx++) {
1551                                 if (ip_idx < s_ip_idx)
1552                                         continue;
1553                                 if (inet_fill_ifaddr(skb, ifa,
1554                                              NETLINK_CB(cb->skb).portid,
1555                                              cb->nlh->nlmsg_seq,
1556                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1557                                         rcu_read_unlock();
1558                                         goto done;
1559                                 }
1560                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1561                         }
1562 cont:
1563                         idx++;
1564                 }
1565                 rcu_read_unlock();
1566         }
1567
1568 done:
1569         cb->args[0] = h;
1570         cb->args[1] = idx;
1571         cb->args[2] = ip_idx;
1572
1573         return skb->len;
1574 }
1575
1576 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1577                       u32 portid)
1578 {
1579         struct sk_buff *skb;
1580         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1581         int err = -ENOBUFS;
1582         struct net *net;
1583
1584         net = dev_net(ifa->ifa_dev->dev);
1585         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1586         if (skb == NULL)
1587                 goto errout;
1588
1589         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1590         if (err < 0) {
1591                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1592                 WARN_ON(err == -EMSGSIZE);
1593                 kfree_skb(skb);
1594                 goto errout;
1595         }
1596         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1597         return;
1598 errout:
1599         if (err < 0)
1600                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1601 }
1602
1603 static size_t inet_get_link_af_size(const struct net_device *dev)
1604 {
1605         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1606
1607         if (!in_dev)
1608                 return 0;
1609
1610         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1611 }
1612
1613 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1614 {
1615         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1616         struct nlattr *nla;
1617         int i;
1618
1619         if (!in_dev)
1620                 return -ENODATA;
1621
1622         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1623         if (nla == NULL)
1624                 return -EMSGSIZE;
1625
1626         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1627                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1628
1629         return 0;
1630 }
1631
1632 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1633         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1634 };
1635
1636 static int inet_validate_link_af(const struct net_device *dev,
1637                                  const struct nlattr *nla)
1638 {
1639         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1640         int err, rem;
1641
1642         if (dev && !__in_dev_get_rtnl(dev))
1643                 return -EAFNOSUPPORT;
1644
1645         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1646         if (err < 0)
1647                 return err;
1648
1649         if (tb[IFLA_INET_CONF]) {
1650                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1651                         int cfgid = nla_type(a);
1652
1653                         if (nla_len(a) < 4)
1654                                 return -EINVAL;
1655
1656                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1657                                 return -EINVAL;
1658                 }
1659         }
1660
1661         return 0;
1662 }
1663
1664 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1665 {
1666         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1667         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1668         int rem;
1669
1670         if (!in_dev)
1671                 return -EAFNOSUPPORT;
1672
1673         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1674                 BUG();
1675
1676         if (tb[IFLA_INET_CONF]) {
1677                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1678                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1679         }
1680
1681         return 0;
1682 }
1683
1684 static int inet_netconf_msgsize_devconf(int type)
1685 {
1686         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1687                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1688
1689         /* type -1 is used for ALL */
1690         if (type == -1 || type == NETCONFA_FORWARDING)
1691                 size += nla_total_size(4);
1692         if (type == -1 || type == NETCONFA_RP_FILTER)
1693                 size += nla_total_size(4);
1694         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1695                 size += nla_total_size(4);
1696
1697         return size;
1698 }
1699
1700 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1701                                      struct ipv4_devconf *devconf, u32 portid,
1702                                      u32 seq, int event, unsigned int flags,
1703                                      int type)
1704 {
1705         struct nlmsghdr  *nlh;
1706         struct netconfmsg *ncm;
1707
1708         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1709                         flags);
1710         if (nlh == NULL)
1711                 return -EMSGSIZE;
1712
1713         ncm = nlmsg_data(nlh);
1714         ncm->ncm_family = AF_INET;
1715
1716         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1717                 goto nla_put_failure;
1718
1719         /* type -1 is used for ALL */
1720         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1721             nla_put_s32(skb, NETCONFA_FORWARDING,
1722                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1723                 goto nla_put_failure;
1724         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1725             nla_put_s32(skb, NETCONFA_RP_FILTER,
1726                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1727                 goto nla_put_failure;
1728         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1729             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1730                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1731                 goto nla_put_failure;
1732
1733         return nlmsg_end(skb, nlh);
1734
1735 nla_put_failure:
1736         nlmsg_cancel(skb, nlh);
1737         return -EMSGSIZE;
1738 }
1739
1740 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1741                                  struct ipv4_devconf *devconf)
1742 {
1743         struct sk_buff *skb;
1744         int err = -ENOBUFS;
1745
1746         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1747         if (skb == NULL)
1748                 goto errout;
1749
1750         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1751                                         RTM_NEWNETCONF, 0, type);
1752         if (err < 0) {
1753                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1754                 WARN_ON(err == -EMSGSIZE);
1755                 kfree_skb(skb);
1756                 goto errout;
1757         }
1758         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1759         return;
1760 errout:
1761         if (err < 0)
1762                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1763 }
1764
1765 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1766         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1767         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1768         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1769 };
1770
1771 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1772                                     struct nlmsghdr *nlh)
1773 {
1774         struct net *net = sock_net(in_skb->sk);
1775         struct nlattr *tb[NETCONFA_MAX+1];
1776         struct netconfmsg *ncm;
1777         struct sk_buff *skb;
1778         struct ipv4_devconf *devconf;
1779         struct in_device *in_dev;
1780         struct net_device *dev;
1781         int ifindex;
1782         int err;
1783
1784         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1785                           devconf_ipv4_policy);
1786         if (err < 0)
1787                 goto errout;
1788
1789         err = EINVAL;
1790         if (!tb[NETCONFA_IFINDEX])
1791                 goto errout;
1792
1793         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1794         switch (ifindex) {
1795         case NETCONFA_IFINDEX_ALL:
1796                 devconf = net->ipv4.devconf_all;
1797                 break;
1798         case NETCONFA_IFINDEX_DEFAULT:
1799                 devconf = net->ipv4.devconf_dflt;
1800                 break;
1801         default:
1802                 dev = __dev_get_by_index(net, ifindex);
1803                 if (dev == NULL)
1804                         goto errout;
1805                 in_dev = __in_dev_get_rtnl(dev);
1806                 if (in_dev == NULL)
1807                         goto errout;
1808                 devconf = &in_dev->cnf;
1809                 break;
1810         }
1811
1812         err = -ENOBUFS;
1813         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1814         if (skb == NULL)
1815                 goto errout;
1816
1817         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1818                                         NETLINK_CB(in_skb).portid,
1819                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1820                                         -1);
1821         if (err < 0) {
1822                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1823                 WARN_ON(err == -EMSGSIZE);
1824                 kfree_skb(skb);
1825                 goto errout;
1826         }
1827         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1828 errout:
1829         return err;
1830 }
1831
1832 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1833                                      struct netlink_callback *cb)
1834 {
1835         struct net *net = sock_net(skb->sk);
1836         int h, s_h;
1837         int idx, s_idx;
1838         struct net_device *dev;
1839         struct in_device *in_dev;
1840         struct hlist_head *head;
1841
1842         s_h = cb->args[0];
1843         s_idx = idx = cb->args[1];
1844
1845         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1846                 idx = 0;
1847                 head = &net->dev_index_head[h];
1848                 rcu_read_lock();
1849                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1850                           net->dev_base_seq;
1851                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1852                         if (idx < s_idx)
1853                                 goto cont;
1854                         in_dev = __in_dev_get_rcu(dev);
1855                         if (!in_dev)
1856                                 goto cont;
1857
1858                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1859                                                       &in_dev->cnf,
1860                                                       NETLINK_CB(cb->skb).portid,
1861                                                       cb->nlh->nlmsg_seq,
1862                                                       RTM_NEWNETCONF,
1863                                                       NLM_F_MULTI,
1864                                                       -1) <= 0) {
1865                                 rcu_read_unlock();
1866                                 goto done;
1867                         }
1868                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1869 cont:
1870                         idx++;
1871                 }
1872                 rcu_read_unlock();
1873         }
1874         if (h == NETDEV_HASHENTRIES) {
1875                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1876                                               net->ipv4.devconf_all,
1877                                               NETLINK_CB(cb->skb).portid,
1878                                               cb->nlh->nlmsg_seq,
1879                                               RTM_NEWNETCONF, NLM_F_MULTI,
1880                                               -1) <= 0)
1881                         goto done;
1882                 else
1883                         h++;
1884         }
1885         if (h == NETDEV_HASHENTRIES + 1) {
1886                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1887                                               net->ipv4.devconf_dflt,
1888                                               NETLINK_CB(cb->skb).portid,
1889                                               cb->nlh->nlmsg_seq,
1890                                               RTM_NEWNETCONF, NLM_F_MULTI,
1891                                               -1) <= 0)
1892                         goto done;
1893                 else
1894                         h++;
1895         }
1896 done:
1897         cb->args[0] = h;
1898         cb->args[1] = idx;
1899
1900         return skb->len;
1901 }
1902
1903 #ifdef CONFIG_SYSCTL
1904
1905 static void devinet_copy_dflt_conf(struct net *net, int i)
1906 {
1907         struct net_device *dev;
1908
1909         rcu_read_lock();
1910         for_each_netdev_rcu(net, dev) {
1911                 struct in_device *in_dev;
1912
1913                 in_dev = __in_dev_get_rcu(dev);
1914                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1915                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1916         }
1917         rcu_read_unlock();
1918 }
1919
1920 /* called with RTNL locked */
1921 static void inet_forward_change(struct net *net)
1922 {
1923         struct net_device *dev;
1924         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1925
1926         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1927         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1928         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929                                     NETCONFA_IFINDEX_ALL,
1930                                     net->ipv4.devconf_all);
1931         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1932                                     NETCONFA_IFINDEX_DEFAULT,
1933                                     net->ipv4.devconf_dflt);
1934
1935         for_each_netdev(net, dev) {
1936                 struct in_device *in_dev;
1937                 if (on)
1938                         dev_disable_lro(dev);
1939                 rcu_read_lock();
1940                 in_dev = __in_dev_get_rcu(dev);
1941                 if (in_dev) {
1942                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1943                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944                                                     dev->ifindex, &in_dev->cnf);
1945                 }
1946                 rcu_read_unlock();
1947         }
1948 }
1949
1950 static int devinet_conf_proc(ctl_table *ctl, int write,
1951                              void __user *buffer,
1952                              size_t *lenp, loff_t *ppos)
1953 {
1954         int old_value = *(int *)ctl->data;
1955         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1956         int new_value = *(int *)ctl->data;
1957
1958         if (write) {
1959                 struct ipv4_devconf *cnf = ctl->extra1;
1960                 struct net *net = ctl->extra2;
1961                 int i = (int *)ctl->data - cnf->data;
1962
1963                 set_bit(i, cnf->state);
1964
1965                 if (cnf == net->ipv4.devconf_dflt)
1966                         devinet_copy_dflt_conf(net, i);
1967                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1968                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1969                         if ((new_value == 0) && (old_value != 0))
1970                                 rt_cache_flush(net);
1971                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1972                     new_value != old_value) {
1973                         int ifindex;
1974
1975                         if (cnf == net->ipv4.devconf_dflt)
1976                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1977                         else if (cnf == net->ipv4.devconf_all)
1978                                 ifindex = NETCONFA_IFINDEX_ALL;
1979                         else {
1980                                 struct in_device *idev =
1981                                         container_of(cnf, struct in_device,
1982                                                      cnf);
1983                                 ifindex = idev->dev->ifindex;
1984                         }
1985                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1986                                                     ifindex, cnf);
1987                 }
1988         }
1989
1990         return ret;
1991 }
1992
1993 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1994                                   void __user *buffer,
1995                                   size_t *lenp, loff_t *ppos)
1996 {
1997         int *valp = ctl->data;
1998         int val = *valp;
1999         loff_t pos = *ppos;
2000         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2001
2002         if (write && *valp != val) {
2003                 struct net *net = ctl->extra2;
2004
2005                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2006                         if (!rtnl_trylock()) {
2007                                 /* Restore the original values before restarting */
2008                                 *valp = val;
2009                                 *ppos = pos;
2010                                 return restart_syscall();
2011                         }
2012                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2013                                 inet_forward_change(net);
2014                         } else {
2015                                 struct ipv4_devconf *cnf = ctl->extra1;
2016                                 struct in_device *idev =
2017                                         container_of(cnf, struct in_device, cnf);
2018                                 if (*valp)
2019                                         dev_disable_lro(idev->dev);
2020                                 inet_netconf_notify_devconf(net,
2021                                                             NETCONFA_FORWARDING,
2022                                                             idev->dev->ifindex,
2023                                                             cnf);
2024                         }
2025                         rtnl_unlock();
2026                         rt_cache_flush(net);
2027                 } else
2028                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2029                                                     NETCONFA_IFINDEX_DEFAULT,
2030                                                     net->ipv4.devconf_dflt);
2031         }
2032
2033         return ret;
2034 }
2035
2036 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2037                                 void __user *buffer,
2038                                 size_t *lenp, loff_t *ppos)
2039 {
2040         int *valp = ctl->data;
2041         int val = *valp;
2042         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2043         struct net *net = ctl->extra2;
2044
2045         if (write && *valp != val)
2046                 rt_cache_flush(net);
2047
2048         return ret;
2049 }
2050
2051 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2052         { \
2053                 .procname       = name, \
2054                 .data           = ipv4_devconf.data + \
2055                                   IPV4_DEVCONF_ ## attr - 1, \
2056                 .maxlen         = sizeof(int), \
2057                 .mode           = mval, \
2058                 .proc_handler   = proc, \
2059                 .extra1         = &ipv4_devconf, \
2060         }
2061
2062 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2063         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2064
2065 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2066         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2067
2068 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2069         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2070
2071 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2072         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2073
2074 static struct devinet_sysctl_table {
2075         struct ctl_table_header *sysctl_header;
2076         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2077 } devinet_sysctl = {
2078         .devinet_vars = {
2079                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2080                                              devinet_sysctl_forward),
2081                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2082
2083                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2084                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2085                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2086                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2087                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2088                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2089                                         "accept_source_route"),
2090                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2091                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2092                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2093                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2094                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2095                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2096                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2097                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2098                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2099                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2100                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2101                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2102                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2103
2104                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2105                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2106                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2107                                               "force_igmp_version"),
2108                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2109                                               "promote_secondaries"),
2110                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2111                                               "route_localnet"),
2112         },
2113 };
2114
2115 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2116                                         struct ipv4_devconf *p)
2117 {
2118         int i;
2119         struct devinet_sysctl_table *t;
2120         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2121
2122         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2123         if (!t)
2124                 goto out;
2125
2126         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2127                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2128                 t->devinet_vars[i].extra1 = p;
2129                 t->devinet_vars[i].extra2 = net;
2130         }
2131
2132         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2133
2134         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2135         if (!t->sysctl_header)
2136                 goto free;
2137
2138         p->sysctl = t;
2139         return 0;
2140
2141 free:
2142         kfree(t);
2143 out:
2144         return -ENOBUFS;
2145 }
2146
2147 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2148 {
2149         struct devinet_sysctl_table *t = cnf->sysctl;
2150
2151         if (t == NULL)
2152                 return;
2153
2154         cnf->sysctl = NULL;
2155         unregister_net_sysctl_table(t->sysctl_header);
2156         kfree(t);
2157 }
2158
2159 static void devinet_sysctl_register(struct in_device *idev)
2160 {
2161         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2162         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2163                                         &idev->cnf);
2164 }
2165
2166 static void devinet_sysctl_unregister(struct in_device *idev)
2167 {
2168         __devinet_sysctl_unregister(&idev->cnf);
2169         neigh_sysctl_unregister(idev->arp_parms);
2170 }
2171
2172 static struct ctl_table ctl_forward_entry[] = {
2173         {
2174                 .procname       = "ip_forward",
2175                 .data           = &ipv4_devconf.data[
2176                                         IPV4_DEVCONF_FORWARDING - 1],
2177                 .maxlen         = sizeof(int),
2178                 .mode           = 0644,
2179                 .proc_handler   = devinet_sysctl_forward,
2180                 .extra1         = &ipv4_devconf,
2181                 .extra2         = &init_net,
2182         },
2183         { },
2184 };
2185 #endif
2186
2187 static __net_init int devinet_init_net(struct net *net)
2188 {
2189         int err;
2190         struct ipv4_devconf *all, *dflt;
2191 #ifdef CONFIG_SYSCTL
2192         struct ctl_table *tbl = ctl_forward_entry;
2193         struct ctl_table_header *forw_hdr;
2194 #endif
2195
2196         err = -ENOMEM;
2197         all = &ipv4_devconf;
2198         dflt = &ipv4_devconf_dflt;
2199
2200         if (!net_eq(net, &init_net)) {
2201                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2202                 if (all == NULL)
2203                         goto err_alloc_all;
2204
2205                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2206                 if (dflt == NULL)
2207                         goto err_alloc_dflt;
2208
2209 #ifdef CONFIG_SYSCTL
2210                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2211                 if (tbl == NULL)
2212                         goto err_alloc_ctl;
2213
2214                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2215                 tbl[0].extra1 = all;
2216                 tbl[0].extra2 = net;
2217 #endif
2218         }
2219
2220 #ifdef CONFIG_SYSCTL
2221         err = __devinet_sysctl_register(net, "all", all);
2222         if (err < 0)
2223                 goto err_reg_all;
2224
2225         err = __devinet_sysctl_register(net, "default", dflt);
2226         if (err < 0)
2227                 goto err_reg_dflt;
2228
2229         err = -ENOMEM;
2230         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2231         if (forw_hdr == NULL)
2232                 goto err_reg_ctl;
2233         net->ipv4.forw_hdr = forw_hdr;
2234 #endif
2235
2236         net->ipv4.devconf_all = all;
2237         net->ipv4.devconf_dflt = dflt;
2238         return 0;
2239
2240 #ifdef CONFIG_SYSCTL
2241 err_reg_ctl:
2242         __devinet_sysctl_unregister(dflt);
2243 err_reg_dflt:
2244         __devinet_sysctl_unregister(all);
2245 err_reg_all:
2246         if (tbl != ctl_forward_entry)
2247                 kfree(tbl);
2248 err_alloc_ctl:
2249 #endif
2250         if (dflt != &ipv4_devconf_dflt)
2251                 kfree(dflt);
2252 err_alloc_dflt:
2253         if (all != &ipv4_devconf)
2254                 kfree(all);
2255 err_alloc_all:
2256         return err;
2257 }
2258
2259 static __net_exit void devinet_exit_net(struct net *net)
2260 {
2261 #ifdef CONFIG_SYSCTL
2262         struct ctl_table *tbl;
2263
2264         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2265         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2266         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2267         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2268         kfree(tbl);
2269 #endif
2270         kfree(net->ipv4.devconf_dflt);
2271         kfree(net->ipv4.devconf_all);
2272 }
2273
2274 static __net_initdata struct pernet_operations devinet_ops = {
2275         .init = devinet_init_net,
2276         .exit = devinet_exit_net,
2277 };
2278
2279 static struct rtnl_af_ops inet_af_ops = {
2280         .family           = AF_INET,
2281         .fill_link_af     = inet_fill_link_af,
2282         .get_link_af_size = inet_get_link_af_size,
2283         .validate_link_af = inet_validate_link_af,
2284         .set_link_af      = inet_set_link_af,
2285 };
2286
2287 void __init devinet_init(void)
2288 {
2289         int i;
2290
2291         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2292                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2293
2294         register_pernet_subsys(&devinet_ops);
2295
2296         register_gifconf(PF_INET, inet_gifconf);
2297         register_netdevice_notifier(&ip_netdev_notifier);
2298
2299         schedule_delayed_work(&check_lifetime_work, 0);
2300
2301         rtnl_af_register(&inet_af_ops);
2302
2303         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2304         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2305         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2306         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2307                       inet_netconf_dump_devconf, NULL);
2308 }
2309