tcp: do not forget FIN in tcp_shifted_skb()
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221         dev_put(dev);
222         if (!idev->dead)
223                 pr_err("Freeing alive in_device %p\n", idev);
224         else
225                 kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231         struct in_device *in_dev;
232
233         ASSERT_RTNL();
234
235         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236         if (!in_dev)
237                 goto out;
238         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239                         sizeof(in_dev->cnf));
240         in_dev->cnf.sysctl = NULL;
241         in_dev->dev = dev;
242         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243         if (!in_dev->arp_parms)
244                 goto out_kfree;
245         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246                 dev_disable_lro(dev);
247         /* Reference in_dev->dev */
248         dev_hold(dev);
249         /* Account for reference dev->ip_ptr (below) */
250         in_dev_hold(in_dev);
251
252         devinet_sysctl_register(in_dev);
253         ip_mc_init_dev(in_dev);
254         if (dev->flags & IFF_UP)
255                 ip_mc_up(in_dev);
256
257         /* we can receive as soon as ip_ptr is set -- do this last */
258         rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260         return in_dev;
261 out_kfree:
262         kfree(in_dev);
263         in_dev = NULL;
264         goto out;
265 }
266
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269         struct in_device *idev = container_of(head, struct in_device, rcu_head);
270         in_dev_put(idev);
271 }
272
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275         struct in_ifaddr *ifa;
276         struct net_device *dev;
277
278         ASSERT_RTNL();
279
280         dev = in_dev->dev;
281
282         in_dev->dead = 1;
283
284         ip_mc_destroy_dev(in_dev);
285
286         while ((ifa = in_dev->ifa_list) != NULL) {
287                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288                 inet_free_ifa(ifa);
289         }
290
291         RCU_INIT_POINTER(dev->ip_ptr, NULL);
292
293         devinet_sysctl_unregister(in_dev);
294         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295         arp_ifdown(dev);
296
297         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302         rcu_read_lock();
303         for_primary_ifa(in_dev) {
304                 if (inet_ifa_match(a, ifa)) {
305                         if (!b || inet_ifa_match(b, ifa)) {
306                                 rcu_read_unlock();
307                                 return 1;
308                         }
309                 }
310         } endfor_ifa(in_dev);
311         rcu_read_unlock();
312         return 0;
313 }
314
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316                          int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318         struct in_ifaddr *promote = NULL;
319         struct in_ifaddr *ifa, *ifa1 = *ifap;
320         struct in_ifaddr *last_prim = in_dev->ifa_list;
321         struct in_ifaddr *prev_prom = NULL;
322         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323
324         ASSERT_RTNL();
325
326         /* 1. Deleting primary ifaddr forces deletion all secondaries
327          * unless alias promotion is set
328          **/
329
330         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332
333                 while ((ifa = *ifap1) != NULL) {
334                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335                             ifa1->ifa_scope <= ifa->ifa_scope)
336                                 last_prim = ifa;
337
338                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339                             ifa1->ifa_mask != ifa->ifa_mask ||
340                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
341                                 ifap1 = &ifa->ifa_next;
342                                 prev_prom = ifa;
343                                 continue;
344                         }
345
346                         if (!do_promote) {
347                                 inet_hash_remove(ifa);
348                                 *ifap1 = ifa->ifa_next;
349
350                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351                                 blocking_notifier_call_chain(&inetaddr_chain,
352                                                 NETDEV_DOWN, ifa);
353                                 inet_free_ifa(ifa);
354                         } else {
355                                 promote = ifa;
356                                 break;
357                         }
358                 }
359         }
360
361         /* On promotion all secondaries from subnet are changing
362          * the primary IP, we must remove all their routes silently
363          * and later to add them back with new prefsrc. Do this
364          * while all addresses are on the device list.
365          */
366         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367                 if (ifa1->ifa_mask == ifa->ifa_mask &&
368                     inet_ifa_match(ifa1->ifa_address, ifa))
369                         fib_del_ifaddr(ifa, ifa1);
370         }
371
372         /* 2. Unlink it */
373
374         *ifap = ifa1->ifa_next;
375         inet_hash_remove(ifa1);
376
377         /* 3. Announce address deletion */
378
379         /* Send message first, then call notifier.
380            At first sight, FIB update triggered by notifier
381            will refer to already deleted ifaddr, that could confuse
382            netlink listeners. It is not true: look, gated sees
383            that route deleted and if it still thinks that ifaddr
384            is valid, it will try to restore deleted routes... Grr.
385            So that, this order is correct.
386          */
387         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389
390         if (promote) {
391                 struct in_ifaddr *next_sec = promote->ifa_next;
392
393                 if (prev_prom) {
394                         prev_prom->ifa_next = promote->ifa_next;
395                         promote->ifa_next = last_prim->ifa_next;
396                         last_prim->ifa_next = promote;
397                 }
398
399                 promote->ifa_flags &= ~IFA_F_SECONDARY;
400                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401                 blocking_notifier_call_chain(&inetaddr_chain,
402                                 NETDEV_UP, promote);
403                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404                         if (ifa1->ifa_mask != ifa->ifa_mask ||
405                             !inet_ifa_match(ifa1->ifa_address, ifa))
406                                         continue;
407                         fib_add_ifaddr(ifa);
408                 }
409
410         }
411         if (destroy)
412                 inet_free_ifa(ifa1);
413 }
414
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416                          int destroy)
417 {
418         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420
421 static void check_lifetime(struct work_struct *work);
422
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426                              u32 portid)
427 {
428         struct in_device *in_dev = ifa->ifa_dev;
429         struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431         ASSERT_RTNL();
432
433         if (!ifa->ifa_local) {
434                 inet_free_ifa(ifa);
435                 return 0;
436         }
437
438         ifa->ifa_flags &= ~IFA_F_SECONDARY;
439         last_primary = &in_dev->ifa_list;
440
441         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442              ifap = &ifa1->ifa_next) {
443                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444                     ifa->ifa_scope <= ifa1->ifa_scope)
445                         last_primary = &ifa1->ifa_next;
446                 if (ifa1->ifa_mask == ifa->ifa_mask &&
447                     inet_ifa_match(ifa1->ifa_address, ifa)) {
448                         if (ifa1->ifa_local == ifa->ifa_local) {
449                                 inet_free_ifa(ifa);
450                                 return -EEXIST;
451                         }
452                         if (ifa1->ifa_scope != ifa->ifa_scope) {
453                                 inet_free_ifa(ifa);
454                                 return -EINVAL;
455                         }
456                         ifa->ifa_flags |= IFA_F_SECONDARY;
457                 }
458         }
459
460         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461                 net_srandom(ifa->ifa_local);
462                 ifap = last_primary;
463         }
464
465         ifa->ifa_next = *ifap;
466         *ifap = ifa;
467
468         inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470         cancel_delayed_work(&check_lifetime_work);
471         schedule_delayed_work(&check_lifetime_work, 0);
472
473         /* Send message first, then call notifier.
474            Notifier will trigger FIB update, so that
475            listeners of netlink will know about new ifaddr */
476         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479         return 0;
480 }
481
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484         return __inet_insert_ifa(ifa, NULL, 0);
485 }
486
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489         struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491         ASSERT_RTNL();
492
493         if (!in_dev) {
494                 inet_free_ifa(ifa);
495                 return -ENOBUFS;
496         }
497         ipv4_devconf_setall(in_dev);
498         if (ifa->ifa_dev != in_dev) {
499                 WARN_ON(ifa->ifa_dev);
500                 in_dev_hold(in_dev);
501                 ifa->ifa_dev = in_dev;
502         }
503         if (ipv4_is_loopback(ifa->ifa_local))
504                 ifa->ifa_scope = RT_SCOPE_HOST;
505         return inet_insert_ifa(ifa);
506 }
507
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513         struct net_device *dev;
514         struct in_device *in_dev = NULL;
515
516         rcu_read_lock();
517         dev = dev_get_by_index_rcu(net, ifindex);
518         if (dev)
519                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520         rcu_read_unlock();
521         return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524
525 /* Called only from RTNL semaphored context. No locks. */
526
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528                                     __be32 mask)
529 {
530         ASSERT_RTNL();
531
532         for_primary_ifa(in_dev) {
533                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534                         return ifa;
535         } endfor_ifa(in_dev);
536         return NULL;
537 }
538
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541         struct net *net = sock_net(skb->sk);
542         struct nlattr *tb[IFA_MAX+1];
543         struct in_device *in_dev;
544         struct ifaddrmsg *ifm;
545         struct in_ifaddr *ifa, **ifap;
546         int err = -EINVAL;
547
548         ASSERT_RTNL();
549
550         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551         if (err < 0)
552                 goto errout;
553
554         ifm = nlmsg_data(nlh);
555         in_dev = inetdev_by_index(net, ifm->ifa_index);
556         if (in_dev == NULL) {
557                 err = -ENODEV;
558                 goto errout;
559         }
560
561         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562              ifap = &ifa->ifa_next) {
563                 if (tb[IFA_LOCAL] &&
564                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565                         continue;
566
567                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568                         continue;
569
570                 if (tb[IFA_ADDRESS] &&
571                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573                         continue;
574
575                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576                 return 0;
577         }
578
579         err = -EADDRNOTAVAIL;
580 errout:
581         return err;
582 }
583
584 #define INFINITY_LIFE_TIME      0xFFFFFFFF
585
586 static void check_lifetime(struct work_struct *work)
587 {
588         unsigned long now, next, next_sec, next_sched;
589         struct in_ifaddr *ifa;
590         struct hlist_node *n;
591         int i;
592
593         now = jiffies;
594         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
595
596         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597                 bool change_needed = false;
598
599                 rcu_read_lock();
600                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
601                         unsigned long age;
602
603                         if (ifa->ifa_flags & IFA_F_PERMANENT)
604                                 continue;
605
606                         /* We try to batch several events at once. */
607                         age = (now - ifa->ifa_tstamp +
608                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609
610                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611                             age >= ifa->ifa_valid_lft) {
612                                 change_needed = true;
613                         } else if (ifa->ifa_preferred_lft ==
614                                    INFINITY_LIFE_TIME) {
615                                 continue;
616                         } else if (age >= ifa->ifa_preferred_lft) {
617                                 if (time_before(ifa->ifa_tstamp +
618                                                 ifa->ifa_valid_lft * HZ, next))
619                                         next = ifa->ifa_tstamp +
620                                                ifa->ifa_valid_lft * HZ;
621
622                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
623                                         change_needed = true;
624                         } else if (time_before(ifa->ifa_tstamp +
625                                                ifa->ifa_preferred_lft * HZ,
626                                                next)) {
627                                 next = ifa->ifa_tstamp +
628                                        ifa->ifa_preferred_lft * HZ;
629                         }
630                 }
631                 rcu_read_unlock();
632                 if (!change_needed)
633                         continue;
634                 rtnl_lock();
635                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636                         unsigned long age;
637
638                         if (ifa->ifa_flags & IFA_F_PERMANENT)
639                                 continue;
640
641                         /* We try to batch several events at once. */
642                         age = (now - ifa->ifa_tstamp +
643                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644
645                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646                             age >= ifa->ifa_valid_lft) {
647                                 struct in_ifaddr **ifap;
648
649                                 for (ifap = &ifa->ifa_dev->ifa_list;
650                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651                                         if (*ifap == ifa) {
652                                                 inet_del_ifa(ifa->ifa_dev,
653                                                              ifap, 1);
654                                                 break;
655                                         }
656                                 }
657                         } else if (ifa->ifa_preferred_lft !=
658                                    INFINITY_LIFE_TIME &&
659                                    age >= ifa->ifa_preferred_lft &&
660                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
662                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663                         }
664                 }
665                 rtnl_unlock();
666         }
667
668         next_sec = round_jiffies_up(next);
669         next_sched = next;
670
671         /* If rounded timeout is accurate enough, accept it. */
672         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
673                 next_sched = next_sec;
674
675         now = jiffies;
676         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
677         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
678                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
679
680         schedule_delayed_work(&check_lifetime_work, next_sched - now);
681 }
682
683 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
684                              __u32 prefered_lft)
685 {
686         unsigned long timeout;
687
688         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
689
690         timeout = addrconf_timeout_fixup(valid_lft, HZ);
691         if (addrconf_finite_timeout(timeout))
692                 ifa->ifa_valid_lft = timeout;
693         else
694                 ifa->ifa_flags |= IFA_F_PERMANENT;
695
696         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
697         if (addrconf_finite_timeout(timeout)) {
698                 if (timeout == 0)
699                         ifa->ifa_flags |= IFA_F_DEPRECATED;
700                 ifa->ifa_preferred_lft = timeout;
701         }
702         ifa->ifa_tstamp = jiffies;
703         if (!ifa->ifa_cstamp)
704                 ifa->ifa_cstamp = ifa->ifa_tstamp;
705 }
706
707 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
708                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
709 {
710         struct nlattr *tb[IFA_MAX+1];
711         struct in_ifaddr *ifa;
712         struct ifaddrmsg *ifm;
713         struct net_device *dev;
714         struct in_device *in_dev;
715         int err;
716
717         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
718         if (err < 0)
719                 goto errout;
720
721         ifm = nlmsg_data(nlh);
722         err = -EINVAL;
723         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
724                 goto errout;
725
726         dev = __dev_get_by_index(net, ifm->ifa_index);
727         err = -ENODEV;
728         if (dev == NULL)
729                 goto errout;
730
731         in_dev = __in_dev_get_rtnl(dev);
732         err = -ENOBUFS;
733         if (in_dev == NULL)
734                 goto errout;
735
736         ifa = inet_alloc_ifa();
737         if (ifa == NULL)
738                 /*
739                  * A potential indev allocation can be left alive, it stays
740                  * assigned to its device and is destroy with it.
741                  */
742                 goto errout;
743
744         ipv4_devconf_setall(in_dev);
745         in_dev_hold(in_dev);
746
747         if (tb[IFA_ADDRESS] == NULL)
748                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
749
750         INIT_HLIST_NODE(&ifa->hash);
751         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
752         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
753         ifa->ifa_flags = ifm->ifa_flags;
754         ifa->ifa_scope = ifm->ifa_scope;
755         ifa->ifa_dev = in_dev;
756
757         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
758         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
759
760         if (tb[IFA_BROADCAST])
761                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
762
763         if (tb[IFA_LABEL])
764                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
765         else
766                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
767
768         if (tb[IFA_CACHEINFO]) {
769                 struct ifa_cacheinfo *ci;
770
771                 ci = nla_data(tb[IFA_CACHEINFO]);
772                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
773                         err = -EINVAL;
774                         goto errout_free;
775                 }
776                 *pvalid_lft = ci->ifa_valid;
777                 *pprefered_lft = ci->ifa_prefered;
778         }
779
780         return ifa;
781
782 errout_free:
783         inet_free_ifa(ifa);
784 errout:
785         return ERR_PTR(err);
786 }
787
788 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
789 {
790         struct in_device *in_dev = ifa->ifa_dev;
791         struct in_ifaddr *ifa1, **ifap;
792
793         if (!ifa->ifa_local)
794                 return NULL;
795
796         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
797              ifap = &ifa1->ifa_next) {
798                 if (ifa1->ifa_mask == ifa->ifa_mask &&
799                     inet_ifa_match(ifa1->ifa_address, ifa) &&
800                     ifa1->ifa_local == ifa->ifa_local)
801                         return ifa1;
802         }
803         return NULL;
804 }
805
806 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
807 {
808         struct net *net = sock_net(skb->sk);
809         struct in_ifaddr *ifa;
810         struct in_ifaddr *ifa_existing;
811         __u32 valid_lft = INFINITY_LIFE_TIME;
812         __u32 prefered_lft = INFINITY_LIFE_TIME;
813
814         ASSERT_RTNL();
815
816         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
817         if (IS_ERR(ifa))
818                 return PTR_ERR(ifa);
819
820         ifa_existing = find_matching_ifa(ifa);
821         if (!ifa_existing) {
822                 /* It would be best to check for !NLM_F_CREATE here but
823                  * userspace alreay relies on not having to provide this.
824                  */
825                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
826                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
827         } else {
828                 inet_free_ifa(ifa);
829
830                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
831                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
832                         return -EEXIST;
833                 ifa = ifa_existing;
834                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835                 cancel_delayed_work(&check_lifetime_work);
836                 schedule_delayed_work(&check_lifetime_work, 0);
837                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
838                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
839         }
840         return 0;
841 }
842
843 /*
844  *      Determine a default network mask, based on the IP address.
845  */
846
847 static int inet_abc_len(__be32 addr)
848 {
849         int rc = -1;    /* Something else, probably a multicast. */
850
851         if (ipv4_is_zeronet(addr))
852                 rc = 0;
853         else {
854                 __u32 haddr = ntohl(addr);
855
856                 if (IN_CLASSA(haddr))
857                         rc = 8;
858                 else if (IN_CLASSB(haddr))
859                         rc = 16;
860                 else if (IN_CLASSC(haddr))
861                         rc = 24;
862         }
863
864         return rc;
865 }
866
867
868 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
869 {
870         struct ifreq ifr;
871         struct sockaddr_in sin_orig;
872         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
873         struct in_device *in_dev;
874         struct in_ifaddr **ifap = NULL;
875         struct in_ifaddr *ifa = NULL;
876         struct net_device *dev;
877         char *colon;
878         int ret = -EFAULT;
879         int tryaddrmatch = 0;
880
881         /*
882          *      Fetch the caller's info block into kernel space
883          */
884
885         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
886                 goto out;
887         ifr.ifr_name[IFNAMSIZ - 1] = 0;
888
889         /* save original address for comparison */
890         memcpy(&sin_orig, sin, sizeof(*sin));
891
892         colon = strchr(ifr.ifr_name, ':');
893         if (colon)
894                 *colon = 0;
895
896         dev_load(net, ifr.ifr_name);
897
898         switch (cmd) {
899         case SIOCGIFADDR:       /* Get interface address */
900         case SIOCGIFBRDADDR:    /* Get the broadcast address */
901         case SIOCGIFDSTADDR:    /* Get the destination address */
902         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
903                 /* Note that these ioctls will not sleep,
904                    so that we do not impose a lock.
905                    One day we will be forced to put shlock here (I mean SMP)
906                  */
907                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
908                 memset(sin, 0, sizeof(*sin));
909                 sin->sin_family = AF_INET;
910                 break;
911
912         case SIOCSIFFLAGS:
913                 ret = -EPERM;
914                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
915                         goto out;
916                 break;
917         case SIOCSIFADDR:       /* Set interface address (and family) */
918         case SIOCSIFBRDADDR:    /* Set the broadcast address */
919         case SIOCSIFDSTADDR:    /* Set the destination address */
920         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
921                 ret = -EPERM;
922                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
923                         goto out;
924                 ret = -EINVAL;
925                 if (sin->sin_family != AF_INET)
926                         goto out;
927                 break;
928         default:
929                 ret = -EINVAL;
930                 goto out;
931         }
932
933         rtnl_lock();
934
935         ret = -ENODEV;
936         dev = __dev_get_by_name(net, ifr.ifr_name);
937         if (!dev)
938                 goto done;
939
940         if (colon)
941                 *colon = ':';
942
943         in_dev = __in_dev_get_rtnl(dev);
944         if (in_dev) {
945                 if (tryaddrmatch) {
946                         /* Matthias Andree */
947                         /* compare label and address (4.4BSD style) */
948                         /* note: we only do this for a limited set of ioctls
949                            and only if the original address family was AF_INET.
950                            This is checked above. */
951                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
952                              ifap = &ifa->ifa_next) {
953                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
954                                     sin_orig.sin_addr.s_addr ==
955                                                         ifa->ifa_local) {
956                                         break; /* found */
957                                 }
958                         }
959                 }
960                 /* we didn't get a match, maybe the application is
961                    4.3BSD-style and passed in junk so we fall back to
962                    comparing just the label */
963                 if (!ifa) {
964                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
965                              ifap = &ifa->ifa_next)
966                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
967                                         break;
968                 }
969         }
970
971         ret = -EADDRNOTAVAIL;
972         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
973                 goto done;
974
975         switch (cmd) {
976         case SIOCGIFADDR:       /* Get interface address */
977                 sin->sin_addr.s_addr = ifa->ifa_local;
978                 goto rarok;
979
980         case SIOCGIFBRDADDR:    /* Get the broadcast address */
981                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
982                 goto rarok;
983
984         case SIOCGIFDSTADDR:    /* Get the destination address */
985                 sin->sin_addr.s_addr = ifa->ifa_address;
986                 goto rarok;
987
988         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
989                 sin->sin_addr.s_addr = ifa->ifa_mask;
990                 goto rarok;
991
992         case SIOCSIFFLAGS:
993                 if (colon) {
994                         ret = -EADDRNOTAVAIL;
995                         if (!ifa)
996                                 break;
997                         ret = 0;
998                         if (!(ifr.ifr_flags & IFF_UP))
999                                 inet_del_ifa(in_dev, ifap, 1);
1000                         break;
1001                 }
1002                 ret = dev_change_flags(dev, ifr.ifr_flags);
1003                 break;
1004
1005         case SIOCSIFADDR:       /* Set interface address (and family) */
1006                 ret = -EINVAL;
1007                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1008                         break;
1009
1010                 if (!ifa) {
1011                         ret = -ENOBUFS;
1012                         ifa = inet_alloc_ifa();
1013                         if (!ifa)
1014                                 break;
1015                         INIT_HLIST_NODE(&ifa->hash);
1016                         if (colon)
1017                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1018                         else
1019                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1020                 } else {
1021                         ret = 0;
1022                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1023                                 break;
1024                         inet_del_ifa(in_dev, ifap, 0);
1025                         ifa->ifa_broadcast = 0;
1026                         ifa->ifa_scope = 0;
1027                 }
1028
1029                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1030
1031                 if (!(dev->flags & IFF_POINTOPOINT)) {
1032                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1033                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1034                         if ((dev->flags & IFF_BROADCAST) &&
1035                             ifa->ifa_prefixlen < 31)
1036                                 ifa->ifa_broadcast = ifa->ifa_address |
1037                                                      ~ifa->ifa_mask;
1038                 } else {
1039                         ifa->ifa_prefixlen = 32;
1040                         ifa->ifa_mask = inet_make_mask(32);
1041                 }
1042                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1043                 ret = inet_set_ifa(dev, ifa);
1044                 break;
1045
1046         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1047                 ret = 0;
1048                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1049                         inet_del_ifa(in_dev, ifap, 0);
1050                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1051                         inet_insert_ifa(ifa);
1052                 }
1053                 break;
1054
1055         case SIOCSIFDSTADDR:    /* Set the destination address */
1056                 ret = 0;
1057                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1058                         break;
1059                 ret = -EINVAL;
1060                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1061                         break;
1062                 ret = 0;
1063                 inet_del_ifa(in_dev, ifap, 0);
1064                 ifa->ifa_address = sin->sin_addr.s_addr;
1065                 inet_insert_ifa(ifa);
1066                 break;
1067
1068         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1069
1070                 /*
1071                  *      The mask we set must be legal.
1072                  */
1073                 ret = -EINVAL;
1074                 if (bad_mask(sin->sin_addr.s_addr, 0))
1075                         break;
1076                 ret = 0;
1077                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1078                         __be32 old_mask = ifa->ifa_mask;
1079                         inet_del_ifa(in_dev, ifap, 0);
1080                         ifa->ifa_mask = sin->sin_addr.s_addr;
1081                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1082
1083                         /* See if current broadcast address matches
1084                          * with current netmask, then recalculate
1085                          * the broadcast address. Otherwise it's a
1086                          * funny address, so don't touch it since
1087                          * the user seems to know what (s)he's doing...
1088                          */
1089                         if ((dev->flags & IFF_BROADCAST) &&
1090                             (ifa->ifa_prefixlen < 31) &&
1091                             (ifa->ifa_broadcast ==
1092                              (ifa->ifa_local|~old_mask))) {
1093                                 ifa->ifa_broadcast = (ifa->ifa_local |
1094                                                       ~sin->sin_addr.s_addr);
1095                         }
1096                         inet_insert_ifa(ifa);
1097                 }
1098                 break;
1099         }
1100 done:
1101         rtnl_unlock();
1102 out:
1103         return ret;
1104 rarok:
1105         rtnl_unlock();
1106         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1107         goto out;
1108 }
1109
1110 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1111 {
1112         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1113         struct in_ifaddr *ifa;
1114         struct ifreq ifr;
1115         int done = 0;
1116
1117         if (!in_dev)
1118                 goto out;
1119
1120         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1121                 if (!buf) {
1122                         done += sizeof(ifr);
1123                         continue;
1124                 }
1125                 if (len < (int) sizeof(ifr))
1126                         break;
1127                 memset(&ifr, 0, sizeof(struct ifreq));
1128                 if (ifa->ifa_label)
1129                         strcpy(ifr.ifr_name, ifa->ifa_label);
1130                 else
1131                         strcpy(ifr.ifr_name, dev->name);
1132
1133                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1134                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1135                                                                 ifa->ifa_local;
1136
1137                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1138                         done = -EFAULT;
1139                         break;
1140                 }
1141                 buf  += sizeof(struct ifreq);
1142                 len  -= sizeof(struct ifreq);
1143                 done += sizeof(struct ifreq);
1144         }
1145 out:
1146         return done;
1147 }
1148
1149 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1150 {
1151         __be32 addr = 0;
1152         struct in_device *in_dev;
1153         struct net *net = dev_net(dev);
1154
1155         rcu_read_lock();
1156         in_dev = __in_dev_get_rcu(dev);
1157         if (!in_dev)
1158                 goto no_in_dev;
1159
1160         for_primary_ifa(in_dev) {
1161                 if (ifa->ifa_scope > scope)
1162                         continue;
1163                 if (!dst || inet_ifa_match(dst, ifa)) {
1164                         addr = ifa->ifa_local;
1165                         break;
1166                 }
1167                 if (!addr)
1168                         addr = ifa->ifa_local;
1169         } endfor_ifa(in_dev);
1170
1171         if (addr)
1172                 goto out_unlock;
1173 no_in_dev:
1174
1175         /* Not loopback addresses on loopback should be preferred
1176            in this case. It is importnat that lo is the first interface
1177            in dev_base list.
1178          */
1179         for_each_netdev_rcu(net, dev) {
1180                 in_dev = __in_dev_get_rcu(dev);
1181                 if (!in_dev)
1182                         continue;
1183
1184                 for_primary_ifa(in_dev) {
1185                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1186                             ifa->ifa_scope <= scope) {
1187                                 addr = ifa->ifa_local;
1188                                 goto out_unlock;
1189                         }
1190                 } endfor_ifa(in_dev);
1191         }
1192 out_unlock:
1193         rcu_read_unlock();
1194         return addr;
1195 }
1196 EXPORT_SYMBOL(inet_select_addr);
1197
1198 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1199                               __be32 local, int scope)
1200 {
1201         int same = 0;
1202         __be32 addr = 0;
1203
1204         for_ifa(in_dev) {
1205                 if (!addr &&
1206                     (local == ifa->ifa_local || !local) &&
1207                     ifa->ifa_scope <= scope) {
1208                         addr = ifa->ifa_local;
1209                         if (same)
1210                                 break;
1211                 }
1212                 if (!same) {
1213                         same = (!local || inet_ifa_match(local, ifa)) &&
1214                                 (!dst || inet_ifa_match(dst, ifa));
1215                         if (same && addr) {
1216                                 if (local || !dst)
1217                                         break;
1218                                 /* Is the selected addr into dst subnet? */
1219                                 if (inet_ifa_match(addr, ifa))
1220                                         break;
1221                                 /* No, then can we use new local src? */
1222                                 if (ifa->ifa_scope <= scope) {
1223                                         addr = ifa->ifa_local;
1224                                         break;
1225                                 }
1226                                 /* search for large dst subnet for addr */
1227                                 same = 0;
1228                         }
1229                 }
1230         } endfor_ifa(in_dev);
1231
1232         return same ? addr : 0;
1233 }
1234
1235 /*
1236  * Confirm that local IP address exists using wildcards:
1237  * - in_dev: only on this interface, 0=any interface
1238  * - dst: only in the same subnet as dst, 0=any dst
1239  * - local: address, 0=autoselect the local address
1240  * - scope: maximum allowed scope value for the local address
1241  */
1242 __be32 inet_confirm_addr(struct in_device *in_dev,
1243                          __be32 dst, __be32 local, int scope)
1244 {
1245         __be32 addr = 0;
1246         struct net_device *dev;
1247         struct net *net;
1248
1249         if (scope != RT_SCOPE_LINK)
1250                 return confirm_addr_indev(in_dev, dst, local, scope);
1251
1252         net = dev_net(in_dev->dev);
1253         rcu_read_lock();
1254         for_each_netdev_rcu(net, dev) {
1255                 in_dev = __in_dev_get_rcu(dev);
1256                 if (in_dev) {
1257                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1258                         if (addr)
1259                                 break;
1260                 }
1261         }
1262         rcu_read_unlock();
1263
1264         return addr;
1265 }
1266 EXPORT_SYMBOL(inet_confirm_addr);
1267
1268 /*
1269  *      Device notifier
1270  */
1271
1272 int register_inetaddr_notifier(struct notifier_block *nb)
1273 {
1274         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1275 }
1276 EXPORT_SYMBOL(register_inetaddr_notifier);
1277
1278 int unregister_inetaddr_notifier(struct notifier_block *nb)
1279 {
1280         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1281 }
1282 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1283
1284 /* Rename ifa_labels for a device name change. Make some effort to preserve
1285  * existing alias numbering and to create unique labels if possible.
1286 */
1287 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1288 {
1289         struct in_ifaddr *ifa;
1290         int named = 0;
1291
1292         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1293                 char old[IFNAMSIZ], *dot;
1294
1295                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1296                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1297                 if (named++ == 0)
1298                         goto skip;
1299                 dot = strchr(old, ':');
1300                 if (dot == NULL) {
1301                         sprintf(old, ":%d", named);
1302                         dot = old;
1303                 }
1304                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1305                         strcat(ifa->ifa_label, dot);
1306                 else
1307                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1308 skip:
1309                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1310         }
1311 }
1312
1313 static bool inetdev_valid_mtu(unsigned int mtu)
1314 {
1315         return mtu >= 68;
1316 }
1317
1318 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1319                                         struct in_device *in_dev)
1320
1321 {
1322         struct in_ifaddr *ifa;
1323
1324         for (ifa = in_dev->ifa_list; ifa;
1325              ifa = ifa->ifa_next) {
1326                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1327                          ifa->ifa_local, dev,
1328                          ifa->ifa_local, NULL,
1329                          dev->dev_addr, NULL);
1330         }
1331 }
1332
1333 /* Called only under RTNL semaphore */
1334
1335 static int inetdev_event(struct notifier_block *this, unsigned long event,
1336                          void *ptr)
1337 {
1338         struct net_device *dev = ptr;
1339         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1340
1341         ASSERT_RTNL();
1342
1343         if (!in_dev) {
1344                 if (event == NETDEV_REGISTER) {
1345                         in_dev = inetdev_init(dev);
1346                         if (!in_dev)
1347                                 return notifier_from_errno(-ENOMEM);
1348                         if (dev->flags & IFF_LOOPBACK) {
1349                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1350                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1351                         }
1352                 } else if (event == NETDEV_CHANGEMTU) {
1353                         /* Re-enabling IP */
1354                         if (inetdev_valid_mtu(dev->mtu))
1355                                 in_dev = inetdev_init(dev);
1356                 }
1357                 goto out;
1358         }
1359
1360         switch (event) {
1361         case NETDEV_REGISTER:
1362                 pr_debug("%s: bug\n", __func__);
1363                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1364                 break;
1365         case NETDEV_UP:
1366                 if (!inetdev_valid_mtu(dev->mtu))
1367                         break;
1368                 if (dev->flags & IFF_LOOPBACK) {
1369                         struct in_ifaddr *ifa = inet_alloc_ifa();
1370
1371                         if (ifa) {
1372                                 INIT_HLIST_NODE(&ifa->hash);
1373                                 ifa->ifa_local =
1374                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1375                                 ifa->ifa_prefixlen = 8;
1376                                 ifa->ifa_mask = inet_make_mask(8);
1377                                 in_dev_hold(in_dev);
1378                                 ifa->ifa_dev = in_dev;
1379                                 ifa->ifa_scope = RT_SCOPE_HOST;
1380                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1381                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1382                                                  INFINITY_LIFE_TIME);
1383                                 inet_insert_ifa(ifa);
1384                         }
1385                 }
1386                 ip_mc_up(in_dev);
1387                 /* fall through */
1388         case NETDEV_CHANGEADDR:
1389                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1390                         break;
1391                 /* fall through */
1392         case NETDEV_NOTIFY_PEERS:
1393                 /* Send gratuitous ARP to notify of link change */
1394                 inetdev_send_gratuitous_arp(dev, in_dev);
1395                 break;
1396         case NETDEV_DOWN:
1397                 ip_mc_down(in_dev);
1398                 break;
1399         case NETDEV_PRE_TYPE_CHANGE:
1400                 ip_mc_unmap(in_dev);
1401                 break;
1402         case NETDEV_POST_TYPE_CHANGE:
1403                 ip_mc_remap(in_dev);
1404                 break;
1405         case NETDEV_CHANGEMTU:
1406                 if (inetdev_valid_mtu(dev->mtu))
1407                         break;
1408                 /* disable IP when MTU is not enough */
1409         case NETDEV_UNREGISTER:
1410                 inetdev_destroy(in_dev);
1411                 break;
1412         case NETDEV_CHANGENAME:
1413                 /* Do not notify about label change, this event is
1414                  * not interesting to applications using netlink.
1415                  */
1416                 inetdev_changename(dev, in_dev);
1417
1418                 devinet_sysctl_unregister(in_dev);
1419                 devinet_sysctl_register(in_dev);
1420                 break;
1421         }
1422 out:
1423         return NOTIFY_DONE;
1424 }
1425
1426 static struct notifier_block ip_netdev_notifier = {
1427         .notifier_call = inetdev_event,
1428 };
1429
1430 static size_t inet_nlmsg_size(void)
1431 {
1432         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1433                + nla_total_size(4) /* IFA_ADDRESS */
1434                + nla_total_size(4) /* IFA_LOCAL */
1435                + nla_total_size(4) /* IFA_BROADCAST */
1436                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1437 }
1438
1439 static inline u32 cstamp_delta(unsigned long cstamp)
1440 {
1441         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1442 }
1443
1444 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1445                          unsigned long tstamp, u32 preferred, u32 valid)
1446 {
1447         struct ifa_cacheinfo ci;
1448
1449         ci.cstamp = cstamp_delta(cstamp);
1450         ci.tstamp = cstamp_delta(tstamp);
1451         ci.ifa_prefered = preferred;
1452         ci.ifa_valid = valid;
1453
1454         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1455 }
1456
1457 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1458                             u32 portid, u32 seq, int event, unsigned int flags)
1459 {
1460         struct ifaddrmsg *ifm;
1461         struct nlmsghdr  *nlh;
1462         u32 preferred, valid;
1463
1464         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1465         if (nlh == NULL)
1466                 return -EMSGSIZE;
1467
1468         ifm = nlmsg_data(nlh);
1469         ifm->ifa_family = AF_INET;
1470         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1471         ifm->ifa_flags = ifa->ifa_flags;
1472         ifm->ifa_scope = ifa->ifa_scope;
1473         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1474
1475         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1476                 preferred = ifa->ifa_preferred_lft;
1477                 valid = ifa->ifa_valid_lft;
1478                 if (preferred != INFINITY_LIFE_TIME) {
1479                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1480
1481                         if (preferred > tval)
1482                                 preferred -= tval;
1483                         else
1484                                 preferred = 0;
1485                         if (valid != INFINITY_LIFE_TIME) {
1486                                 if (valid > tval)
1487                                         valid -= tval;
1488                                 else
1489                                         valid = 0;
1490                         }
1491                 }
1492         } else {
1493                 preferred = INFINITY_LIFE_TIME;
1494                 valid = INFINITY_LIFE_TIME;
1495         }
1496         if ((ifa->ifa_address &&
1497              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1498             (ifa->ifa_local &&
1499              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1500             (ifa->ifa_broadcast &&
1501              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1502             (ifa->ifa_label[0] &&
1503              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1504             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1505                           preferred, valid))
1506                 goto nla_put_failure;
1507
1508         return nlmsg_end(skb, nlh);
1509
1510 nla_put_failure:
1511         nlmsg_cancel(skb, nlh);
1512         return -EMSGSIZE;
1513 }
1514
1515 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1516 {
1517         struct net *net = sock_net(skb->sk);
1518         int h, s_h;
1519         int idx, s_idx;
1520         int ip_idx, s_ip_idx;
1521         struct net_device *dev;
1522         struct in_device *in_dev;
1523         struct in_ifaddr *ifa;
1524         struct hlist_head *head;
1525
1526         s_h = cb->args[0];
1527         s_idx = idx = cb->args[1];
1528         s_ip_idx = ip_idx = cb->args[2];
1529
1530         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1531                 idx = 0;
1532                 head = &net->dev_index_head[h];
1533                 rcu_read_lock();
1534                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1535                           net->dev_base_seq;
1536                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1537                         if (idx < s_idx)
1538                                 goto cont;
1539                         if (h > s_h || idx > s_idx)
1540                                 s_ip_idx = 0;
1541                         in_dev = __in_dev_get_rcu(dev);
1542                         if (!in_dev)
1543                                 goto cont;
1544
1545                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1546                              ifa = ifa->ifa_next, ip_idx++) {
1547                                 if (ip_idx < s_ip_idx)
1548                                         continue;
1549                                 if (inet_fill_ifaddr(skb, ifa,
1550                                              NETLINK_CB(cb->skb).portid,
1551                                              cb->nlh->nlmsg_seq,
1552                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1553                                         rcu_read_unlock();
1554                                         goto done;
1555                                 }
1556                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1557                         }
1558 cont:
1559                         idx++;
1560                 }
1561                 rcu_read_unlock();
1562         }
1563
1564 done:
1565         cb->args[0] = h;
1566         cb->args[1] = idx;
1567         cb->args[2] = ip_idx;
1568
1569         return skb->len;
1570 }
1571
1572 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1573                       u32 portid)
1574 {
1575         struct sk_buff *skb;
1576         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1577         int err = -ENOBUFS;
1578         struct net *net;
1579
1580         net = dev_net(ifa->ifa_dev->dev);
1581         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1582         if (skb == NULL)
1583                 goto errout;
1584
1585         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1586         if (err < 0) {
1587                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1588                 WARN_ON(err == -EMSGSIZE);
1589                 kfree_skb(skb);
1590                 goto errout;
1591         }
1592         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1593         return;
1594 errout:
1595         if (err < 0)
1596                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1597 }
1598
1599 static size_t inet_get_link_af_size(const struct net_device *dev)
1600 {
1601         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1602
1603         if (!in_dev)
1604                 return 0;
1605
1606         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1607 }
1608
1609 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1610 {
1611         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1612         struct nlattr *nla;
1613         int i;
1614
1615         if (!in_dev)
1616                 return -ENODATA;
1617
1618         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1619         if (nla == NULL)
1620                 return -EMSGSIZE;
1621
1622         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1623                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1624
1625         return 0;
1626 }
1627
1628 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1629         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1630 };
1631
1632 static int inet_validate_link_af(const struct net_device *dev,
1633                                  const struct nlattr *nla)
1634 {
1635         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1636         int err, rem;
1637
1638         if (dev && !__in_dev_get_rtnl(dev))
1639                 return -EAFNOSUPPORT;
1640
1641         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1642         if (err < 0)
1643                 return err;
1644
1645         if (tb[IFLA_INET_CONF]) {
1646                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1647                         int cfgid = nla_type(a);
1648
1649                         if (nla_len(a) < 4)
1650                                 return -EINVAL;
1651
1652                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1653                                 return -EINVAL;
1654                 }
1655         }
1656
1657         return 0;
1658 }
1659
1660 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1661 {
1662         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1663         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1664         int rem;
1665
1666         if (!in_dev)
1667                 return -EAFNOSUPPORT;
1668
1669         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1670                 BUG();
1671
1672         if (tb[IFLA_INET_CONF]) {
1673                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1674                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1675         }
1676
1677         return 0;
1678 }
1679
1680 static int inet_netconf_msgsize_devconf(int type)
1681 {
1682         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1683                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1684
1685         /* type -1 is used for ALL */
1686         if (type == -1 || type == NETCONFA_FORWARDING)
1687                 size += nla_total_size(4);
1688         if (type == -1 || type == NETCONFA_RP_FILTER)
1689                 size += nla_total_size(4);
1690         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1691                 size += nla_total_size(4);
1692
1693         return size;
1694 }
1695
1696 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1697                                      struct ipv4_devconf *devconf, u32 portid,
1698                                      u32 seq, int event, unsigned int flags,
1699                                      int type)
1700 {
1701         struct nlmsghdr  *nlh;
1702         struct netconfmsg *ncm;
1703
1704         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1705                         flags);
1706         if (nlh == NULL)
1707                 return -EMSGSIZE;
1708
1709         ncm = nlmsg_data(nlh);
1710         ncm->ncm_family = AF_INET;
1711
1712         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1713                 goto nla_put_failure;
1714
1715         /* type -1 is used for ALL */
1716         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1717             nla_put_s32(skb, NETCONFA_FORWARDING,
1718                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1719                 goto nla_put_failure;
1720         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1721             nla_put_s32(skb, NETCONFA_RP_FILTER,
1722                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1723                 goto nla_put_failure;
1724         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1725             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1726                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1727                 goto nla_put_failure;
1728
1729         return nlmsg_end(skb, nlh);
1730
1731 nla_put_failure:
1732         nlmsg_cancel(skb, nlh);
1733         return -EMSGSIZE;
1734 }
1735
1736 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1737                                  struct ipv4_devconf *devconf)
1738 {
1739         struct sk_buff *skb;
1740         int err = -ENOBUFS;
1741
1742         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1743         if (skb == NULL)
1744                 goto errout;
1745
1746         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1747                                         RTM_NEWNETCONF, 0, type);
1748         if (err < 0) {
1749                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1750                 WARN_ON(err == -EMSGSIZE);
1751                 kfree_skb(skb);
1752                 goto errout;
1753         }
1754         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1755         return;
1756 errout:
1757         if (err < 0)
1758                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1759 }
1760
1761 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1762         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1763         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1764         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1765 };
1766
1767 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1768                                     struct nlmsghdr *nlh)
1769 {
1770         struct net *net = sock_net(in_skb->sk);
1771         struct nlattr *tb[NETCONFA_MAX+1];
1772         struct netconfmsg *ncm;
1773         struct sk_buff *skb;
1774         struct ipv4_devconf *devconf;
1775         struct in_device *in_dev;
1776         struct net_device *dev;
1777         int ifindex;
1778         int err;
1779
1780         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1781                           devconf_ipv4_policy);
1782         if (err < 0)
1783                 goto errout;
1784
1785         err = EINVAL;
1786         if (!tb[NETCONFA_IFINDEX])
1787                 goto errout;
1788
1789         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1790         switch (ifindex) {
1791         case NETCONFA_IFINDEX_ALL:
1792                 devconf = net->ipv4.devconf_all;
1793                 break;
1794         case NETCONFA_IFINDEX_DEFAULT:
1795                 devconf = net->ipv4.devconf_dflt;
1796                 break;
1797         default:
1798                 dev = __dev_get_by_index(net, ifindex);
1799                 if (dev == NULL)
1800                         goto errout;
1801                 in_dev = __in_dev_get_rtnl(dev);
1802                 if (in_dev == NULL)
1803                         goto errout;
1804                 devconf = &in_dev->cnf;
1805                 break;
1806         }
1807
1808         err = -ENOBUFS;
1809         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1810         if (skb == NULL)
1811                 goto errout;
1812
1813         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1814                                         NETLINK_CB(in_skb).portid,
1815                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1816                                         -1);
1817         if (err < 0) {
1818                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1819                 WARN_ON(err == -EMSGSIZE);
1820                 kfree_skb(skb);
1821                 goto errout;
1822         }
1823         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1824 errout:
1825         return err;
1826 }
1827
1828 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1829                                      struct netlink_callback *cb)
1830 {
1831         struct net *net = sock_net(skb->sk);
1832         int h, s_h;
1833         int idx, s_idx;
1834         struct net_device *dev;
1835         struct in_device *in_dev;
1836         struct hlist_head *head;
1837
1838         s_h = cb->args[0];
1839         s_idx = idx = cb->args[1];
1840
1841         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1842                 idx = 0;
1843                 head = &net->dev_index_head[h];
1844                 rcu_read_lock();
1845                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1846                           net->dev_base_seq;
1847                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1848                         if (idx < s_idx)
1849                                 goto cont;
1850                         in_dev = __in_dev_get_rcu(dev);
1851                         if (!in_dev)
1852                                 goto cont;
1853
1854                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1855                                                       &in_dev->cnf,
1856                                                       NETLINK_CB(cb->skb).portid,
1857                                                       cb->nlh->nlmsg_seq,
1858                                                       RTM_NEWNETCONF,
1859                                                       NLM_F_MULTI,
1860                                                       -1) <= 0) {
1861                                 rcu_read_unlock();
1862                                 goto done;
1863                         }
1864                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1865 cont:
1866                         idx++;
1867                 }
1868                 rcu_read_unlock();
1869         }
1870         if (h == NETDEV_HASHENTRIES) {
1871                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1872                                               net->ipv4.devconf_all,
1873                                               NETLINK_CB(cb->skb).portid,
1874                                               cb->nlh->nlmsg_seq,
1875                                               RTM_NEWNETCONF, NLM_F_MULTI,
1876                                               -1) <= 0)
1877                         goto done;
1878                 else
1879                         h++;
1880         }
1881         if (h == NETDEV_HASHENTRIES + 1) {
1882                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1883                                               net->ipv4.devconf_dflt,
1884                                               NETLINK_CB(cb->skb).portid,
1885                                               cb->nlh->nlmsg_seq,
1886                                               RTM_NEWNETCONF, NLM_F_MULTI,
1887                                               -1) <= 0)
1888                         goto done;
1889                 else
1890                         h++;
1891         }
1892 done:
1893         cb->args[0] = h;
1894         cb->args[1] = idx;
1895
1896         return skb->len;
1897 }
1898
1899 #ifdef CONFIG_SYSCTL
1900
1901 static void devinet_copy_dflt_conf(struct net *net, int i)
1902 {
1903         struct net_device *dev;
1904
1905         rcu_read_lock();
1906         for_each_netdev_rcu(net, dev) {
1907                 struct in_device *in_dev;
1908
1909                 in_dev = __in_dev_get_rcu(dev);
1910                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1911                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1912         }
1913         rcu_read_unlock();
1914 }
1915
1916 /* called with RTNL locked */
1917 static void inet_forward_change(struct net *net)
1918 {
1919         struct net_device *dev;
1920         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1921
1922         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1923         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1924         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1925                                     NETCONFA_IFINDEX_ALL,
1926                                     net->ipv4.devconf_all);
1927         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1928                                     NETCONFA_IFINDEX_DEFAULT,
1929                                     net->ipv4.devconf_dflt);
1930
1931         for_each_netdev(net, dev) {
1932                 struct in_device *in_dev;
1933                 if (on)
1934                         dev_disable_lro(dev);
1935                 rcu_read_lock();
1936                 in_dev = __in_dev_get_rcu(dev);
1937                 if (in_dev) {
1938                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1939                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1940                                                     dev->ifindex, &in_dev->cnf);
1941                 }
1942                 rcu_read_unlock();
1943         }
1944 }
1945
1946 static int devinet_conf_proc(ctl_table *ctl, int write,
1947                              void __user *buffer,
1948                              size_t *lenp, loff_t *ppos)
1949 {
1950         int old_value = *(int *)ctl->data;
1951         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1952         int new_value = *(int *)ctl->data;
1953
1954         if (write) {
1955                 struct ipv4_devconf *cnf = ctl->extra1;
1956                 struct net *net = ctl->extra2;
1957                 int i = (int *)ctl->data - cnf->data;
1958
1959                 set_bit(i, cnf->state);
1960
1961                 if (cnf == net->ipv4.devconf_dflt)
1962                         devinet_copy_dflt_conf(net, i);
1963                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1964                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1965                         if ((new_value == 0) && (old_value != 0))
1966                                 rt_cache_flush(net);
1967                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1968                     new_value != old_value) {
1969                         int ifindex;
1970
1971                         if (cnf == net->ipv4.devconf_dflt)
1972                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1973                         else if (cnf == net->ipv4.devconf_all)
1974                                 ifindex = NETCONFA_IFINDEX_ALL;
1975                         else {
1976                                 struct in_device *idev =
1977                                         container_of(cnf, struct in_device,
1978                                                      cnf);
1979                                 ifindex = idev->dev->ifindex;
1980                         }
1981                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1982                                                     ifindex, cnf);
1983                 }
1984         }
1985
1986         return ret;
1987 }
1988
1989 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1990                                   void __user *buffer,
1991                                   size_t *lenp, loff_t *ppos)
1992 {
1993         int *valp = ctl->data;
1994         int val = *valp;
1995         loff_t pos = *ppos;
1996         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1997
1998         if (write && *valp != val) {
1999                 struct net *net = ctl->extra2;
2000
2001                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2002                         if (!rtnl_trylock()) {
2003                                 /* Restore the original values before restarting */
2004                                 *valp = val;
2005                                 *ppos = pos;
2006                                 return restart_syscall();
2007                         }
2008                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2009                                 inet_forward_change(net);
2010                         } else {
2011                                 struct ipv4_devconf *cnf = ctl->extra1;
2012                                 struct in_device *idev =
2013                                         container_of(cnf, struct in_device, cnf);
2014                                 if (*valp)
2015                                         dev_disable_lro(idev->dev);
2016                                 inet_netconf_notify_devconf(net,
2017                                                             NETCONFA_FORWARDING,
2018                                                             idev->dev->ifindex,
2019                                                             cnf);
2020                         }
2021                         rtnl_unlock();
2022                         rt_cache_flush(net);
2023                 } else
2024                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2025                                                     NETCONFA_IFINDEX_DEFAULT,
2026                                                     net->ipv4.devconf_dflt);
2027         }
2028
2029         return ret;
2030 }
2031
2032 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2033                                 void __user *buffer,
2034                                 size_t *lenp, loff_t *ppos)
2035 {
2036         int *valp = ctl->data;
2037         int val = *valp;
2038         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2039         struct net *net = ctl->extra2;
2040
2041         if (write && *valp != val)
2042                 rt_cache_flush(net);
2043
2044         return ret;
2045 }
2046
2047 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2048         { \
2049                 .procname       = name, \
2050                 .data           = ipv4_devconf.data + \
2051                                   IPV4_DEVCONF_ ## attr - 1, \
2052                 .maxlen         = sizeof(int), \
2053                 .mode           = mval, \
2054                 .proc_handler   = proc, \
2055                 .extra1         = &ipv4_devconf, \
2056         }
2057
2058 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2059         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2060
2061 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2062         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2063
2064 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2065         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2066
2067 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2068         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2069
2070 static struct devinet_sysctl_table {
2071         struct ctl_table_header *sysctl_header;
2072         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2073 } devinet_sysctl = {
2074         .devinet_vars = {
2075                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2076                                              devinet_sysctl_forward),
2077                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2078
2079                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2080                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2081                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2082                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2083                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2084                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2085                                         "accept_source_route"),
2086                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2087                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2088                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2089                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2090                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2091                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2092                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2093                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2094                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2095                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2096                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2097                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2098                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2099
2100                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2101                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2102                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2103                                               "force_igmp_version"),
2104                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2105                                               "promote_secondaries"),
2106                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2107                                               "route_localnet"),
2108         },
2109 };
2110
2111 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2112                                         struct ipv4_devconf *p)
2113 {
2114         int i;
2115         struct devinet_sysctl_table *t;
2116         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2117
2118         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2119         if (!t)
2120                 goto out;
2121
2122         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2123                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2124                 t->devinet_vars[i].extra1 = p;
2125                 t->devinet_vars[i].extra2 = net;
2126         }
2127
2128         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2129
2130         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2131         if (!t->sysctl_header)
2132                 goto free;
2133
2134         p->sysctl = t;
2135         return 0;
2136
2137 free:
2138         kfree(t);
2139 out:
2140         return -ENOBUFS;
2141 }
2142
2143 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2144 {
2145         struct devinet_sysctl_table *t = cnf->sysctl;
2146
2147         if (t == NULL)
2148                 return;
2149
2150         cnf->sysctl = NULL;
2151         unregister_net_sysctl_table(t->sysctl_header);
2152         kfree(t);
2153 }
2154
2155 static void devinet_sysctl_register(struct in_device *idev)
2156 {
2157         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2158         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2159                                         &idev->cnf);
2160 }
2161
2162 static void devinet_sysctl_unregister(struct in_device *idev)
2163 {
2164         __devinet_sysctl_unregister(&idev->cnf);
2165         neigh_sysctl_unregister(idev->arp_parms);
2166 }
2167
2168 static struct ctl_table ctl_forward_entry[] = {
2169         {
2170                 .procname       = "ip_forward",
2171                 .data           = &ipv4_devconf.data[
2172                                         IPV4_DEVCONF_FORWARDING - 1],
2173                 .maxlen         = sizeof(int),
2174                 .mode           = 0644,
2175                 .proc_handler   = devinet_sysctl_forward,
2176                 .extra1         = &ipv4_devconf,
2177                 .extra2         = &init_net,
2178         },
2179         { },
2180 };
2181 #endif
2182
2183 static __net_init int devinet_init_net(struct net *net)
2184 {
2185         int err;
2186         struct ipv4_devconf *all, *dflt;
2187 #ifdef CONFIG_SYSCTL
2188         struct ctl_table *tbl = ctl_forward_entry;
2189         struct ctl_table_header *forw_hdr;
2190 #endif
2191
2192         err = -ENOMEM;
2193         all = &ipv4_devconf;
2194         dflt = &ipv4_devconf_dflt;
2195
2196         if (!net_eq(net, &init_net)) {
2197                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2198                 if (all == NULL)
2199                         goto err_alloc_all;
2200
2201                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2202                 if (dflt == NULL)
2203                         goto err_alloc_dflt;
2204
2205 #ifdef CONFIG_SYSCTL
2206                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2207                 if (tbl == NULL)
2208                         goto err_alloc_ctl;
2209
2210                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2211                 tbl[0].extra1 = all;
2212                 tbl[0].extra2 = net;
2213 #endif
2214         }
2215
2216 #ifdef CONFIG_SYSCTL
2217         err = __devinet_sysctl_register(net, "all", all);
2218         if (err < 0)
2219                 goto err_reg_all;
2220
2221         err = __devinet_sysctl_register(net, "default", dflt);
2222         if (err < 0)
2223                 goto err_reg_dflt;
2224
2225         err = -ENOMEM;
2226         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2227         if (forw_hdr == NULL)
2228                 goto err_reg_ctl;
2229         net->ipv4.forw_hdr = forw_hdr;
2230 #endif
2231
2232         net->ipv4.devconf_all = all;
2233         net->ipv4.devconf_dflt = dflt;
2234         return 0;
2235
2236 #ifdef CONFIG_SYSCTL
2237 err_reg_ctl:
2238         __devinet_sysctl_unregister(dflt);
2239 err_reg_dflt:
2240         __devinet_sysctl_unregister(all);
2241 err_reg_all:
2242         if (tbl != ctl_forward_entry)
2243                 kfree(tbl);
2244 err_alloc_ctl:
2245 #endif
2246         if (dflt != &ipv4_devconf_dflt)
2247                 kfree(dflt);
2248 err_alloc_dflt:
2249         if (all != &ipv4_devconf)
2250                 kfree(all);
2251 err_alloc_all:
2252         return err;
2253 }
2254
2255 static __net_exit void devinet_exit_net(struct net *net)
2256 {
2257 #ifdef CONFIG_SYSCTL
2258         struct ctl_table *tbl;
2259
2260         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2261         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2262         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2263         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2264         kfree(tbl);
2265 #endif
2266         kfree(net->ipv4.devconf_dflt);
2267         kfree(net->ipv4.devconf_all);
2268 }
2269
2270 static __net_initdata struct pernet_operations devinet_ops = {
2271         .init = devinet_init_net,
2272         .exit = devinet_exit_net,
2273 };
2274
2275 static struct rtnl_af_ops inet_af_ops = {
2276         .family           = AF_INET,
2277         .fill_link_af     = inet_fill_link_af,
2278         .get_link_af_size = inet_get_link_af_size,
2279         .validate_link_af = inet_validate_link_af,
2280         .set_link_af      = inet_set_link_af,
2281 };
2282
2283 void __init devinet_init(void)
2284 {
2285         int i;
2286
2287         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2288                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2289
2290         register_pernet_subsys(&devinet_ops);
2291
2292         register_gifconf(PF_INET, inet_gifconf);
2293         register_netdevice_notifier(&ip_netdev_notifier);
2294
2295         schedule_delayed_work(&check_lifetime_work, 0);
2296
2297         rtnl_af_register(&inet_af_ops);
2298
2299         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2300         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2301         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2302         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2303                       inet_netconf_dump_devconf, NULL);
2304 }
2305