Merge branch android-common-3.10
[firefly-linux-kernel-4.4.55.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 __skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         __skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         write_lock_bh(&neigh->lock);
712         __skb_queue_purge(&neigh->arp_queue);
713         write_unlock_bh(&neigh->lock);
714         neigh->arp_queue_len_bytes = 0;
715
716         if (dev->netdev_ops->ndo_neigh_destroy)
717                 dev->netdev_ops->ndo_neigh_destroy(neigh);
718
719         dev_put(dev);
720         neigh_parms_put(neigh->parms);
721
722         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723
724         atomic_dec(&neigh->tbl->entries);
725         kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728
729 /* Neighbour state is suspicious;
730    disable fast path.
731
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736         neigh_dbg(2, "neigh %p is suspected\n", neigh);
737
738         neigh->output = neigh->ops->output;
739 }
740
741 /* Neighbour state is OK;
742    enable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748         neigh_dbg(2, "neigh %p is connected\n", neigh);
749
750         neigh->output = neigh->ops->connected_output;
751 }
752
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756         struct neighbour *n;
757         struct neighbour __rcu **np;
758         unsigned int i;
759         struct neigh_hash_table *nht;
760
761         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762
763         write_lock_bh(&tbl->lock);
764         nht = rcu_dereference_protected(tbl->nht,
765                                         lockdep_is_held(&tbl->lock));
766
767         /*
768          *      periodically recompute ReachableTime from random function
769          */
770
771         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
772                 struct neigh_parms *p;
773                 tbl->last_rand = jiffies;
774                 for (p = &tbl->parms; p; p = p->next)
775                         p->reachable_time =
776                                 neigh_rand_reach_time(p->base_reachable_time);
777         }
778
779         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
780                 goto out;
781
782         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783                 np = &nht->hash_buckets[i];
784
785                 while ((n = rcu_dereference_protected(*np,
786                                 lockdep_is_held(&tbl->lock))) != NULL) {
787                         unsigned int state;
788
789                         write_lock(&n->lock);
790
791                         state = n->nud_state;
792                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793                                 write_unlock(&n->lock);
794                                 goto next_elt;
795                         }
796
797                         if (time_before(n->used, n->confirmed))
798                                 n->used = n->confirmed;
799
800                         if (atomic_read(&n->refcnt) == 1 &&
801                             (state == NUD_FAILED ||
802                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
803                                 *np = n->next;
804                                 n->dead = 1;
805                                 write_unlock(&n->lock);
806                                 neigh_cleanup_and_release(n);
807                                 continue;
808                         }
809                         write_unlock(&n->lock);
810
811 next_elt:
812                         np = &n->next;
813                 }
814                 /*
815                  * It's fine to release lock here, even if hash table
816                  * grows while we are preempted.
817                  */
818                 write_unlock_bh(&tbl->lock);
819                 cond_resched();
820                 write_lock_bh(&tbl->lock);
821                 nht = rcu_dereference_protected(tbl->nht,
822                                                 lockdep_is_held(&tbl->lock));
823         }
824 out:
825         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
826          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827          * base_reachable_time.
828          */
829         schedule_delayed_work(&tbl->gc_work,
830                               tbl->parms.base_reachable_time >> 1);
831         write_unlock_bh(&tbl->lock);
832 }
833
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836         struct neigh_parms *p = n->parms;
837         return (n->nud_state & NUD_PROBE) ?
838                 p->ucast_probes :
839                 p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841
842 static void neigh_invalidate(struct neighbour *neigh)
843         __releases(neigh->lock)
844         __acquires(neigh->lock)
845 {
846         struct sk_buff *skb;
847
848         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849         neigh_dbg(2, "neigh %p is failed\n", neigh);
850         neigh->updated = jiffies;
851
852         /* It is very thin place. report_unreachable is very complicated
853            routine. Particularly, it can hit the same neighbour entry!
854
855            So that, we try to be accurate and avoid dead loop. --ANK
856          */
857         while (neigh->nud_state == NUD_FAILED &&
858                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859                 write_unlock(&neigh->lock);
860                 neigh->ops->error_report(neigh, skb);
861                 write_lock(&neigh->lock);
862         }
863         __skb_queue_purge(&neigh->arp_queue);
864         neigh->arp_queue_len_bytes = 0;
865 }
866
867 static void neigh_probe(struct neighbour *neigh)
868         __releases(neigh->lock)
869 {
870         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
871         /* keep skb alive even if arp_queue overflows */
872         if (skb)
873                 skb = skb_copy(skb, GFP_ATOMIC);
874         write_unlock(&neigh->lock);
875         neigh->ops->solicit(neigh, skb);
876         atomic_inc(&neigh->probes);
877         kfree_skb(skb);
878 }
879
880 /* Called when a timer expires for a neighbour entry. */
881
882 static void neigh_timer_handler(unsigned long arg)
883 {
884         unsigned long now, next;
885         struct neighbour *neigh = (struct neighbour *)arg;
886         unsigned int state;
887         int notify = 0;
888
889         write_lock(&neigh->lock);
890
891         state = neigh->nud_state;
892         now = jiffies;
893         next = now + HZ;
894
895         if (!(state & NUD_IN_TIMER))
896                 goto out;
897
898         if (state & NUD_REACHABLE) {
899                 if (time_before_eq(now,
900                                    neigh->confirmed + neigh->parms->reachable_time)) {
901                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else if (time_before_eq(now,
904                                           neigh->used + neigh->parms->delay_probe_time)) {
905                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
906                         neigh->nud_state = NUD_DELAY;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         next = now + neigh->parms->delay_probe_time;
910                 } else {
911                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
912                         neigh->nud_state = NUD_STALE;
913                         neigh->updated = jiffies;
914                         neigh_suspect(neigh);
915                         notify = 1;
916                 }
917         } else if (state & NUD_DELAY) {
918                 if (time_before_eq(now,
919                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
920                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921                         neigh->nud_state = NUD_REACHABLE;
922                         neigh->updated = jiffies;
923                         neigh_connect(neigh);
924                         notify = 1;
925                         next = neigh->confirmed + neigh->parms->reachable_time;
926                 } else {
927                         neigh_dbg(2, "neigh %p is probed\n", neigh);
928                         neigh->nud_state = NUD_PROBE;
929                         neigh->updated = jiffies;
930                         atomic_set(&neigh->probes, 0);
931                         notify = 1;
932                         next = now + neigh->parms->retrans_time;
933                 }
934         } else {
935                 /* NUD_PROBE|NUD_INCOMPLETE */
936                 next = now + neigh->parms->retrans_time;
937         }
938
939         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
940             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
941                 neigh->nud_state = NUD_FAILED;
942                 notify = 1;
943                 neigh_invalidate(neigh);
944         }
945
946         if (neigh->nud_state & NUD_IN_TIMER) {
947                 if (time_before(next, jiffies + HZ/2))
948                         next = jiffies + HZ/2;
949                 if (!mod_timer(&neigh->timer, next))
950                         neigh_hold(neigh);
951         }
952         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
953                 neigh_probe(neigh);
954         } else {
955 out:
956                 write_unlock(&neigh->lock);
957         }
958
959         if (notify)
960                 neigh_update_notify(neigh);
961
962         neigh_release(neigh);
963 }
964
965 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
966 {
967         int rc;
968         bool immediate_probe = false;
969
970         write_lock_bh(&neigh->lock);
971
972         rc = 0;
973         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
974                 goto out_unlock_bh;
975
976         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
977                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
978                         unsigned long next, now = jiffies;
979
980                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
981                         neigh->nud_state     = NUD_INCOMPLETE;
982                         neigh->updated = now;
983                         next = now + max(neigh->parms->retrans_time, HZ/2);
984                         neigh_add_timer(neigh, next);
985                         immediate_probe = true;
986                 } else {
987                         neigh->nud_state = NUD_FAILED;
988                         neigh->updated = jiffies;
989                         write_unlock_bh(&neigh->lock);
990
991                         kfree_skb(skb);
992                         return 1;
993                 }
994         } else if (neigh->nud_state & NUD_STALE) {
995                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
996                 neigh->nud_state = NUD_DELAY;
997                 neigh->updated = jiffies;
998                 neigh_add_timer(neigh,
999                                 jiffies + neigh->parms->delay_probe_time);
1000         }
1001
1002         if (neigh->nud_state == NUD_INCOMPLETE) {
1003                 if (skb) {
1004                         while (neigh->arp_queue_len_bytes + skb->truesize >
1005                                neigh->parms->queue_len_bytes) {
1006                                 struct sk_buff *buff;
1007
1008                                 buff = __skb_dequeue(&neigh->arp_queue);
1009                                 if (!buff)
1010                                         break;
1011                                 neigh->arp_queue_len_bytes -= buff->truesize;
1012                                 kfree_skb(buff);
1013                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1014                         }
1015                         skb_dst_force(skb);
1016                         __skb_queue_tail(&neigh->arp_queue, skb);
1017                         neigh->arp_queue_len_bytes += skb->truesize;
1018                 }
1019                 rc = 1;
1020         }
1021 out_unlock_bh:
1022         if (immediate_probe)
1023                 neigh_probe(neigh);
1024         else
1025                 write_unlock(&neigh->lock);
1026         local_bh_enable();
1027         return rc;
1028 }
1029 EXPORT_SYMBOL(__neigh_event_send);
1030
1031 static void neigh_update_hhs(struct neighbour *neigh)
1032 {
1033         struct hh_cache *hh;
1034         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1035                 = NULL;
1036
1037         if (neigh->dev->header_ops)
1038                 update = neigh->dev->header_ops->cache_update;
1039
1040         if (update) {
1041                 hh = &neigh->hh;
1042                 if (hh->hh_len) {
1043                         write_seqlock_bh(&hh->hh_lock);
1044                         update(hh, neigh->dev, neigh->ha);
1045                         write_sequnlock_bh(&hh->hh_lock);
1046                 }
1047         }
1048 }
1049
1050
1051
1052 /* Generic update routine.
1053    -- lladdr is new lladdr or NULL, if it is not supplied.
1054    -- new    is new state.
1055    -- flags
1056         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1057                                 if it is different.
1058         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1059                                 lladdr instead of overriding it
1060                                 if it is different.
1061                                 It also allows to retain current state
1062                                 if lladdr is unchanged.
1063         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1064
1065         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1066                                 NTF_ROUTER flag.
1067         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1068                                 a router.
1069
1070    Caller MUST hold reference count on the entry.
1071  */
1072
1073 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1074                  u32 flags)
1075 {
1076         u8 old;
1077         int err;
1078         int notify = 0;
1079         struct net_device *dev;
1080         int update_isrouter = 0;
1081
1082         write_lock_bh(&neigh->lock);
1083
1084         dev    = neigh->dev;
1085         old    = neigh->nud_state;
1086         err    = -EPERM;
1087
1088         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1089             (old & (NUD_NOARP | NUD_PERMANENT)))
1090                 goto out;
1091
1092         if (!(new & NUD_VALID)) {
1093                 neigh_del_timer(neigh);
1094                 if (old & NUD_CONNECTED)
1095                         neigh_suspect(neigh);
1096                 neigh->nud_state = new;
1097                 err = 0;
1098                 notify = old & NUD_VALID;
1099                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1100                     (new & NUD_FAILED)) {
1101                         neigh_invalidate(neigh);
1102                         notify = 1;
1103                 }
1104                 goto out;
1105         }
1106
1107         /* Compare new lladdr with cached one */
1108         if (!dev->addr_len) {
1109                 /* First case: device needs no address. */
1110                 lladdr = neigh->ha;
1111         } else if (lladdr) {
1112                 /* The second case: if something is already cached
1113                    and a new address is proposed:
1114                    - compare new & old
1115                    - if they are different, check override flag
1116                  */
1117                 if ((old & NUD_VALID) &&
1118                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1119                         lladdr = neigh->ha;
1120         } else {
1121                 /* No address is supplied; if we know something,
1122                    use it, otherwise discard the request.
1123                  */
1124                 err = -EINVAL;
1125                 if (!(old & NUD_VALID))
1126                         goto out;
1127                 lladdr = neigh->ha;
1128         }
1129
1130         if (new & NUD_CONNECTED)
1131                 neigh->confirmed = jiffies;
1132         neigh->updated = jiffies;
1133
1134         /* If entry was valid and address is not changed,
1135            do not change entry state, if new one is STALE.
1136          */
1137         err = 0;
1138         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1139         if (old & NUD_VALID) {
1140                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1141                         update_isrouter = 0;
1142                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1143                             (old & NUD_CONNECTED)) {
1144                                 lladdr = neigh->ha;
1145                                 new = NUD_STALE;
1146                         } else
1147                                 goto out;
1148                 } else {
1149                         if (lladdr == neigh->ha && new == NUD_STALE &&
1150                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1151                              (old & NUD_CONNECTED))
1152                             )
1153                                 new = old;
1154                 }
1155         }
1156
1157         if (new != old) {
1158                 neigh_del_timer(neigh);
1159                 if (new & NUD_PROBE)
1160                         atomic_set(&neigh->probes, 0);
1161                 if (new & NUD_IN_TIMER)
1162                         neigh_add_timer(neigh, (jiffies +
1163                                                 ((new & NUD_REACHABLE) ?
1164                                                  neigh->parms->reachable_time :
1165                                                  0)));
1166                 neigh->nud_state = new;
1167         }
1168
1169         if (lladdr != neigh->ha) {
1170                 write_seqlock(&neigh->ha_lock);
1171                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1172                 write_sequnlock(&neigh->ha_lock);
1173                 neigh_update_hhs(neigh);
1174                 if (!(new & NUD_CONNECTED))
1175                         neigh->confirmed = jiffies -
1176                                       (neigh->parms->base_reachable_time << 1);
1177                 notify = 1;
1178         }
1179         if (new == old)
1180                 goto out;
1181         if (new & NUD_CONNECTED)
1182                 neigh_connect(neigh);
1183         else
1184                 neigh_suspect(neigh);
1185         if (!(old & NUD_VALID)) {
1186                 struct sk_buff *skb;
1187
1188                 /* Again: avoid dead loop if something went wrong */
1189
1190                 while (neigh->nud_state & NUD_VALID &&
1191                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1192                         struct dst_entry *dst = skb_dst(skb);
1193                         struct neighbour *n2, *n1 = neigh;
1194                         write_unlock_bh(&neigh->lock);
1195
1196                         rcu_read_lock();
1197
1198                         /* Why not just use 'neigh' as-is?  The problem is that
1199                          * things such as shaper, eql, and sch_teql can end up
1200                          * using alternative, different, neigh objects to output
1201                          * the packet in the output path.  So what we need to do
1202                          * here is re-lookup the top-level neigh in the path so
1203                          * we can reinject the packet there.
1204                          */
1205                         n2 = NULL;
1206                         if (dst) {
1207                                 n2 = dst_neigh_lookup_skb(dst, skb);
1208                                 if (n2)
1209                                         n1 = n2;
1210                         }
1211                         n1->output(n1, skb);
1212                         if (n2)
1213                                 neigh_release(n2);
1214                         rcu_read_unlock();
1215
1216                         write_lock_bh(&neigh->lock);
1217                 }
1218                 __skb_queue_purge(&neigh->arp_queue);
1219                 neigh->arp_queue_len_bytes = 0;
1220         }
1221 out:
1222         if (update_isrouter) {
1223                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1224                         (neigh->flags | NTF_ROUTER) :
1225                         (neigh->flags & ~NTF_ROUTER);
1226         }
1227         write_unlock_bh(&neigh->lock);
1228
1229         if (notify)
1230                 neigh_update_notify(neigh);
1231
1232         return err;
1233 }
1234 EXPORT_SYMBOL(neigh_update);
1235
1236 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1237                                  u8 *lladdr, void *saddr,
1238                                  struct net_device *dev)
1239 {
1240         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1241                                                  lladdr || !dev->addr_len);
1242         if (neigh)
1243                 neigh_update(neigh, lladdr, NUD_STALE,
1244                              NEIGH_UPDATE_F_OVERRIDE);
1245         return neigh;
1246 }
1247 EXPORT_SYMBOL(neigh_event_ns);
1248
1249 /* called with read_lock_bh(&n->lock); */
1250 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1251 {
1252         struct net_device *dev = dst->dev;
1253         __be16 prot = dst->ops->protocol;
1254         struct hh_cache *hh = &n->hh;
1255
1256         write_lock_bh(&n->lock);
1257
1258         /* Only one thread can come in here and initialize the
1259          * hh_cache entry.
1260          */
1261         if (!hh->hh_len)
1262                 dev->header_ops->cache(n, hh, prot);
1263
1264         write_unlock_bh(&n->lock);
1265 }
1266
1267 /* This function can be used in contexts, where only old dev_queue_xmit
1268  * worked, f.e. if you want to override normal output path (eql, shaper),
1269  * but resolution is not made yet.
1270  */
1271
1272 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1273 {
1274         struct net_device *dev = skb->dev;
1275
1276         __skb_pull(skb, skb_network_offset(skb));
1277
1278         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1279                             skb->len) < 0 &&
1280             dev_rebuild_header(skb))
1281                 return 0;
1282
1283         return dev_queue_xmit(skb);
1284 }
1285 EXPORT_SYMBOL(neigh_compat_output);
1286
1287 /* Slow and careful. */
1288
1289 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1290 {
1291         struct dst_entry *dst = skb_dst(skb);
1292         int rc = 0;
1293
1294         if (!dst)
1295                 goto discard;
1296
1297         if (!neigh_event_send(neigh, skb)) {
1298                 int err;
1299                 struct net_device *dev = neigh->dev;
1300                 unsigned int seq;
1301
1302                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1303                         neigh_hh_init(neigh, dst);
1304
1305                 do {
1306                         __skb_pull(skb, skb_network_offset(skb));
1307                         seq = read_seqbegin(&neigh->ha_lock);
1308                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1309                                               neigh->ha, NULL, skb->len);
1310                 } while (read_seqretry(&neigh->ha_lock, seq));
1311
1312                 if (err >= 0)
1313                         rc = dev_queue_xmit(skb);
1314                 else
1315                         goto out_kfree_skb;
1316         }
1317 out:
1318         return rc;
1319 discard:
1320         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1321 out_kfree_skb:
1322         rc = -EINVAL;
1323         kfree_skb(skb);
1324         goto out;
1325 }
1326 EXPORT_SYMBOL(neigh_resolve_output);
1327
1328 /* As fast as possible without hh cache */
1329
1330 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1331 {
1332         struct net_device *dev = neigh->dev;
1333         unsigned int seq;
1334         int err;
1335
1336         do {
1337                 __skb_pull(skb, skb_network_offset(skb));
1338                 seq = read_seqbegin(&neigh->ha_lock);
1339                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1340                                       neigh->ha, NULL, skb->len);
1341         } while (read_seqretry(&neigh->ha_lock, seq));
1342
1343         if (err >= 0)
1344                 err = dev_queue_xmit(skb);
1345         else {
1346                 err = -EINVAL;
1347                 kfree_skb(skb);
1348         }
1349         return err;
1350 }
1351 EXPORT_SYMBOL(neigh_connected_output);
1352
1353 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1354 {
1355         return dev_queue_xmit(skb);
1356 }
1357 EXPORT_SYMBOL(neigh_direct_output);
1358
1359 static void neigh_proxy_process(unsigned long arg)
1360 {
1361         struct neigh_table *tbl = (struct neigh_table *)arg;
1362         long sched_next = 0;
1363         unsigned long now = jiffies;
1364         struct sk_buff *skb, *n;
1365
1366         spin_lock(&tbl->proxy_queue.lock);
1367
1368         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1369                 long tdif = NEIGH_CB(skb)->sched_next - now;
1370
1371                 if (tdif <= 0) {
1372                         struct net_device *dev = skb->dev;
1373
1374                         __skb_unlink(skb, &tbl->proxy_queue);
1375                         if (tbl->proxy_redo && netif_running(dev)) {
1376                                 rcu_read_lock();
1377                                 tbl->proxy_redo(skb);
1378                                 rcu_read_unlock();
1379                         } else {
1380                                 kfree_skb(skb);
1381                         }
1382
1383                         dev_put(dev);
1384                 } else if (!sched_next || tdif < sched_next)
1385                         sched_next = tdif;
1386         }
1387         del_timer(&tbl->proxy_timer);
1388         if (sched_next)
1389                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1390         spin_unlock(&tbl->proxy_queue.lock);
1391 }
1392
1393 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1394                     struct sk_buff *skb)
1395 {
1396         unsigned long now = jiffies;
1397         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1398
1399         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1400                 kfree_skb(skb);
1401                 return;
1402         }
1403
1404         NEIGH_CB(skb)->sched_next = sched_next;
1405         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1406
1407         spin_lock(&tbl->proxy_queue.lock);
1408         if (del_timer(&tbl->proxy_timer)) {
1409                 if (time_before(tbl->proxy_timer.expires, sched_next))
1410                         sched_next = tbl->proxy_timer.expires;
1411         }
1412         skb_dst_drop(skb);
1413         dev_hold(skb->dev);
1414         __skb_queue_tail(&tbl->proxy_queue, skb);
1415         mod_timer(&tbl->proxy_timer, sched_next);
1416         spin_unlock(&tbl->proxy_queue.lock);
1417 }
1418 EXPORT_SYMBOL(pneigh_enqueue);
1419
1420 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1421                                                       struct net *net, int ifindex)
1422 {
1423         struct neigh_parms *p;
1424
1425         for (p = &tbl->parms; p; p = p->next) {
1426                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1427                     (!p->dev && !ifindex))
1428                         return p;
1429         }
1430
1431         return NULL;
1432 }
1433
1434 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1435                                       struct neigh_table *tbl)
1436 {
1437         struct neigh_parms *p, *ref;
1438         struct net *net = dev_net(dev);
1439         const struct net_device_ops *ops = dev->netdev_ops;
1440
1441         ref = lookup_neigh_parms(tbl, net, 0);
1442         if (!ref)
1443                 return NULL;
1444
1445         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1446         if (p) {
1447                 p->tbl            = tbl;
1448                 atomic_set(&p->refcnt, 1);
1449                 p->reachable_time =
1450                                 neigh_rand_reach_time(p->base_reachable_time);
1451                 dev_hold(dev);
1452                 p->dev = dev;
1453                 write_pnet(&p->net, hold_net(net));
1454                 p->sysctl_table = NULL;
1455
1456                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1457                         release_net(net);
1458                         dev_put(dev);
1459                         kfree(p);
1460                         return NULL;
1461                 }
1462
1463                 write_lock_bh(&tbl->lock);
1464                 p->next         = tbl->parms.next;
1465                 tbl->parms.next = p;
1466                 write_unlock_bh(&tbl->lock);
1467         }
1468         return p;
1469 }
1470 EXPORT_SYMBOL(neigh_parms_alloc);
1471
1472 static void neigh_rcu_free_parms(struct rcu_head *head)
1473 {
1474         struct neigh_parms *parms =
1475                 container_of(head, struct neigh_parms, rcu_head);
1476
1477         neigh_parms_put(parms);
1478 }
1479
1480 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1481 {
1482         struct neigh_parms **p;
1483
1484         if (!parms || parms == &tbl->parms)
1485                 return;
1486         write_lock_bh(&tbl->lock);
1487         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1488                 if (*p == parms) {
1489                         *p = parms->next;
1490                         parms->dead = 1;
1491                         write_unlock_bh(&tbl->lock);
1492                         if (parms->dev)
1493                                 dev_put(parms->dev);
1494                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1495                         return;
1496                 }
1497         }
1498         write_unlock_bh(&tbl->lock);
1499         neigh_dbg(1, "%s: not found\n", __func__);
1500 }
1501 EXPORT_SYMBOL(neigh_parms_release);
1502
1503 static void neigh_parms_destroy(struct neigh_parms *parms)
1504 {
1505         release_net(neigh_parms_net(parms));
1506         kfree(parms);
1507 }
1508
1509 static struct lock_class_key neigh_table_proxy_queue_class;
1510
1511 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1512 {
1513         unsigned long now = jiffies;
1514         unsigned long phsize;
1515
1516         write_pnet(&tbl->parms.net, &init_net);
1517         atomic_set(&tbl->parms.refcnt, 1);
1518         tbl->parms.reachable_time =
1519                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1520
1521         tbl->stats = alloc_percpu(struct neigh_statistics);
1522         if (!tbl->stats)
1523                 panic("cannot create neighbour cache statistics");
1524
1525 #ifdef CONFIG_PROC_FS
1526         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1527                               &neigh_stat_seq_fops, tbl))
1528                 panic("cannot create neighbour proc dir entry");
1529 #endif
1530
1531         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1532
1533         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1534         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1535
1536         if (!tbl->nht || !tbl->phash_buckets)
1537                 panic("cannot allocate neighbour cache hashes");
1538
1539         if (!tbl->entry_size)
1540                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1541                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1542         else
1543                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1544
1545         rwlock_init(&tbl->lock);
1546         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1548         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1549         skb_queue_head_init_class(&tbl->proxy_queue,
1550                         &neigh_table_proxy_queue_class);
1551
1552         tbl->last_flush = now;
1553         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1554 }
1555
1556 void neigh_table_init(struct neigh_table *tbl)
1557 {
1558         struct neigh_table *tmp;
1559
1560         neigh_table_init_no_netlink(tbl);
1561         write_lock(&neigh_tbl_lock);
1562         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1563                 if (tmp->family == tbl->family)
1564                         break;
1565         }
1566         tbl->next       = neigh_tables;
1567         neigh_tables    = tbl;
1568         write_unlock(&neigh_tbl_lock);
1569
1570         if (unlikely(tmp)) {
1571                 pr_err("Registering multiple tables for family %d\n",
1572                        tbl->family);
1573                 dump_stack();
1574         }
1575 }
1576 EXPORT_SYMBOL(neigh_table_init);
1577
1578 int neigh_table_clear(struct neigh_table *tbl)
1579 {
1580         struct neigh_table **tp;
1581
1582         /* It is not clean... Fix it to unload IPv6 module safely */
1583         cancel_delayed_work_sync(&tbl->gc_work);
1584         del_timer_sync(&tbl->proxy_timer);
1585         pneigh_queue_purge(&tbl->proxy_queue);
1586         neigh_ifdown(tbl, NULL);
1587         if (atomic_read(&tbl->entries))
1588                 pr_crit("neighbour leakage\n");
1589         write_lock(&neigh_tbl_lock);
1590         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1591                 if (*tp == tbl) {
1592                         *tp = tbl->next;
1593                         break;
1594                 }
1595         }
1596         write_unlock(&neigh_tbl_lock);
1597
1598         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1599                  neigh_hash_free_rcu);
1600         tbl->nht = NULL;
1601
1602         kfree(tbl->phash_buckets);
1603         tbl->phash_buckets = NULL;
1604
1605         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1606
1607         free_percpu(tbl->stats);
1608         tbl->stats = NULL;
1609
1610         return 0;
1611 }
1612 EXPORT_SYMBOL(neigh_table_clear);
1613
1614 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1615 {
1616         struct net *net = sock_net(skb->sk);
1617         struct ndmsg *ndm;
1618         struct nlattr *dst_attr;
1619         struct neigh_table *tbl;
1620         struct net_device *dev = NULL;
1621         int err = -EINVAL;
1622
1623         ASSERT_RTNL();
1624         if (nlmsg_len(nlh) < sizeof(*ndm))
1625                 goto out;
1626
1627         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1628         if (dst_attr == NULL)
1629                 goto out;
1630
1631         ndm = nlmsg_data(nlh);
1632         if (ndm->ndm_ifindex) {
1633                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1634                 if (dev == NULL) {
1635                         err = -ENODEV;
1636                         goto out;
1637                 }
1638         }
1639
1640         read_lock(&neigh_tbl_lock);
1641         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1642                 struct neighbour *neigh;
1643
1644                 if (tbl->family != ndm->ndm_family)
1645                         continue;
1646                 read_unlock(&neigh_tbl_lock);
1647
1648                 if (nla_len(dst_attr) < tbl->key_len)
1649                         goto out;
1650
1651                 if (ndm->ndm_flags & NTF_PROXY) {
1652                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1653                         goto out;
1654                 }
1655
1656                 if (dev == NULL)
1657                         goto out;
1658
1659                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1660                 if (neigh == NULL) {
1661                         err = -ENOENT;
1662                         goto out;
1663                 }
1664
1665                 err = neigh_update(neigh, NULL, NUD_FAILED,
1666                                    NEIGH_UPDATE_F_OVERRIDE |
1667                                    NEIGH_UPDATE_F_ADMIN);
1668                 neigh_release(neigh);
1669                 goto out;
1670         }
1671         read_unlock(&neigh_tbl_lock);
1672         err = -EAFNOSUPPORT;
1673
1674 out:
1675         return err;
1676 }
1677
1678 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1679 {
1680         struct net *net = sock_net(skb->sk);
1681         struct ndmsg *ndm;
1682         struct nlattr *tb[NDA_MAX+1];
1683         struct neigh_table *tbl;
1684         struct net_device *dev = NULL;
1685         int err;
1686
1687         ASSERT_RTNL();
1688         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1689         if (err < 0)
1690                 goto out;
1691
1692         err = -EINVAL;
1693         if (tb[NDA_DST] == NULL)
1694                 goto out;
1695
1696         ndm = nlmsg_data(nlh);
1697         if (ndm->ndm_ifindex) {
1698                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1699                 if (dev == NULL) {
1700                         err = -ENODEV;
1701                         goto out;
1702                 }
1703
1704                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1705                         goto out;
1706         }
1707
1708         read_lock(&neigh_tbl_lock);
1709         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1710                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1711                 struct neighbour *neigh;
1712                 void *dst, *lladdr;
1713
1714                 if (tbl->family != ndm->ndm_family)
1715                         continue;
1716                 read_unlock(&neigh_tbl_lock);
1717
1718                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1719                         goto out;
1720                 dst = nla_data(tb[NDA_DST]);
1721                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1722
1723                 if (ndm->ndm_flags & NTF_PROXY) {
1724                         struct pneigh_entry *pn;
1725
1726                         err = -ENOBUFS;
1727                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1728                         if (pn) {
1729                                 pn->flags = ndm->ndm_flags;
1730                                 err = 0;
1731                         }
1732                         goto out;
1733                 }
1734
1735                 if (dev == NULL)
1736                         goto out;
1737
1738                 neigh = neigh_lookup(tbl, dst, dev);
1739                 if (neigh == NULL) {
1740                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1741                                 err = -ENOENT;
1742                                 goto out;
1743                         }
1744
1745                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1746                         if (IS_ERR(neigh)) {
1747                                 err = PTR_ERR(neigh);
1748                                 goto out;
1749                         }
1750                 } else {
1751                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1752                                 err = -EEXIST;
1753                                 neigh_release(neigh);
1754                                 goto out;
1755                         }
1756
1757                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1758                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1759                 }
1760
1761                 if (ndm->ndm_flags & NTF_USE) {
1762                         neigh_event_send(neigh, NULL);
1763                         err = 0;
1764                 } else
1765                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1766                 neigh_release(neigh);
1767                 goto out;
1768         }
1769
1770         read_unlock(&neigh_tbl_lock);
1771         err = -EAFNOSUPPORT;
1772 out:
1773         return err;
1774 }
1775
1776 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1777 {
1778         struct nlattr *nest;
1779
1780         nest = nla_nest_start(skb, NDTA_PARMS);
1781         if (nest == NULL)
1782                 return -ENOBUFS;
1783
1784         if ((parms->dev &&
1785              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1786             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1787             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1788             /* approximative value for deprecated QUEUE_LEN (in packets) */
1789             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1790                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1791             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1792             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1793             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1794             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1795             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1796             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1797                           parms->base_reachable_time) ||
1798             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1799             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1800                           parms->delay_probe_time) ||
1801             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1802             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1803             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1804             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1805                 goto nla_put_failure;
1806         return nla_nest_end(skb, nest);
1807
1808 nla_put_failure:
1809         nla_nest_cancel(skb, nest);
1810         return -EMSGSIZE;
1811 }
1812
1813 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1814                               u32 pid, u32 seq, int type, int flags)
1815 {
1816         struct nlmsghdr *nlh;
1817         struct ndtmsg *ndtmsg;
1818
1819         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1820         if (nlh == NULL)
1821                 return -EMSGSIZE;
1822
1823         ndtmsg = nlmsg_data(nlh);
1824
1825         read_lock_bh(&tbl->lock);
1826         ndtmsg->ndtm_family = tbl->family;
1827         ndtmsg->ndtm_pad1   = 0;
1828         ndtmsg->ndtm_pad2   = 0;
1829
1830         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1831             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1832             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1833             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1834             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1835                 goto nla_put_failure;
1836         {
1837                 unsigned long now = jiffies;
1838                 unsigned int flush_delta = now - tbl->last_flush;
1839                 unsigned int rand_delta = now - tbl->last_rand;
1840                 struct neigh_hash_table *nht;
1841                 struct ndt_config ndc = {
1842                         .ndtc_key_len           = tbl->key_len,
1843                         .ndtc_entry_size        = tbl->entry_size,
1844                         .ndtc_entries           = atomic_read(&tbl->entries),
1845                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1846                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1847                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1848                 };
1849
1850                 rcu_read_lock_bh();
1851                 nht = rcu_dereference_bh(tbl->nht);
1852                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1853                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1854                 rcu_read_unlock_bh();
1855
1856                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1857                         goto nla_put_failure;
1858         }
1859
1860         {
1861                 int cpu;
1862                 struct ndt_stats ndst;
1863
1864                 memset(&ndst, 0, sizeof(ndst));
1865
1866                 for_each_possible_cpu(cpu) {
1867                         struct neigh_statistics *st;
1868
1869                         st = per_cpu_ptr(tbl->stats, cpu);
1870                         ndst.ndts_allocs                += st->allocs;
1871                         ndst.ndts_destroys              += st->destroys;
1872                         ndst.ndts_hash_grows            += st->hash_grows;
1873                         ndst.ndts_res_failed            += st->res_failed;
1874                         ndst.ndts_lookups               += st->lookups;
1875                         ndst.ndts_hits                  += st->hits;
1876                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1877                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1878                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1879                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1880                 }
1881
1882                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1883                         goto nla_put_failure;
1884         }
1885
1886         BUG_ON(tbl->parms.dev);
1887         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1888                 goto nla_put_failure;
1889
1890         read_unlock_bh(&tbl->lock);
1891         return nlmsg_end(skb, nlh);
1892
1893 nla_put_failure:
1894         read_unlock_bh(&tbl->lock);
1895         nlmsg_cancel(skb, nlh);
1896         return -EMSGSIZE;
1897 }
1898
1899 static int neightbl_fill_param_info(struct sk_buff *skb,
1900                                     struct neigh_table *tbl,
1901                                     struct neigh_parms *parms,
1902                                     u32 pid, u32 seq, int type,
1903                                     unsigned int flags)
1904 {
1905         struct ndtmsg *ndtmsg;
1906         struct nlmsghdr *nlh;
1907
1908         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1909         if (nlh == NULL)
1910                 return -EMSGSIZE;
1911
1912         ndtmsg = nlmsg_data(nlh);
1913
1914         read_lock_bh(&tbl->lock);
1915         ndtmsg->ndtm_family = tbl->family;
1916         ndtmsg->ndtm_pad1   = 0;
1917         ndtmsg->ndtm_pad2   = 0;
1918
1919         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1920             neightbl_fill_parms(skb, parms) < 0)
1921                 goto errout;
1922
1923         read_unlock_bh(&tbl->lock);
1924         return nlmsg_end(skb, nlh);
1925 errout:
1926         read_unlock_bh(&tbl->lock);
1927         nlmsg_cancel(skb, nlh);
1928         return -EMSGSIZE;
1929 }
1930
1931 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1932         [NDTA_NAME]             = { .type = NLA_STRING },
1933         [NDTA_THRESH1]          = { .type = NLA_U32 },
1934         [NDTA_THRESH2]          = { .type = NLA_U32 },
1935         [NDTA_THRESH3]          = { .type = NLA_U32 },
1936         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1937         [NDTA_PARMS]            = { .type = NLA_NESTED },
1938 };
1939
1940 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1941         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1942         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1943         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1944         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1945         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1946         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1947         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1948         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1949         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1950         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1951         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1952         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1953         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1954 };
1955
1956 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1957 {
1958         struct net *net = sock_net(skb->sk);
1959         struct neigh_table *tbl;
1960         struct ndtmsg *ndtmsg;
1961         struct nlattr *tb[NDTA_MAX+1];
1962         int err;
1963
1964         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1965                           nl_neightbl_policy);
1966         if (err < 0)
1967                 goto errout;
1968
1969         if (tb[NDTA_NAME] == NULL) {
1970                 err = -EINVAL;
1971                 goto errout;
1972         }
1973
1974         ndtmsg = nlmsg_data(nlh);
1975         read_lock(&neigh_tbl_lock);
1976         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1977                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1978                         continue;
1979
1980                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1981                         break;
1982         }
1983
1984         if (tbl == NULL) {
1985                 err = -ENOENT;
1986                 goto errout_locked;
1987         }
1988
1989         /*
1990          * We acquire tbl->lock to be nice to the periodic timers and
1991          * make sure they always see a consistent set of values.
1992          */
1993         write_lock_bh(&tbl->lock);
1994
1995         if (tb[NDTA_PARMS]) {
1996                 struct nlattr *tbp[NDTPA_MAX+1];
1997                 struct neigh_parms *p;
1998                 int i, ifindex = 0;
1999
2000                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2001                                        nl_ntbl_parm_policy);
2002                 if (err < 0)
2003                         goto errout_tbl_lock;
2004
2005                 if (tbp[NDTPA_IFINDEX])
2006                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2007
2008                 p = lookup_neigh_parms(tbl, net, ifindex);
2009                 if (p == NULL) {
2010                         err = -ENOENT;
2011                         goto errout_tbl_lock;
2012                 }
2013
2014                 for (i = 1; i <= NDTPA_MAX; i++) {
2015                         if (tbp[i] == NULL)
2016                                 continue;
2017
2018                         switch (i) {
2019                         case NDTPA_QUEUE_LEN:
2020                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2021                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2022                                 break;
2023                         case NDTPA_QUEUE_LENBYTES:
2024                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_PROXY_QLEN:
2027                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_APP_PROBES:
2030                                 p->app_probes = nla_get_u32(tbp[i]);
2031                                 break;
2032                         case NDTPA_UCAST_PROBES:
2033                                 p->ucast_probes = nla_get_u32(tbp[i]);
2034                                 break;
2035                         case NDTPA_MCAST_PROBES:
2036                                 p->mcast_probes = nla_get_u32(tbp[i]);
2037                                 break;
2038                         case NDTPA_BASE_REACHABLE_TIME:
2039                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_GC_STALETIME:
2042                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_DELAY_PROBE_TIME:
2045                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_RETRANS_TIME:
2048                                 p->retrans_time = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         case NDTPA_ANYCAST_DELAY:
2051                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         case NDTPA_PROXY_DELAY:
2054                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2055                                 break;
2056                         case NDTPA_LOCKTIME:
2057                                 p->locktime = nla_get_msecs(tbp[i]);
2058                                 break;
2059                         }
2060                 }
2061         }
2062
2063         if (tb[NDTA_THRESH1])
2064                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2065
2066         if (tb[NDTA_THRESH2])
2067                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2068
2069         if (tb[NDTA_THRESH3])
2070                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2071
2072         if (tb[NDTA_GC_INTERVAL])
2073                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2074
2075         err = 0;
2076
2077 errout_tbl_lock:
2078         write_unlock_bh(&tbl->lock);
2079 errout_locked:
2080         read_unlock(&neigh_tbl_lock);
2081 errout:
2082         return err;
2083 }
2084
2085 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2086 {
2087         struct net *net = sock_net(skb->sk);
2088         int family, tidx, nidx = 0;
2089         int tbl_skip = cb->args[0];
2090         int neigh_skip = cb->args[1];
2091         struct neigh_table *tbl;
2092
2093         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2094
2095         read_lock(&neigh_tbl_lock);
2096         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2097                 struct neigh_parms *p;
2098
2099                 if (tidx < tbl_skip || (family && tbl->family != family))
2100                         continue;
2101
2102                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2103                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2104                                        NLM_F_MULTI) <= 0)
2105                         break;
2106
2107                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2108                         if (!net_eq(neigh_parms_net(p), net))
2109                                 continue;
2110
2111                         if (nidx < neigh_skip)
2112                                 goto next;
2113
2114                         if (neightbl_fill_param_info(skb, tbl, p,
2115                                                      NETLINK_CB(cb->skb).portid,
2116                                                      cb->nlh->nlmsg_seq,
2117                                                      RTM_NEWNEIGHTBL,
2118                                                      NLM_F_MULTI) <= 0)
2119                                 goto out;
2120                 next:
2121                         nidx++;
2122                 }
2123
2124                 neigh_skip = 0;
2125         }
2126 out:
2127         read_unlock(&neigh_tbl_lock);
2128         cb->args[0] = tidx;
2129         cb->args[1] = nidx;
2130
2131         return skb->len;
2132 }
2133
2134 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2135                            u32 pid, u32 seq, int type, unsigned int flags)
2136 {
2137         unsigned long now = jiffies;
2138         struct nda_cacheinfo ci;
2139         struct nlmsghdr *nlh;
2140         struct ndmsg *ndm;
2141
2142         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2143         if (nlh == NULL)
2144                 return -EMSGSIZE;
2145
2146         ndm = nlmsg_data(nlh);
2147         ndm->ndm_family  = neigh->ops->family;
2148         ndm->ndm_pad1    = 0;
2149         ndm->ndm_pad2    = 0;
2150         ndm->ndm_flags   = neigh->flags;
2151         ndm->ndm_type    = neigh->type;
2152         ndm->ndm_ifindex = neigh->dev->ifindex;
2153
2154         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2155                 goto nla_put_failure;
2156
2157         read_lock_bh(&neigh->lock);
2158         ndm->ndm_state   = neigh->nud_state;
2159         if (neigh->nud_state & NUD_VALID) {
2160                 char haddr[MAX_ADDR_LEN];
2161
2162                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164                         read_unlock_bh(&neigh->lock);
2165                         goto nla_put_failure;
2166                 }
2167         }
2168
2169         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2170         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2172         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2173         read_unlock_bh(&neigh->lock);
2174
2175         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2176             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2177                 goto nla_put_failure;
2178
2179         return nlmsg_end(skb, nlh);
2180
2181 nla_put_failure:
2182         nlmsg_cancel(skb, nlh);
2183         return -EMSGSIZE;
2184 }
2185
2186 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2187                             u32 pid, u32 seq, int type, unsigned int flags,
2188                             struct neigh_table *tbl)
2189 {
2190         struct nlmsghdr *nlh;
2191         struct ndmsg *ndm;
2192
2193         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2194         if (nlh == NULL)
2195                 return -EMSGSIZE;
2196
2197         ndm = nlmsg_data(nlh);
2198         ndm->ndm_family  = tbl->family;
2199         ndm->ndm_pad1    = 0;
2200         ndm->ndm_pad2    = 0;
2201         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2202         ndm->ndm_type    = NDA_DST;
2203         ndm->ndm_ifindex = pn->dev->ifindex;
2204         ndm->ndm_state   = NUD_NONE;
2205
2206         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2207                 goto nla_put_failure;
2208
2209         return nlmsg_end(skb, nlh);
2210
2211 nla_put_failure:
2212         nlmsg_cancel(skb, nlh);
2213         return -EMSGSIZE;
2214 }
2215
2216 static void neigh_update_notify(struct neighbour *neigh)
2217 {
2218         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2219         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2220 }
2221
2222 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2223                             struct netlink_callback *cb)
2224 {
2225         struct net *net = sock_net(skb->sk);
2226         struct neighbour *n;
2227         int rc, h, s_h = cb->args[1];
2228         int idx, s_idx = idx = cb->args[2];
2229         struct neigh_hash_table *nht;
2230
2231         rcu_read_lock_bh();
2232         nht = rcu_dereference_bh(tbl->nht);
2233
2234         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2235                 if (h > s_h)
2236                         s_idx = 0;
2237                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2238                      n != NULL;
2239                      n = rcu_dereference_bh(n->next)) {
2240                         if (!net_eq(dev_net(n->dev), net))
2241                                 continue;
2242                         if (idx < s_idx)
2243                                 goto next;
2244                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2245                                             cb->nlh->nlmsg_seq,
2246                                             RTM_NEWNEIGH,
2247                                             NLM_F_MULTI) <= 0) {
2248                                 rc = -1;
2249                                 goto out;
2250                         }
2251 next:
2252                         idx++;
2253                 }
2254         }
2255         rc = skb->len;
2256 out:
2257         rcu_read_unlock_bh();
2258         cb->args[1] = h;
2259         cb->args[2] = idx;
2260         return rc;
2261 }
2262
2263 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2264                              struct netlink_callback *cb)
2265 {
2266         struct pneigh_entry *n;
2267         struct net *net = sock_net(skb->sk);
2268         int rc, h, s_h = cb->args[3];
2269         int idx, s_idx = idx = cb->args[4];
2270
2271         read_lock_bh(&tbl->lock);
2272
2273         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2274                 if (h > s_h)
2275                         s_idx = 0;
2276                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2277                         if (dev_net(n->dev) != net)
2278                                 continue;
2279                         if (idx < s_idx)
2280                                 goto next;
2281                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2282                                             cb->nlh->nlmsg_seq,
2283                                             RTM_NEWNEIGH,
2284                                             NLM_F_MULTI, tbl) <= 0) {
2285                                 read_unlock_bh(&tbl->lock);
2286                                 rc = -1;
2287                                 goto out;
2288                         }
2289                 next:
2290                         idx++;
2291                 }
2292         }
2293
2294         read_unlock_bh(&tbl->lock);
2295         rc = skb->len;
2296 out:
2297         cb->args[3] = h;
2298         cb->args[4] = idx;
2299         return rc;
2300
2301 }
2302
2303 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2304 {
2305         struct neigh_table *tbl;
2306         int t, family, s_t;
2307         int proxy = 0;
2308         int err;
2309
2310         read_lock(&neigh_tbl_lock);
2311         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2312
2313         /* check for full ndmsg structure presence, family member is
2314          * the same for both structures
2315          */
2316         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2317             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2318                 proxy = 1;
2319
2320         s_t = cb->args[0];
2321
2322         for (tbl = neigh_tables, t = 0; tbl;
2323              tbl = tbl->next, t++) {
2324                 if (t < s_t || (family && tbl->family != family))
2325                         continue;
2326                 if (t > s_t)
2327                         memset(&cb->args[1], 0, sizeof(cb->args) -
2328                                                 sizeof(cb->args[0]));
2329                 if (proxy)
2330                         err = pneigh_dump_table(tbl, skb, cb);
2331                 else
2332                         err = neigh_dump_table(tbl, skb, cb);
2333                 if (err < 0)
2334                         break;
2335         }
2336         read_unlock(&neigh_tbl_lock);
2337
2338         cb->args[0] = t;
2339         return skb->len;
2340 }
2341
2342 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2343 {
2344         int chain;
2345         struct neigh_hash_table *nht;
2346
2347         rcu_read_lock_bh();
2348         nht = rcu_dereference_bh(tbl->nht);
2349
2350         read_lock(&tbl->lock); /* avoid resizes */
2351         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2352                 struct neighbour *n;
2353
2354                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2355                      n != NULL;
2356                      n = rcu_dereference_bh(n->next))
2357                         cb(n, cookie);
2358         }
2359         read_unlock(&tbl->lock);
2360         rcu_read_unlock_bh();
2361 }
2362 EXPORT_SYMBOL(neigh_for_each);
2363
2364 /* The tbl->lock must be held as a writer and BH disabled. */
2365 void __neigh_for_each_release(struct neigh_table *tbl,
2366                               int (*cb)(struct neighbour *))
2367 {
2368         int chain;
2369         struct neigh_hash_table *nht;
2370
2371         nht = rcu_dereference_protected(tbl->nht,
2372                                         lockdep_is_held(&tbl->lock));
2373         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2374                 struct neighbour *n;
2375                 struct neighbour __rcu **np;
2376
2377                 np = &nht->hash_buckets[chain];
2378                 while ((n = rcu_dereference_protected(*np,
2379                                         lockdep_is_held(&tbl->lock))) != NULL) {
2380                         int release;
2381
2382                         write_lock(&n->lock);
2383                         release = cb(n);
2384                         if (release) {
2385                                 rcu_assign_pointer(*np,
2386                                         rcu_dereference_protected(n->next,
2387                                                 lockdep_is_held(&tbl->lock)));
2388                                 n->dead = 1;
2389                         } else
2390                                 np = &n->next;
2391                         write_unlock(&n->lock);
2392                         if (release)
2393                                 neigh_cleanup_and_release(n);
2394                 }
2395         }
2396 }
2397 EXPORT_SYMBOL(__neigh_for_each_release);
2398
2399 #ifdef CONFIG_PROC_FS
2400
2401 static struct neighbour *neigh_get_first(struct seq_file *seq)
2402 {
2403         struct neigh_seq_state *state = seq->private;
2404         struct net *net = seq_file_net(seq);
2405         struct neigh_hash_table *nht = state->nht;
2406         struct neighbour *n = NULL;
2407         int bucket = state->bucket;
2408
2409         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2410         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2411                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2412
2413                 while (n) {
2414                         if (!net_eq(dev_net(n->dev), net))
2415                                 goto next;
2416                         if (state->neigh_sub_iter) {
2417                                 loff_t fakep = 0;
2418                                 void *v;
2419
2420                                 v = state->neigh_sub_iter(state, n, &fakep);
2421                                 if (!v)
2422                                         goto next;
2423                         }
2424                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2425                                 break;
2426                         if (n->nud_state & ~NUD_NOARP)
2427                                 break;
2428 next:
2429                         n = rcu_dereference_bh(n->next);
2430                 }
2431
2432                 if (n)
2433                         break;
2434         }
2435         state->bucket = bucket;
2436
2437         return n;
2438 }
2439
2440 static struct neighbour *neigh_get_next(struct seq_file *seq,
2441                                         struct neighbour *n,
2442                                         loff_t *pos)
2443 {
2444         struct neigh_seq_state *state = seq->private;
2445         struct net *net = seq_file_net(seq);
2446         struct neigh_hash_table *nht = state->nht;
2447
2448         if (state->neigh_sub_iter) {
2449                 void *v = state->neigh_sub_iter(state, n, pos);
2450                 if (v)
2451                         return n;
2452         }
2453         n = rcu_dereference_bh(n->next);
2454
2455         while (1) {
2456                 while (n) {
2457                         if (!net_eq(dev_net(n->dev), net))
2458                                 goto next;
2459                         if (state->neigh_sub_iter) {
2460                                 void *v = state->neigh_sub_iter(state, n, pos);
2461                                 if (v)
2462                                         return n;
2463                                 goto next;
2464                         }
2465                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2466                                 break;
2467
2468                         if (n->nud_state & ~NUD_NOARP)
2469                                 break;
2470 next:
2471                         n = rcu_dereference_bh(n->next);
2472                 }
2473
2474                 if (n)
2475                         break;
2476
2477                 if (++state->bucket >= (1 << nht->hash_shift))
2478                         break;
2479
2480                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2481         }
2482
2483         if (n && pos)
2484                 --(*pos);
2485         return n;
2486 }
2487
2488 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2489 {
2490         struct neighbour *n = neigh_get_first(seq);
2491
2492         if (n) {
2493                 --(*pos);
2494                 while (*pos) {
2495                         n = neigh_get_next(seq, n, pos);
2496                         if (!n)
2497                                 break;
2498                 }
2499         }
2500         return *pos ? NULL : n;
2501 }
2502
2503 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2504 {
2505         struct neigh_seq_state *state = seq->private;
2506         struct net *net = seq_file_net(seq);
2507         struct neigh_table *tbl = state->tbl;
2508         struct pneigh_entry *pn = NULL;
2509         int bucket = state->bucket;
2510
2511         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2512         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2513                 pn = tbl->phash_buckets[bucket];
2514                 while (pn && !net_eq(pneigh_net(pn), net))
2515                         pn = pn->next;
2516                 if (pn)
2517                         break;
2518         }
2519         state->bucket = bucket;
2520
2521         return pn;
2522 }
2523
2524 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2525                                             struct pneigh_entry *pn,
2526                                             loff_t *pos)
2527 {
2528         struct neigh_seq_state *state = seq->private;
2529         struct net *net = seq_file_net(seq);
2530         struct neigh_table *tbl = state->tbl;
2531
2532         do {
2533                 pn = pn->next;
2534         } while (pn && !net_eq(pneigh_net(pn), net));
2535
2536         while (!pn) {
2537                 if (++state->bucket > PNEIGH_HASHMASK)
2538                         break;
2539                 pn = tbl->phash_buckets[state->bucket];
2540                 while (pn && !net_eq(pneigh_net(pn), net))
2541                         pn = pn->next;
2542                 if (pn)
2543                         break;
2544         }
2545
2546         if (pn && pos)
2547                 --(*pos);
2548
2549         return pn;
2550 }
2551
2552 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2553 {
2554         struct pneigh_entry *pn = pneigh_get_first(seq);
2555
2556         if (pn) {
2557                 --(*pos);
2558                 while (*pos) {
2559                         pn = pneigh_get_next(seq, pn, pos);
2560                         if (!pn)
2561                                 break;
2562                 }
2563         }
2564         return *pos ? NULL : pn;
2565 }
2566
2567 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2568 {
2569         struct neigh_seq_state *state = seq->private;
2570         void *rc;
2571         loff_t idxpos = *pos;
2572
2573         rc = neigh_get_idx(seq, &idxpos);
2574         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2575                 rc = pneigh_get_idx(seq, &idxpos);
2576
2577         return rc;
2578 }
2579
2580 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2581         __acquires(rcu_bh)
2582 {
2583         struct neigh_seq_state *state = seq->private;
2584
2585         state->tbl = tbl;
2586         state->bucket = 0;
2587         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2588
2589         rcu_read_lock_bh();
2590         state->nht = rcu_dereference_bh(tbl->nht);
2591
2592         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2593 }
2594 EXPORT_SYMBOL(neigh_seq_start);
2595
2596 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2597 {
2598         struct neigh_seq_state *state;
2599         void *rc;
2600
2601         if (v == SEQ_START_TOKEN) {
2602                 rc = neigh_get_first(seq);
2603                 goto out;
2604         }
2605
2606         state = seq->private;
2607         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2608                 rc = neigh_get_next(seq, v, NULL);
2609                 if (rc)
2610                         goto out;
2611                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2612                         rc = pneigh_get_first(seq);
2613         } else {
2614                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2615                 rc = pneigh_get_next(seq, v, NULL);
2616         }
2617 out:
2618         ++(*pos);
2619         return rc;
2620 }
2621 EXPORT_SYMBOL(neigh_seq_next);
2622
2623 void neigh_seq_stop(struct seq_file *seq, void *v)
2624         __releases(rcu_bh)
2625 {
2626         rcu_read_unlock_bh();
2627 }
2628 EXPORT_SYMBOL(neigh_seq_stop);
2629
2630 /* statistics via seq_file */
2631
2632 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2633 {
2634         struct neigh_table *tbl = seq->private;
2635         int cpu;
2636
2637         if (*pos == 0)
2638                 return SEQ_START_TOKEN;
2639
2640         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2641                 if (!cpu_possible(cpu))
2642                         continue;
2643                 *pos = cpu+1;
2644                 return per_cpu_ptr(tbl->stats, cpu);
2645         }
2646         return NULL;
2647 }
2648
2649 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2650 {
2651         struct neigh_table *tbl = seq->private;
2652         int cpu;
2653
2654         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2655                 if (!cpu_possible(cpu))
2656                         continue;
2657                 *pos = cpu+1;
2658                 return per_cpu_ptr(tbl->stats, cpu);
2659         }
2660         return NULL;
2661 }
2662
2663 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2664 {
2665
2666 }
2667
2668 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2669 {
2670         struct neigh_table *tbl = seq->private;
2671         struct neigh_statistics *st = v;
2672
2673         if (v == SEQ_START_TOKEN) {
2674                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2675                 return 0;
2676         }
2677
2678         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2679                         "%08lx %08lx  %08lx %08lx %08lx\n",
2680                    atomic_read(&tbl->entries),
2681
2682                    st->allocs,
2683                    st->destroys,
2684                    st->hash_grows,
2685
2686                    st->lookups,
2687                    st->hits,
2688
2689                    st->res_failed,
2690
2691                    st->rcv_probes_mcast,
2692                    st->rcv_probes_ucast,
2693
2694                    st->periodic_gc_runs,
2695                    st->forced_gc_runs,
2696                    st->unres_discards
2697                    );
2698
2699         return 0;
2700 }
2701
2702 static const struct seq_operations neigh_stat_seq_ops = {
2703         .start  = neigh_stat_seq_start,
2704         .next   = neigh_stat_seq_next,
2705         .stop   = neigh_stat_seq_stop,
2706         .show   = neigh_stat_seq_show,
2707 };
2708
2709 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2710 {
2711         int ret = seq_open(file, &neigh_stat_seq_ops);
2712
2713         if (!ret) {
2714                 struct seq_file *sf = file->private_data;
2715                 sf->private = PDE_DATA(inode);
2716         }
2717         return ret;
2718 };
2719
2720 static const struct file_operations neigh_stat_seq_fops = {
2721         .owner   = THIS_MODULE,
2722         .open    = neigh_stat_seq_open,
2723         .read    = seq_read,
2724         .llseek  = seq_lseek,
2725         .release = seq_release,
2726 };
2727
2728 #endif /* CONFIG_PROC_FS */
2729
2730 static inline size_t neigh_nlmsg_size(void)
2731 {
2732         return NLMSG_ALIGN(sizeof(struct ndmsg))
2733                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2734                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2735                + nla_total_size(sizeof(struct nda_cacheinfo))
2736                + nla_total_size(4); /* NDA_PROBES */
2737 }
2738
2739 static void __neigh_notify(struct neighbour *n, int type, int flags)
2740 {
2741         struct net *net = dev_net(n->dev);
2742         struct sk_buff *skb;
2743         int err = -ENOBUFS;
2744
2745         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2746         if (skb == NULL)
2747                 goto errout;
2748
2749         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2750         if (err < 0) {
2751                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2752                 WARN_ON(err == -EMSGSIZE);
2753                 kfree_skb(skb);
2754                 goto errout;
2755         }
2756         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2757         return;
2758 errout:
2759         if (err < 0)
2760                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2761 }
2762
2763 #ifdef CONFIG_ARPD
2764 void neigh_app_ns(struct neighbour *n)
2765 {
2766         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2767 }
2768 EXPORT_SYMBOL(neigh_app_ns);
2769 #endif /* CONFIG_ARPD */
2770
2771 #ifdef CONFIG_SYSCTL
2772 static int zero;
2773 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2774
2775 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2776                            size_t *lenp, loff_t *ppos)
2777 {
2778         int size, ret;
2779         ctl_table tmp = *ctl;
2780
2781         tmp.extra1 = &zero;
2782         tmp.extra2 = &unres_qlen_max;
2783         tmp.data = &size;
2784
2785         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2786         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2787
2788         if (write && !ret)
2789                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2790         return ret;
2791 }
2792
2793 enum {
2794         NEIGH_VAR_MCAST_PROBE,
2795         NEIGH_VAR_UCAST_PROBE,
2796         NEIGH_VAR_APP_PROBE,
2797         NEIGH_VAR_RETRANS_TIME,
2798         NEIGH_VAR_BASE_REACHABLE_TIME,
2799         NEIGH_VAR_DELAY_PROBE_TIME,
2800         NEIGH_VAR_GC_STALETIME,
2801         NEIGH_VAR_QUEUE_LEN,
2802         NEIGH_VAR_QUEUE_LEN_BYTES,
2803         NEIGH_VAR_PROXY_QLEN,
2804         NEIGH_VAR_ANYCAST_DELAY,
2805         NEIGH_VAR_PROXY_DELAY,
2806         NEIGH_VAR_LOCKTIME,
2807         NEIGH_VAR_RETRANS_TIME_MS,
2808         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2809         NEIGH_VAR_GC_INTERVAL,
2810         NEIGH_VAR_GC_THRESH1,
2811         NEIGH_VAR_GC_THRESH2,
2812         NEIGH_VAR_GC_THRESH3,
2813         NEIGH_VAR_MAX
2814 };
2815
2816 static struct neigh_sysctl_table {
2817         struct ctl_table_header *sysctl_header;
2818         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2819 } neigh_sysctl_template __read_mostly = {
2820         .neigh_vars = {
2821                 [NEIGH_VAR_MCAST_PROBE] = {
2822                         .procname       = "mcast_solicit",
2823                         .maxlen         = sizeof(int),
2824                         .mode           = 0644,
2825                         .proc_handler   = proc_dointvec,
2826                 },
2827                 [NEIGH_VAR_UCAST_PROBE] = {
2828                         .procname       = "ucast_solicit",
2829                         .maxlen         = sizeof(int),
2830                         .mode           = 0644,
2831                         .proc_handler   = proc_dointvec,
2832                 },
2833                 [NEIGH_VAR_APP_PROBE] = {
2834                         .procname       = "app_solicit",
2835                         .maxlen         = sizeof(int),
2836                         .mode           = 0644,
2837                         .proc_handler   = proc_dointvec,
2838                 },
2839                 [NEIGH_VAR_RETRANS_TIME] = {
2840                         .procname       = "retrans_time",
2841                         .maxlen         = sizeof(int),
2842                         .mode           = 0644,
2843                         .proc_handler   = proc_dointvec_userhz_jiffies,
2844                 },
2845                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2846                         .procname       = "base_reachable_time",
2847                         .maxlen         = sizeof(int),
2848                         .mode           = 0644,
2849                         .proc_handler   = proc_dointvec_jiffies,
2850                 },
2851                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2852                         .procname       = "delay_first_probe_time",
2853                         .maxlen         = sizeof(int),
2854                         .mode           = 0644,
2855                         .proc_handler   = proc_dointvec_jiffies,
2856                 },
2857                 [NEIGH_VAR_GC_STALETIME] = {
2858                         .procname       = "gc_stale_time",
2859                         .maxlen         = sizeof(int),
2860                         .mode           = 0644,
2861                         .proc_handler   = proc_dointvec_jiffies,
2862                 },
2863                 [NEIGH_VAR_QUEUE_LEN] = {
2864                         .procname       = "unres_qlen",
2865                         .maxlen         = sizeof(int),
2866                         .mode           = 0644,
2867                         .proc_handler   = proc_unres_qlen,
2868                 },
2869                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2870                         .procname       = "unres_qlen_bytes",
2871                         .maxlen         = sizeof(int),
2872                         .mode           = 0644,
2873                         .extra1         = &zero,
2874                         .proc_handler   = proc_dointvec_minmax,
2875                 },
2876                 [NEIGH_VAR_PROXY_QLEN] = {
2877                         .procname       = "proxy_qlen",
2878                         .maxlen         = sizeof(int),
2879                         .mode           = 0644,
2880                         .proc_handler   = proc_dointvec,
2881                 },
2882                 [NEIGH_VAR_ANYCAST_DELAY] = {
2883                         .procname       = "anycast_delay",
2884                         .maxlen         = sizeof(int),
2885                         .mode           = 0644,
2886                         .proc_handler   = proc_dointvec_userhz_jiffies,
2887                 },
2888                 [NEIGH_VAR_PROXY_DELAY] = {
2889                         .procname       = "proxy_delay",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_userhz_jiffies,
2893                 },
2894                 [NEIGH_VAR_LOCKTIME] = {
2895                         .procname       = "locktime",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_userhz_jiffies,
2899                 },
2900                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2901                         .procname       = "retrans_time_ms",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec_ms_jiffies,
2905                 },
2906                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2907                         .procname       = "base_reachable_time_ms",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec_ms_jiffies,
2911                 },
2912                 [NEIGH_VAR_GC_INTERVAL] = {
2913                         .procname       = "gc_interval",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec_jiffies,
2917                 },
2918                 [NEIGH_VAR_GC_THRESH1] = {
2919                         .procname       = "gc_thresh1",
2920                         .maxlen         = sizeof(int),
2921                         .mode           = 0644,
2922                         .proc_handler   = proc_dointvec,
2923                 },
2924                 [NEIGH_VAR_GC_THRESH2] = {
2925                         .procname       = "gc_thresh2",
2926                         .maxlen         = sizeof(int),
2927                         .mode           = 0644,
2928                         .proc_handler   = proc_dointvec,
2929                 },
2930                 [NEIGH_VAR_GC_THRESH3] = {
2931                         .procname       = "gc_thresh3",
2932                         .maxlen         = sizeof(int),
2933                         .mode           = 0644,
2934                         .proc_handler   = proc_dointvec,
2935                 },
2936                 {},
2937         },
2938 };
2939
2940 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2941                           char *p_name, proc_handler *handler)
2942 {
2943         struct neigh_sysctl_table *t;
2944         const char *dev_name_source = NULL;
2945         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2946
2947         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2948         if (!t)
2949                 goto err;
2950
2951         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2952         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2953         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2954         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2955         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2956         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2957         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2958         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2959         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2960         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2961         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2962         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2963         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2964         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2965         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2966
2967         if (dev) {
2968                 dev_name_source = dev->name;
2969                 /* Terminate the table early */
2970                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2971                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2972         } else {
2973                 dev_name_source = "default";
2974                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2975                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2976                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2977                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2978         }
2979
2980
2981         if (handler) {
2982                 /* RetransTime */
2983                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2984                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2985                 /* ReachableTime */
2986                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2987                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2988                 /* RetransTime (in milliseconds)*/
2989                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2990                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2991                 /* ReachableTime (in milliseconds) */
2992                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2993                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2994         }
2995
2996         /* Don't export sysctls to unprivileged users */
2997         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2998                 t->neigh_vars[0].procname = NULL;
2999
3000         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3001                 p_name, dev_name_source);
3002         t->sysctl_header =
3003                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3004         if (!t->sysctl_header)
3005                 goto free;
3006
3007         p->sysctl_table = t;
3008         return 0;
3009
3010 free:
3011         kfree(t);
3012 err:
3013         return -ENOBUFS;
3014 }
3015 EXPORT_SYMBOL(neigh_sysctl_register);
3016
3017 void neigh_sysctl_unregister(struct neigh_parms *p)
3018 {
3019         if (p->sysctl_table) {
3020                 struct neigh_sysctl_table *t = p->sysctl_table;
3021                 p->sysctl_table = NULL;
3022                 unregister_net_sysctl_table(t->sysctl_header);
3023                 kfree(t);
3024         }
3025 }
3026 EXPORT_SYMBOL(neigh_sysctl_unregister);
3027
3028 #endif  /* CONFIG_SYSCTL */
3029
3030 static int __init neigh_init(void)
3031 {
3032         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3033         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3034         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3035
3036         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3037                       NULL);
3038         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3039
3040         return 0;
3041 }
3042
3043 subsys_initcall(neigh_init);
3044