neighbour: fix a race in neigh_destroy()
[firefly-linux-kernel-4.4.55.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 __skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         __skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         write_lock_bh(&neigh->lock);
712         __skb_queue_purge(&neigh->arp_queue);
713         write_unlock_bh(&neigh->lock);
714         neigh->arp_queue_len_bytes = 0;
715
716         if (dev->netdev_ops->ndo_neigh_destroy)
717                 dev->netdev_ops->ndo_neigh_destroy(neigh);
718
719         dev_put(dev);
720         neigh_parms_put(neigh->parms);
721
722         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723
724         atomic_dec(&neigh->tbl->entries);
725         kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728
729 /* Neighbour state is suspicious;
730    disable fast path.
731
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736         neigh_dbg(2, "neigh %p is suspected\n", neigh);
737
738         neigh->output = neigh->ops->output;
739 }
740
741 /* Neighbour state is OK;
742    enable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748         neigh_dbg(2, "neigh %p is connected\n", neigh);
749
750         neigh->output = neigh->ops->connected_output;
751 }
752
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756         struct neighbour *n;
757         struct neighbour __rcu **np;
758         unsigned int i;
759         struct neigh_hash_table *nht;
760
761         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762
763         write_lock_bh(&tbl->lock);
764         nht = rcu_dereference_protected(tbl->nht,
765                                         lockdep_is_held(&tbl->lock));
766
767         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768                 goto out;
769
770         /*
771          *      periodically recompute ReachableTime from random function
772          */
773
774         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775                 struct neigh_parms *p;
776                 tbl->last_rand = jiffies;
777                 for (p = &tbl->parms; p; p = p->next)
778                         p->reachable_time =
779                                 neigh_rand_reach_time(p->base_reachable_time);
780         }
781
782         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783                 np = &nht->hash_buckets[i];
784
785                 while ((n = rcu_dereference_protected(*np,
786                                 lockdep_is_held(&tbl->lock))) != NULL) {
787                         unsigned int state;
788
789                         write_lock(&n->lock);
790
791                         state = n->nud_state;
792                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793                                 write_unlock(&n->lock);
794                                 goto next_elt;
795                         }
796
797                         if (time_before(n->used, n->confirmed))
798                                 n->used = n->confirmed;
799
800                         if (atomic_read(&n->refcnt) == 1 &&
801                             (state == NUD_FAILED ||
802                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
803                                 *np = n->next;
804                                 n->dead = 1;
805                                 write_unlock(&n->lock);
806                                 neigh_cleanup_and_release(n);
807                                 continue;
808                         }
809                         write_unlock(&n->lock);
810
811 next_elt:
812                         np = &n->next;
813                 }
814                 /*
815                  * It's fine to release lock here, even if hash table
816                  * grows while we are preempted.
817                  */
818                 write_unlock_bh(&tbl->lock);
819                 cond_resched();
820                 write_lock_bh(&tbl->lock);
821                 nht = rcu_dereference_protected(tbl->nht,
822                                                 lockdep_is_held(&tbl->lock));
823         }
824 out:
825         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
826          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827          * base_reachable_time.
828          */
829         schedule_delayed_work(&tbl->gc_work,
830                               tbl->parms.base_reachable_time >> 1);
831         write_unlock_bh(&tbl->lock);
832 }
833
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836         struct neigh_parms *p = n->parms;
837         return (n->nud_state & NUD_PROBE) ?
838                 p->ucast_probes :
839                 p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841
842 static void neigh_invalidate(struct neighbour *neigh)
843         __releases(neigh->lock)
844         __acquires(neigh->lock)
845 {
846         struct sk_buff *skb;
847
848         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849         neigh_dbg(2, "neigh %p is failed\n", neigh);
850         neigh->updated = jiffies;
851
852         /* It is very thin place. report_unreachable is very complicated
853            routine. Particularly, it can hit the same neighbour entry!
854
855            So that, we try to be accurate and avoid dead loop. --ANK
856          */
857         while (neigh->nud_state == NUD_FAILED &&
858                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859                 write_unlock(&neigh->lock);
860                 neigh->ops->error_report(neigh, skb);
861                 write_lock(&neigh->lock);
862         }
863         __skb_queue_purge(&neigh->arp_queue);
864         neigh->arp_queue_len_bytes = 0;
865 }
866
867 static void neigh_probe(struct neighbour *neigh)
868         __releases(neigh->lock)
869 {
870         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
871         /* keep skb alive even if arp_queue overflows */
872         if (skb)
873                 skb = skb_copy(skb, GFP_ATOMIC);
874         write_unlock(&neigh->lock);
875         neigh->ops->solicit(neigh, skb);
876         atomic_inc(&neigh->probes);
877         kfree_skb(skb);
878 }
879
880 /* Called when a timer expires for a neighbour entry. */
881
882 static void neigh_timer_handler(unsigned long arg)
883 {
884         unsigned long now, next;
885         struct neighbour *neigh = (struct neighbour *)arg;
886         unsigned int state;
887         int notify = 0;
888
889         write_lock(&neigh->lock);
890
891         state = neigh->nud_state;
892         now = jiffies;
893         next = now + HZ;
894
895         if (!(state & NUD_IN_TIMER))
896                 goto out;
897
898         if (state & NUD_REACHABLE) {
899                 if (time_before_eq(now,
900                                    neigh->confirmed + neigh->parms->reachable_time)) {
901                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else if (time_before_eq(now,
904                                           neigh->used + neigh->parms->delay_probe_time)) {
905                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
906                         neigh->nud_state = NUD_DELAY;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         next = now + neigh->parms->delay_probe_time;
910                 } else {
911                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
912                         neigh->nud_state = NUD_STALE;
913                         neigh->updated = jiffies;
914                         neigh_suspect(neigh);
915                         notify = 1;
916                 }
917         } else if (state & NUD_DELAY) {
918                 if (time_before_eq(now,
919                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
920                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921                         neigh->nud_state = NUD_REACHABLE;
922                         neigh->updated = jiffies;
923                         neigh_connect(neigh);
924                         notify = 1;
925                         next = neigh->confirmed + neigh->parms->reachable_time;
926                 } else {
927                         neigh_dbg(2, "neigh %p is probed\n", neigh);
928                         neigh->nud_state = NUD_PROBE;
929                         neigh->updated = jiffies;
930                         atomic_set(&neigh->probes, 0);
931                         next = now + neigh->parms->retrans_time;
932                 }
933         } else {
934                 /* NUD_PROBE|NUD_INCOMPLETE */
935                 next = now + neigh->parms->retrans_time;
936         }
937
938         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940                 neigh->nud_state = NUD_FAILED;
941                 notify = 1;
942                 neigh_invalidate(neigh);
943         }
944
945         if (neigh->nud_state & NUD_IN_TIMER) {
946                 if (time_before(next, jiffies + HZ/2))
947                         next = jiffies + HZ/2;
948                 if (!mod_timer(&neigh->timer, next))
949                         neigh_hold(neigh);
950         }
951         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952                 neigh_probe(neigh);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         bool immediate_probe = false;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977                         unsigned long next, now = jiffies;
978
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = now;
982                         next = now + max(neigh->parms->retrans_time, HZ/2);
983                         neigh_add_timer(neigh, next);
984                         immediate_probe = true;
985                 } else {
986                         neigh->nud_state = NUD_FAILED;
987                         neigh->updated = jiffies;
988                         write_unlock_bh(&neigh->lock);
989
990                         kfree_skb(skb);
991                         return 1;
992                 }
993         } else if (neigh->nud_state & NUD_STALE) {
994                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995                 neigh->nud_state = NUD_DELAY;
996                 neigh->updated = jiffies;
997                 neigh_add_timer(neigh,
998                                 jiffies + neigh->parms->delay_probe_time);
999         }
1000
1001         if (neigh->nud_state == NUD_INCOMPLETE) {
1002                 if (skb) {
1003                         while (neigh->arp_queue_len_bytes + skb->truesize >
1004                                neigh->parms->queue_len_bytes) {
1005                                 struct sk_buff *buff;
1006
1007                                 buff = __skb_dequeue(&neigh->arp_queue);
1008                                 if (!buff)
1009                                         break;
1010                                 neigh->arp_queue_len_bytes -= buff->truesize;
1011                                 kfree_skb(buff);
1012                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013                         }
1014                         skb_dst_force(skb);
1015                         __skb_queue_tail(&neigh->arp_queue, skb);
1016                         neigh->arp_queue_len_bytes += skb->truesize;
1017                 }
1018                 rc = 1;
1019         }
1020 out_unlock_bh:
1021         if (immediate_probe)
1022                 neigh_probe(neigh);
1023         else
1024                 write_unlock(&neigh->lock);
1025         local_bh_enable();
1026         return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032         struct hh_cache *hh;
1033         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034                 = NULL;
1035
1036         if (neigh->dev->header_ops)
1037                 update = neigh->dev->header_ops->cache_update;
1038
1039         if (update) {
1040                 hh = &neigh->hh;
1041                 if (hh->hh_len) {
1042                         write_seqlock_bh(&hh->hh_lock);
1043                         update(hh, neigh->dev, neigh->ha);
1044                         write_sequnlock_bh(&hh->hh_lock);
1045                 }
1046         }
1047 }
1048
1049
1050
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056                                 if it is different.
1057         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058                                 lladdr instead of overriding it
1059                                 if it is different.
1060                                 It also allows to retain current state
1061                                 if lladdr is unchanged.
1062         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1063
1064         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065                                 NTF_ROUTER flag.
1066         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1067                                 a router.
1068
1069    Caller MUST hold reference count on the entry.
1070  */
1071
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073                  u32 flags)
1074 {
1075         u8 old;
1076         int err;
1077         int notify = 0;
1078         struct net_device *dev;
1079         int update_isrouter = 0;
1080
1081         write_lock_bh(&neigh->lock);
1082
1083         dev    = neigh->dev;
1084         old    = neigh->nud_state;
1085         err    = -EPERM;
1086
1087         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088             (old & (NUD_NOARP | NUD_PERMANENT)))
1089                 goto out;
1090
1091         if (!(new & NUD_VALID)) {
1092                 neigh_del_timer(neigh);
1093                 if (old & NUD_CONNECTED)
1094                         neigh_suspect(neigh);
1095                 neigh->nud_state = new;
1096                 err = 0;
1097                 notify = old & NUD_VALID;
1098                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099                     (new & NUD_FAILED)) {
1100                         neigh_invalidate(neigh);
1101                         notify = 1;
1102                 }
1103                 goto out;
1104         }
1105
1106         /* Compare new lladdr with cached one */
1107         if (!dev->addr_len) {
1108                 /* First case: device needs no address. */
1109                 lladdr = neigh->ha;
1110         } else if (lladdr) {
1111                 /* The second case: if something is already cached
1112                    and a new address is proposed:
1113                    - compare new & old
1114                    - if they are different, check override flag
1115                  */
1116                 if ((old & NUD_VALID) &&
1117                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1118                         lladdr = neigh->ha;
1119         } else {
1120                 /* No address is supplied; if we know something,
1121                    use it, otherwise discard the request.
1122                  */
1123                 err = -EINVAL;
1124                 if (!(old & NUD_VALID))
1125                         goto out;
1126                 lladdr = neigh->ha;
1127         }
1128
1129         if (new & NUD_CONNECTED)
1130                 neigh->confirmed = jiffies;
1131         neigh->updated = jiffies;
1132
1133         /* If entry was valid and address is not changed,
1134            do not change entry state, if new one is STALE.
1135          */
1136         err = 0;
1137         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138         if (old & NUD_VALID) {
1139                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140                         update_isrouter = 0;
1141                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142                             (old & NUD_CONNECTED)) {
1143                                 lladdr = neigh->ha;
1144                                 new = NUD_STALE;
1145                         } else
1146                                 goto out;
1147                 } else {
1148                         if (lladdr == neigh->ha && new == NUD_STALE &&
1149                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150                              (old & NUD_CONNECTED))
1151                             )
1152                                 new = old;
1153                 }
1154         }
1155
1156         if (new != old) {
1157                 neigh_del_timer(neigh);
1158                 if (new & NUD_IN_TIMER)
1159                         neigh_add_timer(neigh, (jiffies +
1160                                                 ((new & NUD_REACHABLE) ?
1161                                                  neigh->parms->reachable_time :
1162                                                  0)));
1163                 neigh->nud_state = new;
1164         }
1165
1166         if (lladdr != neigh->ha) {
1167                 write_seqlock(&neigh->ha_lock);
1168                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1169                 write_sequnlock(&neigh->ha_lock);
1170                 neigh_update_hhs(neigh);
1171                 if (!(new & NUD_CONNECTED))
1172                         neigh->confirmed = jiffies -
1173                                       (neigh->parms->base_reachable_time << 1);
1174                 notify = 1;
1175         }
1176         if (new == old)
1177                 goto out;
1178         if (new & NUD_CONNECTED)
1179                 neigh_connect(neigh);
1180         else
1181                 neigh_suspect(neigh);
1182         if (!(old & NUD_VALID)) {
1183                 struct sk_buff *skb;
1184
1185                 /* Again: avoid dead loop if something went wrong */
1186
1187                 while (neigh->nud_state & NUD_VALID &&
1188                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189                         struct dst_entry *dst = skb_dst(skb);
1190                         struct neighbour *n2, *n1 = neigh;
1191                         write_unlock_bh(&neigh->lock);
1192
1193                         rcu_read_lock();
1194
1195                         /* Why not just use 'neigh' as-is?  The problem is that
1196                          * things such as shaper, eql, and sch_teql can end up
1197                          * using alternative, different, neigh objects to output
1198                          * the packet in the output path.  So what we need to do
1199                          * here is re-lookup the top-level neigh in the path so
1200                          * we can reinject the packet there.
1201                          */
1202                         n2 = NULL;
1203                         if (dst) {
1204                                 n2 = dst_neigh_lookup_skb(dst, skb);
1205                                 if (n2)
1206                                         n1 = n2;
1207                         }
1208                         n1->output(n1, skb);
1209                         if (n2)
1210                                 neigh_release(n2);
1211                         rcu_read_unlock();
1212
1213                         write_lock_bh(&neigh->lock);
1214                 }
1215                 __skb_queue_purge(&neigh->arp_queue);
1216                 neigh->arp_queue_len_bytes = 0;
1217         }
1218 out:
1219         if (update_isrouter) {
1220                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221                         (neigh->flags | NTF_ROUTER) :
1222                         (neigh->flags & ~NTF_ROUTER);
1223         }
1224         write_unlock_bh(&neigh->lock);
1225
1226         if (notify)
1227                 neigh_update_notify(neigh);
1228
1229         return err;
1230 }
1231 EXPORT_SYMBOL(neigh_update);
1232
1233 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234                                  u8 *lladdr, void *saddr,
1235                                  struct net_device *dev)
1236 {
1237         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238                                                  lladdr || !dev->addr_len);
1239         if (neigh)
1240                 neigh_update(neigh, lladdr, NUD_STALE,
1241                              NEIGH_UPDATE_F_OVERRIDE);
1242         return neigh;
1243 }
1244 EXPORT_SYMBOL(neigh_event_ns);
1245
1246 /* called with read_lock_bh(&n->lock); */
1247 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248 {
1249         struct net_device *dev = dst->dev;
1250         __be16 prot = dst->ops->protocol;
1251         struct hh_cache *hh = &n->hh;
1252
1253         write_lock_bh(&n->lock);
1254
1255         /* Only one thread can come in here and initialize the
1256          * hh_cache entry.
1257          */
1258         if (!hh->hh_len)
1259                 dev->header_ops->cache(n, hh, prot);
1260
1261         write_unlock_bh(&n->lock);
1262 }
1263
1264 /* This function can be used in contexts, where only old dev_queue_xmit
1265  * worked, f.e. if you want to override normal output path (eql, shaper),
1266  * but resolution is not made yet.
1267  */
1268
1269 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270 {
1271         struct net_device *dev = skb->dev;
1272
1273         __skb_pull(skb, skb_network_offset(skb));
1274
1275         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276                             skb->len) < 0 &&
1277             dev->header_ops->rebuild(skb))
1278                 return 0;
1279
1280         return dev_queue_xmit(skb);
1281 }
1282 EXPORT_SYMBOL(neigh_compat_output);
1283
1284 /* Slow and careful. */
1285
1286 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287 {
1288         struct dst_entry *dst = skb_dst(skb);
1289         int rc = 0;
1290
1291         if (!dst)
1292                 goto discard;
1293
1294         if (!neigh_event_send(neigh, skb)) {
1295                 int err;
1296                 struct net_device *dev = neigh->dev;
1297                 unsigned int seq;
1298
1299                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1300                         neigh_hh_init(neigh, dst);
1301
1302                 do {
1303                         __skb_pull(skb, skb_network_offset(skb));
1304                         seq = read_seqbegin(&neigh->ha_lock);
1305                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306                                               neigh->ha, NULL, skb->len);
1307                 } while (read_seqretry(&neigh->ha_lock, seq));
1308
1309                 if (err >= 0)
1310                         rc = dev_queue_xmit(skb);
1311                 else
1312                         goto out_kfree_skb;
1313         }
1314 out:
1315         return rc;
1316 discard:
1317         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318 out_kfree_skb:
1319         rc = -EINVAL;
1320         kfree_skb(skb);
1321         goto out;
1322 }
1323 EXPORT_SYMBOL(neigh_resolve_output);
1324
1325 /* As fast as possible without hh cache */
1326
1327 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329         struct net_device *dev = neigh->dev;
1330         unsigned int seq;
1331         int err;
1332
1333         do {
1334                 __skb_pull(skb, skb_network_offset(skb));
1335                 seq = read_seqbegin(&neigh->ha_lock);
1336                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337                                       neigh->ha, NULL, skb->len);
1338         } while (read_seqretry(&neigh->ha_lock, seq));
1339
1340         if (err >= 0)
1341                 err = dev_queue_xmit(skb);
1342         else {
1343                 err = -EINVAL;
1344                 kfree_skb(skb);
1345         }
1346         return err;
1347 }
1348 EXPORT_SYMBOL(neigh_connected_output);
1349
1350 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351 {
1352         return dev_queue_xmit(skb);
1353 }
1354 EXPORT_SYMBOL(neigh_direct_output);
1355
1356 static void neigh_proxy_process(unsigned long arg)
1357 {
1358         struct neigh_table *tbl = (struct neigh_table *)arg;
1359         long sched_next = 0;
1360         unsigned long now = jiffies;
1361         struct sk_buff *skb, *n;
1362
1363         spin_lock(&tbl->proxy_queue.lock);
1364
1365         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366                 long tdif = NEIGH_CB(skb)->sched_next - now;
1367
1368                 if (tdif <= 0) {
1369                         struct net_device *dev = skb->dev;
1370
1371                         __skb_unlink(skb, &tbl->proxy_queue);
1372                         if (tbl->proxy_redo && netif_running(dev)) {
1373                                 rcu_read_lock();
1374                                 tbl->proxy_redo(skb);
1375                                 rcu_read_unlock();
1376                         } else {
1377                                 kfree_skb(skb);
1378                         }
1379
1380                         dev_put(dev);
1381                 } else if (!sched_next || tdif < sched_next)
1382                         sched_next = tdif;
1383         }
1384         del_timer(&tbl->proxy_timer);
1385         if (sched_next)
1386                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387         spin_unlock(&tbl->proxy_queue.lock);
1388 }
1389
1390 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391                     struct sk_buff *skb)
1392 {
1393         unsigned long now = jiffies;
1394         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395
1396         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397                 kfree_skb(skb);
1398                 return;
1399         }
1400
1401         NEIGH_CB(skb)->sched_next = sched_next;
1402         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403
1404         spin_lock(&tbl->proxy_queue.lock);
1405         if (del_timer(&tbl->proxy_timer)) {
1406                 if (time_before(tbl->proxy_timer.expires, sched_next))
1407                         sched_next = tbl->proxy_timer.expires;
1408         }
1409         skb_dst_drop(skb);
1410         dev_hold(skb->dev);
1411         __skb_queue_tail(&tbl->proxy_queue, skb);
1412         mod_timer(&tbl->proxy_timer, sched_next);
1413         spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418                                                       struct net *net, int ifindex)
1419 {
1420         struct neigh_parms *p;
1421
1422         for (p = &tbl->parms; p; p = p->next) {
1423                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424                     (!p->dev && !ifindex))
1425                         return p;
1426         }
1427
1428         return NULL;
1429 }
1430
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432                                       struct neigh_table *tbl)
1433 {
1434         struct neigh_parms *p, *ref;
1435         struct net *net = dev_net(dev);
1436         const struct net_device_ops *ops = dev->netdev_ops;
1437
1438         ref = lookup_neigh_parms(tbl, net, 0);
1439         if (!ref)
1440                 return NULL;
1441
1442         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1443         if (p) {
1444                 p->tbl            = tbl;
1445                 atomic_set(&p->refcnt, 1);
1446                 p->reachable_time =
1447                                 neigh_rand_reach_time(p->base_reachable_time);
1448
1449                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450                         kfree(p);
1451                         return NULL;
1452                 }
1453
1454                 dev_hold(dev);
1455                 p->dev = dev;
1456                 write_pnet(&p->net, hold_net(net));
1457                 p->sysctl_table = NULL;
1458                 write_lock_bh(&tbl->lock);
1459                 p->next         = tbl->parms.next;
1460                 tbl->parms.next = p;
1461                 write_unlock_bh(&tbl->lock);
1462         }
1463         return p;
1464 }
1465 EXPORT_SYMBOL(neigh_parms_alloc);
1466
1467 static void neigh_rcu_free_parms(struct rcu_head *head)
1468 {
1469         struct neigh_parms *parms =
1470                 container_of(head, struct neigh_parms, rcu_head);
1471
1472         neigh_parms_put(parms);
1473 }
1474
1475 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1476 {
1477         struct neigh_parms **p;
1478
1479         if (!parms || parms == &tbl->parms)
1480                 return;
1481         write_lock_bh(&tbl->lock);
1482         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1483                 if (*p == parms) {
1484                         *p = parms->next;
1485                         parms->dead = 1;
1486                         write_unlock_bh(&tbl->lock);
1487                         if (parms->dev)
1488                                 dev_put(parms->dev);
1489                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1490                         return;
1491                 }
1492         }
1493         write_unlock_bh(&tbl->lock);
1494         neigh_dbg(1, "%s: not found\n", __func__);
1495 }
1496 EXPORT_SYMBOL(neigh_parms_release);
1497
1498 static void neigh_parms_destroy(struct neigh_parms *parms)
1499 {
1500         release_net(neigh_parms_net(parms));
1501         kfree(parms);
1502 }
1503
1504 static struct lock_class_key neigh_table_proxy_queue_class;
1505
1506 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1507 {
1508         unsigned long now = jiffies;
1509         unsigned long phsize;
1510
1511         write_pnet(&tbl->parms.net, &init_net);
1512         atomic_set(&tbl->parms.refcnt, 1);
1513         tbl->parms.reachable_time =
1514                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1515
1516         tbl->stats = alloc_percpu(struct neigh_statistics);
1517         if (!tbl->stats)
1518                 panic("cannot create neighbour cache statistics");
1519
1520 #ifdef CONFIG_PROC_FS
1521         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1522                               &neigh_stat_seq_fops, tbl))
1523                 panic("cannot create neighbour proc dir entry");
1524 #endif
1525
1526         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1527
1528         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1529         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1530
1531         if (!tbl->nht || !tbl->phash_buckets)
1532                 panic("cannot allocate neighbour cache hashes");
1533
1534         if (!tbl->entry_size)
1535                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1536                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1537         else
1538                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1539
1540         rwlock_init(&tbl->lock);
1541         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1542         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1543         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1544         skb_queue_head_init_class(&tbl->proxy_queue,
1545                         &neigh_table_proxy_queue_class);
1546
1547         tbl->last_flush = now;
1548         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1549 }
1550
1551 void neigh_table_init(struct neigh_table *tbl)
1552 {
1553         struct neigh_table *tmp;
1554
1555         neigh_table_init_no_netlink(tbl);
1556         write_lock(&neigh_tbl_lock);
1557         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1558                 if (tmp->family == tbl->family)
1559                         break;
1560         }
1561         tbl->next       = neigh_tables;
1562         neigh_tables    = tbl;
1563         write_unlock(&neigh_tbl_lock);
1564
1565         if (unlikely(tmp)) {
1566                 pr_err("Registering multiple tables for family %d\n",
1567                        tbl->family);
1568                 dump_stack();
1569         }
1570 }
1571 EXPORT_SYMBOL(neigh_table_init);
1572
1573 int neigh_table_clear(struct neigh_table *tbl)
1574 {
1575         struct neigh_table **tp;
1576
1577         /* It is not clean... Fix it to unload IPv6 module safely */
1578         cancel_delayed_work_sync(&tbl->gc_work);
1579         del_timer_sync(&tbl->proxy_timer);
1580         pneigh_queue_purge(&tbl->proxy_queue);
1581         neigh_ifdown(tbl, NULL);
1582         if (atomic_read(&tbl->entries))
1583                 pr_crit("neighbour leakage\n");
1584         write_lock(&neigh_tbl_lock);
1585         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1586                 if (*tp == tbl) {
1587                         *tp = tbl->next;
1588                         break;
1589                 }
1590         }
1591         write_unlock(&neigh_tbl_lock);
1592
1593         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1594                  neigh_hash_free_rcu);
1595         tbl->nht = NULL;
1596
1597         kfree(tbl->phash_buckets);
1598         tbl->phash_buckets = NULL;
1599
1600         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1601
1602         free_percpu(tbl->stats);
1603         tbl->stats = NULL;
1604
1605         return 0;
1606 }
1607 EXPORT_SYMBOL(neigh_table_clear);
1608
1609 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1610 {
1611         struct net *net = sock_net(skb->sk);
1612         struct ndmsg *ndm;
1613         struct nlattr *dst_attr;
1614         struct neigh_table *tbl;
1615         struct net_device *dev = NULL;
1616         int err = -EINVAL;
1617
1618         ASSERT_RTNL();
1619         if (nlmsg_len(nlh) < sizeof(*ndm))
1620                 goto out;
1621
1622         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1623         if (dst_attr == NULL)
1624                 goto out;
1625
1626         ndm = nlmsg_data(nlh);
1627         if (ndm->ndm_ifindex) {
1628                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1629                 if (dev == NULL) {
1630                         err = -ENODEV;
1631                         goto out;
1632                 }
1633         }
1634
1635         read_lock(&neigh_tbl_lock);
1636         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1637                 struct neighbour *neigh;
1638
1639                 if (tbl->family != ndm->ndm_family)
1640                         continue;
1641                 read_unlock(&neigh_tbl_lock);
1642
1643                 if (nla_len(dst_attr) < tbl->key_len)
1644                         goto out;
1645
1646                 if (ndm->ndm_flags & NTF_PROXY) {
1647                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1648                         goto out;
1649                 }
1650
1651                 if (dev == NULL)
1652                         goto out;
1653
1654                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1655                 if (neigh == NULL) {
1656                         err = -ENOENT;
1657                         goto out;
1658                 }
1659
1660                 err = neigh_update(neigh, NULL, NUD_FAILED,
1661                                    NEIGH_UPDATE_F_OVERRIDE |
1662                                    NEIGH_UPDATE_F_ADMIN);
1663                 neigh_release(neigh);
1664                 goto out;
1665         }
1666         read_unlock(&neigh_tbl_lock);
1667         err = -EAFNOSUPPORT;
1668
1669 out:
1670         return err;
1671 }
1672
1673 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1674 {
1675         struct net *net = sock_net(skb->sk);
1676         struct ndmsg *ndm;
1677         struct nlattr *tb[NDA_MAX+1];
1678         struct neigh_table *tbl;
1679         struct net_device *dev = NULL;
1680         int err;
1681
1682         ASSERT_RTNL();
1683         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1684         if (err < 0)
1685                 goto out;
1686
1687         err = -EINVAL;
1688         if (tb[NDA_DST] == NULL)
1689                 goto out;
1690
1691         ndm = nlmsg_data(nlh);
1692         if (ndm->ndm_ifindex) {
1693                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1694                 if (dev == NULL) {
1695                         err = -ENODEV;
1696                         goto out;
1697                 }
1698
1699                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1700                         goto out;
1701         }
1702
1703         read_lock(&neigh_tbl_lock);
1704         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1705                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1706                 struct neighbour *neigh;
1707                 void *dst, *lladdr;
1708
1709                 if (tbl->family != ndm->ndm_family)
1710                         continue;
1711                 read_unlock(&neigh_tbl_lock);
1712
1713                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1714                         goto out;
1715                 dst = nla_data(tb[NDA_DST]);
1716                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1717
1718                 if (ndm->ndm_flags & NTF_PROXY) {
1719                         struct pneigh_entry *pn;
1720
1721                         err = -ENOBUFS;
1722                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1723                         if (pn) {
1724                                 pn->flags = ndm->ndm_flags;
1725                                 err = 0;
1726                         }
1727                         goto out;
1728                 }
1729
1730                 if (dev == NULL)
1731                         goto out;
1732
1733                 neigh = neigh_lookup(tbl, dst, dev);
1734                 if (neigh == NULL) {
1735                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1736                                 err = -ENOENT;
1737                                 goto out;
1738                         }
1739
1740                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1741                         if (IS_ERR(neigh)) {
1742                                 err = PTR_ERR(neigh);
1743                                 goto out;
1744                         }
1745                 } else {
1746                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1747                                 err = -EEXIST;
1748                                 neigh_release(neigh);
1749                                 goto out;
1750                         }
1751
1752                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1753                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1754                 }
1755
1756                 if (ndm->ndm_flags & NTF_USE) {
1757                         neigh_event_send(neigh, NULL);
1758                         err = 0;
1759                 } else
1760                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1761                 neigh_release(neigh);
1762                 goto out;
1763         }
1764
1765         read_unlock(&neigh_tbl_lock);
1766         err = -EAFNOSUPPORT;
1767 out:
1768         return err;
1769 }
1770
1771 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1772 {
1773         struct nlattr *nest;
1774
1775         nest = nla_nest_start(skb, NDTA_PARMS);
1776         if (nest == NULL)
1777                 return -ENOBUFS;
1778
1779         if ((parms->dev &&
1780              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1781             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1782             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1783             /* approximative value for deprecated QUEUE_LEN (in packets) */
1784             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1785                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1786             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1787             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1788             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1789             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1790             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1791             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1792                           parms->base_reachable_time) ||
1793             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1794             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1795                           parms->delay_probe_time) ||
1796             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1797             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1798             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1799             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1800                 goto nla_put_failure;
1801         return nla_nest_end(skb, nest);
1802
1803 nla_put_failure:
1804         nla_nest_cancel(skb, nest);
1805         return -EMSGSIZE;
1806 }
1807
1808 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1809                               u32 pid, u32 seq, int type, int flags)
1810 {
1811         struct nlmsghdr *nlh;
1812         struct ndtmsg *ndtmsg;
1813
1814         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1815         if (nlh == NULL)
1816                 return -EMSGSIZE;
1817
1818         ndtmsg = nlmsg_data(nlh);
1819
1820         read_lock_bh(&tbl->lock);
1821         ndtmsg->ndtm_family = tbl->family;
1822         ndtmsg->ndtm_pad1   = 0;
1823         ndtmsg->ndtm_pad2   = 0;
1824
1825         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1826             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1827             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1828             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1829             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1830                 goto nla_put_failure;
1831         {
1832                 unsigned long now = jiffies;
1833                 unsigned int flush_delta = now - tbl->last_flush;
1834                 unsigned int rand_delta = now - tbl->last_rand;
1835                 struct neigh_hash_table *nht;
1836                 struct ndt_config ndc = {
1837                         .ndtc_key_len           = tbl->key_len,
1838                         .ndtc_entry_size        = tbl->entry_size,
1839                         .ndtc_entries           = atomic_read(&tbl->entries),
1840                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1841                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1842                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1843                 };
1844
1845                 rcu_read_lock_bh();
1846                 nht = rcu_dereference_bh(tbl->nht);
1847                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1848                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1849                 rcu_read_unlock_bh();
1850
1851                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1852                         goto nla_put_failure;
1853         }
1854
1855         {
1856                 int cpu;
1857                 struct ndt_stats ndst;
1858
1859                 memset(&ndst, 0, sizeof(ndst));
1860
1861                 for_each_possible_cpu(cpu) {
1862                         struct neigh_statistics *st;
1863
1864                         st = per_cpu_ptr(tbl->stats, cpu);
1865                         ndst.ndts_allocs                += st->allocs;
1866                         ndst.ndts_destroys              += st->destroys;
1867                         ndst.ndts_hash_grows            += st->hash_grows;
1868                         ndst.ndts_res_failed            += st->res_failed;
1869                         ndst.ndts_lookups               += st->lookups;
1870                         ndst.ndts_hits                  += st->hits;
1871                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1872                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1873                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1874                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1875                 }
1876
1877                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1878                         goto nla_put_failure;
1879         }
1880
1881         BUG_ON(tbl->parms.dev);
1882         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1883                 goto nla_put_failure;
1884
1885         read_unlock_bh(&tbl->lock);
1886         return nlmsg_end(skb, nlh);
1887
1888 nla_put_failure:
1889         read_unlock_bh(&tbl->lock);
1890         nlmsg_cancel(skb, nlh);
1891         return -EMSGSIZE;
1892 }
1893
1894 static int neightbl_fill_param_info(struct sk_buff *skb,
1895                                     struct neigh_table *tbl,
1896                                     struct neigh_parms *parms,
1897                                     u32 pid, u32 seq, int type,
1898                                     unsigned int flags)
1899 {
1900         struct ndtmsg *ndtmsg;
1901         struct nlmsghdr *nlh;
1902
1903         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1904         if (nlh == NULL)
1905                 return -EMSGSIZE;
1906
1907         ndtmsg = nlmsg_data(nlh);
1908
1909         read_lock_bh(&tbl->lock);
1910         ndtmsg->ndtm_family = tbl->family;
1911         ndtmsg->ndtm_pad1   = 0;
1912         ndtmsg->ndtm_pad2   = 0;
1913
1914         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1915             neightbl_fill_parms(skb, parms) < 0)
1916                 goto errout;
1917
1918         read_unlock_bh(&tbl->lock);
1919         return nlmsg_end(skb, nlh);
1920 errout:
1921         read_unlock_bh(&tbl->lock);
1922         nlmsg_cancel(skb, nlh);
1923         return -EMSGSIZE;
1924 }
1925
1926 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1927         [NDTA_NAME]             = { .type = NLA_STRING },
1928         [NDTA_THRESH1]          = { .type = NLA_U32 },
1929         [NDTA_THRESH2]          = { .type = NLA_U32 },
1930         [NDTA_THRESH3]          = { .type = NLA_U32 },
1931         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1932         [NDTA_PARMS]            = { .type = NLA_NESTED },
1933 };
1934
1935 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1936         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1937         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1938         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1939         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1940         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1941         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1942         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1943         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1944         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1945         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1946         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1947         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1948         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1949 };
1950
1951 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1952 {
1953         struct net *net = sock_net(skb->sk);
1954         struct neigh_table *tbl;
1955         struct ndtmsg *ndtmsg;
1956         struct nlattr *tb[NDTA_MAX+1];
1957         int err;
1958
1959         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1960                           nl_neightbl_policy);
1961         if (err < 0)
1962                 goto errout;
1963
1964         if (tb[NDTA_NAME] == NULL) {
1965                 err = -EINVAL;
1966                 goto errout;
1967         }
1968
1969         ndtmsg = nlmsg_data(nlh);
1970         read_lock(&neigh_tbl_lock);
1971         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1972                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1973                         continue;
1974
1975                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1976                         break;
1977         }
1978
1979         if (tbl == NULL) {
1980                 err = -ENOENT;
1981                 goto errout_locked;
1982         }
1983
1984         /*
1985          * We acquire tbl->lock to be nice to the periodic timers and
1986          * make sure they always see a consistent set of values.
1987          */
1988         write_lock_bh(&tbl->lock);
1989
1990         if (tb[NDTA_PARMS]) {
1991                 struct nlattr *tbp[NDTPA_MAX+1];
1992                 struct neigh_parms *p;
1993                 int i, ifindex = 0;
1994
1995                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1996                                        nl_ntbl_parm_policy);
1997                 if (err < 0)
1998                         goto errout_tbl_lock;
1999
2000                 if (tbp[NDTPA_IFINDEX])
2001                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2002
2003                 p = lookup_neigh_parms(tbl, net, ifindex);
2004                 if (p == NULL) {
2005                         err = -ENOENT;
2006                         goto errout_tbl_lock;
2007                 }
2008
2009                 for (i = 1; i <= NDTPA_MAX; i++) {
2010                         if (tbp[i] == NULL)
2011                                 continue;
2012
2013                         switch (i) {
2014                         case NDTPA_QUEUE_LEN:
2015                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2016                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2017                                 break;
2018                         case NDTPA_QUEUE_LENBYTES:
2019                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2020                                 break;
2021                         case NDTPA_PROXY_QLEN:
2022                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2023                                 break;
2024                         case NDTPA_APP_PROBES:
2025                                 p->app_probes = nla_get_u32(tbp[i]);
2026                                 break;
2027                         case NDTPA_UCAST_PROBES:
2028                                 p->ucast_probes = nla_get_u32(tbp[i]);
2029                                 break;
2030                         case NDTPA_MCAST_PROBES:
2031                                 p->mcast_probes = nla_get_u32(tbp[i]);
2032                                 break;
2033                         case NDTPA_BASE_REACHABLE_TIME:
2034                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2035                                 break;
2036                         case NDTPA_GC_STALETIME:
2037                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2038                                 break;
2039                         case NDTPA_DELAY_PROBE_TIME:
2040                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2041                                 break;
2042                         case NDTPA_RETRANS_TIME:
2043                                 p->retrans_time = nla_get_msecs(tbp[i]);
2044                                 break;
2045                         case NDTPA_ANYCAST_DELAY:
2046                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2047                                 break;
2048                         case NDTPA_PROXY_DELAY:
2049                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2050                                 break;
2051                         case NDTPA_LOCKTIME:
2052                                 p->locktime = nla_get_msecs(tbp[i]);
2053                                 break;
2054                         }
2055                 }
2056         }
2057
2058         if (tb[NDTA_THRESH1])
2059                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2060
2061         if (tb[NDTA_THRESH2])
2062                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2063
2064         if (tb[NDTA_THRESH3])
2065                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2066
2067         if (tb[NDTA_GC_INTERVAL])
2068                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2069
2070         err = 0;
2071
2072 errout_tbl_lock:
2073         write_unlock_bh(&tbl->lock);
2074 errout_locked:
2075         read_unlock(&neigh_tbl_lock);
2076 errout:
2077         return err;
2078 }
2079
2080 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2081 {
2082         struct net *net = sock_net(skb->sk);
2083         int family, tidx, nidx = 0;
2084         int tbl_skip = cb->args[0];
2085         int neigh_skip = cb->args[1];
2086         struct neigh_table *tbl;
2087
2088         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2089
2090         read_lock(&neigh_tbl_lock);
2091         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2092                 struct neigh_parms *p;
2093
2094                 if (tidx < tbl_skip || (family && tbl->family != family))
2095                         continue;
2096
2097                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2098                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2099                                        NLM_F_MULTI) <= 0)
2100                         break;
2101
2102                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2103                         if (!net_eq(neigh_parms_net(p), net))
2104                                 continue;
2105
2106                         if (nidx < neigh_skip)
2107                                 goto next;
2108
2109                         if (neightbl_fill_param_info(skb, tbl, p,
2110                                                      NETLINK_CB(cb->skb).portid,
2111                                                      cb->nlh->nlmsg_seq,
2112                                                      RTM_NEWNEIGHTBL,
2113                                                      NLM_F_MULTI) <= 0)
2114                                 goto out;
2115                 next:
2116                         nidx++;
2117                 }
2118
2119                 neigh_skip = 0;
2120         }
2121 out:
2122         read_unlock(&neigh_tbl_lock);
2123         cb->args[0] = tidx;
2124         cb->args[1] = nidx;
2125
2126         return skb->len;
2127 }
2128
2129 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2130                            u32 pid, u32 seq, int type, unsigned int flags)
2131 {
2132         unsigned long now = jiffies;
2133         struct nda_cacheinfo ci;
2134         struct nlmsghdr *nlh;
2135         struct ndmsg *ndm;
2136
2137         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2138         if (nlh == NULL)
2139                 return -EMSGSIZE;
2140
2141         ndm = nlmsg_data(nlh);
2142         ndm->ndm_family  = neigh->ops->family;
2143         ndm->ndm_pad1    = 0;
2144         ndm->ndm_pad2    = 0;
2145         ndm->ndm_flags   = neigh->flags;
2146         ndm->ndm_type    = neigh->type;
2147         ndm->ndm_ifindex = neigh->dev->ifindex;
2148
2149         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2150                 goto nla_put_failure;
2151
2152         read_lock_bh(&neigh->lock);
2153         ndm->ndm_state   = neigh->nud_state;
2154         if (neigh->nud_state & NUD_VALID) {
2155                 char haddr[MAX_ADDR_LEN];
2156
2157                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2158                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2159                         read_unlock_bh(&neigh->lock);
2160                         goto nla_put_failure;
2161                 }
2162         }
2163
2164         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2165         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2166         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2167         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2168         read_unlock_bh(&neigh->lock);
2169
2170         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2171             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2172                 goto nla_put_failure;
2173
2174         return nlmsg_end(skb, nlh);
2175
2176 nla_put_failure:
2177         nlmsg_cancel(skb, nlh);
2178         return -EMSGSIZE;
2179 }
2180
2181 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2182                             u32 pid, u32 seq, int type, unsigned int flags,
2183                             struct neigh_table *tbl)
2184 {
2185         struct nlmsghdr *nlh;
2186         struct ndmsg *ndm;
2187
2188         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2189         if (nlh == NULL)
2190                 return -EMSGSIZE;
2191
2192         ndm = nlmsg_data(nlh);
2193         ndm->ndm_family  = tbl->family;
2194         ndm->ndm_pad1    = 0;
2195         ndm->ndm_pad2    = 0;
2196         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2197         ndm->ndm_type    = NDA_DST;
2198         ndm->ndm_ifindex = pn->dev->ifindex;
2199         ndm->ndm_state   = NUD_NONE;
2200
2201         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2202                 goto nla_put_failure;
2203
2204         return nlmsg_end(skb, nlh);
2205
2206 nla_put_failure:
2207         nlmsg_cancel(skb, nlh);
2208         return -EMSGSIZE;
2209 }
2210
2211 static void neigh_update_notify(struct neighbour *neigh)
2212 {
2213         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2214         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2215 }
2216
2217 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2218                             struct netlink_callback *cb)
2219 {
2220         struct net *net = sock_net(skb->sk);
2221         struct neighbour *n;
2222         int rc, h, s_h = cb->args[1];
2223         int idx, s_idx = idx = cb->args[2];
2224         struct neigh_hash_table *nht;
2225
2226         rcu_read_lock_bh();
2227         nht = rcu_dereference_bh(tbl->nht);
2228
2229         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2230                 if (h > s_h)
2231                         s_idx = 0;
2232                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2233                      n != NULL;
2234                      n = rcu_dereference_bh(n->next)) {
2235                         if (!net_eq(dev_net(n->dev), net))
2236                                 continue;
2237                         if (idx < s_idx)
2238                                 goto next;
2239                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2240                                             cb->nlh->nlmsg_seq,
2241                                             RTM_NEWNEIGH,
2242                                             NLM_F_MULTI) <= 0) {
2243                                 rc = -1;
2244                                 goto out;
2245                         }
2246 next:
2247                         idx++;
2248                 }
2249         }
2250         rc = skb->len;
2251 out:
2252         rcu_read_unlock_bh();
2253         cb->args[1] = h;
2254         cb->args[2] = idx;
2255         return rc;
2256 }
2257
2258 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2259                              struct netlink_callback *cb)
2260 {
2261         struct pneigh_entry *n;
2262         struct net *net = sock_net(skb->sk);
2263         int rc, h, s_h = cb->args[3];
2264         int idx, s_idx = idx = cb->args[4];
2265
2266         read_lock_bh(&tbl->lock);
2267
2268         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2269                 if (h > s_h)
2270                         s_idx = 0;
2271                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2272                         if (dev_net(n->dev) != net)
2273                                 continue;
2274                         if (idx < s_idx)
2275                                 goto next;
2276                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2277                                             cb->nlh->nlmsg_seq,
2278                                             RTM_NEWNEIGH,
2279                                             NLM_F_MULTI, tbl) <= 0) {
2280                                 read_unlock_bh(&tbl->lock);
2281                                 rc = -1;
2282                                 goto out;
2283                         }
2284                 next:
2285                         idx++;
2286                 }
2287         }
2288
2289         read_unlock_bh(&tbl->lock);
2290         rc = skb->len;
2291 out:
2292         cb->args[3] = h;
2293         cb->args[4] = idx;
2294         return rc;
2295
2296 }
2297
2298 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2299 {
2300         struct neigh_table *tbl;
2301         int t, family, s_t;
2302         int proxy = 0;
2303         int err;
2304
2305         read_lock(&neigh_tbl_lock);
2306         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2307
2308         /* check for full ndmsg structure presence, family member is
2309          * the same for both structures
2310          */
2311         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2312             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2313                 proxy = 1;
2314
2315         s_t = cb->args[0];
2316
2317         for (tbl = neigh_tables, t = 0; tbl;
2318              tbl = tbl->next, t++) {
2319                 if (t < s_t || (family && tbl->family != family))
2320                         continue;
2321                 if (t > s_t)
2322                         memset(&cb->args[1], 0, sizeof(cb->args) -
2323                                                 sizeof(cb->args[0]));
2324                 if (proxy)
2325                         err = pneigh_dump_table(tbl, skb, cb);
2326                 else
2327                         err = neigh_dump_table(tbl, skb, cb);
2328                 if (err < 0)
2329                         break;
2330         }
2331         read_unlock(&neigh_tbl_lock);
2332
2333         cb->args[0] = t;
2334         return skb->len;
2335 }
2336
2337 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2338 {
2339         int chain;
2340         struct neigh_hash_table *nht;
2341
2342         rcu_read_lock_bh();
2343         nht = rcu_dereference_bh(tbl->nht);
2344
2345         read_lock(&tbl->lock); /* avoid resizes */
2346         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2347                 struct neighbour *n;
2348
2349                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2350                      n != NULL;
2351                      n = rcu_dereference_bh(n->next))
2352                         cb(n, cookie);
2353         }
2354         read_unlock(&tbl->lock);
2355         rcu_read_unlock_bh();
2356 }
2357 EXPORT_SYMBOL(neigh_for_each);
2358
2359 /* The tbl->lock must be held as a writer and BH disabled. */
2360 void __neigh_for_each_release(struct neigh_table *tbl,
2361                               int (*cb)(struct neighbour *))
2362 {
2363         int chain;
2364         struct neigh_hash_table *nht;
2365
2366         nht = rcu_dereference_protected(tbl->nht,
2367                                         lockdep_is_held(&tbl->lock));
2368         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2369                 struct neighbour *n;
2370                 struct neighbour __rcu **np;
2371
2372                 np = &nht->hash_buckets[chain];
2373                 while ((n = rcu_dereference_protected(*np,
2374                                         lockdep_is_held(&tbl->lock))) != NULL) {
2375                         int release;
2376
2377                         write_lock(&n->lock);
2378                         release = cb(n);
2379                         if (release) {
2380                                 rcu_assign_pointer(*np,
2381                                         rcu_dereference_protected(n->next,
2382                                                 lockdep_is_held(&tbl->lock)));
2383                                 n->dead = 1;
2384                         } else
2385                                 np = &n->next;
2386                         write_unlock(&n->lock);
2387                         if (release)
2388                                 neigh_cleanup_and_release(n);
2389                 }
2390         }
2391 }
2392 EXPORT_SYMBOL(__neigh_for_each_release);
2393
2394 #ifdef CONFIG_PROC_FS
2395
2396 static struct neighbour *neigh_get_first(struct seq_file *seq)
2397 {
2398         struct neigh_seq_state *state = seq->private;
2399         struct net *net = seq_file_net(seq);
2400         struct neigh_hash_table *nht = state->nht;
2401         struct neighbour *n = NULL;
2402         int bucket = state->bucket;
2403
2404         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2405         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2406                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2407
2408                 while (n) {
2409                         if (!net_eq(dev_net(n->dev), net))
2410                                 goto next;
2411                         if (state->neigh_sub_iter) {
2412                                 loff_t fakep = 0;
2413                                 void *v;
2414
2415                                 v = state->neigh_sub_iter(state, n, &fakep);
2416                                 if (!v)
2417                                         goto next;
2418                         }
2419                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2420                                 break;
2421                         if (n->nud_state & ~NUD_NOARP)
2422                                 break;
2423 next:
2424                         n = rcu_dereference_bh(n->next);
2425                 }
2426
2427                 if (n)
2428                         break;
2429         }
2430         state->bucket = bucket;
2431
2432         return n;
2433 }
2434
2435 static struct neighbour *neigh_get_next(struct seq_file *seq,
2436                                         struct neighbour *n,
2437                                         loff_t *pos)
2438 {
2439         struct neigh_seq_state *state = seq->private;
2440         struct net *net = seq_file_net(seq);
2441         struct neigh_hash_table *nht = state->nht;
2442
2443         if (state->neigh_sub_iter) {
2444                 void *v = state->neigh_sub_iter(state, n, pos);
2445                 if (v)
2446                         return n;
2447         }
2448         n = rcu_dereference_bh(n->next);
2449
2450         while (1) {
2451                 while (n) {
2452                         if (!net_eq(dev_net(n->dev), net))
2453                                 goto next;
2454                         if (state->neigh_sub_iter) {
2455                                 void *v = state->neigh_sub_iter(state, n, pos);
2456                                 if (v)
2457                                         return n;
2458                                 goto next;
2459                         }
2460                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2461                                 break;
2462
2463                         if (n->nud_state & ~NUD_NOARP)
2464                                 break;
2465 next:
2466                         n = rcu_dereference_bh(n->next);
2467                 }
2468
2469                 if (n)
2470                         break;
2471
2472                 if (++state->bucket >= (1 << nht->hash_shift))
2473                         break;
2474
2475                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2476         }
2477
2478         if (n && pos)
2479                 --(*pos);
2480         return n;
2481 }
2482
2483 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2484 {
2485         struct neighbour *n = neigh_get_first(seq);
2486
2487         if (n) {
2488                 --(*pos);
2489                 while (*pos) {
2490                         n = neigh_get_next(seq, n, pos);
2491                         if (!n)
2492                                 break;
2493                 }
2494         }
2495         return *pos ? NULL : n;
2496 }
2497
2498 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2499 {
2500         struct neigh_seq_state *state = seq->private;
2501         struct net *net = seq_file_net(seq);
2502         struct neigh_table *tbl = state->tbl;
2503         struct pneigh_entry *pn = NULL;
2504         int bucket = state->bucket;
2505
2506         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2507         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2508                 pn = tbl->phash_buckets[bucket];
2509                 while (pn && !net_eq(pneigh_net(pn), net))
2510                         pn = pn->next;
2511                 if (pn)
2512                         break;
2513         }
2514         state->bucket = bucket;
2515
2516         return pn;
2517 }
2518
2519 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2520                                             struct pneigh_entry *pn,
2521                                             loff_t *pos)
2522 {
2523         struct neigh_seq_state *state = seq->private;
2524         struct net *net = seq_file_net(seq);
2525         struct neigh_table *tbl = state->tbl;
2526
2527         do {
2528                 pn = pn->next;
2529         } while (pn && !net_eq(pneigh_net(pn), net));
2530
2531         while (!pn) {
2532                 if (++state->bucket > PNEIGH_HASHMASK)
2533                         break;
2534                 pn = tbl->phash_buckets[state->bucket];
2535                 while (pn && !net_eq(pneigh_net(pn), net))
2536                         pn = pn->next;
2537                 if (pn)
2538                         break;
2539         }
2540
2541         if (pn && pos)
2542                 --(*pos);
2543
2544         return pn;
2545 }
2546
2547 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2548 {
2549         struct pneigh_entry *pn = pneigh_get_first(seq);
2550
2551         if (pn) {
2552                 --(*pos);
2553                 while (*pos) {
2554                         pn = pneigh_get_next(seq, pn, pos);
2555                         if (!pn)
2556                                 break;
2557                 }
2558         }
2559         return *pos ? NULL : pn;
2560 }
2561
2562 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2563 {
2564         struct neigh_seq_state *state = seq->private;
2565         void *rc;
2566         loff_t idxpos = *pos;
2567
2568         rc = neigh_get_idx(seq, &idxpos);
2569         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2570                 rc = pneigh_get_idx(seq, &idxpos);
2571
2572         return rc;
2573 }
2574
2575 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2576         __acquires(rcu_bh)
2577 {
2578         struct neigh_seq_state *state = seq->private;
2579
2580         state->tbl = tbl;
2581         state->bucket = 0;
2582         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2583
2584         rcu_read_lock_bh();
2585         state->nht = rcu_dereference_bh(tbl->nht);
2586
2587         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2588 }
2589 EXPORT_SYMBOL(neigh_seq_start);
2590
2591 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2592 {
2593         struct neigh_seq_state *state;
2594         void *rc;
2595
2596         if (v == SEQ_START_TOKEN) {
2597                 rc = neigh_get_first(seq);
2598                 goto out;
2599         }
2600
2601         state = seq->private;
2602         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2603                 rc = neigh_get_next(seq, v, NULL);
2604                 if (rc)
2605                         goto out;
2606                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2607                         rc = pneigh_get_first(seq);
2608         } else {
2609                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2610                 rc = pneigh_get_next(seq, v, NULL);
2611         }
2612 out:
2613         ++(*pos);
2614         return rc;
2615 }
2616 EXPORT_SYMBOL(neigh_seq_next);
2617
2618 void neigh_seq_stop(struct seq_file *seq, void *v)
2619         __releases(rcu_bh)
2620 {
2621         rcu_read_unlock_bh();
2622 }
2623 EXPORT_SYMBOL(neigh_seq_stop);
2624
2625 /* statistics via seq_file */
2626
2627 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2628 {
2629         struct neigh_table *tbl = seq->private;
2630         int cpu;
2631
2632         if (*pos == 0)
2633                 return SEQ_START_TOKEN;
2634
2635         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2636                 if (!cpu_possible(cpu))
2637                         continue;
2638                 *pos = cpu+1;
2639                 return per_cpu_ptr(tbl->stats, cpu);
2640         }
2641         return NULL;
2642 }
2643
2644 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2645 {
2646         struct neigh_table *tbl = seq->private;
2647         int cpu;
2648
2649         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2650                 if (!cpu_possible(cpu))
2651                         continue;
2652                 *pos = cpu+1;
2653                 return per_cpu_ptr(tbl->stats, cpu);
2654         }
2655         return NULL;
2656 }
2657
2658 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2659 {
2660
2661 }
2662
2663 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2664 {
2665         struct neigh_table *tbl = seq->private;
2666         struct neigh_statistics *st = v;
2667
2668         if (v == SEQ_START_TOKEN) {
2669                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2670                 return 0;
2671         }
2672
2673         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2674                         "%08lx %08lx  %08lx %08lx %08lx\n",
2675                    atomic_read(&tbl->entries),
2676
2677                    st->allocs,
2678                    st->destroys,
2679                    st->hash_grows,
2680
2681                    st->lookups,
2682                    st->hits,
2683
2684                    st->res_failed,
2685
2686                    st->rcv_probes_mcast,
2687                    st->rcv_probes_ucast,
2688
2689                    st->periodic_gc_runs,
2690                    st->forced_gc_runs,
2691                    st->unres_discards
2692                    );
2693
2694         return 0;
2695 }
2696
2697 static const struct seq_operations neigh_stat_seq_ops = {
2698         .start  = neigh_stat_seq_start,
2699         .next   = neigh_stat_seq_next,
2700         .stop   = neigh_stat_seq_stop,
2701         .show   = neigh_stat_seq_show,
2702 };
2703
2704 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2705 {
2706         int ret = seq_open(file, &neigh_stat_seq_ops);
2707
2708         if (!ret) {
2709                 struct seq_file *sf = file->private_data;
2710                 sf->private = PDE_DATA(inode);
2711         }
2712         return ret;
2713 };
2714
2715 static const struct file_operations neigh_stat_seq_fops = {
2716         .owner   = THIS_MODULE,
2717         .open    = neigh_stat_seq_open,
2718         .read    = seq_read,
2719         .llseek  = seq_lseek,
2720         .release = seq_release,
2721 };
2722
2723 #endif /* CONFIG_PROC_FS */
2724
2725 static inline size_t neigh_nlmsg_size(void)
2726 {
2727         return NLMSG_ALIGN(sizeof(struct ndmsg))
2728                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2729                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2730                + nla_total_size(sizeof(struct nda_cacheinfo))
2731                + nla_total_size(4); /* NDA_PROBES */
2732 }
2733
2734 static void __neigh_notify(struct neighbour *n, int type, int flags)
2735 {
2736         struct net *net = dev_net(n->dev);
2737         struct sk_buff *skb;
2738         int err = -ENOBUFS;
2739
2740         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2741         if (skb == NULL)
2742                 goto errout;
2743
2744         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2745         if (err < 0) {
2746                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2747                 WARN_ON(err == -EMSGSIZE);
2748                 kfree_skb(skb);
2749                 goto errout;
2750         }
2751         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2752         return;
2753 errout:
2754         if (err < 0)
2755                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2756 }
2757
2758 #ifdef CONFIG_ARPD
2759 void neigh_app_ns(struct neighbour *n)
2760 {
2761         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2762 }
2763 EXPORT_SYMBOL(neigh_app_ns);
2764 #endif /* CONFIG_ARPD */
2765
2766 #ifdef CONFIG_SYSCTL
2767 static int zero;
2768 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2769
2770 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2771                            size_t *lenp, loff_t *ppos)
2772 {
2773         int size, ret;
2774         ctl_table tmp = *ctl;
2775
2776         tmp.extra1 = &zero;
2777         tmp.extra2 = &unres_qlen_max;
2778         tmp.data = &size;
2779
2780         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2781         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2782
2783         if (write && !ret)
2784                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2785         return ret;
2786 }
2787
2788 enum {
2789         NEIGH_VAR_MCAST_PROBE,
2790         NEIGH_VAR_UCAST_PROBE,
2791         NEIGH_VAR_APP_PROBE,
2792         NEIGH_VAR_RETRANS_TIME,
2793         NEIGH_VAR_BASE_REACHABLE_TIME,
2794         NEIGH_VAR_DELAY_PROBE_TIME,
2795         NEIGH_VAR_GC_STALETIME,
2796         NEIGH_VAR_QUEUE_LEN,
2797         NEIGH_VAR_QUEUE_LEN_BYTES,
2798         NEIGH_VAR_PROXY_QLEN,
2799         NEIGH_VAR_ANYCAST_DELAY,
2800         NEIGH_VAR_PROXY_DELAY,
2801         NEIGH_VAR_LOCKTIME,
2802         NEIGH_VAR_RETRANS_TIME_MS,
2803         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2804         NEIGH_VAR_GC_INTERVAL,
2805         NEIGH_VAR_GC_THRESH1,
2806         NEIGH_VAR_GC_THRESH2,
2807         NEIGH_VAR_GC_THRESH3,
2808         NEIGH_VAR_MAX
2809 };
2810
2811 static struct neigh_sysctl_table {
2812         struct ctl_table_header *sysctl_header;
2813         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2814 } neigh_sysctl_template __read_mostly = {
2815         .neigh_vars = {
2816                 [NEIGH_VAR_MCAST_PROBE] = {
2817                         .procname       = "mcast_solicit",
2818                         .maxlen         = sizeof(int),
2819                         .mode           = 0644,
2820                         .proc_handler   = proc_dointvec,
2821                 },
2822                 [NEIGH_VAR_UCAST_PROBE] = {
2823                         .procname       = "ucast_solicit",
2824                         .maxlen         = sizeof(int),
2825                         .mode           = 0644,
2826                         .proc_handler   = proc_dointvec,
2827                 },
2828                 [NEIGH_VAR_APP_PROBE] = {
2829                         .procname       = "app_solicit",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .proc_handler   = proc_dointvec,
2833                 },
2834                 [NEIGH_VAR_RETRANS_TIME] = {
2835                         .procname       = "retrans_time",
2836                         .maxlen         = sizeof(int),
2837                         .mode           = 0644,
2838                         .proc_handler   = proc_dointvec_userhz_jiffies,
2839                 },
2840                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2841                         .procname       = "base_reachable_time",
2842                         .maxlen         = sizeof(int),
2843                         .mode           = 0644,
2844                         .proc_handler   = proc_dointvec_jiffies,
2845                 },
2846                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2847                         .procname       = "delay_first_probe_time",
2848                         .maxlen         = sizeof(int),
2849                         .mode           = 0644,
2850                         .proc_handler   = proc_dointvec_jiffies,
2851                 },
2852                 [NEIGH_VAR_GC_STALETIME] = {
2853                         .procname       = "gc_stale_time",
2854                         .maxlen         = sizeof(int),
2855                         .mode           = 0644,
2856                         .proc_handler   = proc_dointvec_jiffies,
2857                 },
2858                 [NEIGH_VAR_QUEUE_LEN] = {
2859                         .procname       = "unres_qlen",
2860                         .maxlen         = sizeof(int),
2861                         .mode           = 0644,
2862                         .proc_handler   = proc_unres_qlen,
2863                 },
2864                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2865                         .procname       = "unres_qlen_bytes",
2866                         .maxlen         = sizeof(int),
2867                         .mode           = 0644,
2868                         .extra1         = &zero,
2869                         .proc_handler   = proc_dointvec_minmax,
2870                 },
2871                 [NEIGH_VAR_PROXY_QLEN] = {
2872                         .procname       = "proxy_qlen",
2873                         .maxlen         = sizeof(int),
2874                         .mode           = 0644,
2875                         .proc_handler   = proc_dointvec,
2876                 },
2877                 [NEIGH_VAR_ANYCAST_DELAY] = {
2878                         .procname       = "anycast_delay",
2879                         .maxlen         = sizeof(int),
2880                         .mode           = 0644,
2881                         .proc_handler   = proc_dointvec_userhz_jiffies,
2882                 },
2883                 [NEIGH_VAR_PROXY_DELAY] = {
2884                         .procname       = "proxy_delay",
2885                         .maxlen         = sizeof(int),
2886                         .mode           = 0644,
2887                         .proc_handler   = proc_dointvec_userhz_jiffies,
2888                 },
2889                 [NEIGH_VAR_LOCKTIME] = {
2890                         .procname       = "locktime",
2891                         .maxlen         = sizeof(int),
2892                         .mode           = 0644,
2893                         .proc_handler   = proc_dointvec_userhz_jiffies,
2894                 },
2895                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2896                         .procname       = "retrans_time_ms",
2897                         .maxlen         = sizeof(int),
2898                         .mode           = 0644,
2899                         .proc_handler   = proc_dointvec_ms_jiffies,
2900                 },
2901                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2902                         .procname       = "base_reachable_time_ms",
2903                         .maxlen         = sizeof(int),
2904                         .mode           = 0644,
2905                         .proc_handler   = proc_dointvec_ms_jiffies,
2906                 },
2907                 [NEIGH_VAR_GC_INTERVAL] = {
2908                         .procname       = "gc_interval",
2909                         .maxlen         = sizeof(int),
2910                         .mode           = 0644,
2911                         .proc_handler   = proc_dointvec_jiffies,
2912                 },
2913                 [NEIGH_VAR_GC_THRESH1] = {
2914                         .procname       = "gc_thresh1",
2915                         .maxlen         = sizeof(int),
2916                         .mode           = 0644,
2917                         .proc_handler   = proc_dointvec,
2918                 },
2919                 [NEIGH_VAR_GC_THRESH2] = {
2920                         .procname       = "gc_thresh2",
2921                         .maxlen         = sizeof(int),
2922                         .mode           = 0644,
2923                         .proc_handler   = proc_dointvec,
2924                 },
2925                 [NEIGH_VAR_GC_THRESH3] = {
2926                         .procname       = "gc_thresh3",
2927                         .maxlen         = sizeof(int),
2928                         .mode           = 0644,
2929                         .proc_handler   = proc_dointvec,
2930                 },
2931                 {},
2932         },
2933 };
2934
2935 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2936                           char *p_name, proc_handler *handler)
2937 {
2938         struct neigh_sysctl_table *t;
2939         const char *dev_name_source = NULL;
2940         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2941
2942         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2943         if (!t)
2944                 goto err;
2945
2946         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2947         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2948         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2949         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2950         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2951         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2952         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2953         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2954         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2955         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2956         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2957         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2958         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2959         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2960         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2961
2962         if (dev) {
2963                 dev_name_source = dev->name;
2964                 /* Terminate the table early */
2965                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2966                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2967         } else {
2968                 dev_name_source = "default";
2969                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2970                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2971                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2972                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2973         }
2974
2975
2976         if (handler) {
2977                 /* RetransTime */
2978                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2979                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2980                 /* ReachableTime */
2981                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2982                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2983                 /* RetransTime (in milliseconds)*/
2984                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2985                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2986                 /* ReachableTime (in milliseconds) */
2987                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2988                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2989         }
2990
2991         /* Don't export sysctls to unprivileged users */
2992         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2993                 t->neigh_vars[0].procname = NULL;
2994
2995         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2996                 p_name, dev_name_source);
2997         t->sysctl_header =
2998                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2999         if (!t->sysctl_header)
3000                 goto free;
3001
3002         p->sysctl_table = t;
3003         return 0;
3004
3005 free:
3006         kfree(t);
3007 err:
3008         return -ENOBUFS;
3009 }
3010 EXPORT_SYMBOL(neigh_sysctl_register);
3011
3012 void neigh_sysctl_unregister(struct neigh_parms *p)
3013 {
3014         if (p->sysctl_table) {
3015                 struct neigh_sysctl_table *t = p->sysctl_table;
3016                 p->sysctl_table = NULL;
3017                 unregister_net_sysctl_table(t->sysctl_header);
3018                 kfree(t);
3019         }
3020 }
3021 EXPORT_SYMBOL(neigh_sysctl_unregister);
3022
3023 #endif  /* CONFIG_SYSCTL */
3024
3025 static int __init neigh_init(void)
3026 {
3027         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3028         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3029         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3030
3031         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3032                       NULL);
3033         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3034
3035         return 0;
3036 }
3037
3038 subsys_initcall(neigh_init);
3039