netfilter: nf_conntrack: Support expectations in different zones
[firefly-linux-kernel-4.4.55.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/types.h>
14 #include <linux/netfilter.h>
15 #include <linux/skbuff.h>
16 #include <linux/proc_fs.h>
17 #include <linux/seq_file.h>
18 #include <linux/stddef.h>
19 #include <linux/slab.h>
20 #include <linux/err.h>
21 #include <linux/percpu.h>
22 #include <linux/kernel.h>
23 #include <linux/jhash.h>
24 #include <linux/moduleparam.h>
25 #include <linux/export.h>
26 #include <net/net_namespace.h>
27
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack_expect.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
34
35 unsigned int nf_ct_expect_hsize __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37
38 unsigned int nf_ct_expect_max __read_mostly;
39
40 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
41
42 /* nf_conntrack_expect helper functions */
43 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
44                                 u32 portid, int report)
45 {
46         struct nf_conn_help *master_help = nfct_help(exp->master);
47         struct net *net = nf_ct_exp_net(exp);
48
49         NF_CT_ASSERT(master_help);
50         NF_CT_ASSERT(!timer_pending(&exp->timeout));
51
52         hlist_del_rcu(&exp->hnode);
53         net->ct.expect_count--;
54
55         hlist_del(&exp->lnode);
56         master_help->expecting[exp->class]--;
57
58         nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
59         nf_ct_expect_put(exp);
60
61         NF_CT_STAT_INC(net, expect_delete);
62 }
63 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
64
65 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
66 {
67         struct nf_conntrack_expect *exp = (void *)ul_expect;
68
69         spin_lock_bh(&nf_conntrack_lock);
70         nf_ct_unlink_expect(exp);
71         spin_unlock_bh(&nf_conntrack_lock);
72         nf_ct_expect_put(exp);
73 }
74
75 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
76 {
77         unsigned int hash;
78
79         if (unlikely(!nf_conntrack_hash_rnd)) {
80                 init_nf_conntrack_hash_rnd();
81         }
82
83         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85                        (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
86         return ((u64)hash * nf_ct_expect_hsize) >> 32;
87 }
88
89 struct nf_conntrack_expect *
90 __nf_ct_expect_find(struct net *net, u16 zone,
91                     const struct nf_conntrack_tuple *tuple)
92 {
93         struct nf_conntrack_expect *i;
94         unsigned int h;
95
96         if (!net->ct.expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102                     nf_ct_zone(i->master) == zone)
103                         return i;
104         }
105         return NULL;
106 }
107 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109 /* Just find a expectation corresponding to a tuple. */
110 struct nf_conntrack_expect *
111 nf_ct_expect_find_get(struct net *net, u16 zone,
112                       const struct nf_conntrack_tuple *tuple)
113 {
114         struct nf_conntrack_expect *i;
115
116         rcu_read_lock();
117         i = __nf_ct_expect_find(net, zone, tuple);
118         if (i && !atomic_inc_not_zero(&i->use))
119                 i = NULL;
120         rcu_read_unlock();
121
122         return i;
123 }
124 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126 /* If an expectation for this connection is found, it gets delete from
127  * global list then returned. */
128 struct nf_conntrack_expect *
129 nf_ct_find_expectation(struct net *net, u16 zone,
130                        const struct nf_conntrack_tuple *tuple)
131 {
132         struct nf_conntrack_expect *i, *exp = NULL;
133         unsigned int h;
134
135         if (!net->ct.expect_count)
136                 return NULL;
137
138         h = nf_ct_expect_dst_hash(tuple);
139         hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
140                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
141                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
142                     nf_ct_zone(i->master) == zone) {
143                         exp = i;
144                         break;
145                 }
146         }
147         if (!exp)
148                 return NULL;
149
150         /* If master is not in hash table yet (ie. packet hasn't left
151            this machine yet), how can other end know about expected?
152            Hence these are not the droids you are looking for (if
153            master ct never got confirmed, we'd hold a reference to it
154            and weird things would happen to future packets). */
155         if (!nf_ct_is_confirmed(exp->master))
156                 return NULL;
157
158         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
159                 atomic_inc(&exp->use);
160                 return exp;
161         } else if (del_timer(&exp->timeout)) {
162                 nf_ct_unlink_expect(exp);
163                 return exp;
164         }
165
166         return NULL;
167 }
168
169 /* delete all expectations for this conntrack */
170 void nf_ct_remove_expectations(struct nf_conn *ct)
171 {
172         struct nf_conn_help *help = nfct_help(ct);
173         struct nf_conntrack_expect *exp;
174         struct hlist_node *next;
175
176         /* Optimization: most connection never expect any others. */
177         if (!help)
178                 return;
179
180         hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
181                 if (del_timer(&exp->timeout)) {
182                         nf_ct_unlink_expect(exp);
183                         nf_ct_expect_put(exp);
184                 }
185         }
186 }
187 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
188
189 /* Would two expected things clash? */
190 static inline int expect_clash(const struct nf_conntrack_expect *a,
191                                const struct nf_conntrack_expect *b)
192 {
193         /* Part covered by intersection of masks must be unequal,
194            otherwise they clash */
195         struct nf_conntrack_tuple_mask intersect_mask;
196         int count;
197
198         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
199
200         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
201                 intersect_mask.src.u3.all[count] =
202                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
203         }
204
205         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
206                nf_ct_zone(a->master) == nf_ct_zone(b->master);
207 }
208
209 static inline int expect_matches(const struct nf_conntrack_expect *a,
210                                  const struct nf_conntrack_expect *b)
211 {
212         return a->master == b->master && a->class == b->class &&
213                 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214                 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215                 nf_ct_zone(a->master) == nf_ct_zone(b->master);
216 }
217
218 /* Generally a bad idea to call this: could have matched already. */
219 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
220 {
221         spin_lock_bh(&nf_conntrack_lock);
222         if (del_timer(&exp->timeout)) {
223                 nf_ct_unlink_expect(exp);
224                 nf_ct_expect_put(exp);
225         }
226         spin_unlock_bh(&nf_conntrack_lock);
227 }
228 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
229
230 /* We don't increase the master conntrack refcount for non-fulfilled
231  * conntracks. During the conntrack destruction, the expectations are
232  * always killed before the conntrack itself */
233 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
234 {
235         struct nf_conntrack_expect *new;
236
237         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238         if (!new)
239                 return NULL;
240
241         new->master = me;
242         atomic_set(&new->use, 1);
243         return new;
244 }
245 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
246
247 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
248                        u_int8_t family,
249                        const union nf_inet_addr *saddr,
250                        const union nf_inet_addr *daddr,
251                        u_int8_t proto, const __be16 *src, const __be16 *dst)
252 {
253         int len;
254
255         if (family == AF_INET)
256                 len = 4;
257         else
258                 len = 16;
259
260         exp->flags = 0;
261         exp->class = class;
262         exp->expectfn = NULL;
263         exp->helper = NULL;
264         exp->tuple.src.l3num = family;
265         exp->tuple.dst.protonum = proto;
266
267         if (saddr) {
268                 memcpy(&exp->tuple.src.u3, saddr, len);
269                 if (sizeof(exp->tuple.src.u3) > len)
270                         /* address needs to be cleared for nf_ct_tuple_equal */
271                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
272                                sizeof(exp->tuple.src.u3) - len);
273                 memset(&exp->mask.src.u3, 0xFF, len);
274                 if (sizeof(exp->mask.src.u3) > len)
275                         memset((void *)&exp->mask.src.u3 + len, 0x00,
276                                sizeof(exp->mask.src.u3) - len);
277         } else {
278                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280         }
281
282         if (src) {
283                 exp->tuple.src.u.all = *src;
284                 exp->mask.src.u.all = htons(0xFFFF);
285         } else {
286                 exp->tuple.src.u.all = 0;
287                 exp->mask.src.u.all = 0;
288         }
289
290         memcpy(&exp->tuple.dst.u3, daddr, len);
291         if (sizeof(exp->tuple.dst.u3) > len)
292                 /* address needs to be cleared for nf_ct_tuple_equal */
293                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294                        sizeof(exp->tuple.dst.u3) - len);
295
296         exp->tuple.dst.u.all = *dst;
297 }
298 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
299
300 static void nf_ct_expect_free_rcu(struct rcu_head *head)
301 {
302         struct nf_conntrack_expect *exp;
303
304         exp = container_of(head, struct nf_conntrack_expect, rcu);
305         kmem_cache_free(nf_ct_expect_cachep, exp);
306 }
307
308 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
309 {
310         if (atomic_dec_and_test(&exp->use))
311                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312 }
313 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
314
315 static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316 {
317         struct nf_conn_help *master_help = nfct_help(exp->master);
318         struct nf_conntrack_helper *helper;
319         struct net *net = nf_ct_exp_net(exp);
320         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321
322         /* two references : one for hash insert, one for the timer */
323         atomic_add(2, &exp->use);
324
325         hlist_add_head(&exp->lnode, &master_help->expectations);
326         master_help->expecting[exp->class]++;
327
328         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
329         net->ct.expect_count++;
330
331         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
332                     (unsigned long)exp);
333         helper = rcu_dereference_protected(master_help->helper,
334                                            lockdep_is_held(&nf_conntrack_lock));
335         if (helper) {
336                 exp->timeout.expires = jiffies +
337                         helper->expect_policy[exp->class].timeout * HZ;
338         }
339         add_timer(&exp->timeout);
340
341         NF_CT_STAT_INC(net, expect_create);
342         return 0;
343 }
344
345 /* Race with expectations being used means we could have none to find; OK. */
346 static void evict_oldest_expect(struct nf_conn *master,
347                                 struct nf_conntrack_expect *new)
348 {
349         struct nf_conn_help *master_help = nfct_help(master);
350         struct nf_conntrack_expect *exp, *last = NULL;
351
352         hlist_for_each_entry(exp, &master_help->expectations, lnode) {
353                 if (exp->class == new->class)
354                         last = exp;
355         }
356
357         if (last && del_timer(&last->timeout)) {
358                 nf_ct_unlink_expect(last);
359                 nf_ct_expect_put(last);
360         }
361 }
362
363 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
364 {
365         const struct nf_conntrack_expect_policy *p;
366         struct nf_conntrack_expect *i;
367         struct nf_conn *master = expect->master;
368         struct nf_conn_help *master_help = nfct_help(master);
369         struct nf_conntrack_helper *helper;
370         struct net *net = nf_ct_exp_net(expect);
371         struct hlist_node *next;
372         unsigned int h;
373         int ret = 1;
374
375         if (!master_help) {
376                 ret = -ESHUTDOWN;
377                 goto out;
378         }
379         h = nf_ct_expect_dst_hash(&expect->tuple);
380         hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
381                 if (expect_matches(i, expect)) {
382                         if (del_timer(&i->timeout)) {
383                                 nf_ct_unlink_expect(i);
384                                 nf_ct_expect_put(i);
385                                 break;
386                         }
387                 } else if (expect_clash(i, expect)) {
388                         ret = -EBUSY;
389                         goto out;
390                 }
391         }
392         /* Will be over limit? */
393         helper = rcu_dereference_protected(master_help->helper,
394                                            lockdep_is_held(&nf_conntrack_lock));
395         if (helper) {
396                 p = &helper->expect_policy[expect->class];
397                 if (p->max_expected &&
398                     master_help->expecting[expect->class] >= p->max_expected) {
399                         evict_oldest_expect(master, expect);
400                         if (master_help->expecting[expect->class]
401                                                 >= p->max_expected) {
402                                 ret = -EMFILE;
403                                 goto out;
404                         }
405                 }
406         }
407
408         if (net->ct.expect_count >= nf_ct_expect_max) {
409                 net_warn_ratelimited("nf_conntrack: expectation table full\n");
410                 ret = -EMFILE;
411         }
412 out:
413         return ret;
414 }
415
416 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
417                                 u32 portid, int report)
418 {
419         int ret;
420
421         spin_lock_bh(&nf_conntrack_lock);
422         ret = __nf_ct_expect_check(expect);
423         if (ret <= 0)
424                 goto out;
425
426         ret = nf_ct_expect_insert(expect);
427         if (ret < 0)
428                 goto out;
429         spin_unlock_bh(&nf_conntrack_lock);
430         nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
431         return ret;
432 out:
433         spin_unlock_bh(&nf_conntrack_lock);
434         return ret;
435 }
436 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
437
438 #ifdef CONFIG_NF_CONNTRACK_PROCFS
439 struct ct_expect_iter_state {
440         struct seq_net_private p;
441         unsigned int bucket;
442 };
443
444 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
445 {
446         struct net *net = seq_file_net(seq);
447         struct ct_expect_iter_state *st = seq->private;
448         struct hlist_node *n;
449
450         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
451                 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
452                 if (n)
453                         return n;
454         }
455         return NULL;
456 }
457
458 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
459                                              struct hlist_node *head)
460 {
461         struct net *net = seq_file_net(seq);
462         struct ct_expect_iter_state *st = seq->private;
463
464         head = rcu_dereference(hlist_next_rcu(head));
465         while (head == NULL) {
466                 if (++st->bucket >= nf_ct_expect_hsize)
467                         return NULL;
468                 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
469         }
470         return head;
471 }
472
473 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
474 {
475         struct hlist_node *head = ct_expect_get_first(seq);
476
477         if (head)
478                 while (pos && (head = ct_expect_get_next(seq, head)))
479                         pos--;
480         return pos ? NULL : head;
481 }
482
483 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
484         __acquires(RCU)
485 {
486         rcu_read_lock();
487         return ct_expect_get_idx(seq, *pos);
488 }
489
490 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
491 {
492         (*pos)++;
493         return ct_expect_get_next(seq, v);
494 }
495
496 static void exp_seq_stop(struct seq_file *seq, void *v)
497         __releases(RCU)
498 {
499         rcu_read_unlock();
500 }
501
502 static int exp_seq_show(struct seq_file *s, void *v)
503 {
504         struct nf_conntrack_expect *expect;
505         struct nf_conntrack_helper *helper;
506         struct hlist_node *n = v;
507         char *delim = "";
508
509         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
510
511         if (expect->timeout.function)
512                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
513                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
514         else
515                 seq_printf(s, "- ");
516         seq_printf(s, "l3proto = %u proto=%u ",
517                    expect->tuple.src.l3num,
518                    expect->tuple.dst.protonum);
519         print_tuple(s, &expect->tuple,
520                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
521                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
522                                        expect->tuple.dst.protonum));
523
524         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
525                 seq_printf(s, "PERMANENT");
526                 delim = ",";
527         }
528         if (expect->flags & NF_CT_EXPECT_INACTIVE) {
529                 seq_printf(s, "%sINACTIVE", delim);
530                 delim = ",";
531         }
532         if (expect->flags & NF_CT_EXPECT_USERSPACE)
533                 seq_printf(s, "%sUSERSPACE", delim);
534
535         helper = rcu_dereference(nfct_help(expect->master)->helper);
536         if (helper) {
537                 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
538                 if (helper->expect_policy[expect->class].name)
539                         seq_printf(s, "/%s",
540                                    helper->expect_policy[expect->class].name);
541         }
542
543         return seq_putc(s, '\n');
544 }
545
546 static const struct seq_operations exp_seq_ops = {
547         .start = exp_seq_start,
548         .next = exp_seq_next,
549         .stop = exp_seq_stop,
550         .show = exp_seq_show
551 };
552
553 static int exp_open(struct inode *inode, struct file *file)
554 {
555         return seq_open_net(inode, file, &exp_seq_ops,
556                         sizeof(struct ct_expect_iter_state));
557 }
558
559 static const struct file_operations exp_file_ops = {
560         .owner   = THIS_MODULE,
561         .open    = exp_open,
562         .read    = seq_read,
563         .llseek  = seq_lseek,
564         .release = seq_release_net,
565 };
566 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
567
568 static int exp_proc_init(struct net *net)
569 {
570 #ifdef CONFIG_NF_CONNTRACK_PROCFS
571         struct proc_dir_entry *proc;
572
573         proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
574                            &exp_file_ops);
575         if (!proc)
576                 return -ENOMEM;
577 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
578         return 0;
579 }
580
581 static void exp_proc_remove(struct net *net)
582 {
583 #ifdef CONFIG_NF_CONNTRACK_PROCFS
584         remove_proc_entry("nf_conntrack_expect", net->proc_net);
585 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
586 }
587
588 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
589
590 int nf_conntrack_expect_pernet_init(struct net *net)
591 {
592         int err = -ENOMEM;
593
594         net->ct.expect_count = 0;
595         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
596         if (net->ct.expect_hash == NULL)
597                 goto err1;
598
599         err = exp_proc_init(net);
600         if (err < 0)
601                 goto err2;
602
603         return 0;
604 err2:
605         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
606 err1:
607         return err;
608 }
609
610 void nf_conntrack_expect_pernet_fini(struct net *net)
611 {
612         exp_proc_remove(net);
613         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
614 }
615
616 int nf_conntrack_expect_init(void)
617 {
618         if (!nf_ct_expect_hsize) {
619                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
620                 if (!nf_ct_expect_hsize)
621                         nf_ct_expect_hsize = 1;
622         }
623         nf_ct_expect_max = nf_ct_expect_hsize * 4;
624         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
625                                 sizeof(struct nf_conntrack_expect),
626                                 0, 0, NULL);
627         if (!nf_ct_expect_cachep)
628                 return -ENOMEM;
629         return 0;
630 }
631
632 void nf_conntrack_expect_fini(void)
633 {
634         rcu_barrier(); /* Wait for call_rcu() before destroy */
635         kmem_cache_destroy(nf_ct_expect_cachep);
636 }