Merge remote-tracking branch 'remotes/aosp/android-3.0' into develop-3.0
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* #define DEBUG */
12 /* #define IDEBUG */
13 /* #define MDEBUG */
14 /* #define RDEBUG */
15 /* #define CDEBUG */
16
17 /* Iface handling */
18 #ifdef IDEBUG
19 #define IF_DEBUG(...) pr_debug(__VA_ARGS__)
20 #else
21 #define IF_DEBUG(...) no_printk(__VA_ARGS__)
22 #endif
23 /* Iptable Matching */
24 #ifdef MDEBUG
25 #define MT_DEBUG(...) pr_debug(__VA_ARGS__)
26 #else
27 #define MT_DEBUG(...) no_printk(__VA_ARGS__)
28 #endif
29 /* Red-black tree handling */
30 #ifdef RDEBUG
31 #define RB_DEBUG(...) pr_debug(__VA_ARGS__)
32 #else
33 #define RB_DEBUG(...) no_printk(__VA_ARGS__)
34 #endif
35 /* procfs ctrl/stats handling */
36 #ifdef CDEBUG
37 #define CT_DEBUG(...) pr_debug(__VA_ARGS__)
38 #else
39 #define CT_DEBUG(...) no_printk(__VA_ARGS__)
40 #endif
41
42 #include <linux/file.h>
43 #include <linux/inetdevice.h>
44 #include <linux/module.h>
45 #include <linux/netfilter/x_tables.h>
46 #include <linux/netfilter/xt_qtaguid.h>
47 #include <linux/skbuff.h>
48 #include <linux/workqueue.h>
49 #include <net/addrconf.h>
50 #include <net/sock.h>
51 #include <net/tcp.h>
52 #include <net/udp.h>
53
54 #include <linux/netfilter/xt_socket.h>
55 /*
56  * We only use the xt_socket funcs within a similar context to avoid unexpected
57  * return values.
58  */
59 #define XT_SOCKET_SUPPORTED_HOOKS \
60         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
61
62
63 static const char *module_procdirname = "xt_qtaguid";
64 static struct proc_dir_entry *xt_qtaguid_procdir;
65
66 static unsigned int proc_iface_perms = S_IRUGO;
67 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
68
69 static struct proc_dir_entry *xt_qtaguid_stats_file;
70 static unsigned int proc_stats_perms = S_IRUGO;
71 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
72
73 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
74 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
75 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
76 #else
77 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
78 #endif
79 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
80
81 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
82 #include <linux/android_aid.h>
83 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
84 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
85 #else
86 /* 0 means, don't limit anybody */
87 static gid_t proc_stats_readall_gid;
88 static gid_t proc_ctrl_write_gid;
89 #endif
90 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
91                    S_IRUGO | S_IWUSR);
92 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
93                    S_IRUGO | S_IWUSR);
94
95 /*
96  * After the kernel has initiallized this module, it is still possible
97  * to make it passive:
98  *  - do not register it via iptables.
99  *   the matching code will not be invoked.
100  *  - set passive to 0
101  *   the iface stats handling will not be act on notifications.
102  * This is mostly usefull when a bug is suspected.
103  */
104 static bool module_passive;
105 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
106
107 /*---------------------------------------------------------------------------*/
108 /*
109  * Tags:
110  *
111  * They represent what the data usage counters will be tracked against.
112  * By default a tag is just based on the UID.
113  * The UID is used as the base for policying, and can not be ignored.
114  * So a tag will always at least represent a UID (uid_tag).
115  *
116  * A tag can be augmented with an "accounting tag" which is associated
117  * with a UID.
118  * User space can set the acct_tag portion of the tag which is then used
119  * with sockets: all data belong to that socket will be counted against the
120  * tag. The policing is then based on the tag's uid_tag portion,
121  * and stats are collected for the acct_tag portion seperately.
122  *
123  * There could be
124  * a:  {acct_tag=1, uid_tag=10003}
125  * b:  {acct_tag=2, uid_tag=10003}
126  * c:  {acct_tag=3, uid_tag=10003}
127  * d:  {acct_tag=0, uid_tag=10003}
128  * (a, b, and c represent tags associated with specific sockets.
129  * d is for the totals for that uid, including all untagged traffic.
130  * Typically d is used with policing/quota rules.
131  *
132  * We want tag_t big enough to distinguish uid_t and acct_tag.
133  * It might become a struct if needed.
134  * Nothing should be using it as an int.
135  */
136 typedef uint64_t tag_t;  /* Only used via accessors */
137
138 static const char *iface_stat_procdirname = "iface_stat";
139 static struct proc_dir_entry *iface_stat_procdir;
140
141
142 /*
143  * For now we only track 2 sets of counters.
144  * The default set is 0.
145  * Userspace can activate another set for a given uid being tracked.
146  */
147 #define IFS_MAX_COUNTER_SETS 2
148
149 enum ifs_tx_rx {
150         IFS_TX,
151         IFS_RX,
152         IFS_MAX_DIRECTIONS
153 };
154
155 /* For now, TCP, UDP, the rest */
156 enum ifs_proto {
157         IFS_TCP,
158         IFS_UDP,
159         IFS_PROTO_OTHER,
160         IFS_MAX_PROTOS
161 };
162
163 struct byte_packet_counters {
164         uint64_t bytes;
165         uint64_t packets;
166 };
167
168 struct data_counters {
169         struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
170 };
171
172 /* Generic tag based node used as a base for rb_tree ops. */
173 struct tag_node {
174         struct rb_node node;
175         tag_t tag;
176 };
177
178 struct tag_stat {
179         struct tag_node tn;
180         struct data_counters counters;
181         /*
182          * If this tag is acct_tag based, we need to count against the
183          * matching parent uid_tag.
184          */
185         struct data_counters *parent_counters;
186 };
187
188 struct iface_stat {
189         struct list_head list;
190         char *ifname;
191         uint64_t rx_bytes;
192         uint64_t rx_packets;
193         uint64_t tx_bytes;
194         uint64_t tx_packets;
195         bool active;
196         struct proc_dir_entry *proc_ptr;
197
198         struct rb_root tag_stat_tree;
199         spinlock_t tag_stat_list_lock;
200 };
201
202 static LIST_HEAD(iface_stat_list);
203 static DEFINE_SPINLOCK(iface_stat_list_lock);
204
205 /* This is needed to create proc_dir_entries from atomic context. */
206 struct iface_stat_work {
207         struct work_struct iface_work;
208         struct iface_stat *iface_entry;
209 };
210
211 /*
212  * Track tag that this socket is transferring data for, and not necessarily
213  * the uid that owns the socket.
214  * This is the tag against which tag_stat.counters will be billed.
215  */
216 struct sock_tag {
217         struct rb_node sock_node;
218         struct sock *sk;  /* Only used as a number, never dereferenced */
219         /* The socket is needed for sockfd_put() */
220         struct socket *socket;
221
222         tag_t tag;
223 };
224
225 struct qtaguid_event_counts {
226         /* Various successful events */
227         atomic64_t sockets_tagged;
228         atomic64_t sockets_untagged;
229         atomic64_t counter_set_changes;
230         atomic64_t delete_cmds;
231         atomic64_t iface_events;  /* Number of NETDEV_* events handled */
232         /*
233          * match_found_sk_*: numbers related to the netfilter matching
234          * function finding a sock for the sk_buff.
235          */
236         atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
237         /* The connection tracker had the sk. */
238         atomic64_t match_found_sk_in_ct;
239         /*
240          * No sk could be found. No apparent owner. Could happen with
241          * unsolicited traffic.
242          */
243         atomic64_t match_found_sk_none;
244 };
245 static struct qtaguid_event_counts qtu_events;
246
247 static struct rb_root sock_tag_tree = RB_ROOT;
248 static DEFINE_SPINLOCK(sock_tag_list_lock);
249
250 /* Track the set active_set for the given tag. */
251 struct tag_counter_set {
252         struct tag_node tn;
253         int active_set;
254 };
255
256 static struct rb_root tag_counter_set_tree = RB_ROOT;
257 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
258
259 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
260
261 /*----------------------------------------------*/
262 static inline int tag_compare(tag_t t1, tag_t t2)
263 {
264         return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
265 }
266
267 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
268 {
269         return acct_tag | uid;
270 }
271 static inline tag_t make_tag_from_uid(uid_t uid)
272 {
273         return uid;
274 }
275 static inline uid_t get_uid_from_tag(tag_t tag)
276 {
277         return tag & 0xFFFFFFFFULL;
278 }
279 static inline tag_t get_utag_from_tag(tag_t tag)
280 {
281         return tag & 0xFFFFFFFFULL;
282 }
283 static inline tag_t get_atag_from_tag(tag_t tag)
284 {
285         return tag & ~0xFFFFFFFFULL;
286 }
287
288 static inline bool valid_atag(tag_t tag)
289 {
290         return !(tag & 0xFFFFFFFFULL);
291 }
292
293 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
294                                   enum ifs_tx_rx direction,
295                                   enum ifs_proto ifs_proto,
296                                   int bytes,
297                                   int packets)
298 {
299         counters->bpc[set][direction][ifs_proto].bytes += bytes;
300         counters->bpc[set][direction][ifs_proto].packets += packets;
301 }
302
303 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
304                                     int set,
305                                     enum ifs_tx_rx direction)
306 {
307         return counters->bpc[set][direction][IFS_TCP].bytes
308                 + counters->bpc[set][direction][IFS_UDP].bytes
309                 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
310 }
311
312 static inline uint64_t dc_sum_packets(struct data_counters *counters,
313                                       int set,
314                                       enum ifs_tx_rx direction)
315 {
316         return counters->bpc[set][direction][IFS_TCP].packets
317                 + counters->bpc[set][direction][IFS_UDP].packets
318                 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
319 }
320
321 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
322 {
323         struct rb_node *node = root->rb_node;
324
325         while (node) {
326                 struct tag_node *data = rb_entry(node, struct tag_node, node);
327                 int result = tag_compare(tag, data->tag);
328                 RB_DEBUG("qtaguid: tag_node_tree_search(): tag=0x%llx"
329                          " (uid=%d)\n",
330                          data->tag,
331                          get_uid_from_tag(data->tag));
332
333                 if (result < 0)
334                         node = node->rb_left;
335                 else if (result > 0)
336                         node = node->rb_right;
337                 else
338                         return data;
339         }
340         return NULL;
341 }
342
343 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
344 {
345         struct rb_node **new = &(root->rb_node), *parent = NULL;
346
347         /* Figure out where to put new node */
348         while (*new) {
349                 struct tag_node *this = rb_entry(*new, struct tag_node,
350                                                  node);
351                 int result = tag_compare(data->tag, this->tag);
352                 RB_DEBUG("qtaguid: tag_node_tree_insert(): tag=0x%llx"
353                          " (uid=%d)\n",
354                          this->tag,
355                          get_uid_from_tag(this->tag));
356                 parent = *new;
357                 if (result < 0)
358                         new = &((*new)->rb_left);
359                 else if (result > 0)
360                         new = &((*new)->rb_right);
361                 else
362                         BUG();
363         }
364
365         /* Add new node and rebalance tree. */
366         rb_link_node(&data->node, parent, new);
367         rb_insert_color(&data->node, root);
368 }
369
370 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
371 {
372         tag_node_tree_insert(&data->tn, root);
373 }
374
375 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
376 {
377         struct tag_node *node = tag_node_tree_search(root, tag);
378         if (!node)
379                 return NULL;
380         return rb_entry(&node->node, struct tag_stat, tn.node);
381 }
382
383 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
384                                         struct rb_root *root)
385 {
386         tag_node_tree_insert(&data->tn, root);
387 }
388
389 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
390                                                            tag_t tag)
391 {
392         struct tag_node *node = tag_node_tree_search(root, tag);
393         if (!node)
394                 return NULL;
395         return rb_entry(&node->node, struct tag_counter_set, tn.node);
396
397 }
398
399 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
400                                              const struct sock *sk)
401 {
402         struct rb_node *node = root->rb_node;
403
404         while (node) {
405                 struct sock_tag *data = rb_entry(node, struct sock_tag,
406                                                  sock_node);
407                 ptrdiff_t result = sk - data->sk;
408                 if (result < 0)
409                         node = node->rb_left;
410                 else if (result > 0)
411                         node = node->rb_right;
412                 else
413                         return data;
414         }
415         return NULL;
416 }
417
418 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
419 {
420         struct rb_node **new = &(root->rb_node), *parent = NULL;
421
422         /* Figure out where to put new node */
423         while (*new) {
424                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
425                                                  sock_node);
426                 ptrdiff_t result = data->sk - this->sk;
427                 parent = *new;
428                 if (result < 0)
429                         new = &((*new)->rb_left);
430                 else if (result > 0)
431                         new = &((*new)->rb_right);
432                 else
433                         BUG();
434         }
435
436         /* Add new node and rebalance tree. */
437         rb_link_node(&data->sock_node, parent, new);
438         rb_insert_color(&data->sock_node, root);
439 }
440
441 static int read_proc_u64(char *page, char **start, off_t off,
442                         int count, int *eof, void *data)
443 {
444         int len;
445         uint64_t value;
446         char *p = page;
447         uint64_t *iface_entry = data;
448
449         if (!data)
450                 return 0;
451
452         value = *iface_entry;
453         p += sprintf(p, "%llu\n", value);
454         len = (p - page) - off;
455         *eof = (len <= count) ? 1 : 0;
456         *start = page + off;
457         return len;
458 }
459
460 static int read_proc_bool(char *page, char **start, off_t off,
461                         int count, int *eof, void *data)
462 {
463         int len;
464         bool value;
465         char *p = page;
466         bool *bool_entry = data;
467
468         if (!data)
469                 return 0;
470
471         value = *bool_entry;
472         p += sprintf(p, "%u\n", value);
473         len = (p - page) - off;
474         *eof = (len <= count) ? 1 : 0;
475         *start = page + off;
476         return len;
477 }
478
479 static int get_active_counter_set(tag_t tag)
480 {
481         int active_set = 0;
482         struct tag_counter_set *tcs;
483
484         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
485                  " (uid=%u)\n",
486                  tag, get_uid_from_tag(tag));
487         /* For now we only handle UID tags for active sets */
488         tag = get_utag_from_tag(tag);
489         spin_lock_bh(&tag_counter_set_list_lock);
490         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
491         if (tcs)
492                 active_set = tcs->active_set;
493         spin_unlock_bh(&tag_counter_set_list_lock);
494         return active_set;
495 }
496
497 /*
498  * Find the entry for tracking the specified interface.
499  * Caller must hold iface_stat_list_lock
500  */
501 static struct iface_stat *get_iface_entry(const char *ifname)
502 {
503         struct iface_stat *iface_entry;
504
505         /* Find the entry for tracking the specified tag within the interface */
506         if (ifname == NULL) {
507                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
508                 return NULL;
509         }
510
511         /* Iterate over interfaces */
512         list_for_each_entry(iface_entry, &iface_stat_list, list) {
513                 if (!strcmp(ifname, iface_entry->ifname))
514                         goto done;
515         }
516         iface_entry = NULL;
517 done:
518         return iface_entry;
519 }
520
521 static void iface_create_proc_worker(struct work_struct *work)
522 {
523         struct proc_dir_entry *proc_entry;
524         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
525                                                    iface_work);
526         struct iface_stat *new_iface  = isw->iface_entry;
527
528         /* iface_entries are not deleted, so safe to manipulate. */
529         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
530         if (IS_ERR_OR_NULL(proc_entry)) {
531                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
532                 kfree(isw);
533                 return;
534         }
535
536         new_iface->proc_ptr = proc_entry;
537
538         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
539                         read_proc_u64, &new_iface->tx_bytes);
540         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
541                         read_proc_u64, &new_iface->rx_bytes);
542         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
543                         read_proc_u64, &new_iface->tx_packets);
544         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
545                         read_proc_u64, &new_iface->rx_packets);
546         create_proc_read_entry("active", proc_iface_perms, proc_entry,
547                         read_proc_bool, &new_iface->active);
548
549         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
550                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
551         kfree(isw);
552 }
553
554 /* Caller must hold iface_stat_list_lock */
555 static struct iface_stat *iface_alloc(const char *ifname)
556 {
557         struct iface_stat *new_iface;
558         struct iface_stat_work *isw;
559
560         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
561         if (new_iface == NULL) {
562                 pr_err("qtaguid: iface_stat: create(%s): "
563                        "iface_stat alloc failed\n", ifname);
564                 return NULL;
565         }
566         new_iface->ifname = kstrdup(ifname, GFP_ATOMIC);
567         if (new_iface->ifname == NULL) {
568                 pr_err("qtaguid: iface_stat: create(%s): "
569                        "ifname alloc failed\n", ifname);
570                 kfree(new_iface);
571                 return NULL;
572         }
573         spin_lock_init(&new_iface->tag_stat_list_lock);
574         new_iface->active = true;
575         new_iface->tag_stat_tree = RB_ROOT;
576
577         /*
578          * ipv6 notifier chains are atomic :(
579          * No create_proc_read_entry() for you!
580          */
581         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
582         if (!isw) {
583                 pr_err("qtaguid: iface_stat: create(%s): "
584                        "work alloc failed\n", new_iface->ifname);
585                 kfree(new_iface->ifname);
586                 kfree(new_iface);
587                 return NULL;
588         }
589         isw->iface_entry = new_iface;
590         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
591         schedule_work(&isw->iface_work);
592         list_add(&new_iface->list, &iface_stat_list);
593         return new_iface;
594 }
595
596 /*
597  * Create a new entry for tracking the specified interface.
598  * Do nothing if the entry already exists.
599  * Called when an interface is configured with a valid IP address.
600  */
601 void iface_stat_create(const struct net_device *net_dev,
602                        struct in_ifaddr *ifa)
603 {
604         struct in_device *in_dev = NULL;
605         const char *ifname;
606         struct iface_stat *entry;
607         __be32 ipaddr = 0;
608         struct iface_stat *new_iface;
609
610         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
611                  net_dev ? net_dev->name : "?",
612                  ifa, net_dev);
613         if (!net_dev) {
614                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
615                 return;
616         }
617
618         ifname = net_dev->name;
619         if (!ifa) {
620                 in_dev = in_dev_get(net_dev);
621                 if (!in_dev) {
622                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
623                                ifname);
624                         return;
625                 }
626                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
627                          ifname, in_dev);
628                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
629                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
630                                  "ifa=%p ifa_label=%s\n",
631                                  ifname, ifa,
632                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
633                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
634                                 break;
635                 }
636         }
637
638         if (!ifa) {
639                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
640                          ifname);
641                 goto done_put;
642         }
643         ipaddr = ifa->ifa_local;
644
645         spin_lock_bh(&iface_stat_list_lock);
646         entry = get_iface_entry(ifname);
647         if (entry != NULL) {
648                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
649                          ifname, entry);
650                 if (ipv4_is_loopback(ipaddr)) {
651                         entry->active = false;
652                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
653                                  "disable tracking of loopback dev\n",
654                                  ifname);
655                 } else {
656                         entry->active = true;
657                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
658                                  "enable tracking. ip=%pI4\n",
659                                  ifname, &ipaddr);
660                 }
661                 goto done_unlock_put;
662         } else if (ipv4_is_loopback(ipaddr)) {
663                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
664                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
665                 goto done_unlock_put;
666         }
667
668         new_iface = iface_alloc(ifname);
669         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
670                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
671
672 done_unlock_put:
673         spin_unlock_bh(&iface_stat_list_lock);
674 done_put:
675         if (in_dev)
676                 in_dev_put(in_dev);
677 }
678
679 void iface_stat_create_ipv6(const struct net_device *net_dev,
680                             struct inet6_ifaddr *ifa)
681 {
682         struct in_device *in_dev;
683         const char *ifname;
684         struct iface_stat *entry;
685         struct iface_stat *new_iface;
686         int addr_type;
687
688         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
689                  ifa, net_dev, net_dev ? net_dev->name : "");
690         if (!net_dev) {
691                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
692                 return;
693         }
694         ifname = net_dev->name;
695
696         in_dev = in_dev_get(net_dev);
697         if (!in_dev) {
698                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
699                        ifname);
700                 return;
701         }
702
703         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
704                  ifname, in_dev);
705
706         if (!ifa) {
707                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
708                          ifname);
709                 goto done_put;
710         }
711         addr_type = ipv6_addr_type(&ifa->addr);
712
713         spin_lock_bh(&iface_stat_list_lock);
714         entry = get_iface_entry(ifname);
715         if (entry != NULL) {
716                 IF_DEBUG("qtaguid: iface_stat: create6(%s): entry=%p\n",
717                          ifname, entry);
718                 if (addr_type & IPV6_ADDR_LOOPBACK) {
719                         entry->active = false;
720                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
721                                  "disable tracking of loopback dev\n",
722                                  ifname);
723                 } else {
724                         entry->active = true;
725                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
726                                  "enable tracking. ip=%pI6c\n",
727                                  ifname, &ifa->addr);
728                 }
729                 goto done_unlock_put;
730         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
731                 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
732                          "ignore loopback dev. ip=%pI6c\n",
733                          ifname, &ifa->addr);
734                 goto done_unlock_put;
735         }
736
737         new_iface = iface_alloc(ifname);
738         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
739                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
740
741 done_unlock_put:
742         spin_unlock_bh(&iface_stat_list_lock);
743 done_put:
744         in_dev_put(in_dev);
745 }
746
747 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
748 {
749         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
750         return sock_tag_tree_search(&sock_tag_tree, sk);
751 }
752
753 static struct sock_tag *get_sock_stat(const struct sock *sk)
754 {
755         struct sock_tag *sock_tag_entry;
756         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
757         if (!sk)
758                 return NULL;
759         spin_lock_bh(&sock_tag_list_lock);
760         sock_tag_entry = get_sock_stat_nl(sk);
761         spin_unlock_bh(&sock_tag_list_lock);
762         return sock_tag_entry;
763 }
764
765 static void
766 data_counters_update(struct data_counters *dc, int set,
767                      enum ifs_tx_rx direction, int proto, int bytes)
768 {
769         switch (proto) {
770         case IPPROTO_TCP:
771                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
772                 break;
773         case IPPROTO_UDP:
774                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
775                 break;
776         case IPPROTO_IP:
777         default:
778                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
779                                     1);
780                 break;
781         }
782 }
783
784 /*
785  * Update stats for the specified interface. Do nothing if the entry
786  * does not exist (when a device was never configured with an IP address).
787  * Called when an device is being unregistered.
788  */
789 static void iface_stat_update(struct net_device *dev)
790 {
791         struct rtnl_link_stats64 dev_stats, *stats;
792         struct iface_stat *entry;
793
794         stats = dev_get_stats(dev, &dev_stats);
795         spin_lock_bh(&iface_stat_list_lock);
796         entry = get_iface_entry(dev->name);
797         if (entry == NULL) {
798                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
799                          dev->name);
800                 spin_unlock_bh(&iface_stat_list_lock);
801                 return;
802         }
803         IF_DEBUG("qtaguid: iface_stat: update(%s): entry=%p\n",
804                  dev->name, entry);
805         if (entry->active) {
806                 entry->tx_bytes += stats->tx_bytes;
807                 entry->tx_packets += stats->tx_packets;
808                 entry->rx_bytes += stats->rx_bytes;
809                 entry->rx_packets += stats->rx_packets;
810                 entry->active = false;
811                 IF_DEBUG("qtaguid: iface_stat: update(%s): "
812                          " disable tracking. rx/tx=%llu/%llu\n",
813                          dev->name, stats->rx_bytes, stats->tx_bytes);
814         } else {
815                 IF_DEBUG("qtaguid: iface_stat: update(%s): disabled\n",
816                         dev->name);
817         }
818         spin_unlock_bh(&iface_stat_list_lock);
819 }
820
821 static void tag_stat_update(struct tag_stat *tag_entry,
822                         enum ifs_tx_rx direction, int proto, int bytes)
823 {
824         int active_set;
825         active_set = get_active_counter_set(tag_entry->tn.tag);
826         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
827                  "dir=%d proto=%d bytes=%d)\n",
828                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
829                  active_set, direction, proto, bytes);
830         data_counters_update(&tag_entry->counters, active_set, direction,
831                              proto, bytes);
832         if (tag_entry->parent_counters)
833                 data_counters_update(tag_entry->parent_counters, active_set,
834                                      direction, proto, bytes);
835 }
836
837 /*
838  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
839  * the interface.
840  * iface_entry->tag_stat_list_lock should be held.
841  */
842 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
843                                            tag_t tag)
844 {
845         struct tag_stat *new_tag_stat_entry = NULL;
846         IF_DEBUG("qtaguid: iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
847                  " (uid=%u)\n",
848                  iface_entry, tag, get_uid_from_tag(tag));
849         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
850         if (!new_tag_stat_entry) {
851                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
852                 goto done;
853         }
854         new_tag_stat_entry->tn.tag = tag;
855         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
856 done:
857         return new_tag_stat_entry;
858 }
859
860 static void if_tag_stat_update(const char *ifname, uid_t uid,
861                                const struct sock *sk, enum ifs_tx_rx direction,
862                                int proto, int bytes)
863 {
864         struct tag_stat *tag_stat_entry;
865         tag_t tag, acct_tag;
866         tag_t uid_tag;
867         struct data_counters *uid_tag_counters;
868         struct sock_tag *sock_tag_entry;
869         struct iface_stat *iface_entry;
870         struct tag_stat *new_tag_stat;
871         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
872                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
873                  ifname, uid, sk, direction, proto, bytes);
874
875
876         iface_entry = get_iface_entry(ifname);
877         if (!iface_entry) {
878                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
879                        ifname);
880                 return;
881         }
882         /* It is ok to process data when an iface_entry is inactive */
883
884         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
885                  ifname, iface_entry);
886
887         /*
888          * Look for a tagged sock.
889          * It will have an acct_uid.
890          */
891         sock_tag_entry = get_sock_stat(sk);
892         if (sock_tag_entry) {
893                 tag = sock_tag_entry->tag;
894                 acct_tag = get_atag_from_tag(tag);
895                 uid_tag = get_utag_from_tag(tag);
896         } else {
897                 uid_tag = make_tag_from_uid(uid);
898                 acct_tag = 0;
899                 tag = combine_atag_with_uid(acct_tag, uid);
900         }
901         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
902                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
903                  tag, get_uid_from_tag(tag), iface_entry);
904         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
905         spin_lock_bh(&iface_entry->tag_stat_list_lock);
906
907         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
908                                               tag);
909         if (tag_stat_entry) {
910                 /*
911                  * Updating the {acct_tag, uid_tag} entry handles both stats:
912                  * {0, uid_tag} will also get updated.
913                  */
914                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
915                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
916                 return;
917         }
918
919         /* Loop over tag list under this interface for {0,uid_tag} */
920         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
921                                               uid_tag);
922         if (!tag_stat_entry) {
923                 /* Here: the base uid_tag did not exist */
924                 /*
925                  * No parent counters. So
926                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
927                  */
928                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
929                 uid_tag_counters = &new_tag_stat->counters;
930         } else {
931                 uid_tag_counters = &tag_stat_entry->counters;
932         }
933
934         if (acct_tag) {
935                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
936                 new_tag_stat->parent_counters = uid_tag_counters;
937         }
938         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
939         tag_stat_update(new_tag_stat, direction, proto, bytes);
940 }
941
942 static int iface_netdev_event_handler(struct notifier_block *nb,
943                                       unsigned long event, void *ptr) {
944         struct net_device *dev = ptr;
945
946         if (unlikely(module_passive))
947                 return NOTIFY_DONE;
948
949         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
950                  "ev=0x%lx netdev=%p->name=%s\n",
951                  event, dev, dev ? dev->name : "");
952
953         switch (event) {
954         case NETDEV_UP:
955                 iface_stat_create(dev, NULL);
956                 break;
957         case NETDEV_DOWN:
958                 iface_stat_update(dev);
959                 break;
960         }
961         return NOTIFY_DONE;
962 }
963
964 static int iface_inet6addr_event_handler(struct notifier_block *nb,
965                                          unsigned long event, void *ptr)
966 {
967         struct inet6_ifaddr *ifa = ptr;
968         struct net_device *dev;
969
970         if (unlikely(module_passive))
971                 return NOTIFY_DONE;
972
973         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
974                  "ev=0x%lx ifa=%p\n",
975                  event, ifa);
976
977         switch (event) {
978         case NETDEV_UP:
979                 BUG_ON(!ifa || !ifa->idev);
980                 dev = (struct net_device *)ifa->idev->dev;
981                 iface_stat_create_ipv6(dev, ifa);
982                 atomic64_inc(&qtu_events.iface_events);
983                 break;
984         case NETDEV_DOWN:
985                 BUG_ON(!ifa || !ifa->idev);
986                 dev = (struct net_device *)ifa->idev->dev;
987                 iface_stat_update(dev);
988                 atomic64_inc(&qtu_events.iface_events);
989                 break;
990         }
991         return NOTIFY_DONE;
992 }
993
994 static int iface_inetaddr_event_handler(struct notifier_block *nb,
995                                         unsigned long event, void *ptr)
996 {
997         struct in_ifaddr *ifa = ptr;
998         struct net_device *dev;
999
1000         if (unlikely(module_passive))
1001                 return NOTIFY_DONE;
1002
1003         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1004                  "ev=0x%lx ifa=%p\n",
1005                  event, ifa);
1006
1007         switch (event) {
1008         case NETDEV_UP:
1009                 BUG_ON(!ifa || !ifa->ifa_dev);
1010                 dev = ifa->ifa_dev->dev;
1011                 iface_stat_create(dev, ifa);
1012                 atomic64_inc(&qtu_events.iface_events);
1013                 break;
1014         case NETDEV_DOWN:
1015                 BUG_ON(!ifa || !ifa->ifa_dev);
1016                 dev = ifa->ifa_dev->dev;
1017                 iface_stat_update(dev);
1018                 atomic64_inc(&qtu_events.iface_events);
1019                 break;
1020         }
1021         return NOTIFY_DONE;
1022 }
1023
1024 static struct notifier_block iface_netdev_notifier_blk = {
1025         .notifier_call = iface_netdev_event_handler,
1026 };
1027
1028 static struct notifier_block iface_inetaddr_notifier_blk = {
1029         .notifier_call = iface_inetaddr_event_handler,
1030 };
1031
1032 static struct notifier_block iface_inet6addr_notifier_blk = {
1033         .notifier_call = iface_inet6addr_event_handler,
1034 };
1035
1036 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1037 {
1038         int err;
1039
1040         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1041         if (!iface_stat_procdir) {
1042                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1043                 err = -1;
1044                 goto err;
1045         }
1046         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1047         if (err) {
1048                 pr_err("qtaguid: iface_stat: init "
1049                        "failed to register dev event handler\n");
1050                 goto err_zap_entry;
1051         }
1052         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1053         if (err) {
1054                 pr_err("qtaguid: iface_stat: init "
1055                        "failed to register ipv4 dev event handler\n");
1056                 goto err_unreg_nd;
1057         }
1058
1059         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1060         if (err) {
1061                 pr_err("qtaguid: iface_stat: init "
1062                        "failed to register ipv6 dev event handler\n");
1063                 goto err_unreg_ip4_addr;
1064         }
1065         return 0;
1066
1067 err_unreg_ip4_addr:
1068         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1069 err_unreg_nd:
1070         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1071 err_zap_entry:
1072         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1073 err:
1074         return err;
1075 }
1076
1077 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1078                                     struct xt_action_param *par)
1079 {
1080         struct sock *sk;
1081         unsigned int hook_mask = (1 << par->hooknum);
1082
1083         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1084                  par->hooknum, par->family);
1085
1086         /*
1087          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1088          * return garbage SKs.
1089          */
1090         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1091                 return NULL;
1092
1093         switch (par->family) {
1094         case NFPROTO_IPV6:
1095                 sk = xt_socket_get6_sk(skb, par);
1096                 break;
1097         case NFPROTO_IPV4:
1098                 sk = xt_socket_get4_sk(skb, par);
1099                 break;
1100         default:
1101                 return NULL;
1102         }
1103
1104         /*
1105          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1106          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1107          * Not fixed in 3.0-r3 :(
1108          */
1109         if (sk) {
1110                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1111                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1112                 if (sk->sk_state  == TCP_TIME_WAIT) {
1113                         xt_socket_put_sk(sk);
1114                         sk = NULL;
1115                 }
1116         }
1117         return sk;
1118 }
1119
1120 static void account_for_uid(const struct sk_buff *skb,
1121                             const struct sock *alternate_sk, uid_t uid,
1122                             struct xt_action_param *par)
1123 {
1124         const struct net_device *el_dev;
1125
1126         if (!skb->dev) {
1127                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1128                 el_dev = par->in ? : par->out;
1129         } else {
1130                 const struct net_device *other_dev;
1131                 el_dev = skb->dev;
1132                 other_dev = par->in ? : par->out;
1133                 if (el_dev != other_dev) {
1134                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1135                                 "par->(in/out)=%p %s\n",
1136                                 par->hooknum, el_dev, el_dev->name, other_dev,
1137                                 other_dev->name);
1138                 }
1139         }
1140
1141         if (unlikely(!el_dev)) {
1142                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1143         } else if (unlikely(!el_dev->name)) {
1144                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1145         } else {
1146                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1147                          par->hooknum,
1148                          el_dev->name,
1149                          el_dev->type);
1150
1151                 if_tag_stat_update(el_dev->name, uid,
1152                                 skb->sk ? skb->sk : alternate_sk,
1153                                 par->in ? IFS_RX : IFS_TX,
1154                                 ip_hdr(skb)->protocol, skb->len);
1155         }
1156 }
1157
1158 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1159 {
1160         const struct xt_qtaguid_match_info *info = par->matchinfo;
1161         const struct file *filp;
1162         bool got_sock = false;
1163         struct sock *sk;
1164         uid_t sock_uid;
1165         bool res;
1166
1167         if (unlikely(module_passive))
1168                 return (info->match ^ info->invert) == 0;
1169
1170         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1171                  par->hooknum, skb, par->in, par->out, par->family);
1172
1173         if (skb == NULL) {
1174                 res = (info->match ^ info->invert) == 0;
1175                 goto ret_res;
1176         }
1177
1178         sk = skb->sk;
1179
1180         if (sk == NULL) {
1181                 /*
1182                  * A missing sk->sk_socket happens when packets are in-flight
1183                  * and the matching socket is already closed and gone.
1184                  */
1185                 sk = qtaguid_find_sk(skb, par);
1186                 /*
1187                  * If we got the socket from the find_sk(), we will need to put
1188                  * it back, as nf_tproxy_get_sock_v4() got it.
1189                  */
1190                 got_sock = sk;
1191                 if (sk)
1192                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1193         } else {
1194                 atomic64_inc(&qtu_events.match_found_sk);
1195         }
1196         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1197                 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1198         if (sk != NULL) {
1199                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1200                         par->hooknum, sk, sk->sk_socket,
1201                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1202                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1203                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1204                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1205         }
1206
1207         if (sk == NULL || sk->sk_socket == NULL) {
1208                 /*
1209                  * Here, the qtaguid_find_sk() using connection tracking
1210                  * couldn't find the owner, so for now we just count them
1211                  * against the system.
1212                  */
1213                 /*
1214                  * TODO: unhack how to force just accounting.
1215                  * For now we only do iface stats when the uid-owner is not
1216                  * requested.
1217                  */
1218                 if (!(info->match & XT_QTAGUID_UID))
1219                         account_for_uid(skb, sk, 0, par);
1220                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1221                         par->hooknum,
1222                         sk ? sk->sk_socket : NULL);
1223                 res = (info->match ^ info->invert) == 0;
1224                 atomic64_inc(&qtu_events.match_found_sk_none);
1225                 goto put_sock_ret_res;
1226         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1227                 res = false;
1228                 goto put_sock_ret_res;
1229         }
1230         filp = sk->sk_socket->file;
1231         if (filp == NULL) {
1232                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1233                 res = ((info->match ^ info->invert) &
1234                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1235                 goto put_sock_ret_res;
1236         }
1237         sock_uid = filp->f_cred->fsuid;
1238         /*
1239          * TODO: unhack how to force just accounting.
1240          * For now we only do iface stats when the uid-owner is not requested
1241          */
1242         if (!(info->match & XT_QTAGUID_UID))
1243                 account_for_uid(skb, sk, sock_uid, par);
1244
1245         /*
1246          * The following two tests fail the match when:
1247          *    id not in range AND no inverted condition requested
1248          * or id     in range AND    inverted condition requested
1249          * Thus (!a && b) || (a && !b) == a ^ b
1250          */
1251         if (info->match & XT_QTAGUID_UID)
1252                 if ((filp->f_cred->fsuid >= info->uid_min &&
1253                      filp->f_cred->fsuid <= info->uid_max) ^
1254                     !(info->invert & XT_QTAGUID_UID)) {
1255                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1256                                  par->hooknum);
1257                         res = false;
1258                         goto put_sock_ret_res;
1259                 }
1260         if (info->match & XT_QTAGUID_GID)
1261                 if ((filp->f_cred->fsgid >= info->gid_min &&
1262                                 filp->f_cred->fsgid <= info->gid_max) ^
1263                         !(info->invert & XT_QTAGUID_GID)) {
1264                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1265                                 par->hooknum);
1266                         res = false;
1267                         goto put_sock_ret_res;
1268                 }
1269
1270         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1271         res = true;
1272
1273 put_sock_ret_res:
1274         if (got_sock)
1275                 xt_socket_put_sk(sk);
1276 ret_res:
1277         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1278         return res;
1279 }
1280
1281 /*
1282  * Procfs reader to get all active socket tags using style "1)" as described in
1283  * fs/proc/generic.c
1284  */
1285 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1286                                   off_t items_to_skip, int char_count, int *eof,
1287                                   void *data)
1288 {
1289         char *outp = page;
1290         int len;
1291         uid_t uid;
1292         struct sock_tag *sock_tag_entry;
1293         struct rb_node *node;
1294         int item_index = 0;
1295
1296         if (unlikely(module_passive)) {
1297                 *eof = 1;
1298                 return 0;
1299         }
1300
1301         /* TODO: support skipping num_items_returned on entry. */
1302         CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1303                 page, items_to_skip, char_count, *eof);
1304
1305         if (*eof)
1306                 return 0;
1307
1308         spin_lock_bh(&sock_tag_list_lock);
1309         for (node = rb_first(&sock_tag_tree);
1310              node;
1311              node = rb_next(node)) {
1312                 if (item_index++ < items_to_skip)
1313                         continue;
1314                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1315                 uid = get_uid_from_tag(sock_tag_entry->tag);
1316                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u)\n",
1317                          sock_tag_entry->sk,
1318                          sock_tag_entry->tag,
1319                          uid
1320                         );
1321                 len = snprintf(outp, char_count,
1322                                "sock=%p tag=0x%llx (uid=%u)\n",
1323                                sock_tag_entry->sk, sock_tag_entry->tag, uid);
1324                 if (len >= char_count) {
1325                         spin_unlock_bh(&sock_tag_list_lock);
1326                         *outp = '\0';
1327                         return outp - page;
1328                 }
1329                 outp += len;
1330                 char_count -= len;
1331                 (*num_items_returned)++;
1332         }
1333         spin_unlock_bh(&sock_tag_list_lock);
1334
1335         if (item_index++ >= items_to_skip) {
1336                 len = snprintf(outp, char_count,
1337                                "events: sockets_tagged=%llu "
1338                                "sockets_untagged=%llu "
1339                                "counter_set_changes=%llu "
1340                                "delete_cmds=%llu "
1341                                "iface_events=%llu "
1342                                "match_found_sk=%llu "
1343                                "match_found_sk_in_ct=%llu "
1344                                "match_found_sk_none=%llu\n",
1345                                atomic64_read(&qtu_events.sockets_tagged),
1346                                atomic64_read(&qtu_events.sockets_untagged),
1347                                atomic64_read(&qtu_events.counter_set_changes),
1348                                atomic64_read(&qtu_events.delete_cmds),
1349                                atomic64_read(&qtu_events.iface_events),
1350                                atomic64_read(&qtu_events.match_found_sk),
1351                                atomic64_read(&qtu_events.match_found_sk_in_ct),
1352                                atomic64_read(&qtu_events.match_found_sk_none));
1353                 if (len >= char_count) {
1354                         *outp = '\0';
1355                         return outp - page;
1356                 }
1357                 outp += len;
1358                 char_count -= len;
1359                 (*num_items_returned)++;
1360         }
1361
1362         *eof = 1;
1363         return outp - page;
1364 }
1365
1366 static bool can_manipulate_uids(void)
1367 {
1368         /* root pwnd */
1369         return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
1370                 || in_egroup_p(proc_ctrl_write_gid);
1371 }
1372
1373 static bool can_impersonate_uid(uid_t uid)
1374 {
1375         return uid == current_fsuid() || can_manipulate_uids();
1376 }
1377
1378 static bool can_read_other_uid_stats(uid_t uid)
1379 {
1380         /* root pwnd */
1381         return unlikely(!current_fsuid()) || uid == current_fsuid()
1382                 || unlikely(!proc_stats_readall_gid)
1383                 || in_egroup_p(proc_stats_readall_gid);
1384 }
1385
1386 /*
1387  * Delete socket tags, and stat tags associated with a given
1388  * accouting tag and uid.
1389  */
1390 static int ctrl_cmd_delete(const char *input)
1391 {
1392         char cmd;
1393         uid_t uid;
1394         uid_t entry_uid;
1395         tag_t acct_tag;
1396         tag_t tag;
1397         int res, argc;
1398         struct iface_stat *iface_entry;
1399         struct rb_node *node;
1400         struct sock_tag *st_entry;
1401         struct rb_root st_to_free_tree = RB_ROOT;
1402         struct tag_stat *ts_entry;
1403         struct tag_counter_set *tcs_entry;
1404
1405         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1406         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1407                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1408                  acct_tag, uid);
1409         if (argc < 2) {
1410                 res = -EINVAL;
1411                 goto err;
1412         }
1413         if (!valid_atag(acct_tag)) {
1414                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1415                 res = -EINVAL;
1416                 goto err;
1417         }
1418         if (argc < 3) {
1419                 uid = current_fsuid();
1420         } else if (!can_impersonate_uid(uid)) {
1421                 pr_info("qtaguid: ctrl_delete(%s): "
1422                         "insufficient priv from pid=%u uid=%u\n",
1423                         input, current->pid, current_fsuid());
1424                 res = -EPERM;
1425                 goto err;
1426         }
1427
1428         /* Delete socket tags */
1429         spin_lock_bh(&sock_tag_list_lock);
1430         node = rb_first(&sock_tag_tree);
1431         while (node) {
1432                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1433                 entry_uid = get_uid_from_tag(st_entry->tag);
1434                 node = rb_next(node);
1435                 if (entry_uid != uid)
1436                         continue;
1437
1438                 if (!acct_tag || st_entry->tag == tag) {
1439                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
1440                         /* Can't sockfd_put() within spinlock, do it later. */
1441                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
1442                 }
1443         }
1444         spin_unlock_bh(&sock_tag_list_lock);
1445
1446         node = rb_first(&st_to_free_tree);
1447         while (node) {
1448                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1449                 node = rb_next(node);
1450                 CT_DEBUG("qtaguid: ctrl_delete(): "
1451                          "erase st: sk=%p tag=0x%llx (uid=%u)\n",
1452                          st_entry->sk,
1453                          st_entry->tag,
1454                          entry_uid);
1455                 rb_erase(&st_entry->sock_node, &st_to_free_tree);
1456                 sockfd_put(st_entry->socket);
1457                 kfree(st_entry);
1458         }
1459
1460         tag = combine_atag_with_uid(acct_tag, uid);
1461
1462         /* Delete tag counter-sets */
1463         spin_lock_bh(&tag_counter_set_list_lock);
1464         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1465         if (tcs_entry) {
1466                 CT_DEBUG("qtaguid: ctrl_delete(): "
1467                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1468                          tcs_entry->tn.tag,
1469                          get_uid_from_tag(tcs_entry->tn.tag),
1470                          tcs_entry->active_set);
1471                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1472                 kfree(tcs_entry);
1473         }
1474         spin_unlock_bh(&tag_counter_set_list_lock);
1475
1476         /*
1477          * If acct_tag is 0, then all entries belonging to uid are
1478          * erased.
1479          */
1480         spin_lock_bh(&iface_stat_list_lock);
1481         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1482                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1483                 node = rb_first(&iface_entry->tag_stat_tree);
1484                 while (node) {
1485                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
1486                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1487                         node = rb_next(node);
1488                         if (entry_uid != uid)
1489                                 continue;
1490                         if (!acct_tag || ts_entry->tn.tag == tag) {
1491                                 CT_DEBUG("qtaguid: ctrl_delete(): "
1492                                          "erase ts: %s 0x%llx %u\n",
1493                                          iface_entry->ifname,
1494                                          get_atag_from_tag(ts_entry->tn.tag),
1495                                          entry_uid);
1496                                 rb_erase(&ts_entry->tn.node,
1497                                          &iface_entry->tag_stat_tree);
1498                                 kfree(ts_entry);
1499                         }
1500                 }
1501                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1502         }
1503         spin_unlock_bh(&iface_stat_list_lock);
1504         atomic64_inc(&qtu_events.delete_cmds);
1505         res = 0;
1506
1507 err:
1508         return res;
1509 }
1510
1511 static int ctrl_cmd_counter_set(const char *input)
1512 {
1513         char cmd;
1514         uid_t uid = 0;
1515         tag_t tag;
1516         int res, argc;
1517         struct tag_counter_set *tcs;
1518         int counter_set;
1519
1520         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
1521         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
1522                  "set=%d uid=%u\n", input, argc, cmd,
1523                  counter_set, uid);
1524         if (argc != 3) {
1525                 res = -EINVAL;
1526                 goto err;
1527         }
1528         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
1529                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
1530                         input);
1531                 res = -EINVAL;
1532                 goto err;
1533         }
1534         if (!can_manipulate_uids()) {
1535                 pr_info("qtaguid: ctrl_counterset(%s): "
1536                         "insufficient priv from pid=%u uid=%u\n",
1537                         input, current->pid, current_fsuid());
1538                 res = -EPERM;
1539                 goto err;
1540         }
1541
1542         tag = make_tag_from_uid(uid);
1543         spin_lock_bh(&tag_counter_set_list_lock);
1544         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1545         if (!tcs) {
1546                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
1547                 if (!tcs) {
1548                         spin_unlock_bh(&tag_counter_set_list_lock);
1549                         pr_err("qtaguid: ctrl_counterset(%s): "
1550                                "failed to alloc counter set\n",
1551                                input);
1552                         res = -ENOMEM;
1553                         goto err;
1554                 }
1555                 tcs->tn.tag = tag;
1556                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
1557                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
1558                          "(uid=%u) set=%d\n",
1559                          input, tag, get_uid_from_tag(tag), counter_set);
1560         }
1561         tcs->active_set = counter_set;
1562         spin_unlock_bh(&tag_counter_set_list_lock);
1563         atomic64_inc(&qtu_events.counter_set_changes);
1564         res = 0;
1565
1566 err:
1567         return res;
1568 }
1569
1570 static int ctrl_cmd_tag(const char *input)
1571 {
1572         char cmd;
1573         int sock_fd = 0;
1574         uid_t uid = 0;
1575         tag_t acct_tag = 0;
1576         struct socket *el_socket;
1577         int refcnt = -1;
1578         int res, argc;
1579         struct sock_tag *sock_tag_entry;
1580
1581         /* Unassigned args will get defaulted later. */
1582         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1583         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1584                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1585                  acct_tag, uid);
1586         if (argc < 2) {
1587                 res = -EINVAL;
1588                 goto err;
1589         }
1590         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
1591         if (!el_socket) {
1592                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
1593                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1594                 goto err;
1595         }
1596         refcnt = atomic_read(&el_socket->file->f_count);
1597         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%d\n",
1598                  input, refcnt);
1599         if (argc < 3) {
1600                 acct_tag = 0;
1601         } else if (!valid_atag(acct_tag)) {
1602                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
1603                 res = -EINVAL;
1604                 goto err_put;
1605         }
1606         CT_DEBUG("qtaguid: ctrl_tag(%s): "
1607                  "uid=%u euid=%u fsuid=%u "
1608                  "in_group=%d in_egroup=%d\n",
1609                  input, current_uid(), current_euid(), current_fsuid(),
1610                  in_group_p(proc_stats_readall_gid),
1611                  in_egroup_p(proc_stats_readall_gid));
1612         if (argc < 4) {
1613                 uid = current_fsuid();
1614         } else if (!can_impersonate_uid(uid)) {
1615                 pr_info("qtaguid: ctrl_tag(%s): "
1616                         "insufficient priv from pid=%u uid=%u\n",
1617                         input, current->pid, current_fsuid());
1618                 res = -EPERM;
1619                 goto err_put;
1620         }
1621
1622         spin_lock_bh(&sock_tag_list_lock);
1623         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1624         if (sock_tag_entry) {
1625                 /*
1626                  * This is a re-tagging, so release the sock_fd that was
1627                  * locked at the time of the 1st tagging.
1628                  */
1629                 sockfd_put(sock_tag_entry->socket);
1630                 refcnt--;
1631                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1632                                                             uid);
1633         } else {
1634                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1635                                          GFP_ATOMIC);
1636                 if (!sock_tag_entry) {
1637                         pr_err("qtaguid: ctrl_tag(%s): "
1638                                "socket tag alloc failed\n",
1639                                input);
1640                         spin_unlock_bh(&sock_tag_list_lock);
1641                         res = -ENOMEM;
1642                         goto err_put;
1643                 }
1644                 sock_tag_entry->sk = el_socket->sk;
1645                 sock_tag_entry->socket = el_socket;
1646                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1647                                                             uid);
1648                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1649                 atomic64_inc(&qtu_events.sockets_tagged);
1650         }
1651         spin_unlock_bh(&sock_tag_list_lock);
1652         /* We keep the ref to the socket (file) until it is untagged */
1653         CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1654                  input,
1655                  el_socket ? atomic_read(&el_socket->file->f_count) : -1);
1656         return 0;
1657
1658 err_put:
1659         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1660         sockfd_put(el_socket);
1661         refcnt--;
1662 err:
1663         CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1664                  input, refcnt);
1665         return res;
1666 }
1667
1668 static int ctrl_cmd_untag(const char *input)
1669 {
1670         char cmd;
1671         int sock_fd = 0;
1672         struct socket *el_socket;
1673         int refcnt = -1;
1674         int res, argc;
1675         struct sock_tag *sock_tag_entry;
1676
1677         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1678         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1679                  input, argc, cmd, sock_fd);
1680         if (argc < 2) {
1681                 res = -EINVAL;
1682                 goto err;
1683         }
1684         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
1685         if (!el_socket) {
1686                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
1687                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1688                 goto err;
1689         }
1690         refcnt = atomic_read(&el_socket->file->f_count);
1691         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%d\n",
1692                  input, refcnt);
1693         spin_lock_bh(&sock_tag_list_lock);
1694         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1695         if (!sock_tag_entry) {
1696                 spin_unlock_bh(&sock_tag_list_lock);
1697                 res = -EINVAL;
1698                 goto err_put;
1699         }
1700         /*
1701          * The socket already belongs to the current process
1702          * so it can do whatever it wants to it.
1703          */
1704         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
1705
1706         /*
1707          * Release the sock_fd that was grabbed at tag time,
1708          * and once more for the sockfd_lookup() here.
1709          */
1710         sockfd_put(sock_tag_entry->socket);
1711         spin_unlock_bh(&sock_tag_list_lock);
1712         sockfd_put(el_socket);
1713         refcnt -= 2;
1714         kfree(sock_tag_entry);
1715         atomic64_inc(&qtu_events.sockets_untagged);
1716         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1717                  input, refcnt);
1718
1719         return 0;
1720
1721 err_put:
1722         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1723         sockfd_put(el_socket);
1724         refcnt--;
1725 err:
1726         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1727                  input, refcnt);
1728         return res;
1729 }
1730
1731 static int qtaguid_ctrl_parse(const char *input, int count)
1732 {
1733         char cmd;
1734         int res;
1735
1736         cmd = input[0];
1737         /* Collect params for commands */
1738         switch (cmd) {
1739         case 'd':
1740                 res = ctrl_cmd_delete(input);
1741                 break;
1742
1743         case 's':
1744                 res = ctrl_cmd_counter_set(input);
1745                 break;
1746
1747         case 't':
1748                 res = ctrl_cmd_tag(input);
1749                 break;
1750
1751         case 'u':
1752                 res = ctrl_cmd_untag(input);
1753                 break;
1754
1755         default:
1756                 res = -EINVAL;
1757                 goto err;
1758         }
1759         if (!res)
1760                 res = count;
1761 err:
1762         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
1763         return res;
1764 }
1765
1766 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
1767 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
1768                         unsigned long count, void *data)
1769 {
1770         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
1771
1772         if (unlikely(module_passive))
1773                 return count;
1774
1775         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
1776                 return -EINVAL;
1777
1778         if (copy_from_user(input_buf, buffer, count))
1779                 return -EFAULT;
1780
1781         input_buf[count] = '\0';
1782         return qtaguid_ctrl_parse(input_buf, count);
1783 }
1784
1785 struct proc_print_info {
1786         char *outp;
1787         char **num_items_returned;
1788         struct iface_stat *iface_entry;
1789         struct tag_stat *ts_entry;
1790         int item_index;
1791         int char_count;
1792 };
1793
1794 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
1795 {
1796         int len;
1797         struct data_counters *cnts;
1798         if (!ppi->item_index) {
1799                 len = snprintf(ppi->outp, ppi->char_count,
1800                                "idx iface acct_tag_hex uid_tag_int cnt_set "
1801                                "rx_bytes rx_packets "
1802                                "tx_bytes tx_packets "
1803                                "rx_tcp_packets rx_tcp_bytes "
1804                                "rx_udp_packets rx_udp_bytes "
1805                                "rx_other_packets rx_other_bytes "
1806                                "tx_tcp_packets tx_tcp_bytes "
1807                                "tx_udp_packets tx_udp_bytes "
1808                                "tx_other_packets tx_other_bytes\n");
1809         } else {
1810                 tag_t tag = ppi->ts_entry->tn.tag;
1811                 uid_t stat_uid = get_uid_from_tag(tag);
1812                 if (!can_read_other_uid_stats(stat_uid)) {
1813                         CT_DEBUG("qtaguid: stats line: "
1814                                  "%s 0x%llx %u: "
1815                                  "insufficient priv from pid=%u uid=%u\n",
1816                                  ppi->iface_entry->ifname,
1817                                  get_atag_from_tag(tag), stat_uid,
1818                                  current->pid, current_fsuid());
1819                         return 0;
1820                 }
1821                 cnts = &ppi->ts_entry->counters;
1822                 len = snprintf(
1823                         ppi->outp, ppi->char_count,
1824                         "%d %s 0x%llx %u %u "
1825                         "%llu %llu "
1826                         "%llu %llu "
1827                         "%llu %llu "
1828                         "%llu %llu "
1829                         "%llu %llu "
1830                         "%llu %llu "
1831                         "%llu %llu "
1832                         "%llu %llu\n",
1833                         ppi->item_index,
1834                         ppi->iface_entry->ifname,
1835                         get_atag_from_tag(tag),
1836                         stat_uid,
1837                         cnt_set,
1838                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
1839                         dc_sum_packets(cnts, cnt_set, IFS_RX),
1840                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
1841                         dc_sum_packets(cnts, cnt_set, IFS_TX),
1842                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
1843                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
1844                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
1845                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
1846                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
1847                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
1848                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
1849                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
1850                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
1851                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
1852                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
1853                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
1854         }
1855         return len;
1856 }
1857
1858 bool pp_sets(struct proc_print_info *ppi)
1859 {
1860         int len;
1861         int counter_set;
1862         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
1863              counter_set++) {
1864                 len = pp_stats_line(ppi, counter_set);
1865                 if (len >= ppi->char_count) {
1866                         *ppi->outp = '\0';
1867                         return false;
1868                 }
1869                 if (len) {
1870                         ppi->outp += len;
1871                         ppi->char_count -= len;
1872                         (*ppi->num_items_returned)++;
1873                 }
1874         }
1875         return true;
1876 }
1877
1878 /*
1879  * Procfs reader to get all tag stats using style "1)" as described in
1880  * fs/proc/generic.c
1881  * Groups all protocols tx/rx bytes.
1882  */
1883 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
1884                                 off_t items_to_skip, int char_count, int *eof,
1885                                 void *data)
1886 {
1887         struct proc_print_info ppi;
1888         int len;
1889
1890         ppi.outp = page;
1891         ppi.item_index = 0;
1892         ppi.char_count = char_count;
1893         ppi.num_items_returned = num_items_returned;
1894
1895         if (unlikely(module_passive)) {
1896                 len = pp_stats_line(&ppi, 0);
1897                 /* The header should always be shorter than the buffer. */
1898                 WARN_ON(len >= ppi.char_count);
1899                 *eof = 1;
1900                 return len;
1901         }
1902
1903         CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
1904                 "char_count=%d *eof=%d\n", page, *num_items_returned,
1905                 items_to_skip, char_count, *eof);
1906
1907         if (*eof)
1908                 return 0;
1909
1910         if (!items_to_skip) {
1911                 /* The idx is there to help debug when things go belly up. */
1912                 len = pp_stats_line(&ppi, 0);
1913                 /* Don't advance the outp unless the whole line was printed */
1914                 if (len >= ppi.char_count) {
1915                         *ppi.outp = '\0';
1916                         return ppi.outp - page;
1917                 }
1918                 ppi.outp += len;
1919                 ppi.char_count -= len;
1920         }
1921
1922         spin_lock_bh(&iface_stat_list_lock);
1923         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
1924                 struct rb_node *node;
1925                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
1926                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
1927                      node;
1928                      node = rb_next(node)) {
1929                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
1930                         if (ppi.item_index++ < items_to_skip)
1931                                 continue;
1932                         if (!pp_sets(&ppi)) {
1933                                 spin_unlock_bh(
1934                                         &ppi.iface_entry->tag_stat_list_lock);
1935                                 spin_unlock_bh(&iface_stat_list_lock);
1936                                 return ppi.outp - page;
1937                         }
1938                 }
1939                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
1940         }
1941         spin_unlock_bh(&iface_stat_list_lock);
1942
1943         *eof = 1;
1944         return ppi.outp - page;
1945 }
1946
1947 /*------------------------------------------*/
1948 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
1949 {
1950         int ret;
1951         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
1952         if (!*res_procdir) {
1953                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
1954                 ret = -ENOMEM;
1955                 goto no_dir;
1956         }
1957
1958         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
1959                                                 *res_procdir);
1960         if (!xt_qtaguid_ctrl_file) {
1961                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
1962                         " file\n");
1963                 ret = -ENOMEM;
1964                 goto no_ctrl_entry;
1965         }
1966         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
1967         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
1968
1969         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
1970                                                 *res_procdir);
1971         if (!xt_qtaguid_stats_file) {
1972                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
1973                         "file\n");
1974                 ret = -ENOMEM;
1975                 goto no_stats_entry;
1976         }
1977         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
1978         /*
1979          * TODO: add support counter hacking
1980          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
1981          */
1982         return 0;
1983
1984 no_stats_entry:
1985         remove_proc_entry("ctrl", *res_procdir);
1986 no_ctrl_entry:
1987         remove_proc_entry("xt_qtaguid", NULL);
1988 no_dir:
1989         return ret;
1990 }
1991
1992 static struct xt_match qtaguid_mt_reg __read_mostly = {
1993         /*
1994          * This module masquerades as the "owner" module so that iptables
1995          * tools can deal with it.
1996          */
1997         .name       = "owner",
1998         .revision   = 1,
1999         .family     = NFPROTO_UNSPEC,
2000         .match      = qtaguid_mt,
2001         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2002         .me         = THIS_MODULE,
2003 };
2004
2005 static int __init qtaguid_mt_init(void)
2006 {
2007         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2008             || iface_stat_init(xt_qtaguid_procdir)
2009             || xt_register_match(&qtaguid_mt_reg))
2010                 return -1;
2011         return 0;
2012 }
2013
2014 /*
2015  * TODO: allow unloading of the module.
2016  * For now stats are permanent.
2017  * Kconfig forces'y/n' and never an 'm'.
2018  */
2019
2020 module_init(qtaguid_mt_init);
2021 MODULE_AUTHOR("jpa <jpa@google.com>");
2022 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2023 MODULE_LICENSE("GPL");
2024 MODULE_ALIAS("ipt_owner");
2025 MODULE_ALIAS("ip6t_owner");
2026 MODULE_ALIAS("ipt_qtaguid");
2027 MODULE_ALIAS("ip6t_qtaguid");