netfilter: qtaguid: disable #define DEBUG
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* TODO: support ipv6 for iface_stat.
12  * Currently if an iface is only v6 it will not have stats collected. */
13
14 #include <linux/file.h>
15 #include <linux/inetdevice.h>
16 #include <linux/module.h>
17 #include <linux/netfilter/x_tables.h>
18 #include <linux/netfilter/xt_qtaguid.h>
19 #include <linux/skbuff.h>
20 #include <linux/workqueue.h>
21 #include <net/sock.h>
22 #include <net/tcp.h>
23 #include <net/udp.h>
24
25 #include <linux/netfilter/xt_socket.h>
26 /* We only use the xt_socket funcs within a similar context to avoid unexpected
27  * return values. */
28 #define XT_SOCKET_SUPPORTED_HOOKS \
29         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
30
31
32 static const char *module_procdirname = "xt_qtaguid";
33 static struct proc_dir_entry *xt_qtaguid_procdir;
34
35 static unsigned int proc_iface_perms = S_IRUGO;
36 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
37
38 static struct proc_dir_entry *xt_qtaguid_stats_file;
39 static unsigned int proc_stats_perms = S_IRUGO;
40 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
41
42 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
43 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
44 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
45 #else
46 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
47 #endif
48 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
49
50 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
51 #include <linux/android_aid.h>
52 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
53 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
54 #else
55 /* 0 means, don't limit anybody */
56 static gid_t proc_stats_readall_gid;
57 static gid_t proc_ctrl_write_gid;
58 #endif
59 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
60                    S_IRUGO | S_IWUSR);
61 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
62                    S_IRUGO | S_IWUSR);
63
64 /* After the kernel has initiallized this module, it is still possible
65  * to make it passive:
66  *  - do not register it via iptables.
67  *   the matching code will not be invoked.
68  *  - set passive to 0
69  *   the iface stats handling will not be act on notifications.
70  * This is mostly usefull when a bug is suspected.
71  */
72 static bool module_passive;
73 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
74
75 /*---------------------------------------------------------------------------*/
76 /*
77  * Tags:
78  *
79  * They represent what the data usage counters will be tracked against.
80  * By default a tag is just based on the UID.
81  * The UID is used as the base for policying, and can not be ignored.
82  * So a tag will always at least represent a UID (uid_tag).
83  *
84  * A tag can be augmented with an "accounting tag" which is associated
85  * with a UID.
86  * User space can set the acct_tag portion of the tag which is then used
87  * with sockets: all data belong to that socket will be counted against the
88  * tag. The policing is then based on the tag's uid_tag portion,
89  * and stats are collected for the acct_tag portion seperately.
90  *
91  * There could be
92  * a:  {acct_tag=1, uid_tag=10003}
93  * b:  {acct_tag=2, uid_tag=10003}
94  * c:  {acct_tag=3, uid_tag=10003}
95  * d:  {acct_tag=0, uid_tag=10003}
96  * (a, b, and c represent tags associated with specific sockets.
97  * d is for the totals for that uid, including all untagged traffic.
98  * Typically d is used with policing/quota rules.
99  *
100  * We want tag_t big enough to distinguish uid_t and acct_tag.
101  * It might become a struct if needed.
102  * Nothing should be using it as an int.
103  */
104 typedef uint64_t tag_t;  /* Only used via accessors */
105
106 static const char *iface_stat_procdirname = "iface_stat";
107 static struct proc_dir_entry *iface_stat_procdir;
108
109 enum ifs_tx_rx {
110         IFS_TX,
111         IFS_RX,
112         IFS_MAX_DIRECTIONS
113 };
114
115 /* For now, TCP, UDP, the rest */
116 enum ifs_proto {
117         IFS_TCP,
118         IFS_UDP,
119         IFS_PROTO_OTHER,
120         IFS_MAX_PROTOS
121 };
122
123 struct byte_packet_counters {
124         uint64_t bytes;
125         uint64_t packets;
126 };
127
128 struct data_counters {
129         struct byte_packet_counters bpc[IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
130 };
131
132 struct tag_stat {
133         struct rb_node node;
134         tag_t tag;
135
136         struct data_counters counters;
137         /* If this tag is acct_tag based, we need to count against the
138          * matching parent uid_tag. */
139         struct data_counters *parent_counters;
140         struct proc_dir_entry *proc_ptr;
141 };
142
143 struct iface_stat {
144         struct list_head list;
145         char *ifname;
146         uint64_t rx_bytes;
147         uint64_t rx_packets;
148         uint64_t tx_bytes;
149         uint64_t tx_packets;
150         bool active;
151         struct proc_dir_entry *proc_ptr;
152
153         struct rb_root tag_stat_tree;
154         spinlock_t tag_stat_list_lock;
155 };
156
157 static LIST_HEAD(iface_stat_list);
158 static DEFINE_SPINLOCK(iface_stat_list_lock);
159
160 /*
161  * Track tag that this socket is transferring data for, and not necesseraly
162  * the uid that owns the socket.
163  * This is the tag against which tag_stat.counters will be billed.
164  */
165 struct sock_tag {
166         struct rb_node node;
167         struct sock *sk;
168         tag_t tag;
169 };
170
171 static struct rb_root sock_tag_tree = RB_ROOT;
172 static DEFINE_SPINLOCK(sock_tag_list_lock);
173
174 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
175
176 /*----------------------------------------------*/
177 static inline int tag_compare(tag_t t1, tag_t t2)
178 {
179         return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
180 }
181
182
183 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
184 {
185         return acct_tag | uid;
186 }
187 static inline tag_t make_tag_from_uid(uid_t uid)
188 {
189         return uid;
190 }
191 static inline uid_t get_uid_from_tag(tag_t tag)
192 {
193         return tag & 0xFFFFFFFFULL;
194 }
195 static inline tag_t get_utag_from_tag(tag_t tag)
196 {
197         return tag & 0xFFFFFFFFULL;
198 }
199 static inline tag_t get_atag_from_tag(tag_t tag)
200 {
201         return tag & ~0xFFFFFFFFULL;
202 }
203
204 static inline bool valid_atag(tag_t tag)
205 {
206         return !(tag & 0xFFFFFFFFULL);
207 }
208
209 static inline void dc_add_byte_packets(struct data_counters *counters,
210                                   enum ifs_tx_rx direction,
211                                   enum ifs_proto ifs_proto,
212                                   int bytes,
213                                   int packets)
214 {
215         counters->bpc[direction][ifs_proto].bytes += bytes;
216         counters->bpc[direction][ifs_proto].packets += packets;
217 }
218
219 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
220                                     enum ifs_tx_rx direction)
221 {
222         return counters->bpc[direction][IFS_TCP].bytes
223                 + counters->bpc[direction][IFS_UDP].bytes
224                 + counters->bpc[direction][IFS_PROTO_OTHER].bytes;
225 }
226
227 static inline uint64_t dc_sum_packets(struct data_counters *counters,
228                                       enum ifs_tx_rx direction)
229 {
230         return counters->bpc[direction][IFS_TCP].packets
231                 + counters->bpc[direction][IFS_UDP].packets
232                 + counters->bpc[direction][IFS_PROTO_OTHER].packets;
233 }
234
235 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
236 {
237         struct rb_node *node = root->rb_node;
238
239         while (node) {
240                 struct tag_stat *data = rb_entry(node, struct tag_stat, node);
241                 int result = tag_compare(tag, data->tag);
242                 pr_debug("qtaguid: tag_stat_tree_search(): tag=0x%llx"
243                          " (uid=%d)\n",
244                          data->tag,
245                          get_uid_from_tag(data->tag));
246
247                 if (result < 0)
248                         node = node->rb_left;
249                 else if (result > 0)
250                         node = node->rb_right;
251                 else
252                         return data;
253         }
254         return NULL;
255 }
256
257 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
258 {
259         struct rb_node **new = &(root->rb_node), *parent = NULL;
260
261         /* Figure out where to put new node */
262         while (*new) {
263                 struct tag_stat *this = rb_entry(*new, struct tag_stat,
264                                                  node);
265                 int result = tag_compare(data->tag, this->tag);
266                 pr_debug("qtaguid: tag_stat_tree_insert(): tag=0x%llx"
267                          " (uid=%d)\n",
268                          this->tag,
269                          get_uid_from_tag(this->tag));
270                 parent = *new;
271                 if (result < 0)
272                         new = &((*new)->rb_left);
273                 else if (result > 0)
274                         new = &((*new)->rb_right);
275                 else
276                         BUG();
277         }
278
279         /* Add new node and rebalance tree. */
280         rb_link_node(&data->node, parent, new);
281         rb_insert_color(&data->node, root);
282 }
283
284 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
285                                              const struct sock *sk)
286 {
287         struct rb_node *node = root->rb_node;
288
289         while (node) {
290                 struct sock_tag *data = rb_entry(node, struct sock_tag, node);
291                 ptrdiff_t result = sk - data->sk;
292                 if (result < 0)
293                         node = node->rb_left;
294                 else if (result > 0)
295                         node = node->rb_right;
296                 else
297                         return data;
298         }
299         return NULL;
300 }
301
302 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
303 {
304         struct rb_node **new = &(root->rb_node), *parent = NULL;
305
306         /* Figure out where to put new node */
307         while (*new) {
308                 struct sock_tag *this = rb_entry(*new, struct sock_tag, node);
309                 ptrdiff_t result = data->sk - this->sk;
310                 parent = *new;
311                 if (result < 0)
312                         new = &((*new)->rb_left);
313                 else if (result > 0)
314                         new = &((*new)->rb_right);
315                 else
316                         BUG();
317         }
318
319         /* Add new node and rebalance tree. */
320         rb_link_node(&data->node, parent, new);
321         rb_insert_color(&data->node, root);
322 }
323
324 static int read_proc_u64(char *page, char **start, off_t off,
325                         int count, int *eof, void *data)
326 {
327         int len;
328         uint64_t value;
329         char *p = page;
330         uint64_t *iface_entry = data;
331         if (!data)
332                 return 0;
333
334         value = *iface_entry;
335         p += sprintf(p, "%llu\n", value);
336         len = (p - page) - off;
337         *eof = (len <= count) ? 1 : 0;
338         *start = page + off;
339         return len;
340 }
341
342 static int read_proc_bool(char *page, char **start, off_t off,
343                         int count, int *eof, void *data)
344 {
345         int len;
346         bool value;
347         char *p = page;
348         bool *bool_entry = data;
349         if (!data)
350                 return 0;
351
352         value = *bool_entry;
353         p += sprintf(p, "%u\n", value);
354         len = (p - page) - off;
355         *eof = (len <= count) ? 1 : 0;
356         *start = page + off;
357         return len;
358 }
359
360 /* Find the entry for tracking the specified interface. */
361 static struct iface_stat *get_iface_stat(const char *ifname)
362 {
363         unsigned long flags;
364         struct iface_stat *iface_entry;
365         if (!ifname)
366                 return NULL;
367
368         spin_lock_irqsave(&iface_stat_list_lock, flags);
369         list_for_each_entry(iface_entry, &iface_stat_list, list) {
370                 if (!strcmp(iface_entry->ifname, ifname))
371                         goto done;
372         }
373         iface_entry = NULL;
374 done:
375         spin_unlock_irqrestore(&iface_stat_list_lock, flags);
376         return iface_entry;
377 }
378
379 /*
380  * Create a new entry for tracking the specified interface.
381  * Do nothing if the entry already exists.
382  * Called when an interface is configured with a valid IP address.
383  */
384 void iface_stat_create(const struct net_device *net_dev)
385 {
386         struct in_device *in_dev;
387         unsigned long flags;
388         struct iface_stat *new_iface;
389         struct proc_dir_entry *proc_entry;
390         const char *ifname;
391         struct iface_stat *entry;
392         __be32 ipaddr = 0;
393         struct in_ifaddr *ifa = NULL;
394
395         ASSERT_RTNL(); /* No need for separate locking */
396
397         pr_debug("iface_stat: create(): netdev=%p->name=%s\n",
398                  net_dev, net_dev ? net_dev->name : "");
399         if (!net_dev) {
400                 pr_err("iface_stat: create(): no net dev!\n");
401                 return;
402         }
403
404         in_dev = __in_dev_get_rtnl(net_dev);
405         if (!in_dev) {
406                 pr_err("iface_stat: create(): no inet dev!\n");
407                 return;
408         }
409
410         pr_debug("iface_stat: create(): in_dev=%p\n", in_dev);
411         ifname = net_dev->name;
412         pr_debug("iface_stat: create(): ifname=%p\n", ifname);
413         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
414                 pr_debug("iface_stat: create(): for(): ifa=%p ifname=%p\n",
415                          ifa, ifname);
416                 pr_debug("iface_stat: create(): ifname=%s ifa_label=%s\n",
417                          ifname, ifa->ifa_label ? ifa->ifa_label : "(null)");
418                 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
419                         break;
420         }
421
422         if (ifa) {
423                 ipaddr = ifa->ifa_local;
424         } else {
425                 pr_err("iface_stat: create(): dev %s has no matching IP\n",
426                        ifname);
427                 return;
428         }
429
430         entry = get_iface_stat(net_dev->name);
431         if (entry != NULL) {
432                 pr_debug("iface_stat: create(): dev %s entry=%p\n", ifname,
433                          entry);
434                 if (ipv4_is_loopback(ipaddr)) {
435                         entry->active = false;
436                         pr_debug("iface_stat: create(): disable tracking of "
437                                  "loopback dev %s\n", ifname);
438                 } else {
439                         entry->active = true;
440                         pr_debug("iface_stat: create(): enable tracking of "
441                                  "dev %s with ip=%pI4\n",
442                                  ifname, &ipaddr);
443                 }
444                 return;
445         } else if (ipv4_is_loopback(ipaddr)) {
446                 pr_debug("iface_stat: create(): ignore loopback dev %s"
447                          " ip=%pI4\n", ifname, &ipaddr);
448                 return;
449         }
450
451         new_iface = kzalloc(sizeof(*new_iface), GFP_KERNEL);
452         if (new_iface == NULL) {
453                 pr_err("iface_stat: create(): failed to alloc iface_stat\n");
454                 return;
455         }
456         new_iface->ifname = kstrdup(ifname, GFP_KERNEL);
457         if (new_iface->ifname == NULL) {
458                 pr_err("iface_stat: create(): failed to alloc ifname\n");
459                 kfree(new_iface);
460                 return;
461         }
462         spin_lock_init(&new_iface->tag_stat_list_lock);
463
464         new_iface->active = true;
465
466         new_iface->tag_stat_tree = RB_ROOT;
467         spin_lock_irqsave(&iface_stat_list_lock, flags);
468         list_add(&new_iface->list, &iface_stat_list);
469         spin_unlock_irqrestore(&iface_stat_list_lock, flags);
470
471         proc_entry = proc_mkdir(ifname, iface_stat_procdir);
472         new_iface->proc_ptr = proc_entry;
473
474         /* TODO: make root access only */
475         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
476                         read_proc_u64, &new_iface->tx_bytes);
477         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
478                         read_proc_u64, &new_iface->rx_bytes);
479         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
480                         read_proc_u64, &new_iface->tx_packets);
481         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
482                         read_proc_u64, &new_iface->rx_packets);
483         create_proc_read_entry("active", proc_iface_perms, proc_entry,
484                         read_proc_bool, &new_iface->active);
485
486         pr_debug("iface_stat: create(): done entry=%p dev=%s ip=%pI4\n",
487                  new_iface, ifname, &ipaddr);
488 }
489
490 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
491 {
492         pr_debug("xt_qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
493         return sock_tag_tree_search(&sock_tag_tree, sk);
494 }
495
496 static struct sock_tag *get_sock_stat(const struct sock *sk)
497 {
498         unsigned long flags;
499         struct sock_tag *sock_tag_entry;
500         pr_debug("xt_qtaguid: get_sock_stat(sk=%p)\n", sk);
501         if (!sk)
502                 return NULL;
503         spin_lock_irqsave(&sock_tag_list_lock, flags);
504         sock_tag_entry = get_sock_stat_nl(sk);
505         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
506         return sock_tag_entry;
507 }
508
509 static void
510 data_counters_update(struct data_counters *dc,  enum ifs_tx_rx direction,
511                 int proto, int bytes)
512 {
513         switch (proto) {
514         case IPPROTO_TCP:
515                 dc_add_byte_packets(dc, direction, IFS_TCP, bytes, 1);
516                 break;
517         case IPPROTO_UDP:
518                 dc_add_byte_packets(dc, direction, IFS_UDP, bytes, 1);
519                 break;
520         case IPPROTO_IP:
521         default:
522                 dc_add_byte_packets(dc, direction, IFS_PROTO_OTHER, bytes, 1);
523                 break;
524         }
525 }
526
527
528 /*
529  * Update stats for the specified interface. Do nothing if the entry
530  * does not exist (when a device was never configured with an IP address).
531  * Called when an device is being unregistered.
532  */
533 void iface_stat_update(struct net_device *dev)
534 {
535         struct rtnl_link_stats64 dev_stats, *stats;
536         struct iface_stat *entry;
537         stats = dev_get_stats(dev, &dev_stats);
538         ASSERT_RTNL();
539
540         entry = get_iface_stat(dev->name);
541         if (entry == NULL) {
542                 pr_debug("iface_stat: dev %s monitor not found\n", dev->name);
543                 return;
544         }
545         if (entry->active) {
546                 entry->tx_bytes += stats->tx_bytes;
547                 entry->tx_packets += stats->tx_packets;
548                 entry->rx_bytes += stats->rx_bytes;
549                 entry->rx_packets += stats->rx_packets;
550                 entry->active = false;
551                 pr_debug("iface_stat: Updating stats for "
552                         "dev %s which went down\n", dev->name);
553         } else {
554                 pr_debug("iface_stat: Did not update stats for "
555                         "dev %s which went down\n", dev->name);
556         }
557 }
558
559
560 static void tag_stat_update(struct tag_stat *tag_entry,
561                         enum ifs_tx_rx direction, int proto, int bytes)
562 {
563         pr_debug("xt_qtaguid: tag_stat_update(tag=0x%llx (uid=%d) dir=%d "
564                 "proto=%d bytes=%d)\n",
565                 tag_entry->tag, get_uid_from_tag(tag_entry->tag), direction,
566                 proto, bytes);
567         data_counters_update(&tag_entry->counters, direction, proto, bytes);
568         if (tag_entry->parent_counters)
569                 data_counters_update(tag_entry->parent_counters, direction,
570                                 proto, bytes);
571 }
572
573
574 /* Create a new entry for tracking the specified {acct_tag,uid_tag} within
575  * the interface.
576  * iface_entry->tag_stat_list_lock should be held. */
577 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
578                                            tag_t tag)
579 {
580         struct tag_stat *new_tag_stat_entry = NULL;
581         pr_debug("iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
582                  " (uid=%d)\n",
583                  iface_entry, tag, get_uid_from_tag(tag));
584         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
585         if (!new_tag_stat_entry) {
586                 pr_err("iface_stat: failed to alloc new tag entry\n");
587                 goto done;
588         }
589         new_tag_stat_entry->tag = tag;
590         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
591 done:
592         return new_tag_stat_entry;
593 }
594
595 static struct iface_stat *get_iface_entry(const char *ifname)
596 {
597         struct iface_stat *iface_entry;
598         unsigned long flags;
599
600         /* Find the entry for tracking the specified tag within the interface */
601         if (ifname == NULL) {
602                 pr_info("iface_stat: NULL device name\n");
603                 return NULL;
604         }
605
606
607         /* Iterate over interfaces */
608         spin_lock_irqsave(&iface_stat_list_lock, flags);
609         list_for_each_entry(iface_entry, &iface_stat_list, list) {
610                 if (!strcmp(ifname, iface_entry->ifname))
611                         goto done;
612         }
613         iface_entry = NULL;
614 done:
615         spin_unlock_irqrestore(&iface_stat_list_lock, flags);
616         return iface_entry;
617 }
618
619 static void if_tag_stat_update(const char *ifname, uid_t uid,
620                                const struct sock *sk, enum ifs_tx_rx direction,
621                                int proto, int bytes)
622 {
623         struct tag_stat *tag_stat_entry;
624         tag_t tag, acct_tag;
625         tag_t uid_tag;
626         struct data_counters *uid_tag_counters;
627         struct sock_tag *sock_tag_entry;
628         struct iface_stat *iface_entry;
629         unsigned long flags;
630         struct tag_stat *new_tag_stat;
631         pr_debug("xt_qtaguid: if_tag_stat_update(ifname=%s "
632                 "uid=%d sk=%p dir=%d proto=%d bytes=%d)\n",
633                  ifname, uid, sk, direction, proto, bytes);
634
635
636         iface_entry = get_iface_entry(ifname);
637         if (!iface_entry) {
638                 pr_err("iface_stat: interface %s not found\n", ifname);
639                 return;
640         }
641         /* else { If the iface_entry becomes inactive, it is still ok
642          * to process the data. } */
643
644         pr_debug("iface_stat: stat_update() got entry=%p\n", iface_entry);
645
646         /* Look for a tagged sock.
647          * It will have an acct_uid. */
648         sock_tag_entry = get_sock_stat(sk);
649         if (sock_tag_entry) {
650                 tag = sock_tag_entry->tag;
651                 acct_tag = get_atag_from_tag(tag);
652                 uid_tag = get_utag_from_tag(tag);
653         } else {
654                 uid_tag = make_tag_from_uid(uid);
655                 acct_tag = 0;
656                 tag = combine_atag_with_uid(acct_tag, uid);
657         }
658         pr_debug("iface_stat: stat_update(): looking for tag=0x%llx (uid=%d)"
659                  " in ife=%p\n",
660                  tag, get_uid_from_tag(tag), iface_entry);
661         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
662         spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags);
663
664         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
665                                               tag);
666         if (tag_stat_entry) {
667                 /* Updating the {acct_tag, uid_tag} entry handles both stats:
668                  * {0, uid_tag} will also get updated. */
669                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
670                 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags);
671                 return;
672         }
673
674         /* Loop over tag list under this interface for {0,uid_tag} */
675         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
676                                               uid_tag);
677         if (!tag_stat_entry) {
678                 /* Here: the base uid_tag did not exist */
679                 /*
680                  * No parent counters. So
681                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
682                  */
683                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
684                 uid_tag_counters = &new_tag_stat->counters;
685         } else {
686                 uid_tag_counters = &tag_stat_entry->counters;
687         }
688
689         if (acct_tag) {
690                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
691                 new_tag_stat->parent_counters = uid_tag_counters;
692         }
693         spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags);
694         tag_stat_update(new_tag_stat, direction, proto, bytes);
695 }
696
697 static int iface_netdev_event_handler(struct notifier_block *nb,
698                                       unsigned long event, void *ptr) {
699         struct net_device *dev = ptr;
700
701         if (unlikely(module_passive))
702                 return NOTIFY_DONE;
703
704         pr_debug("iface_stat: netdev_event(): ev=0x%lx netdev=%p->name=%s\n",
705                  event, dev, dev ? dev->name : "");
706
707         switch (event) {
708         case NETDEV_UP:
709         case NETDEV_REBOOT:
710         case NETDEV_CHANGE:
711         case NETDEV_REGISTER:  /* Most likely no IP */
712         case NETDEV_CHANGEADDR:  /* MAC addr change */
713         case NETDEV_CHANGENAME:
714         case NETDEV_FEAT_CHANGE:  /* Might be usefull when cell type changes */
715                 iface_stat_create(dev);
716                 break;
717         case NETDEV_UNREGISTER:
718                 iface_stat_update(dev);
719                 break;
720         }
721         return NOTIFY_DONE;
722 }
723
724 static int iface_inetaddr_event_handler(struct notifier_block *nb,
725                                         unsigned long event, void *ptr) {
726
727         struct in_ifaddr *ifa = ptr;
728         struct in_device *in_dev = ifa->ifa_dev;
729         struct net_device *dev = in_dev->dev;
730
731         if (unlikely(module_passive))
732                 return NOTIFY_DONE;
733
734         pr_debug("iface_stat: inetaddr_event(): ev=0x%lx netdev=%p->name=%s\n",
735                  event, dev, dev ? dev->name : "");
736
737         switch (event) {
738         case NETDEV_UP:
739                 iface_stat_create(dev);
740                 break;
741         }
742         return NOTIFY_DONE;
743 }
744
745 static struct notifier_block iface_netdev_notifier_blk = {
746         .notifier_call = iface_netdev_event_handler,
747 };
748
749 static struct notifier_block iface_inetaddr_notifier_blk = {
750         .notifier_call = iface_inetaddr_event_handler,
751 };
752
753 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
754 {
755         int err;
756
757         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
758         if (!iface_stat_procdir) {
759                 pr_err("iface_stat: failed to create proc entry\n");
760                 err = -1;
761                 goto err;
762         }
763         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
764         if (err) {
765                 pr_err("iface_stat: failed to register dev event handler\n");
766                 goto err_unreg_nd;
767         }
768         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
769         if (err) {
770                 pr_err("iface_stat: failed to register dev event handler\n");
771                 goto err_zap_entry;
772         }
773         return 0;
774
775 err_unreg_nd:
776         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
777 err_zap_entry:
778         remove_proc_entry(iface_stat_procdirname, parent_procdir);
779 err:
780         return err;
781 }
782
783 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
784                                     struct xt_action_param *par)
785 {
786         struct sock *sk;
787         unsigned int hook_mask = (1 << par->hooknum);
788
789         pr_debug("xt_qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
790                  par->hooknum, par->family);
791
792         /* Let's not abuse the the xt_socket_get*_sk(), or else it will
793          * return garbage SKs. */
794         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
795                 return NULL;
796
797         switch (par->family) {
798         case NFPROTO_IPV6:
799                 sk = xt_socket_get6_sk(skb, par);
800                 break;
801         case NFPROTO_IPV4:
802                 sk = xt_socket_get4_sk(skb, par);
803                 break;
804         default:
805                 return NULL;
806         }
807
808         /* Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
809          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
810          * Not fixed in 3.0-r3 :(
811          */
812         if (sk) {
813                 pr_debug("xt_qtaguid: %p->sk_proto=%u "
814                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
815                 if (sk->sk_state  == TCP_TIME_WAIT) {
816                         xt_socket_put_sk(sk);
817                         sk = NULL;
818                 }
819         }
820         return sk;
821 }
822
823 static void account_for_uid(const struct sk_buff *skb,
824                             const struct sock *alternate_sk, uid_t uid,
825                             struct xt_action_param *par)
826 {
827         const struct net_device *el_dev;
828
829         if (!skb->dev) {
830                 pr_debug("xt_qtaguid[%d]: no skb->dev\n", par->hooknum);
831                 el_dev = par->in ? : par->out;
832         } else {
833                 const struct net_device *other_dev;
834                 el_dev = skb->dev;
835                 other_dev = par->in ? : par->out;
836                 if (el_dev != other_dev) {
837                         pr_debug("xt_qtaguid[%d]: skb->dev=%p %s vs "
838                                 "par->(in/out)=%p %s\n",
839                                 par->hooknum, el_dev, el_dev->name, other_dev,
840                                 other_dev->name);
841                 }
842         }
843
844         if (unlikely(!el_dev)) {
845                 pr_info("xt_qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
846         } else if (unlikely(!el_dev->name)) {
847                 pr_info("xt_qtaguid[%d]: no dev->name?!!\n", par->hooknum);
848         } else {
849                 pr_debug("xt_qtaguid[%d]: dev name=%s type=%d\n",
850                         par->hooknum,
851                         el_dev->name,
852                         el_dev->type);
853
854                 if_tag_stat_update(el_dev->name, uid,
855                                 skb->sk ? skb->sk : alternate_sk,
856                                 par->in ? IFS_RX : IFS_TX,
857                                 ip_hdr(skb)->protocol, skb->len);
858         }
859 }
860
861 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
862 {
863         const struct xt_qtaguid_match_info *info = par->matchinfo;
864         const struct file *filp;
865         bool got_sock = false;
866         struct sock *sk;
867         uid_t sock_uid;
868         bool res;
869
870         pr_debug("xt_qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
871                  par->hooknum, skb, par->in, par->out, par->family);
872
873         if (skb == NULL) {
874                 res = (info->match ^ info->invert) == 0;
875                 goto ret_res;
876         }
877
878         sk = skb->sk;
879
880         if (sk == NULL) {
881                 /*  A missing sk->sk_socket happens when packets are in-flight
882                  * and the matching socket is already closed and gone.
883                  */
884                 sk = qtaguid_find_sk(skb, par);
885                 /* If we got the socket from the find_sk(), we will need to put
886                  * it back, as nf_tproxy_get_sock_v4() got it. */
887                 got_sock = sk;
888         }
889         pr_debug("xt_qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
890                 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
891         if (sk != NULL) {
892                 pr_debug("xt_qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
893                         par->hooknum, sk, sk->sk_socket,
894                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
895                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
896                 pr_debug("xt_qtaguid[%d]: filp...uid=%d\n",
897                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
898         }
899
900         if (sk == NULL || sk->sk_socket == NULL) {
901                 /* Here, the qtaguid_find_sk() using connection tracking
902                  * couldn't find the owner, so for now we just count them
903                  * against the system. */
904                 /* TODO: unhack how to force just accounting.
905                  * For now we only do iface stats when the uid-owner is not
906                  * requested */
907                 if (!(info->match & XT_QTAGUID_UID))
908                         account_for_uid(skb, sk, 0, par);
909                 pr_debug("xt_qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
910                         par->hooknum,
911                         sk ? sk->sk_socket : NULL);
912                 res = (info->match ^ info->invert) == 0;
913                 goto put_sock_ret_res;
914         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
915                 res = false;
916                 goto put_sock_ret_res;
917         }
918         filp = sk->sk_socket->file;
919         if (filp == NULL) {
920                 pr_debug("xt_qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
921                 res = ((info->match ^ info->invert) &
922                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
923                 goto put_sock_ret_res;
924         }
925         sock_uid = filp->f_cred->fsuid;
926         /* TODO: unhack how to force just accounting.
927          * For now we only do iface stats when the uid-owner is not requested */
928         if (!(info->match & XT_QTAGUID_UID))
929                 account_for_uid(skb, sk, sock_uid, par);
930
931         /* The following two tests fail the match when:
932          *    id not in range AND no inverted condition requested
933          * or id     in range AND    inverted condition requested
934          * Thus (!a && b) || (a && !b) == a ^ b
935          */
936         if (info->match & XT_QTAGUID_UID)
937                 if ((filp->f_cred->fsuid >= info->uid_min &&
938                      filp->f_cred->fsuid <= info->uid_max) ^
939                     !(info->invert & XT_QTAGUID_UID)) {
940                         pr_debug("xt_qtaguid[%d]: leaving uid not matching\n",
941                                  par->hooknum);
942                         res = false;
943                         goto put_sock_ret_res;
944                 }
945         if (info->match & XT_QTAGUID_GID)
946                 if ((filp->f_cred->fsgid >= info->gid_min &&
947                                 filp->f_cred->fsgid <= info->gid_max) ^
948                         !(info->invert & XT_QTAGUID_GID)) {
949                         pr_debug("xt_qtaguid[%d]: leaving gid not matching\n",
950                                 par->hooknum);
951                         res = false;
952                         goto put_sock_ret_res;
953                 }
954
955         pr_debug("xt_qtaguid[%d]: leaving matched\n", par->hooknum);
956         res = true;
957
958 put_sock_ret_res:
959         if (got_sock)
960                 xt_socket_put_sk(sk);
961 ret_res:
962         pr_debug("xt_qtaguid[%d]: left %d\n", par->hooknum, res);
963         return res;
964 }
965
966 /*
967  * Procfs reader to get all active socket tags using style "1)" as described in
968  * fs/proc/generic.c
969  */
970 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
971                                   off_t items_to_skip, int char_count, int *eof,
972                                   void *data)
973 {
974         char *outp = page;
975         int len;
976         unsigned long flags;
977         uid_t uid;
978         struct sock_tag *sock_tag_entry;
979         struct rb_node *node;
980         int item_index = 0;
981
982         if (unlikely(module_passive)) {
983                 *eof = 1;
984                 return 0;
985         }
986
987         pr_debug("xt_qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
988                 page, items_to_skip, char_count, *eof);
989
990         if (*eof)
991                 return 0;
992
993         spin_lock_irqsave(&sock_tag_list_lock, flags);
994         for (node = rb_first(&sock_tag_tree);
995              node;
996              node = rb_next(node)) {
997                 if (item_index++ < items_to_skip)
998                         continue;
999                 sock_tag_entry = rb_entry(node, struct sock_tag, node);
1000                 uid = get_uid_from_tag(sock_tag_entry->tag);
1001                 pr_debug("xt_qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%d)\n",
1002                         sock_tag_entry->sk,
1003                         sock_tag_entry->tag,
1004                         uid);
1005                 len = snprintf(outp, char_count,
1006                                "sock=%p tag=0x%llx (uid=%u)\n",
1007                                sock_tag_entry->sk, sock_tag_entry->tag, uid);
1008                 if (len >= char_count) {
1009                         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1010                         *outp = '\0';
1011                         return outp - page;
1012                 }
1013                 outp += len;
1014                 char_count -= len;
1015                 (*num_items_returned)++;
1016         }
1017         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1018         *eof = 1;
1019         return outp - page;
1020 }
1021
1022 int can_impersonate_uid(uid_t uid)
1023 {
1024         return uid == current_fsuid()
1025                 || !proc_ctrl_write_gid
1026                 || in_egroup_p(proc_ctrl_write_gid);
1027 }
1028
1029 int can_read_other_uid_stats(uid_t uid)
1030 {
1031         return uid == current_fsuid()
1032                 || !proc_ctrl_write_gid
1033                 || in_egroup_p(proc_stats_readall_gid);
1034 }
1035
1036 /* Delete socket tags, and stat tags associated with a given
1037  * accouting tag and uid. */
1038 static int ctrl_cmd_delete(const char *input)
1039 {
1040         char cmd;
1041         uid_t uid = 0;
1042         uid_t entry_uid;
1043         tag_t acct_tag = 0;
1044         tag_t tag;
1045         int res, argc;
1046         unsigned long flags, flags2;
1047         struct iface_stat *iface_entry;
1048         struct rb_node *node;
1049         struct sock_tag *st_entry;
1050         struct tag_stat *ts_entry;
1051
1052         pr_debug("xt_qtaguid: ctrl_delete(%s): entered\n", input);
1053         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1054         pr_debug("xt_qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1055                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd,
1056                  acct_tag, uid);
1057         if (argc < 2) {
1058                 res = -EINVAL;
1059                 goto err;
1060         }
1061         if (!valid_atag(acct_tag)) {
1062                 pr_info("xt_qtaguid: ctrl_delete(%s): invalid tag\n", input);
1063                 res = -EINVAL;
1064                 goto err;
1065         }
1066         if (argc < 3) {
1067                 uid = current_fsuid();
1068         } else if (!can_impersonate_uid(uid)) {
1069                 pr_info("xt_qtaguid: ctrl_delete(%s): insuficient priv\n",
1070                         input);
1071                 res = -EPERM;
1072                 goto err;
1073         }
1074
1075         spin_lock_irqsave(&sock_tag_list_lock, flags);
1076         node = rb_first(&sock_tag_tree);
1077         while (node) {
1078                 st_entry = rb_entry(node, struct sock_tag, node);
1079                 entry_uid = get_uid_from_tag(st_entry->tag);
1080                 node = rb_next(node);
1081                 if (entry_uid != uid)
1082                         continue;
1083
1084                 if (!acct_tag || st_entry->tag == tag) {
1085                         pr_debug("xt_qtaguid: ctrl_delete(): "
1086                                  "erase sk=%p tag=0x%llx (uid=%d)\n",
1087                                  st_entry->sk,
1088                                  st_entry->tag,
1089                                  entry_uid);
1090                         rb_erase(&ts_entry->node, &sock_tag_tree);
1091                         kfree(st_entry);
1092                 }
1093         }
1094         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1095
1096         /* If acct_tag is 0, then all entries belonging to uid are
1097          * erased. */
1098         tag = combine_atag_with_uid(acct_tag, uid);
1099         spin_lock_irqsave(&iface_stat_list_lock, flags);
1100         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1101
1102                 spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2);
1103                 node = rb_first(&iface_entry->tag_stat_tree);
1104                 while (node) {
1105                         ts_entry = rb_entry(node, struct tag_stat, node);
1106                         entry_uid = get_uid_from_tag(ts_entry->tag);
1107                         node = rb_next(node);
1108                         if (entry_uid != uid)
1109                                 continue;
1110                         if (!acct_tag || ts_entry->tag == tag) {
1111                                 pr_debug("xt_qtaguid: ctrl_delete(): erase "
1112                                          "%s 0x%llx %u\n",
1113                                          iface_entry->ifname,
1114                                          get_atag_from_tag(ts_entry->tag),
1115                                          entry_uid);
1116                                 rb_erase(&ts_entry->node,
1117                                          &iface_entry->tag_stat_tree);
1118                                 kfree(ts_entry);
1119                         }
1120                 }
1121                 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock,
1122                                        flags2);
1123
1124         }
1125         spin_unlock_irqrestore(&iface_stat_list_lock, flags);
1126
1127         res = 0;
1128
1129 err:
1130         pr_debug("xt_qtaguid: ctrl_delete(%s) res=%d\n", input, res);
1131         return res;
1132 }
1133
1134
1135 static int ctrl_cmd_tag(const char *input)
1136 {
1137         char cmd;
1138         int sock_fd = 0;
1139         uid_t uid = 0;
1140         tag_t acct_tag = 0;
1141         struct socket *el_socket;
1142         int res, argc;
1143         struct sock_tag *sock_tag_entry;
1144         unsigned long flags;
1145
1146         /* Unassigned args will get defaulted later. */
1147         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1148         pr_debug("xt_qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1149                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1150                  acct_tag, uid);
1151         if (argc < 2) {
1152                 res = -EINVAL;
1153                 goto err;
1154         }
1155         el_socket = sockfd_lookup(sock_fd, &res);
1156         if (!el_socket) {
1157                 pr_info("xt_qtaguid: ctrl_tag(%s): failed to lookup"
1158                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1159                 goto err;
1160         }
1161         if (argc < 3) {
1162                 acct_tag = 0;
1163         } else if (!valid_atag(acct_tag)) {
1164                 pr_info("xt_qtaguid: ctrl_tag(%s): invalid tag\n", input);
1165                 res = -EINVAL;
1166                 goto err;
1167         }
1168         if (argc < 4) {
1169                 uid = current_fsuid();
1170         } else if (!can_impersonate_uid(uid)) {
1171                 pr_info("xt_qtaguid: ctrl_tag(%s): insuficient priv\n",
1172                         input);
1173                 res = -EPERM;
1174                 goto err;
1175         }
1176
1177         spin_lock_irqsave(&sock_tag_list_lock, flags);
1178         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1179         if (sock_tag_entry) {
1180                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1181                                                             uid);
1182         } else {
1183                 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1184                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1185                                          GFP_KERNEL);
1186                 if (!sock_tag_entry) {
1187                         res = -ENOMEM;
1188                         goto err;
1189                 }
1190                 sock_tag_entry->sk = el_socket->sk;
1191                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1192                                                             uid);
1193                 spin_lock_irqsave(&sock_tag_list_lock, flags);
1194                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1195         }
1196         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1197
1198         pr_debug("xt_qtaguid: tag: sock_tag_entry->sk=%p "
1199                  "...->tag=0x%llx (uid=%u)\n",
1200                  sock_tag_entry->sk, sock_tag_entry->tag,
1201                  get_uid_from_tag(sock_tag_entry->tag));
1202         res = 0;
1203
1204 err:
1205         pr_debug("xt_qtaguid: ctrl_tag(%s) res=%d\n", input, res);
1206         return res;
1207 }
1208
1209
1210 static int ctrl_cmd_untag(const char *input)
1211 {
1212         char cmd;
1213         int sock_fd = 0;
1214         struct socket *el_socket;
1215         int res, argc;
1216         struct sock_tag *sock_tag_entry;
1217         unsigned long flags;
1218
1219         pr_debug("xt_qtaguid: ctrl_untag(%s): entered\n", input);
1220         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1221         pr_debug("xt_qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1222                  input, argc, cmd, sock_fd);
1223         if (argc < 2) {
1224                 res = -EINVAL;
1225                 goto err;
1226         }
1227         el_socket = sockfd_lookup(sock_fd, &res);
1228         if (!el_socket) {
1229                 pr_info("xt_qtaguid: ctrl_untag(%s): failed to lookup"
1230                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1231                 goto err;
1232         }
1233         spin_lock_irqsave(&sock_tag_list_lock, flags);
1234         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1235         if (!sock_tag_entry) {
1236                 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1237                 res = -EINVAL;
1238                 goto err;
1239         }
1240         /* The socket already belongs to the current process
1241          * so it can do whatever it wants to it. */
1242         rb_erase(&sock_tag_entry->node, &sock_tag_tree);
1243         spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1244         kfree(sock_tag_entry);
1245
1246         res = 0;
1247 err:
1248         pr_debug("xt_qtaguid: ctrl_untag(%s): res=%d\n", input, res);
1249         return res;
1250 }
1251
1252 static int qtaguid_ctrl_parse(const char *input, int count)
1253 {
1254         char cmd;
1255         int res;
1256
1257         pr_debug("xt_qtaguid: ctrl(%s): entered\n", input);
1258         cmd = input[0];
1259         /* Collect params for commands */
1260         switch (cmd) {
1261         case 'd':
1262                 res = ctrl_cmd_delete(input);
1263                 break;
1264
1265         case 't':
1266                 res = ctrl_cmd_tag(input);
1267                 break;
1268
1269         case 'u':
1270                 res = ctrl_cmd_untag(input);
1271                 break;
1272
1273         default:
1274                 res = -EINVAL;
1275                 goto err;
1276         }
1277         if (!res)
1278                 res = count;
1279 err:
1280         pr_debug("xt_qtaguid: ctrl(%s): res=%d\n", input, res);
1281         return res;
1282 }
1283
1284 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
1285 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
1286                         unsigned long count, void *data)
1287 {
1288         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
1289
1290         if (unlikely(module_passive))
1291                 return count;
1292
1293         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
1294                 return -EINVAL;
1295
1296         if (copy_from_user(input_buf, buffer, count))
1297                 return -EFAULT;
1298
1299         input_buf[count] = '\0';
1300         return qtaguid_ctrl_parse(input_buf, count);
1301 }
1302
1303 static int print_stats_line(char *outp, int char_count, int item_index,
1304                             char *ifname, tag_t tag,
1305                             struct data_counters *counters)
1306 {
1307         int len;
1308         if (!item_index) {
1309                 len = snprintf(outp, char_count,
1310                                "idx iface acct_tag_hex uid_tag_int "
1311                                "rx_bytes rx_packets "
1312                                "tx_bytes tx_packets "
1313                                "rx_tcp_packets rx_tcp_bytes "
1314                                "rx_udp_packets rx_udp_bytes "
1315                                "rx_other_packets rx_other_bytes "
1316                                "tx_tcp_packets tx_tcp_bytes "
1317                                "tx_udp_packets tx_udp_bytes "
1318                                "tx_other_packets tx_other_bytes\n");
1319         } else {
1320                 uid_t stat_uid = get_uid_from_tag(tag);
1321                 if (!can_read_other_uid_stats(stat_uid)) {
1322                         pr_debug("xt_qtaguid: insufficient priv for stat line:"
1323                                  "%s 0x%llx %u\n",
1324                                  ifname, get_atag_from_tag(tag), stat_uid);
1325                         return 0;
1326                 }
1327                 len = snprintf(outp, char_count,
1328                                "%d %s 0x%llx %u "
1329                                "%llu %llu "
1330                                "%llu %llu "
1331                                "%llu %llu "
1332                                "%llu %llu "
1333                                "%llu %llu "
1334                                "%llu %llu "
1335                                "%llu %llu "
1336                                "%llu %llu\n",
1337                                item_index,
1338                                ifname,
1339                                get_atag_from_tag(tag),
1340                                stat_uid,
1341                                dc_sum_bytes(counters, IFS_RX),
1342                                dc_sum_packets(counters, IFS_RX),
1343                                dc_sum_bytes(counters, IFS_TX),
1344                                dc_sum_packets(counters, IFS_TX),
1345                                counters->bpc[IFS_RX][IFS_TCP].bytes,
1346                                counters->bpc[IFS_RX][IFS_TCP].packets,
1347                                counters->bpc[IFS_RX][IFS_UDP].bytes,
1348                                counters->bpc[IFS_RX][IFS_UDP].packets,
1349                                counters->bpc[IFS_RX][IFS_PROTO_OTHER].bytes,
1350                                counters->bpc[IFS_RX][IFS_PROTO_OTHER].packets,
1351                                counters->bpc[IFS_TX][IFS_TCP].bytes,
1352                                counters->bpc[IFS_TX][IFS_TCP].packets,
1353                                counters->bpc[IFS_TX][IFS_UDP].bytes,
1354                                counters->bpc[IFS_TX][IFS_UDP].packets,
1355                                counters->bpc[IFS_TX][IFS_PROTO_OTHER].bytes,
1356                                counters->bpc[IFS_TX][IFS_PROTO_OTHER].packets);
1357         }
1358         return len;
1359 }
1360
1361
1362 /*
1363  * Procfs reader to get all tag stats using style "1)" as described in
1364  * fs/proc/generic.c
1365  * Groups all protocols tx/rx bytes.
1366  */
1367 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
1368                                 off_t items_to_skip, int char_count, int *eof,
1369                                 void *data)
1370 {
1371         char *outp = page;
1372         int len;
1373         unsigned long flags, flags2;
1374         struct iface_stat *iface_entry;
1375         struct tag_stat *ts_entry;
1376         int item_index = 0;
1377
1378         if (unlikely(module_passive)) {
1379                 *eof = 1;
1380                 return 0;
1381         }
1382
1383         pr_debug("xt_qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
1384                 "char_count=%d *eof=%d\n", page, *num_items_returned,
1385                 items_to_skip, char_count, *eof);
1386
1387         if (*eof)
1388                 return 0;
1389
1390         if (!items_to_skip) {
1391                 /* The idx is there to help debug when things go belly up. */
1392                 len = print_stats_line(outp, char_count, /*index*/0, NULL,
1393                                        make_tag_from_uid(0), NULL);
1394                 /* Don't advance the outp unless the whole line was printed */
1395                 if (len >= char_count) {
1396                         *outp = '\0';
1397                         return outp - page;
1398                 }
1399                 outp += len;
1400                 char_count -= len;
1401         }
1402         spin_lock_irqsave(&iface_stat_list_lock, flags);
1403         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1404                 struct rb_node *node;
1405                 spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2);
1406                 for (node = rb_first(&iface_entry->tag_stat_tree);
1407                      node;
1408                      node = rb_next(node)) {
1409                         ts_entry = rb_entry(node, struct tag_stat, node);
1410                         if (item_index++ < items_to_skip)
1411                                 continue;
1412                         len = print_stats_line(outp, char_count,
1413                                                item_index,
1414                                                iface_entry->ifname,
1415                                                ts_entry->tag,
1416                                                &ts_entry->counters);
1417                         if (len >= char_count) {
1418                                 *outp = '\0';
1419                                 spin_unlock_irqrestore(
1420                                         &iface_entry->tag_stat_list_lock,
1421                                         flags2);
1422                                 spin_unlock_irqrestore(
1423                                         &iface_stat_list_lock, flags);
1424                                 return outp - page;
1425                         }
1426                         if (len) {
1427                                 outp += len;
1428                                 char_count -= len;
1429                                 (*num_items_returned)++;
1430                         }
1431                 }
1432                 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock,
1433                                 flags2);
1434         }
1435         spin_unlock_irqrestore(&iface_stat_list_lock, flags);
1436
1437         *eof = 1;
1438         return outp - page;
1439 }
1440
1441 /*------------------------------------------*/
1442 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
1443 {
1444         int ret;
1445         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
1446         if (!*res_procdir) {
1447                 pr_err("xt_qtaguid: failed to create proc/.../xt_qtaguid\n");
1448                 ret = -ENOMEM;
1449                 goto no_dir;
1450         }
1451
1452         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
1453                                                 *res_procdir);
1454         if (!xt_qtaguid_ctrl_file) {
1455                 pr_err("xt_qtaguid: failed to create xt_qtaguid/ctrl "
1456                         " file\n");
1457                 ret = -ENOMEM;
1458                 goto no_ctrl_entry;
1459         }
1460         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
1461         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
1462
1463         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
1464                                                 *res_procdir);
1465         if (!xt_qtaguid_stats_file) {
1466                 pr_err("xt_qtaguid: failed to create xt_qtaguid/stats "
1467                         "file\n");
1468                 ret = -ENOMEM;
1469                 goto no_stats_entry;
1470         }
1471         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
1472         /*
1473          * TODO: add support counter hacking
1474          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
1475          */
1476         return 0;
1477
1478 no_stats_entry:
1479         remove_proc_entry("ctrl", *res_procdir);
1480 no_ctrl_entry:
1481         remove_proc_entry("xt_qtaguid", NULL);
1482 no_dir:
1483         return ret;
1484 }
1485
1486 static struct xt_match qtaguid_mt_reg __read_mostly = {
1487         /*
1488          * This module masquerades as the "owner" module so that iptables
1489          * tools can deal with it.
1490          */
1491         .name       = "owner",
1492         .revision   = 1,
1493         .family     = NFPROTO_UNSPEC,
1494         .match      = qtaguid_mt,
1495         .matchsize  = sizeof(struct xt_qtaguid_match_info),
1496         .me         = THIS_MODULE,
1497 };
1498
1499 static int __init qtaguid_mt_init(void)
1500 {
1501         if (qtaguid_proc_register(&xt_qtaguid_procdir)
1502             || iface_stat_init(xt_qtaguid_procdir)
1503             || xt_register_match(&qtaguid_mt_reg))
1504                 return -1;
1505         return 0;
1506 }
1507
1508 /* TODO: allow unloading of the module.
1509  * For now stats are permanent.
1510  * Kconfig forces'y/n' and never an 'm'.
1511  */
1512
1513 module_init(qtaguid_mt_init);
1514 MODULE_AUTHOR("jpa <jpa@google.com>");
1515 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
1516 MODULE_LICENSE("GPL");
1517 MODULE_ALIAS("ipt_owner");
1518 MODULE_ALIAS("ip6t_owner");
1519 MODULE_ALIAS("ipt_qtaguid");
1520 MODULE_ALIAS("ip6t_qtaguid");