2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* TODO: support ipv6 for iface_stat.
12 * Currently if an iface is only v6 it will not have stats collected. */
14 #include <linux/file.h>
15 #include <linux/inetdevice.h>
16 #include <linux/module.h>
17 #include <linux/netfilter/x_tables.h>
18 #include <linux/netfilter/xt_qtaguid.h>
19 #include <linux/skbuff.h>
20 #include <linux/workqueue.h>
25 #include <linux/netfilter/xt_socket.h>
26 /* We only use the xt_socket funcs within a similar context to avoid unexpected
28 #define XT_SOCKET_SUPPORTED_HOOKS \
29 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
32 static const char *module_procdirname = "xt_qtaguid";
33 static struct proc_dir_entry *xt_qtaguid_procdir;
35 static unsigned int proc_iface_perms = S_IRUGO;
36 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
38 static struct proc_dir_entry *xt_qtaguid_stats_file;
39 static unsigned int proc_stats_perms = S_IRUGO;
40 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
42 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
43 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
44 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
46 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
48 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
50 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
51 #include <linux/android_aid.h>
52 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
53 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
55 /* 0 means, don't limit anybody */
56 static gid_t proc_stats_readall_gid;
57 static gid_t proc_ctrl_write_gid;
59 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
61 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
64 /* After the kernel has initiallized this module, it is still possible
66 * - do not register it via iptables.
67 * the matching code will not be invoked.
69 * the iface stats handling will not be act on notifications.
70 * This is mostly usefull when a bug is suspected.
72 static bool module_passive;
73 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
75 /*---------------------------------------------------------------------------*/
79 * They represent what the data usage counters will be tracked against.
80 * By default a tag is just based on the UID.
81 * The UID is used as the base for policying, and can not be ignored.
82 * So a tag will always at least represent a UID (uid_tag).
84 * A tag can be augmented with an "accounting tag" which is associated
86 * User space can set the acct_tag portion of the tag which is then used
87 * with sockets: all data belong to that socket will be counted against the
88 * tag. The policing is then based on the tag's uid_tag portion,
89 * and stats are collected for the acct_tag portion seperately.
92 * a: {acct_tag=1, uid_tag=10003}
93 * b: {acct_tag=2, uid_tag=10003}
94 * c: {acct_tag=3, uid_tag=10003}
95 * d: {acct_tag=0, uid_tag=10003}
96 * (a, b, and c represent tags associated with specific sockets.
97 * d is for the totals for that uid, including all untagged traffic.
98 * Typically d is used with policing/quota rules.
100 * We want tag_t big enough to distinguish uid_t and acct_tag.
101 * It might become a struct if needed.
102 * Nothing should be using it as an int.
104 typedef uint64_t tag_t; /* Only used via accessors */
106 static const char *iface_stat_procdirname = "iface_stat";
107 static struct proc_dir_entry *iface_stat_procdir;
115 /* For now, TCP, UDP, the rest */
123 struct byte_packet_counters {
128 struct data_counters {
129 struct byte_packet_counters bpc[IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
136 struct data_counters counters;
137 /* If this tag is acct_tag based, we need to count against the
138 * matching parent uid_tag. */
139 struct data_counters *parent_counters;
140 struct proc_dir_entry *proc_ptr;
144 struct list_head list;
151 struct proc_dir_entry *proc_ptr;
153 struct rb_root tag_stat_tree;
154 spinlock_t tag_stat_list_lock;
157 static LIST_HEAD(iface_stat_list);
158 static DEFINE_SPINLOCK(iface_stat_list_lock);
161 * Track tag that this socket is transferring data for, and not necesseraly
162 * the uid that owns the socket.
163 * This is the tag against which tag_stat.counters will be billed.
171 static struct rb_root sock_tag_tree = RB_ROOT;
172 static DEFINE_SPINLOCK(sock_tag_list_lock);
174 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
176 /*----------------------------------------------*/
177 static inline int tag_compare(tag_t t1, tag_t t2)
179 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
183 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
185 return acct_tag | uid;
187 static inline tag_t make_tag_from_uid(uid_t uid)
191 static inline uid_t get_uid_from_tag(tag_t tag)
193 return tag & 0xFFFFFFFFULL;
195 static inline tag_t get_utag_from_tag(tag_t tag)
197 return tag & 0xFFFFFFFFULL;
199 static inline tag_t get_atag_from_tag(tag_t tag)
201 return tag & ~0xFFFFFFFFULL;
204 static inline bool valid_atag(tag_t tag)
206 return !(tag & 0xFFFFFFFFULL);
209 static inline void dc_add_byte_packets(struct data_counters *counters,
210 enum ifs_tx_rx direction,
211 enum ifs_proto ifs_proto,
215 counters->bpc[direction][ifs_proto].bytes += bytes;
216 counters->bpc[direction][ifs_proto].packets += packets;
219 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
220 enum ifs_tx_rx direction)
222 return counters->bpc[direction][IFS_TCP].bytes
223 + counters->bpc[direction][IFS_UDP].bytes
224 + counters->bpc[direction][IFS_PROTO_OTHER].bytes;
227 static inline uint64_t dc_sum_packets(struct data_counters *counters,
228 enum ifs_tx_rx direction)
230 return counters->bpc[direction][IFS_TCP].packets
231 + counters->bpc[direction][IFS_UDP].packets
232 + counters->bpc[direction][IFS_PROTO_OTHER].packets;
235 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
237 struct rb_node *node = root->rb_node;
240 struct tag_stat *data = rb_entry(node, struct tag_stat, node);
241 int result = tag_compare(tag, data->tag);
242 pr_debug("qtaguid: tag_stat_tree_search(): tag=0x%llx"
245 get_uid_from_tag(data->tag));
248 node = node->rb_left;
250 node = node->rb_right;
257 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
259 struct rb_node **new = &(root->rb_node), *parent = NULL;
261 /* Figure out where to put new node */
263 struct tag_stat *this = rb_entry(*new, struct tag_stat,
265 int result = tag_compare(data->tag, this->tag);
266 pr_debug("qtaguid: tag_stat_tree_insert(): tag=0x%llx"
269 get_uid_from_tag(this->tag));
272 new = &((*new)->rb_left);
274 new = &((*new)->rb_right);
279 /* Add new node and rebalance tree. */
280 rb_link_node(&data->node, parent, new);
281 rb_insert_color(&data->node, root);
284 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
285 const struct sock *sk)
287 struct rb_node *node = root->rb_node;
290 struct sock_tag *data = rb_entry(node, struct sock_tag, node);
291 ptrdiff_t result = sk - data->sk;
293 node = node->rb_left;
295 node = node->rb_right;
302 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
304 struct rb_node **new = &(root->rb_node), *parent = NULL;
306 /* Figure out where to put new node */
308 struct sock_tag *this = rb_entry(*new, struct sock_tag, node);
309 ptrdiff_t result = data->sk - this->sk;
312 new = &((*new)->rb_left);
314 new = &((*new)->rb_right);
319 /* Add new node and rebalance tree. */
320 rb_link_node(&data->node, parent, new);
321 rb_insert_color(&data->node, root);
324 static int read_proc_u64(char *page, char **start, off_t off,
325 int count, int *eof, void *data)
330 uint64_t *iface_entry = data;
334 value = *iface_entry;
335 p += sprintf(p, "%llu\n", value);
336 len = (p - page) - off;
337 *eof = (len <= count) ? 1 : 0;
342 static int read_proc_bool(char *page, char **start, off_t off,
343 int count, int *eof, void *data)
348 bool *bool_entry = data;
353 p += sprintf(p, "%u\n", value);
354 len = (p - page) - off;
355 *eof = (len <= count) ? 1 : 0;
360 /* Find the entry for tracking the specified interface. */
361 static struct iface_stat *get_iface_stat(const char *ifname)
364 struct iface_stat *iface_entry;
368 spin_lock_irqsave(&iface_stat_list_lock, flags);
369 list_for_each_entry(iface_entry, &iface_stat_list, list) {
370 if (!strcmp(iface_entry->ifname, ifname))
375 spin_unlock_irqrestore(&iface_stat_list_lock, flags);
380 * Create a new entry for tracking the specified interface.
381 * Do nothing if the entry already exists.
382 * Called when an interface is configured with a valid IP address.
384 void iface_stat_create(const struct net_device *net_dev)
386 struct in_device *in_dev;
388 struct iface_stat *new_iface;
389 struct proc_dir_entry *proc_entry;
391 struct iface_stat *entry;
393 struct in_ifaddr *ifa = NULL;
395 ASSERT_RTNL(); /* No need for separate locking */
397 pr_debug("iface_stat: create(): netdev=%p->name=%s\n",
398 net_dev, net_dev ? net_dev->name : "");
400 pr_err("iface_stat: create(): no net dev!\n");
404 in_dev = __in_dev_get_rtnl(net_dev);
406 pr_err("iface_stat: create(): no inet dev!\n");
410 pr_debug("iface_stat: create(): in_dev=%p\n", in_dev);
411 ifname = net_dev->name;
412 pr_debug("iface_stat: create(): ifname=%p\n", ifname);
413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
414 pr_debug("iface_stat: create(): for(): ifa=%p ifname=%p\n",
416 pr_debug("iface_stat: create(): ifname=%s ifa_label=%s\n",
417 ifname, ifa->ifa_label ? ifa->ifa_label : "(null)");
418 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
423 ipaddr = ifa->ifa_local;
425 pr_err("iface_stat: create(): dev %s has no matching IP\n",
430 entry = get_iface_stat(net_dev->name);
432 pr_debug("iface_stat: create(): dev %s entry=%p\n", ifname,
434 if (ipv4_is_loopback(ipaddr)) {
435 entry->active = false;
436 pr_debug("iface_stat: create(): disable tracking of "
437 "loopback dev %s\n", ifname);
439 entry->active = true;
440 pr_debug("iface_stat: create(): enable tracking of "
441 "dev %s with ip=%pI4\n",
445 } else if (ipv4_is_loopback(ipaddr)) {
446 pr_debug("iface_stat: create(): ignore loopback dev %s"
447 " ip=%pI4\n", ifname, &ipaddr);
451 new_iface = kzalloc(sizeof(*new_iface), GFP_KERNEL);
452 if (new_iface == NULL) {
453 pr_err("iface_stat: create(): failed to alloc iface_stat\n");
456 new_iface->ifname = kstrdup(ifname, GFP_KERNEL);
457 if (new_iface->ifname == NULL) {
458 pr_err("iface_stat: create(): failed to alloc ifname\n");
462 spin_lock_init(&new_iface->tag_stat_list_lock);
464 new_iface->active = true;
466 new_iface->tag_stat_tree = RB_ROOT;
467 spin_lock_irqsave(&iface_stat_list_lock, flags);
468 list_add(&new_iface->list, &iface_stat_list);
469 spin_unlock_irqrestore(&iface_stat_list_lock, flags);
471 proc_entry = proc_mkdir(ifname, iface_stat_procdir);
472 new_iface->proc_ptr = proc_entry;
474 /* TODO: make root access only */
475 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
476 read_proc_u64, &new_iface->tx_bytes);
477 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
478 read_proc_u64, &new_iface->rx_bytes);
479 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
480 read_proc_u64, &new_iface->tx_packets);
481 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
482 read_proc_u64, &new_iface->rx_packets);
483 create_proc_read_entry("active", proc_iface_perms, proc_entry,
484 read_proc_bool, &new_iface->active);
486 pr_debug("iface_stat: create(): done entry=%p dev=%s ip=%pI4\n",
487 new_iface, ifname, &ipaddr);
490 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
492 pr_debug("xt_qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
493 return sock_tag_tree_search(&sock_tag_tree, sk);
496 static struct sock_tag *get_sock_stat(const struct sock *sk)
499 struct sock_tag *sock_tag_entry;
500 pr_debug("xt_qtaguid: get_sock_stat(sk=%p)\n", sk);
503 spin_lock_irqsave(&sock_tag_list_lock, flags);
504 sock_tag_entry = get_sock_stat_nl(sk);
505 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
506 return sock_tag_entry;
510 data_counters_update(struct data_counters *dc, enum ifs_tx_rx direction,
511 int proto, int bytes)
515 dc_add_byte_packets(dc, direction, IFS_TCP, bytes, 1);
518 dc_add_byte_packets(dc, direction, IFS_UDP, bytes, 1);
522 dc_add_byte_packets(dc, direction, IFS_PROTO_OTHER, bytes, 1);
529 * Update stats for the specified interface. Do nothing if the entry
530 * does not exist (when a device was never configured with an IP address).
531 * Called when an device is being unregistered.
533 void iface_stat_update(struct net_device *dev)
535 struct rtnl_link_stats64 dev_stats, *stats;
536 struct iface_stat *entry;
537 stats = dev_get_stats(dev, &dev_stats);
540 entry = get_iface_stat(dev->name);
542 pr_debug("iface_stat: dev %s monitor not found\n", dev->name);
546 entry->tx_bytes += stats->tx_bytes;
547 entry->tx_packets += stats->tx_packets;
548 entry->rx_bytes += stats->rx_bytes;
549 entry->rx_packets += stats->rx_packets;
550 entry->active = false;
551 pr_debug("iface_stat: Updating stats for "
552 "dev %s which went down\n", dev->name);
554 pr_debug("iface_stat: Did not update stats for "
555 "dev %s which went down\n", dev->name);
560 static void tag_stat_update(struct tag_stat *tag_entry,
561 enum ifs_tx_rx direction, int proto, int bytes)
563 pr_debug("xt_qtaguid: tag_stat_update(tag=0x%llx (uid=%d) dir=%d "
564 "proto=%d bytes=%d)\n",
565 tag_entry->tag, get_uid_from_tag(tag_entry->tag), direction,
567 data_counters_update(&tag_entry->counters, direction, proto, bytes);
568 if (tag_entry->parent_counters)
569 data_counters_update(tag_entry->parent_counters, direction,
574 /* Create a new entry for tracking the specified {acct_tag,uid_tag} within
576 * iface_entry->tag_stat_list_lock should be held. */
577 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
580 struct tag_stat *new_tag_stat_entry = NULL;
581 pr_debug("iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
583 iface_entry, tag, get_uid_from_tag(tag));
584 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
585 if (!new_tag_stat_entry) {
586 pr_err("iface_stat: failed to alloc new tag entry\n");
589 new_tag_stat_entry->tag = tag;
590 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
592 return new_tag_stat_entry;
595 static struct iface_stat *get_iface_entry(const char *ifname)
597 struct iface_stat *iface_entry;
600 /* Find the entry for tracking the specified tag within the interface */
601 if (ifname == NULL) {
602 pr_info("iface_stat: NULL device name\n");
607 /* Iterate over interfaces */
608 spin_lock_irqsave(&iface_stat_list_lock, flags);
609 list_for_each_entry(iface_entry, &iface_stat_list, list) {
610 if (!strcmp(ifname, iface_entry->ifname))
615 spin_unlock_irqrestore(&iface_stat_list_lock, flags);
619 static void if_tag_stat_update(const char *ifname, uid_t uid,
620 const struct sock *sk, enum ifs_tx_rx direction,
621 int proto, int bytes)
623 struct tag_stat *tag_stat_entry;
626 struct data_counters *uid_tag_counters;
627 struct sock_tag *sock_tag_entry;
628 struct iface_stat *iface_entry;
630 struct tag_stat *new_tag_stat;
631 pr_debug("xt_qtaguid: if_tag_stat_update(ifname=%s "
632 "uid=%d sk=%p dir=%d proto=%d bytes=%d)\n",
633 ifname, uid, sk, direction, proto, bytes);
636 iface_entry = get_iface_entry(ifname);
638 pr_err("iface_stat: interface %s not found\n", ifname);
641 /* else { If the iface_entry becomes inactive, it is still ok
642 * to process the data. } */
644 pr_debug("iface_stat: stat_update() got entry=%p\n", iface_entry);
646 /* Look for a tagged sock.
647 * It will have an acct_uid. */
648 sock_tag_entry = get_sock_stat(sk);
649 if (sock_tag_entry) {
650 tag = sock_tag_entry->tag;
651 acct_tag = get_atag_from_tag(tag);
652 uid_tag = get_utag_from_tag(tag);
654 uid_tag = make_tag_from_uid(uid);
656 tag = combine_atag_with_uid(acct_tag, uid);
658 pr_debug("iface_stat: stat_update(): looking for tag=0x%llx (uid=%d)"
660 tag, get_uid_from_tag(tag), iface_entry);
661 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
662 spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags);
664 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
666 if (tag_stat_entry) {
667 /* Updating the {acct_tag, uid_tag} entry handles both stats:
668 * {0, uid_tag} will also get updated. */
669 tag_stat_update(tag_stat_entry, direction, proto, bytes);
670 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags);
674 /* Loop over tag list under this interface for {0,uid_tag} */
675 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
677 if (!tag_stat_entry) {
678 /* Here: the base uid_tag did not exist */
680 * No parent counters. So
681 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
683 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
684 uid_tag_counters = &new_tag_stat->counters;
686 uid_tag_counters = &tag_stat_entry->counters;
690 new_tag_stat = create_if_tag_stat(iface_entry, tag);
691 new_tag_stat->parent_counters = uid_tag_counters;
693 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock, flags);
694 tag_stat_update(new_tag_stat, direction, proto, bytes);
697 static int iface_netdev_event_handler(struct notifier_block *nb,
698 unsigned long event, void *ptr) {
699 struct net_device *dev = ptr;
701 if (unlikely(module_passive))
704 pr_debug("iface_stat: netdev_event(): ev=0x%lx netdev=%p->name=%s\n",
705 event, dev, dev ? dev->name : "");
711 case NETDEV_REGISTER: /* Most likely no IP */
712 case NETDEV_CHANGEADDR: /* MAC addr change */
713 case NETDEV_CHANGENAME:
714 case NETDEV_FEAT_CHANGE: /* Might be usefull when cell type changes */
715 iface_stat_create(dev);
717 case NETDEV_UNREGISTER:
718 iface_stat_update(dev);
724 static int iface_inetaddr_event_handler(struct notifier_block *nb,
725 unsigned long event, void *ptr) {
727 struct in_ifaddr *ifa = ptr;
728 struct in_device *in_dev = ifa->ifa_dev;
729 struct net_device *dev = in_dev->dev;
731 if (unlikely(module_passive))
734 pr_debug("iface_stat: inetaddr_event(): ev=0x%lx netdev=%p->name=%s\n",
735 event, dev, dev ? dev->name : "");
739 iface_stat_create(dev);
745 static struct notifier_block iface_netdev_notifier_blk = {
746 .notifier_call = iface_netdev_event_handler,
749 static struct notifier_block iface_inetaddr_notifier_blk = {
750 .notifier_call = iface_inetaddr_event_handler,
753 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
757 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
758 if (!iface_stat_procdir) {
759 pr_err("iface_stat: failed to create proc entry\n");
763 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
765 pr_err("iface_stat: failed to register dev event handler\n");
768 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
770 pr_err("iface_stat: failed to register dev event handler\n");
776 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
778 remove_proc_entry(iface_stat_procdirname, parent_procdir);
783 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
784 struct xt_action_param *par)
787 unsigned int hook_mask = (1 << par->hooknum);
789 pr_debug("xt_qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
790 par->hooknum, par->family);
792 /* Let's not abuse the the xt_socket_get*_sk(), or else it will
793 * return garbage SKs. */
794 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
797 switch (par->family) {
799 sk = xt_socket_get6_sk(skb, par);
802 sk = xt_socket_get4_sk(skb, par);
808 /* Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
809 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
810 * Not fixed in 3.0-r3 :(
813 pr_debug("xt_qtaguid: %p->sk_proto=%u "
814 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
815 if (sk->sk_state == TCP_TIME_WAIT) {
816 xt_socket_put_sk(sk);
823 static void account_for_uid(const struct sk_buff *skb,
824 const struct sock *alternate_sk, uid_t uid,
825 struct xt_action_param *par)
827 const struct net_device *el_dev;
830 pr_debug("xt_qtaguid[%d]: no skb->dev\n", par->hooknum);
831 el_dev = par->in ? : par->out;
833 const struct net_device *other_dev;
835 other_dev = par->in ? : par->out;
836 if (el_dev != other_dev) {
837 pr_debug("xt_qtaguid[%d]: skb->dev=%p %s vs "
838 "par->(in/out)=%p %s\n",
839 par->hooknum, el_dev, el_dev->name, other_dev,
844 if (unlikely(!el_dev)) {
845 pr_info("xt_qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
846 } else if (unlikely(!el_dev->name)) {
847 pr_info("xt_qtaguid[%d]: no dev->name?!!\n", par->hooknum);
849 pr_debug("xt_qtaguid[%d]: dev name=%s type=%d\n",
854 if_tag_stat_update(el_dev->name, uid,
855 skb->sk ? skb->sk : alternate_sk,
856 par->in ? IFS_RX : IFS_TX,
857 ip_hdr(skb)->protocol, skb->len);
861 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
863 const struct xt_qtaguid_match_info *info = par->matchinfo;
864 const struct file *filp;
865 bool got_sock = false;
870 pr_debug("xt_qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
871 par->hooknum, skb, par->in, par->out, par->family);
874 res = (info->match ^ info->invert) == 0;
881 /* A missing sk->sk_socket happens when packets are in-flight
882 * and the matching socket is already closed and gone.
884 sk = qtaguid_find_sk(skb, par);
885 /* If we got the socket from the find_sk(), we will need to put
886 * it back, as nf_tproxy_get_sock_v4() got it. */
889 pr_debug("xt_qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
890 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
892 pr_debug("xt_qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
893 par->hooknum, sk, sk->sk_socket,
894 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
895 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
896 pr_debug("xt_qtaguid[%d]: filp...uid=%d\n",
897 par->hooknum, filp ? filp->f_cred->fsuid : -1);
900 if (sk == NULL || sk->sk_socket == NULL) {
901 /* Here, the qtaguid_find_sk() using connection tracking
902 * couldn't find the owner, so for now we just count them
903 * against the system. */
904 /* TODO: unhack how to force just accounting.
905 * For now we only do iface stats when the uid-owner is not
907 if (!(info->match & XT_QTAGUID_UID))
908 account_for_uid(skb, sk, 0, par);
909 pr_debug("xt_qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
911 sk ? sk->sk_socket : NULL);
912 res = (info->match ^ info->invert) == 0;
913 goto put_sock_ret_res;
914 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
916 goto put_sock_ret_res;
918 filp = sk->sk_socket->file;
920 pr_debug("xt_qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
921 res = ((info->match ^ info->invert) &
922 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
923 goto put_sock_ret_res;
925 sock_uid = filp->f_cred->fsuid;
926 /* TODO: unhack how to force just accounting.
927 * For now we only do iface stats when the uid-owner is not requested */
928 if (!(info->match & XT_QTAGUID_UID))
929 account_for_uid(skb, sk, sock_uid, par);
931 /* The following two tests fail the match when:
932 * id not in range AND no inverted condition requested
933 * or id in range AND inverted condition requested
934 * Thus (!a && b) || (a && !b) == a ^ b
936 if (info->match & XT_QTAGUID_UID)
937 if ((filp->f_cred->fsuid >= info->uid_min &&
938 filp->f_cred->fsuid <= info->uid_max) ^
939 !(info->invert & XT_QTAGUID_UID)) {
940 pr_debug("xt_qtaguid[%d]: leaving uid not matching\n",
943 goto put_sock_ret_res;
945 if (info->match & XT_QTAGUID_GID)
946 if ((filp->f_cred->fsgid >= info->gid_min &&
947 filp->f_cred->fsgid <= info->gid_max) ^
948 !(info->invert & XT_QTAGUID_GID)) {
949 pr_debug("xt_qtaguid[%d]: leaving gid not matching\n",
952 goto put_sock_ret_res;
955 pr_debug("xt_qtaguid[%d]: leaving matched\n", par->hooknum);
960 xt_socket_put_sk(sk);
962 pr_debug("xt_qtaguid[%d]: left %d\n", par->hooknum, res);
967 * Procfs reader to get all active socket tags using style "1)" as described in
970 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
971 off_t items_to_skip, int char_count, int *eof,
978 struct sock_tag *sock_tag_entry;
979 struct rb_node *node;
982 if (unlikely(module_passive)) {
987 pr_debug("xt_qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
988 page, items_to_skip, char_count, *eof);
993 spin_lock_irqsave(&sock_tag_list_lock, flags);
994 for (node = rb_first(&sock_tag_tree);
996 node = rb_next(node)) {
997 if (item_index++ < items_to_skip)
999 sock_tag_entry = rb_entry(node, struct sock_tag, node);
1000 uid = get_uid_from_tag(sock_tag_entry->tag);
1001 pr_debug("xt_qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%d)\n",
1003 sock_tag_entry->tag,
1005 len = snprintf(outp, char_count,
1006 "sock=%p tag=0x%llx (uid=%u)\n",
1007 sock_tag_entry->sk, sock_tag_entry->tag, uid);
1008 if (len >= char_count) {
1009 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1015 (*num_items_returned)++;
1017 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1022 int can_impersonate_uid(uid_t uid)
1024 return uid == current_fsuid()
1025 || !proc_ctrl_write_gid
1026 || in_egroup_p(proc_ctrl_write_gid);
1029 int can_read_other_uid_stats(uid_t uid)
1031 return uid == current_fsuid()
1032 || !proc_ctrl_write_gid
1033 || in_egroup_p(proc_stats_readall_gid);
1036 /* Delete socket tags, and stat tags associated with a given
1037 * accouting tag and uid. */
1038 static int ctrl_cmd_delete(const char *input)
1046 unsigned long flags, flags2;
1047 struct iface_stat *iface_entry;
1048 struct rb_node *node;
1049 struct sock_tag *st_entry;
1050 struct tag_stat *ts_entry;
1052 pr_debug("xt_qtaguid: ctrl_delete(%s): entered\n", input);
1053 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1054 pr_debug("xt_qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1055 "acct_tag=0x%llx uid=%u\n", input, argc, cmd,
1061 if (!valid_atag(acct_tag)) {
1062 pr_info("xt_qtaguid: ctrl_delete(%s): invalid tag\n", input);
1067 uid = current_fsuid();
1068 } else if (!can_impersonate_uid(uid)) {
1069 pr_info("xt_qtaguid: ctrl_delete(%s): insuficient priv\n",
1075 spin_lock_irqsave(&sock_tag_list_lock, flags);
1076 node = rb_first(&sock_tag_tree);
1078 st_entry = rb_entry(node, struct sock_tag, node);
1079 entry_uid = get_uid_from_tag(st_entry->tag);
1080 node = rb_next(node);
1081 if (entry_uid != uid)
1084 if (!acct_tag || st_entry->tag == tag) {
1085 pr_debug("xt_qtaguid: ctrl_delete(): "
1086 "erase sk=%p tag=0x%llx (uid=%d)\n",
1090 rb_erase(&ts_entry->node, &sock_tag_tree);
1094 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1096 /* If acct_tag is 0, then all entries belonging to uid are
1098 tag = combine_atag_with_uid(acct_tag, uid);
1099 spin_lock_irqsave(&iface_stat_list_lock, flags);
1100 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1102 spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2);
1103 node = rb_first(&iface_entry->tag_stat_tree);
1105 ts_entry = rb_entry(node, struct tag_stat, node);
1106 entry_uid = get_uid_from_tag(ts_entry->tag);
1107 node = rb_next(node);
1108 if (entry_uid != uid)
1110 if (!acct_tag || ts_entry->tag == tag) {
1111 pr_debug("xt_qtaguid: ctrl_delete(): erase "
1113 iface_entry->ifname,
1114 get_atag_from_tag(ts_entry->tag),
1116 rb_erase(&ts_entry->node,
1117 &iface_entry->tag_stat_tree);
1121 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock,
1125 spin_unlock_irqrestore(&iface_stat_list_lock, flags);
1130 pr_debug("xt_qtaguid: ctrl_delete(%s) res=%d\n", input, res);
1135 static int ctrl_cmd_tag(const char *input)
1141 struct socket *el_socket;
1143 struct sock_tag *sock_tag_entry;
1144 unsigned long flags;
1146 /* Unassigned args will get defaulted later. */
1147 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1148 pr_debug("xt_qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1149 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1155 el_socket = sockfd_lookup(sock_fd, &res);
1157 pr_info("xt_qtaguid: ctrl_tag(%s): failed to lookup"
1158 " sock_fd=%d err=%d\n", input, sock_fd, res);
1163 } else if (!valid_atag(acct_tag)) {
1164 pr_info("xt_qtaguid: ctrl_tag(%s): invalid tag\n", input);
1169 uid = current_fsuid();
1170 } else if (!can_impersonate_uid(uid)) {
1171 pr_info("xt_qtaguid: ctrl_tag(%s): insuficient priv\n",
1177 spin_lock_irqsave(&sock_tag_list_lock, flags);
1178 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1179 if (sock_tag_entry) {
1180 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1183 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1184 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1186 if (!sock_tag_entry) {
1190 sock_tag_entry->sk = el_socket->sk;
1191 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1193 spin_lock_irqsave(&sock_tag_list_lock, flags);
1194 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1196 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1198 pr_debug("xt_qtaguid: tag: sock_tag_entry->sk=%p "
1199 "...->tag=0x%llx (uid=%u)\n",
1200 sock_tag_entry->sk, sock_tag_entry->tag,
1201 get_uid_from_tag(sock_tag_entry->tag));
1205 pr_debug("xt_qtaguid: ctrl_tag(%s) res=%d\n", input, res);
1210 static int ctrl_cmd_untag(const char *input)
1214 struct socket *el_socket;
1216 struct sock_tag *sock_tag_entry;
1217 unsigned long flags;
1219 pr_debug("xt_qtaguid: ctrl_untag(%s): entered\n", input);
1220 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1221 pr_debug("xt_qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1222 input, argc, cmd, sock_fd);
1227 el_socket = sockfd_lookup(sock_fd, &res);
1229 pr_info("xt_qtaguid: ctrl_untag(%s): failed to lookup"
1230 " sock_fd=%d err=%d\n", input, sock_fd, res);
1233 spin_lock_irqsave(&sock_tag_list_lock, flags);
1234 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1235 if (!sock_tag_entry) {
1236 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1240 /* The socket already belongs to the current process
1241 * so it can do whatever it wants to it. */
1242 rb_erase(&sock_tag_entry->node, &sock_tag_tree);
1243 spin_unlock_irqrestore(&sock_tag_list_lock, flags);
1244 kfree(sock_tag_entry);
1248 pr_debug("xt_qtaguid: ctrl_untag(%s): res=%d\n", input, res);
1252 static int qtaguid_ctrl_parse(const char *input, int count)
1257 pr_debug("xt_qtaguid: ctrl(%s): entered\n", input);
1259 /* Collect params for commands */
1262 res = ctrl_cmd_delete(input);
1266 res = ctrl_cmd_tag(input);
1270 res = ctrl_cmd_untag(input);
1280 pr_debug("xt_qtaguid: ctrl(%s): res=%d\n", input, res);
1284 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
1285 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
1286 unsigned long count, void *data)
1288 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
1290 if (unlikely(module_passive))
1293 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
1296 if (copy_from_user(input_buf, buffer, count))
1299 input_buf[count] = '\0';
1300 return qtaguid_ctrl_parse(input_buf, count);
1303 static int print_stats_line(char *outp, int char_count, int item_index,
1304 char *ifname, tag_t tag,
1305 struct data_counters *counters)
1309 len = snprintf(outp, char_count,
1310 "idx iface acct_tag_hex uid_tag_int "
1311 "rx_bytes rx_packets "
1312 "tx_bytes tx_packets "
1313 "rx_tcp_packets rx_tcp_bytes "
1314 "rx_udp_packets rx_udp_bytes "
1315 "rx_other_packets rx_other_bytes "
1316 "tx_tcp_packets tx_tcp_bytes "
1317 "tx_udp_packets tx_udp_bytes "
1318 "tx_other_packets tx_other_bytes\n");
1320 uid_t stat_uid = get_uid_from_tag(tag);
1321 if (!can_read_other_uid_stats(stat_uid)) {
1322 pr_debug("xt_qtaguid: insufficient priv for stat line:"
1324 ifname, get_atag_from_tag(tag), stat_uid);
1327 len = snprintf(outp, char_count,
1339 get_atag_from_tag(tag),
1341 dc_sum_bytes(counters, IFS_RX),
1342 dc_sum_packets(counters, IFS_RX),
1343 dc_sum_bytes(counters, IFS_TX),
1344 dc_sum_packets(counters, IFS_TX),
1345 counters->bpc[IFS_RX][IFS_TCP].bytes,
1346 counters->bpc[IFS_RX][IFS_TCP].packets,
1347 counters->bpc[IFS_RX][IFS_UDP].bytes,
1348 counters->bpc[IFS_RX][IFS_UDP].packets,
1349 counters->bpc[IFS_RX][IFS_PROTO_OTHER].bytes,
1350 counters->bpc[IFS_RX][IFS_PROTO_OTHER].packets,
1351 counters->bpc[IFS_TX][IFS_TCP].bytes,
1352 counters->bpc[IFS_TX][IFS_TCP].packets,
1353 counters->bpc[IFS_TX][IFS_UDP].bytes,
1354 counters->bpc[IFS_TX][IFS_UDP].packets,
1355 counters->bpc[IFS_TX][IFS_PROTO_OTHER].bytes,
1356 counters->bpc[IFS_TX][IFS_PROTO_OTHER].packets);
1363 * Procfs reader to get all tag stats using style "1)" as described in
1365 * Groups all protocols tx/rx bytes.
1367 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
1368 off_t items_to_skip, int char_count, int *eof,
1373 unsigned long flags, flags2;
1374 struct iface_stat *iface_entry;
1375 struct tag_stat *ts_entry;
1378 if (unlikely(module_passive)) {
1383 pr_debug("xt_qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
1384 "char_count=%d *eof=%d\n", page, *num_items_returned,
1385 items_to_skip, char_count, *eof);
1390 if (!items_to_skip) {
1391 /* The idx is there to help debug when things go belly up. */
1392 len = print_stats_line(outp, char_count, /*index*/0, NULL,
1393 make_tag_from_uid(0), NULL);
1394 /* Don't advance the outp unless the whole line was printed */
1395 if (len >= char_count) {
1402 spin_lock_irqsave(&iface_stat_list_lock, flags);
1403 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1404 struct rb_node *node;
1405 spin_lock_irqsave(&iface_entry->tag_stat_list_lock, flags2);
1406 for (node = rb_first(&iface_entry->tag_stat_tree);
1408 node = rb_next(node)) {
1409 ts_entry = rb_entry(node, struct tag_stat, node);
1410 if (item_index++ < items_to_skip)
1412 len = print_stats_line(outp, char_count,
1414 iface_entry->ifname,
1416 &ts_entry->counters);
1417 if (len >= char_count) {
1419 spin_unlock_irqrestore(
1420 &iface_entry->tag_stat_list_lock,
1422 spin_unlock_irqrestore(
1423 &iface_stat_list_lock, flags);
1429 (*num_items_returned)++;
1432 spin_unlock_irqrestore(&iface_entry->tag_stat_list_lock,
1435 spin_unlock_irqrestore(&iface_stat_list_lock, flags);
1441 /*------------------------------------------*/
1442 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
1445 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
1446 if (!*res_procdir) {
1447 pr_err("xt_qtaguid: failed to create proc/.../xt_qtaguid\n");
1452 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
1454 if (!xt_qtaguid_ctrl_file) {
1455 pr_err("xt_qtaguid: failed to create xt_qtaguid/ctrl "
1460 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
1461 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
1463 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
1465 if (!xt_qtaguid_stats_file) {
1466 pr_err("xt_qtaguid: failed to create xt_qtaguid/stats "
1469 goto no_stats_entry;
1471 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
1473 * TODO: add support counter hacking
1474 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
1479 remove_proc_entry("ctrl", *res_procdir);
1481 remove_proc_entry("xt_qtaguid", NULL);
1486 static struct xt_match qtaguid_mt_reg __read_mostly = {
1488 * This module masquerades as the "owner" module so that iptables
1489 * tools can deal with it.
1493 .family = NFPROTO_UNSPEC,
1494 .match = qtaguid_mt,
1495 .matchsize = sizeof(struct xt_qtaguid_match_info),
1499 static int __init qtaguid_mt_init(void)
1501 if (qtaguid_proc_register(&xt_qtaguid_procdir)
1502 || iface_stat_init(xt_qtaguid_procdir)
1503 || xt_register_match(&qtaguid_mt_reg))
1508 /* TODO: allow unloading of the module.
1509 * For now stats are permanent.
1510 * Kconfig forces'y/n' and never an 'm'.
1513 module_init(qtaguid_mt_init);
1514 MODULE_AUTHOR("jpa <jpa@google.com>");
1515 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
1516 MODULE_LICENSE("GPL");
1517 MODULE_ALIAS("ipt_owner");
1518 MODULE_ALIAS("ip6t_owner");
1519 MODULE_ALIAS("ipt_qtaguid");
1520 MODULE_ALIAS("ip6t_qtaguid");