2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
19 #define IF_DEBUG(...) pr_debug(__VA_ARGS__)
21 #define IF_DEBUG(...) no_printk(__VA_ARGS__)
23 /* Iptable Matching */
25 #define MT_DEBUG(...) pr_debug(__VA_ARGS__)
27 #define MT_DEBUG(...) no_printk(__VA_ARGS__)
29 /* Red-black tree handling */
31 #define RB_DEBUG(...) pr_debug(__VA_ARGS__)
33 #define RB_DEBUG(...) no_printk(__VA_ARGS__)
35 /* procfs ctrl/stats handling */
37 #define CT_DEBUG(...) pr_debug(__VA_ARGS__)
39 #define CT_DEBUG(...) no_printk(__VA_ARGS__)
42 #include <linux/file.h>
43 #include <linux/inetdevice.h>
44 #include <linux/module.h>
45 #include <linux/netfilter/x_tables.h>
46 #include <linux/netfilter/xt_qtaguid.h>
47 #include <linux/skbuff.h>
48 #include <linux/workqueue.h>
49 #include <net/addrconf.h>
54 #include <linux/netfilter/xt_socket.h>
56 * We only use the xt_socket funcs within a similar context to avoid unexpected
59 #define XT_SOCKET_SUPPORTED_HOOKS \
60 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
63 static const char *module_procdirname = "xt_qtaguid";
64 static struct proc_dir_entry *xt_qtaguid_procdir;
66 static unsigned int proc_iface_perms = S_IRUGO;
67 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
69 static struct proc_dir_entry *xt_qtaguid_stats_file;
70 static unsigned int proc_stats_perms = S_IRUGO;
71 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
73 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
74 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
75 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
77 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
79 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
81 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
82 #include <linux/android_aid.h>
83 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
84 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
86 /* 0 means, don't limit anybody */
87 static gid_t proc_stats_readall_gid;
88 static gid_t proc_ctrl_write_gid;
90 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
92 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
96 * After the kernel has initiallized this module, it is still possible
98 * - do not register it via iptables.
99 * the matching code will not be invoked.
101 * the iface stats handling will not be act on notifications.
102 * This is mostly usefull when a bug is suspected.
104 static bool module_passive;
105 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
107 /*---------------------------------------------------------------------------*/
111 * They represent what the data usage counters will be tracked against.
112 * By default a tag is just based on the UID.
113 * The UID is used as the base for policying, and can not be ignored.
114 * So a tag will always at least represent a UID (uid_tag).
116 * A tag can be augmented with an "accounting tag" which is associated
118 * User space can set the acct_tag portion of the tag which is then used
119 * with sockets: all data belong to that socket will be counted against the
120 * tag. The policing is then based on the tag's uid_tag portion,
121 * and stats are collected for the acct_tag portion seperately.
124 * a: {acct_tag=1, uid_tag=10003}
125 * b: {acct_tag=2, uid_tag=10003}
126 * c: {acct_tag=3, uid_tag=10003}
127 * d: {acct_tag=0, uid_tag=10003}
128 * (a, b, and c represent tags associated with specific sockets.
129 * d is for the totals for that uid, including all untagged traffic.
130 * Typically d is used with policing/quota rules.
132 * We want tag_t big enough to distinguish uid_t and acct_tag.
133 * It might become a struct if needed.
134 * Nothing should be using it as an int.
136 typedef uint64_t tag_t; /* Only used via accessors */
138 static const char *iface_stat_procdirname = "iface_stat";
139 static struct proc_dir_entry *iface_stat_procdir;
143 * For now we only track 2 sets of counters.
144 * The default set is 0.
145 * Userspace can activate another set for a given uid being tracked.
147 #define IFS_MAX_COUNTER_SETS 2
155 /* For now, TCP, UDP, the rest */
163 struct byte_packet_counters {
168 struct data_counters {
169 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
172 /* Generic tag based node used as a base for rb_tree ops. */
180 struct data_counters counters;
182 * If this tag is acct_tag based, we need to count against the
183 * matching parent uid_tag.
185 struct data_counters *parent_counters;
189 struct list_head list;
196 struct proc_dir_entry *proc_ptr;
198 struct rb_root tag_stat_tree;
199 spinlock_t tag_stat_list_lock;
202 static LIST_HEAD(iface_stat_list);
203 static DEFINE_SPINLOCK(iface_stat_list_lock);
205 /* This is needed to create proc_dir_entries from atomic context. */
206 struct iface_stat_work {
207 struct work_struct iface_work;
208 struct iface_stat *iface_entry;
212 * Track tag that this socket is transferring data for, and not necessarily
213 * the uid that owns the socket.
214 * This is the tag against which tag_stat.counters will be billed.
217 struct rb_node sock_node;
218 struct sock *sk; /* Only used as a number, never dereferenced */
219 /* The socket is needed for sockfd_put() */
220 struct socket *socket;
225 struct qtaguid_event_counts {
226 /* Various successful events */
227 atomic64_t sockets_tagged;
228 atomic64_t sockets_untagged;
229 atomic64_t counter_set_changes;
230 atomic64_t delete_cmds;
231 atomic64_t iface_events; /* Number of NETDEV_* events handled */
233 * match_found_sk_*: numbers related to the netfilter matching
234 * function finding a sock for the sk_buff.
236 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
237 /* The connection tracker had the sk. */
238 atomic64_t match_found_sk_in_ct;
240 * No sk could be found. No apparent owner. Could happen with
241 * unsolicited traffic.
243 atomic64_t match_found_sk_none;
245 static struct qtaguid_event_counts qtu_events;
247 static struct rb_root sock_tag_tree = RB_ROOT;
248 static DEFINE_SPINLOCK(sock_tag_list_lock);
250 /* Track the set active_set for the given tag. */
251 struct tag_counter_set {
256 static struct rb_root tag_counter_set_tree = RB_ROOT;
257 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
259 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
261 /*----------------------------------------------*/
262 static inline int tag_compare(tag_t t1, tag_t t2)
264 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
267 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
269 return acct_tag | uid;
271 static inline tag_t make_tag_from_uid(uid_t uid)
275 static inline uid_t get_uid_from_tag(tag_t tag)
277 return tag & 0xFFFFFFFFULL;
279 static inline tag_t get_utag_from_tag(tag_t tag)
281 return tag & 0xFFFFFFFFULL;
283 static inline tag_t get_atag_from_tag(tag_t tag)
285 return tag & ~0xFFFFFFFFULL;
288 static inline bool valid_atag(tag_t tag)
290 return !(tag & 0xFFFFFFFFULL);
293 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
294 enum ifs_tx_rx direction,
295 enum ifs_proto ifs_proto,
299 counters->bpc[set][direction][ifs_proto].bytes += bytes;
300 counters->bpc[set][direction][ifs_proto].packets += packets;
303 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
305 enum ifs_tx_rx direction)
307 return counters->bpc[set][direction][IFS_TCP].bytes
308 + counters->bpc[set][direction][IFS_UDP].bytes
309 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
312 static inline uint64_t dc_sum_packets(struct data_counters *counters,
314 enum ifs_tx_rx direction)
316 return counters->bpc[set][direction][IFS_TCP].packets
317 + counters->bpc[set][direction][IFS_UDP].packets
318 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
321 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
323 struct rb_node *node = root->rb_node;
326 struct tag_node *data = rb_entry(node, struct tag_node, node);
327 int result = tag_compare(tag, data->tag);
328 RB_DEBUG("qtaguid: tag_node_tree_search(): tag=0x%llx"
331 get_uid_from_tag(data->tag));
334 node = node->rb_left;
336 node = node->rb_right;
343 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
345 struct rb_node **new = &(root->rb_node), *parent = NULL;
347 /* Figure out where to put new node */
349 struct tag_node *this = rb_entry(*new, struct tag_node,
351 int result = tag_compare(data->tag, this->tag);
352 RB_DEBUG("qtaguid: tag_node_tree_insert(): tag=0x%llx"
355 get_uid_from_tag(this->tag));
358 new = &((*new)->rb_left);
360 new = &((*new)->rb_right);
365 /* Add new node and rebalance tree. */
366 rb_link_node(&data->node, parent, new);
367 rb_insert_color(&data->node, root);
370 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
372 tag_node_tree_insert(&data->tn, root);
375 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
377 struct tag_node *node = tag_node_tree_search(root, tag);
380 return rb_entry(&node->node, struct tag_stat, tn.node);
383 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
384 struct rb_root *root)
386 tag_node_tree_insert(&data->tn, root);
389 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
392 struct tag_node *node = tag_node_tree_search(root, tag);
395 return rb_entry(&node->node, struct tag_counter_set, tn.node);
399 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
400 const struct sock *sk)
402 struct rb_node *node = root->rb_node;
405 struct sock_tag *data = rb_entry(node, struct sock_tag,
407 ptrdiff_t result = sk - data->sk;
409 node = node->rb_left;
411 node = node->rb_right;
418 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
420 struct rb_node **new = &(root->rb_node), *parent = NULL;
422 /* Figure out where to put new node */
424 struct sock_tag *this = rb_entry(*new, struct sock_tag,
426 ptrdiff_t result = data->sk - this->sk;
429 new = &((*new)->rb_left);
431 new = &((*new)->rb_right);
436 /* Add new node and rebalance tree. */
437 rb_link_node(&data->sock_node, parent, new);
438 rb_insert_color(&data->sock_node, root);
441 static int read_proc_u64(char *page, char **start, off_t off,
442 int count, int *eof, void *data)
447 uint64_t *iface_entry = data;
452 value = *iface_entry;
453 p += sprintf(p, "%llu\n", value);
454 len = (p - page) - off;
455 *eof = (len <= count) ? 1 : 0;
460 static int read_proc_bool(char *page, char **start, off_t off,
461 int count, int *eof, void *data)
466 bool *bool_entry = data;
472 p += sprintf(p, "%u\n", value);
473 len = (p - page) - off;
474 *eof = (len <= count) ? 1 : 0;
479 static int get_active_counter_set(tag_t tag)
482 struct tag_counter_set *tcs;
484 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
486 tag, get_uid_from_tag(tag));
487 /* For now we only handle UID tags for active sets */
488 tag = get_utag_from_tag(tag);
489 spin_lock_bh(&tag_counter_set_list_lock);
490 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
492 active_set = tcs->active_set;
493 spin_unlock_bh(&tag_counter_set_list_lock);
498 * Find the entry for tracking the specified interface.
499 * Caller must hold iface_stat_list_lock
501 static struct iface_stat *get_iface_entry(const char *ifname)
503 struct iface_stat *iface_entry;
505 /* Find the entry for tracking the specified tag within the interface */
506 if (ifname == NULL) {
507 pr_info("qtaguid: iface_stat: get() NULL device name\n");
511 /* Iterate over interfaces */
512 list_for_each_entry(iface_entry, &iface_stat_list, list) {
513 if (!strcmp(ifname, iface_entry->ifname))
521 static void iface_create_proc_worker(struct work_struct *work)
523 struct proc_dir_entry *proc_entry;
524 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
526 struct iface_stat *new_iface = isw->iface_entry;
528 /* iface_entries are not deleted, so safe to manipulate. */
529 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
530 if (IS_ERR_OR_NULL(proc_entry)) {
531 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
536 new_iface->proc_ptr = proc_entry;
538 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
539 read_proc_u64, &new_iface->tx_bytes);
540 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
541 read_proc_u64, &new_iface->rx_bytes);
542 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
543 read_proc_u64, &new_iface->tx_packets);
544 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
545 read_proc_u64, &new_iface->rx_packets);
546 create_proc_read_entry("active", proc_iface_perms, proc_entry,
547 read_proc_bool, &new_iface->active);
549 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
550 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
554 /* Caller must hold iface_stat_list_lock */
555 static struct iface_stat *iface_alloc(const char *ifname)
557 struct iface_stat *new_iface;
558 struct iface_stat_work *isw;
560 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
561 if (new_iface == NULL) {
562 pr_err("qtaguid: iface_stat: create(%s): "
563 "iface_stat alloc failed\n", ifname);
566 new_iface->ifname = kstrdup(ifname, GFP_ATOMIC);
567 if (new_iface->ifname == NULL) {
568 pr_err("qtaguid: iface_stat: create(%s): "
569 "ifname alloc failed\n", ifname);
573 spin_lock_init(&new_iface->tag_stat_list_lock);
574 new_iface->active = true;
575 new_iface->tag_stat_tree = RB_ROOT;
578 * ipv6 notifier chains are atomic :(
579 * No create_proc_read_entry() for you!
581 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
583 pr_err("qtaguid: iface_stat: create(%s): "
584 "work alloc failed\n", new_iface->ifname);
585 kfree(new_iface->ifname);
589 isw->iface_entry = new_iface;
590 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
591 schedule_work(&isw->iface_work);
592 list_add(&new_iface->list, &iface_stat_list);
597 * Create a new entry for tracking the specified interface.
598 * Do nothing if the entry already exists.
599 * Called when an interface is configured with a valid IP address.
601 void iface_stat_create(const struct net_device *net_dev,
602 struct in_ifaddr *ifa)
604 struct in_device *in_dev = NULL;
606 struct iface_stat *entry;
608 struct iface_stat *new_iface;
610 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
611 net_dev ? net_dev->name : "?",
614 pr_err("qtaguid: iface_stat: create(): no net dev\n");
618 ifname = net_dev->name;
620 in_dev = in_dev_get(net_dev);
622 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
626 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
628 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
629 IF_DEBUG("qtaguid: iface_stat: create(%s): "
630 "ifa=%p ifa_label=%s\n",
632 ifa->ifa_label ? ifa->ifa_label : "(null)");
633 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
639 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
643 ipaddr = ifa->ifa_local;
645 spin_lock_bh(&iface_stat_list_lock);
646 entry = get_iface_entry(ifname);
648 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
650 if (ipv4_is_loopback(ipaddr)) {
651 entry->active = false;
652 IF_DEBUG("qtaguid: iface_stat: create(%s): "
653 "disable tracking of loopback dev\n",
656 entry->active = true;
657 IF_DEBUG("qtaguid: iface_stat: create(%s): "
658 "enable tracking. ip=%pI4\n",
661 goto done_unlock_put;
662 } else if (ipv4_is_loopback(ipaddr)) {
663 IF_DEBUG("qtaguid: iface_stat: create(%s): "
664 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
665 goto done_unlock_put;
668 new_iface = iface_alloc(ifname);
669 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
670 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
673 spin_unlock_bh(&iface_stat_list_lock);
679 void iface_stat_create_ipv6(const struct net_device *net_dev,
680 struct inet6_ifaddr *ifa)
682 struct in_device *in_dev;
684 struct iface_stat *entry;
685 struct iface_stat *new_iface;
688 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
689 ifa, net_dev, net_dev ? net_dev->name : "");
691 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
694 ifname = net_dev->name;
696 in_dev = in_dev_get(net_dev);
698 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
703 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
707 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
711 addr_type = ipv6_addr_type(&ifa->addr);
713 spin_lock_bh(&iface_stat_list_lock);
714 entry = get_iface_entry(ifname);
716 IF_DEBUG("qtaguid: iface_stat: create6(%s): entry=%p\n",
718 if (addr_type & IPV6_ADDR_LOOPBACK) {
719 entry->active = false;
720 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
721 "disable tracking of loopback dev\n",
724 entry->active = true;
725 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
726 "enable tracking. ip=%pI6c\n",
729 goto done_unlock_put;
730 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
731 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
732 "ignore loopback dev. ip=%pI6c\n",
734 goto done_unlock_put;
737 new_iface = iface_alloc(ifname);
738 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
739 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
742 spin_unlock_bh(&iface_stat_list_lock);
747 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
749 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
750 return sock_tag_tree_search(&sock_tag_tree, sk);
753 static struct sock_tag *get_sock_stat(const struct sock *sk)
755 struct sock_tag *sock_tag_entry;
756 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
759 spin_lock_bh(&sock_tag_list_lock);
760 sock_tag_entry = get_sock_stat_nl(sk);
761 spin_unlock_bh(&sock_tag_list_lock);
762 return sock_tag_entry;
766 data_counters_update(struct data_counters *dc, int set,
767 enum ifs_tx_rx direction, int proto, int bytes)
771 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
774 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
778 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
785 * Update stats for the specified interface. Do nothing if the entry
786 * does not exist (when a device was never configured with an IP address).
787 * Called when an device is being unregistered.
789 static void iface_stat_update(struct net_device *dev)
791 struct rtnl_link_stats64 dev_stats, *stats;
792 struct iface_stat *entry;
794 stats = dev_get_stats(dev, &dev_stats);
795 spin_lock_bh(&iface_stat_list_lock);
796 entry = get_iface_entry(dev->name);
798 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
800 spin_unlock_bh(&iface_stat_list_lock);
803 IF_DEBUG("qtaguid: iface_stat: update(%s): entry=%p\n",
806 entry->tx_bytes += stats->tx_bytes;
807 entry->tx_packets += stats->tx_packets;
808 entry->rx_bytes += stats->rx_bytes;
809 entry->rx_packets += stats->rx_packets;
810 entry->active = false;
811 IF_DEBUG("qtaguid: iface_stat: update(%s): "
812 " disable tracking. rx/tx=%llu/%llu\n",
813 dev->name, stats->rx_bytes, stats->tx_bytes);
815 IF_DEBUG("qtaguid: iface_stat: update(%s): disabled\n",
818 spin_unlock_bh(&iface_stat_list_lock);
821 static void tag_stat_update(struct tag_stat *tag_entry,
822 enum ifs_tx_rx direction, int proto, int bytes)
825 active_set = get_active_counter_set(tag_entry->tn.tag);
826 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
827 "dir=%d proto=%d bytes=%d)\n",
828 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
829 active_set, direction, proto, bytes);
830 data_counters_update(&tag_entry->counters, active_set, direction,
832 if (tag_entry->parent_counters)
833 data_counters_update(tag_entry->parent_counters, active_set,
834 direction, proto, bytes);
838 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
840 * iface_entry->tag_stat_list_lock should be held.
842 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
845 struct tag_stat *new_tag_stat_entry = NULL;
846 IF_DEBUG("qtaguid: iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
848 iface_entry, tag, get_uid_from_tag(tag));
849 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
850 if (!new_tag_stat_entry) {
851 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
854 new_tag_stat_entry->tn.tag = tag;
855 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
857 return new_tag_stat_entry;
860 static void if_tag_stat_update(const char *ifname, uid_t uid,
861 const struct sock *sk, enum ifs_tx_rx direction,
862 int proto, int bytes)
864 struct tag_stat *tag_stat_entry;
867 struct data_counters *uid_tag_counters;
868 struct sock_tag *sock_tag_entry;
869 struct iface_stat *iface_entry;
870 struct tag_stat *new_tag_stat;
871 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
872 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
873 ifname, uid, sk, direction, proto, bytes);
876 iface_entry = get_iface_entry(ifname);
878 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
882 /* It is ok to process data when an iface_entry is inactive */
884 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
885 ifname, iface_entry);
888 * Look for a tagged sock.
889 * It will have an acct_uid.
891 sock_tag_entry = get_sock_stat(sk);
892 if (sock_tag_entry) {
893 tag = sock_tag_entry->tag;
894 acct_tag = get_atag_from_tag(tag);
895 uid_tag = get_utag_from_tag(tag);
897 uid_tag = make_tag_from_uid(uid);
899 tag = combine_atag_with_uid(acct_tag, uid);
901 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
902 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
903 tag, get_uid_from_tag(tag), iface_entry);
904 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
905 spin_lock_bh(&iface_entry->tag_stat_list_lock);
907 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
909 if (tag_stat_entry) {
911 * Updating the {acct_tag, uid_tag} entry handles both stats:
912 * {0, uid_tag} will also get updated.
914 tag_stat_update(tag_stat_entry, direction, proto, bytes);
915 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
919 /* Loop over tag list under this interface for {0,uid_tag} */
920 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
922 if (!tag_stat_entry) {
923 /* Here: the base uid_tag did not exist */
925 * No parent counters. So
926 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
928 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
929 uid_tag_counters = &new_tag_stat->counters;
931 uid_tag_counters = &tag_stat_entry->counters;
935 new_tag_stat = create_if_tag_stat(iface_entry, tag);
936 new_tag_stat->parent_counters = uid_tag_counters;
938 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
939 tag_stat_update(new_tag_stat, direction, proto, bytes);
942 static int iface_netdev_event_handler(struct notifier_block *nb,
943 unsigned long event, void *ptr) {
944 struct net_device *dev = ptr;
946 if (unlikely(module_passive))
949 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
950 "ev=0x%lx netdev=%p->name=%s\n",
951 event, dev, dev ? dev->name : "");
955 iface_stat_create(dev, NULL);
958 iface_stat_update(dev);
964 static int iface_inet6addr_event_handler(struct notifier_block *nb,
965 unsigned long event, void *ptr)
967 struct inet6_ifaddr *ifa = ptr;
968 struct net_device *dev;
970 if (unlikely(module_passive))
973 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
979 BUG_ON(!ifa || !ifa->idev);
980 dev = (struct net_device *)ifa->idev->dev;
981 iface_stat_create_ipv6(dev, ifa);
982 atomic64_inc(&qtu_events.iface_events);
985 BUG_ON(!ifa || !ifa->idev);
986 dev = (struct net_device *)ifa->idev->dev;
987 iface_stat_update(dev);
988 atomic64_inc(&qtu_events.iface_events);
994 static int iface_inetaddr_event_handler(struct notifier_block *nb,
995 unsigned long event, void *ptr)
997 struct in_ifaddr *ifa = ptr;
998 struct net_device *dev;
1000 if (unlikely(module_passive))
1003 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1004 "ev=0x%lx ifa=%p\n",
1009 BUG_ON(!ifa || !ifa->ifa_dev);
1010 dev = ifa->ifa_dev->dev;
1011 iface_stat_create(dev, ifa);
1012 atomic64_inc(&qtu_events.iface_events);
1015 BUG_ON(!ifa || !ifa->ifa_dev);
1016 dev = ifa->ifa_dev->dev;
1017 iface_stat_update(dev);
1018 atomic64_inc(&qtu_events.iface_events);
1024 static struct notifier_block iface_netdev_notifier_blk = {
1025 .notifier_call = iface_netdev_event_handler,
1028 static struct notifier_block iface_inetaddr_notifier_blk = {
1029 .notifier_call = iface_inetaddr_event_handler,
1032 static struct notifier_block iface_inet6addr_notifier_blk = {
1033 .notifier_call = iface_inet6addr_event_handler,
1036 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1040 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1041 if (!iface_stat_procdir) {
1042 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1046 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1048 pr_err("qtaguid: iface_stat: init "
1049 "failed to register dev event handler\n");
1052 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1054 pr_err("qtaguid: iface_stat: init "
1055 "failed to register ipv4 dev event handler\n");
1059 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1061 pr_err("qtaguid: iface_stat: init "
1062 "failed to register ipv6 dev event handler\n");
1063 goto err_unreg_ip4_addr;
1068 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1070 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1072 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1077 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1078 struct xt_action_param *par)
1081 unsigned int hook_mask = (1 << par->hooknum);
1083 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1084 par->hooknum, par->family);
1087 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1088 * return garbage SKs.
1090 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1093 switch (par->family) {
1095 sk = xt_socket_get6_sk(skb, par);
1098 sk = xt_socket_get4_sk(skb, par);
1105 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1106 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1107 * Not fixed in 3.0-r3 :(
1110 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1111 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1112 if (sk->sk_state == TCP_TIME_WAIT) {
1113 xt_socket_put_sk(sk);
1120 static void account_for_uid(const struct sk_buff *skb,
1121 const struct sock *alternate_sk, uid_t uid,
1122 struct xt_action_param *par)
1124 const struct net_device *el_dev;
1127 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1128 el_dev = par->in ? : par->out;
1130 const struct net_device *other_dev;
1132 other_dev = par->in ? : par->out;
1133 if (el_dev != other_dev) {
1134 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1135 "par->(in/out)=%p %s\n",
1136 par->hooknum, el_dev, el_dev->name, other_dev,
1141 if (unlikely(!el_dev)) {
1142 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1143 } else if (unlikely(!el_dev->name)) {
1144 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1146 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1151 if_tag_stat_update(el_dev->name, uid,
1152 skb->sk ? skb->sk : alternate_sk,
1153 par->in ? IFS_RX : IFS_TX,
1154 ip_hdr(skb)->protocol, skb->len);
1158 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1160 const struct xt_qtaguid_match_info *info = par->matchinfo;
1161 const struct file *filp;
1162 bool got_sock = false;
1167 if (unlikely(module_passive))
1168 return (info->match ^ info->invert) == 0;
1170 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1171 par->hooknum, skb, par->in, par->out, par->family);
1174 res = (info->match ^ info->invert) == 0;
1182 * A missing sk->sk_socket happens when packets are in-flight
1183 * and the matching socket is already closed and gone.
1185 sk = qtaguid_find_sk(skb, par);
1187 * If we got the socket from the find_sk(), we will need to put
1188 * it back, as nf_tproxy_get_sock_v4() got it.
1192 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1194 atomic64_inc(&qtu_events.match_found_sk);
1196 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1197 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1199 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1200 par->hooknum, sk, sk->sk_socket,
1201 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1202 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1203 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1204 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1207 if (sk == NULL || sk->sk_socket == NULL) {
1209 * Here, the qtaguid_find_sk() using connection tracking
1210 * couldn't find the owner, so for now we just count them
1211 * against the system.
1214 * TODO: unhack how to force just accounting.
1215 * For now we only do iface stats when the uid-owner is not
1218 if (!(info->match & XT_QTAGUID_UID))
1219 account_for_uid(skb, sk, 0, par);
1220 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1222 sk ? sk->sk_socket : NULL);
1223 res = (info->match ^ info->invert) == 0;
1224 atomic64_inc(&qtu_events.match_found_sk_none);
1225 goto put_sock_ret_res;
1226 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1228 goto put_sock_ret_res;
1230 filp = sk->sk_socket->file;
1232 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1233 res = ((info->match ^ info->invert) &
1234 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1235 goto put_sock_ret_res;
1237 sock_uid = filp->f_cred->fsuid;
1239 * TODO: unhack how to force just accounting.
1240 * For now we only do iface stats when the uid-owner is not requested
1242 if (!(info->match & XT_QTAGUID_UID))
1243 account_for_uid(skb, sk, sock_uid, par);
1246 * The following two tests fail the match when:
1247 * id not in range AND no inverted condition requested
1248 * or id in range AND inverted condition requested
1249 * Thus (!a && b) || (a && !b) == a ^ b
1251 if (info->match & XT_QTAGUID_UID)
1252 if ((filp->f_cred->fsuid >= info->uid_min &&
1253 filp->f_cred->fsuid <= info->uid_max) ^
1254 !(info->invert & XT_QTAGUID_UID)) {
1255 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1258 goto put_sock_ret_res;
1260 if (info->match & XT_QTAGUID_GID)
1261 if ((filp->f_cred->fsgid >= info->gid_min &&
1262 filp->f_cred->fsgid <= info->gid_max) ^
1263 !(info->invert & XT_QTAGUID_GID)) {
1264 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1267 goto put_sock_ret_res;
1270 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1275 xt_socket_put_sk(sk);
1277 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1282 * Procfs reader to get all active socket tags using style "1)" as described in
1285 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1286 off_t items_to_skip, int char_count, int *eof,
1292 struct sock_tag *sock_tag_entry;
1293 struct rb_node *node;
1296 if (unlikely(module_passive)) {
1301 /* TODO: support skipping num_items_returned on entry. */
1302 CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1303 page, items_to_skip, char_count, *eof);
1308 spin_lock_bh(&sock_tag_list_lock);
1309 for (node = rb_first(&sock_tag_tree);
1311 node = rb_next(node)) {
1312 if (item_index++ < items_to_skip)
1314 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1315 uid = get_uid_from_tag(sock_tag_entry->tag);
1316 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u)\n",
1318 sock_tag_entry->tag,
1321 len = snprintf(outp, char_count,
1322 "sock=%p tag=0x%llx (uid=%u)\n",
1323 sock_tag_entry->sk, sock_tag_entry->tag, uid);
1324 if (len >= char_count) {
1325 spin_unlock_bh(&sock_tag_list_lock);
1331 (*num_items_returned)++;
1333 spin_unlock_bh(&sock_tag_list_lock);
1335 if (item_index++ >= items_to_skip) {
1336 len = snprintf(outp, char_count,
1337 "events: sockets_tagged=%llu "
1338 "sockets_untagged=%llu "
1339 "counter_set_changes=%llu "
1341 "iface_events=%llu "
1342 "match_found_sk=%llu "
1343 "match_found_sk_in_ct=%llu "
1344 "match_found_sk_none=%llu\n",
1345 atomic64_read(&qtu_events.sockets_tagged),
1346 atomic64_read(&qtu_events.sockets_untagged),
1347 atomic64_read(&qtu_events.counter_set_changes),
1348 atomic64_read(&qtu_events.delete_cmds),
1349 atomic64_read(&qtu_events.iface_events),
1350 atomic64_read(&qtu_events.match_found_sk),
1351 atomic64_read(&qtu_events.match_found_sk_in_ct),
1352 atomic64_read(&qtu_events.match_found_sk_none));
1353 if (len >= char_count) {
1359 (*num_items_returned)++;
1366 static bool can_manipulate_uids(void)
1369 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
1370 || in_egroup_p(proc_ctrl_write_gid);
1373 static bool can_impersonate_uid(uid_t uid)
1375 return uid == current_fsuid() || can_manipulate_uids();
1378 static bool can_read_other_uid_stats(uid_t uid)
1381 return unlikely(!current_fsuid()) || uid == current_fsuid()
1382 || unlikely(!proc_stats_readall_gid)
1383 || in_egroup_p(proc_stats_readall_gid);
1387 * Delete socket tags, and stat tags associated with a given
1388 * accouting tag and uid.
1390 static int ctrl_cmd_delete(const char *input)
1398 struct iface_stat *iface_entry;
1399 struct rb_node *node;
1400 struct sock_tag *st_entry;
1401 struct rb_root st_to_free_tree = RB_ROOT;
1402 struct tag_stat *ts_entry;
1403 struct tag_counter_set *tcs_entry;
1405 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1406 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1407 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1413 if (!valid_atag(acct_tag)) {
1414 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1419 uid = current_fsuid();
1420 } else if (!can_impersonate_uid(uid)) {
1421 pr_info("qtaguid: ctrl_delete(%s): "
1422 "insufficient priv from pid=%u uid=%u\n",
1423 input, current->pid, current_fsuid());
1428 /* Delete socket tags */
1429 spin_lock_bh(&sock_tag_list_lock);
1430 node = rb_first(&sock_tag_tree);
1432 st_entry = rb_entry(node, struct sock_tag, sock_node);
1433 entry_uid = get_uid_from_tag(st_entry->tag);
1434 node = rb_next(node);
1435 if (entry_uid != uid)
1438 if (!acct_tag || st_entry->tag == tag) {
1439 rb_erase(&st_entry->sock_node, &sock_tag_tree);
1440 /* Can't sockfd_put() within spinlock, do it later. */
1441 sock_tag_tree_insert(st_entry, &st_to_free_tree);
1444 spin_unlock_bh(&sock_tag_list_lock);
1446 node = rb_first(&st_to_free_tree);
1448 st_entry = rb_entry(node, struct sock_tag, sock_node);
1449 node = rb_next(node);
1450 CT_DEBUG("qtaguid: ctrl_delete(): "
1451 "erase st: sk=%p tag=0x%llx (uid=%u)\n",
1455 rb_erase(&st_entry->sock_node, &st_to_free_tree);
1456 sockfd_put(st_entry->socket);
1460 tag = combine_atag_with_uid(acct_tag, uid);
1462 /* Delete tag counter-sets */
1463 spin_lock_bh(&tag_counter_set_list_lock);
1464 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1466 CT_DEBUG("qtaguid: ctrl_delete(): "
1467 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1469 get_uid_from_tag(tcs_entry->tn.tag),
1470 tcs_entry->active_set);
1471 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1474 spin_unlock_bh(&tag_counter_set_list_lock);
1477 * If acct_tag is 0, then all entries belonging to uid are
1480 spin_lock_bh(&iface_stat_list_lock);
1481 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1482 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1483 node = rb_first(&iface_entry->tag_stat_tree);
1485 ts_entry = rb_entry(node, struct tag_stat, tn.node);
1486 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1487 node = rb_next(node);
1488 if (entry_uid != uid)
1490 if (!acct_tag || ts_entry->tn.tag == tag) {
1491 CT_DEBUG("qtaguid: ctrl_delete(): "
1492 "erase ts: %s 0x%llx %u\n",
1493 iface_entry->ifname,
1494 get_atag_from_tag(ts_entry->tn.tag),
1496 rb_erase(&ts_entry->tn.node,
1497 &iface_entry->tag_stat_tree);
1501 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1503 spin_unlock_bh(&iface_stat_list_lock);
1504 atomic64_inc(&qtu_events.delete_cmds);
1511 static int ctrl_cmd_counter_set(const char *input)
1517 struct tag_counter_set *tcs;
1520 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
1521 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
1522 "set=%d uid=%u\n", input, argc, cmd,
1528 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
1529 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
1534 if (!can_manipulate_uids()) {
1535 pr_info("qtaguid: ctrl_counterset(%s): "
1536 "insufficient priv from pid=%u uid=%u\n",
1537 input, current->pid, current_fsuid());
1542 tag = make_tag_from_uid(uid);
1543 spin_lock_bh(&tag_counter_set_list_lock);
1544 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1546 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
1548 spin_unlock_bh(&tag_counter_set_list_lock);
1549 pr_err("qtaguid: ctrl_counterset(%s): "
1550 "failed to alloc counter set\n",
1556 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
1557 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
1558 "(uid=%u) set=%d\n",
1559 input, tag, get_uid_from_tag(tag), counter_set);
1561 tcs->active_set = counter_set;
1562 spin_unlock_bh(&tag_counter_set_list_lock);
1563 atomic64_inc(&qtu_events.counter_set_changes);
1570 static int ctrl_cmd_tag(const char *input)
1576 struct socket *el_socket;
1579 struct sock_tag *sock_tag_entry;
1581 /* Unassigned args will get defaulted later. */
1582 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1583 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1584 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1590 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
1592 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
1593 " sock_fd=%d err=%d\n", input, sock_fd, res);
1596 refcnt = atomic_read(&el_socket->file->f_count);
1597 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%d\n",
1601 } else if (!valid_atag(acct_tag)) {
1602 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
1606 CT_DEBUG("qtaguid: ctrl_tag(%s): "
1607 "uid=%u euid=%u fsuid=%u "
1608 "in_group=%d in_egroup=%d\n",
1609 input, current_uid(), current_euid(), current_fsuid(),
1610 in_group_p(proc_stats_readall_gid),
1611 in_egroup_p(proc_stats_readall_gid));
1613 uid = current_fsuid();
1614 } else if (!can_impersonate_uid(uid)) {
1615 pr_info("qtaguid: ctrl_tag(%s): "
1616 "insufficient priv from pid=%u uid=%u\n",
1617 input, current->pid, current_fsuid());
1622 spin_lock_bh(&sock_tag_list_lock);
1623 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1624 if (sock_tag_entry) {
1626 * This is a re-tagging, so release the sock_fd that was
1627 * locked at the time of the 1st tagging.
1629 sockfd_put(sock_tag_entry->socket);
1631 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1634 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1636 if (!sock_tag_entry) {
1637 pr_err("qtaguid: ctrl_tag(%s): "
1638 "socket tag alloc failed\n",
1640 spin_unlock_bh(&sock_tag_list_lock);
1644 sock_tag_entry->sk = el_socket->sk;
1645 sock_tag_entry->socket = el_socket;
1646 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1648 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1649 atomic64_inc(&qtu_events.sockets_tagged);
1651 spin_unlock_bh(&sock_tag_list_lock);
1652 /* We keep the ref to the socket (file) until it is untagged */
1653 CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1655 el_socket ? atomic_read(&el_socket->file->f_count) : -1);
1659 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1660 sockfd_put(el_socket);
1663 CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1668 static int ctrl_cmd_untag(const char *input)
1672 struct socket *el_socket;
1675 struct sock_tag *sock_tag_entry;
1677 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1678 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1679 input, argc, cmd, sock_fd);
1684 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
1686 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
1687 " sock_fd=%d err=%d\n", input, sock_fd, res);
1690 refcnt = atomic_read(&el_socket->file->f_count);
1691 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%d\n",
1693 spin_lock_bh(&sock_tag_list_lock);
1694 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1695 if (!sock_tag_entry) {
1696 spin_unlock_bh(&sock_tag_list_lock);
1701 * The socket already belongs to the current process
1702 * so it can do whatever it wants to it.
1704 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
1707 * Release the sock_fd that was grabbed at tag time,
1708 * and once more for the sockfd_lookup() here.
1710 sockfd_put(sock_tag_entry->socket);
1711 spin_unlock_bh(&sock_tag_list_lock);
1712 sockfd_put(el_socket);
1714 kfree(sock_tag_entry);
1715 atomic64_inc(&qtu_events.sockets_untagged);
1716 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1722 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1723 sockfd_put(el_socket);
1726 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1731 static int qtaguid_ctrl_parse(const char *input, int count)
1737 /* Collect params for commands */
1740 res = ctrl_cmd_delete(input);
1744 res = ctrl_cmd_counter_set(input);
1748 res = ctrl_cmd_tag(input);
1752 res = ctrl_cmd_untag(input);
1762 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
1766 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
1767 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
1768 unsigned long count, void *data)
1770 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
1772 if (unlikely(module_passive))
1775 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
1778 if (copy_from_user(input_buf, buffer, count))
1781 input_buf[count] = '\0';
1782 return qtaguid_ctrl_parse(input_buf, count);
1785 struct proc_print_info {
1787 char **num_items_returned;
1788 struct iface_stat *iface_entry;
1789 struct tag_stat *ts_entry;
1794 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
1797 struct data_counters *cnts;
1798 if (!ppi->item_index) {
1799 len = snprintf(ppi->outp, ppi->char_count,
1800 "idx iface acct_tag_hex uid_tag_int cnt_set "
1801 "rx_bytes rx_packets "
1802 "tx_bytes tx_packets "
1803 "rx_tcp_packets rx_tcp_bytes "
1804 "rx_udp_packets rx_udp_bytes "
1805 "rx_other_packets rx_other_bytes "
1806 "tx_tcp_packets tx_tcp_bytes "
1807 "tx_udp_packets tx_udp_bytes "
1808 "tx_other_packets tx_other_bytes\n");
1810 tag_t tag = ppi->ts_entry->tn.tag;
1811 uid_t stat_uid = get_uid_from_tag(tag);
1812 if (!can_read_other_uid_stats(stat_uid)) {
1813 CT_DEBUG("qtaguid: stats line: "
1815 "insufficient priv from pid=%u uid=%u\n",
1816 ppi->iface_entry->ifname,
1817 get_atag_from_tag(tag), stat_uid,
1818 current->pid, current_fsuid());
1821 cnts = &ppi->ts_entry->counters;
1823 ppi->outp, ppi->char_count,
1824 "%d %s 0x%llx %u %u "
1834 ppi->iface_entry->ifname,
1835 get_atag_from_tag(tag),
1838 dc_sum_bytes(cnts, cnt_set, IFS_RX),
1839 dc_sum_packets(cnts, cnt_set, IFS_RX),
1840 dc_sum_bytes(cnts, cnt_set, IFS_TX),
1841 dc_sum_packets(cnts, cnt_set, IFS_TX),
1842 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
1843 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
1844 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
1845 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
1846 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
1847 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
1848 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
1849 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
1850 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
1851 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
1852 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
1853 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
1858 bool pp_sets(struct proc_print_info *ppi)
1862 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
1864 len = pp_stats_line(ppi, counter_set);
1865 if (len >= ppi->char_count) {
1871 ppi->char_count -= len;
1872 (*ppi->num_items_returned)++;
1879 * Procfs reader to get all tag stats using style "1)" as described in
1881 * Groups all protocols tx/rx bytes.
1883 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
1884 off_t items_to_skip, int char_count, int *eof,
1887 struct proc_print_info ppi;
1892 ppi.char_count = char_count;
1893 ppi.num_items_returned = num_items_returned;
1895 if (unlikely(module_passive)) {
1896 len = pp_stats_line(&ppi, 0);
1897 /* The header should always be shorter than the buffer. */
1898 WARN_ON(len >= ppi.char_count);
1903 CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
1904 "char_count=%d *eof=%d\n", page, *num_items_returned,
1905 items_to_skip, char_count, *eof);
1910 if (!items_to_skip) {
1911 /* The idx is there to help debug when things go belly up. */
1912 len = pp_stats_line(&ppi, 0);
1913 /* Don't advance the outp unless the whole line was printed */
1914 if (len >= ppi.char_count) {
1916 return ppi.outp - page;
1919 ppi.char_count -= len;
1922 spin_lock_bh(&iface_stat_list_lock);
1923 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
1924 struct rb_node *node;
1925 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
1926 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
1928 node = rb_next(node)) {
1929 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
1930 if (ppi.item_index++ < items_to_skip)
1932 if (!pp_sets(&ppi)) {
1934 &ppi.iface_entry->tag_stat_list_lock);
1935 spin_unlock_bh(&iface_stat_list_lock);
1936 return ppi.outp - page;
1939 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
1941 spin_unlock_bh(&iface_stat_list_lock);
1944 return ppi.outp - page;
1947 /*------------------------------------------*/
1948 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
1951 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
1952 if (!*res_procdir) {
1953 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
1958 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
1960 if (!xt_qtaguid_ctrl_file) {
1961 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
1966 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
1967 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
1969 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
1971 if (!xt_qtaguid_stats_file) {
1972 pr_err("qtaguid: failed to create xt_qtaguid/stats "
1975 goto no_stats_entry;
1977 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
1979 * TODO: add support counter hacking
1980 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
1985 remove_proc_entry("ctrl", *res_procdir);
1987 remove_proc_entry("xt_qtaguid", NULL);
1992 static struct xt_match qtaguid_mt_reg __read_mostly = {
1994 * This module masquerades as the "owner" module so that iptables
1995 * tools can deal with it.
1999 .family = NFPROTO_UNSPEC,
2000 .match = qtaguid_mt,
2001 .matchsize = sizeof(struct xt_qtaguid_match_info),
2005 static int __init qtaguid_mt_init(void)
2007 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2008 || iface_stat_init(xt_qtaguid_procdir)
2009 || xt_register_match(&qtaguid_mt_reg))
2015 * TODO: allow unloading of the module.
2016 * For now stats are permanent.
2017 * Kconfig forces'y/n' and never an 'm'.
2020 module_init(qtaguid_mt_init);
2021 MODULE_AUTHOR("jpa <jpa@google.com>");
2022 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2023 MODULE_LICENSE("GPL");
2024 MODULE_ALIAS("ipt_owner");
2025 MODULE_ALIAS("ip6t_owner");
2026 MODULE_ALIAS("ipt_qtaguid");
2027 MODULE_ALIAS("ip6t_qtaguid");