From bd8db05002753117d38ad9783d96f0b118f0f42b Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 27 Apr 2012 12:57:39 -0700 Subject: [PATCH] netfilter: xt_qtaguid: start tracking iface rx/tx at low level qtaguid tracks the device stats by monitoring when it goes up and down, then it gets the dev_stats(). But devs don't correctly report stats (either they don't count headers symmetrically between rx/tx, or they count internal control messages). Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and mangle:postrouting (nat is not available in ipv6). The results are in /proc/net/xt_qtaguid/iface_stat_fmt which outputs a format line (bash expansion): ifname total_skb_{rx,tx}_{bytes,packets} Added event counters for pre/post handling. Added extra ctrl_*() pid/uid debugging. Change-Id: Id84345d544ad1dd5f63e3842cab229e71d339297 Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 277 ++++++++++++++++++++++------ net/netfilter/xt_qtaguid_internal.h | 5 +- net/netfilter/xt_qtaguid_print.c | 18 +- 3 files changed, 233 insertions(+), 67 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 2c1170f89d0f..9fd0ffa6c365 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -114,8 +114,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); /*---------------------------------------------------------------------------*/ static const char *iface_stat_procdirname = "iface_stat"; static struct proc_dir_entry *iface_stat_procdir; +/* + * The iface_stat_all* will go away once userspace gets use to the new fields + * that have a format line. + */ static const char *iface_stat_all_procfilename = "iface_stat_all"; static struct proc_dir_entry *iface_stat_all_procfile; +static const char *iface_stat_fmt_procfilename = "iface_stat_fmt"; +static struct proc_dir_entry *iface_stat_fmt_procfile; + /* * Ordering of locks: @@ -128,9 +135,9 @@ static struct proc_dir_entry *iface_stat_all_procfile; * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock * is acquired. * - * Call tree with all lock holders as of 2011-09-25: + * Call tree with all lock holders as of 2012-04-27: * - * iface_stat_all_proc_read() + * iface_stat_fmt_proc_read() * iface_stat_list_lock * (struct iface_stat) * @@ -781,13 +788,14 @@ done: return iface_entry; } -static int iface_stat_all_proc_read(char *page, char **num_items_returned, +static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, off_t items_to_skip, int char_count, int *eof, void *data) { char *outp = page; int item_index = 0; int len; + int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */ struct iface_stat *iface_entry; struct rtnl_link_stats64 dev_stats, *stats; struct rtnl_link_stats64 no_dev_stats = {0}; @@ -797,14 +805,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned, return 0; } - CT_DEBUG("qtaguid:proc iface_stat_all " + CT_DEBUG("qtaguid:proc iface_stat_fmt " + "pid=%u tgid=%u uid=%u " "page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", page, *num_items_returned, + "char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, *num_items_returned, items_to_skip, char_count, *eof); if (*eof) return 0; + if (fmt == 2 && item_index++ >= items_to_skip) { + len = snprintf(outp, char_count, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets\n" + ); + if (len >= char_count) { + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + /* * This lock will prevent iface_stat_update() from changing active, * and in turn prevent an interface from unregistering itself. @@ -820,18 +846,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned, } else { stats = &no_dev_stats; } - len = snprintf(outp, char_count, - "%s %d " - "%llu %llu %llu %llu " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->active, - iface_entry->totals[IFS_RX].bytes, - iface_entry->totals[IFS_RX].packets, - iface_entry->totals[IFS_TX].bytes, - iface_entry->totals[IFS_TX].packets, - stats->rx_bytes, stats->rx_packets, - stats->tx_bytes, stats->tx_packets); + /* + * If the meaning of the data changes, then update the fmtX + * string. + */ + if (fmt == 1) { + len = snprintf( + outp, char_count, + "%s %d " + "%llu %llu %llu %llu " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals_via_dev[IFS_RX].bytes, + iface_entry->totals_via_dev[IFS_RX].packets, + iface_entry->totals_via_dev[IFS_TX].bytes, + iface_entry->totals_via_dev[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets + ); + } else { + len = snprintf( + outp, char_count, + "%s " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->totals_via_skb[IFS_RX].bytes, + iface_entry->totals_via_skb[IFS_RX].packets, + iface_entry->totals_via_skb[IFS_TX].bytes, + iface_entry->totals_via_skb[IFS_TX].packets + ); + } if (len >= char_count) { spin_unlock_bh(&iface_stat_list_lock); *outp = '\0'; @@ -865,13 +910,17 @@ static void iface_create_proc_worker(struct work_struct *work) new_iface->proc_ptr = proc_entry; create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_TX].bytes); + read_proc_u64, + &new_iface->totals_via_dev[IFS_TX].bytes); create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_RX].bytes); + read_proc_u64, + &new_iface->totals_via_dev[IFS_RX].bytes); create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_TX].packets); + read_proc_u64, + &new_iface->totals_via_dev[IFS_TX].packets); create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_RX].packets); + read_proc_u64, + &new_iface->totals_via_dev[IFS_RX].packets); create_proc_read_entry("active", proc_iface_perms, proc_entry, read_proc_bool, &new_iface->active); @@ -975,11 +1024,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, "iface reset its stats unexpectedly\n", __func__, net_dev->name); - iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; - iface->totals[IFS_TX].packets += + iface->totals_via_dev[IFS_TX].bytes += + iface->last_known[IFS_TX].bytes; + iface->totals_via_dev[IFS_TX].packets += iface->last_known[IFS_TX].packets; - iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; - iface->totals[IFS_RX].packets += + iface->totals_via_dev[IFS_RX].bytes += + iface->last_known[IFS_RX].bytes; + iface->totals_via_dev[IFS_RX].packets += iface->last_known[IFS_RX].packets; iface->last_known_valid = false; IF_DEBUG("qtaguid: %s(%s): iface=%p " @@ -1147,6 +1198,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk) return sock_tag_entry; } +static int ipx_proto(const struct sk_buff *skb, + struct xt_action_param *par) +{ + int thoff = 0, tproto; + + switch (par->family) { + case NFPROTO_IPV6: + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) + MT_DEBUG("%s(): transport header not found in ipv6" + " skb=%p\n", __func__, skb); + break; + case NFPROTO_IPV4: + tproto = ip_hdr(skb)->protocol; + break; + default: + tproto = IPPROTO_RAW; + } + return tproto; +} + static void data_counters_update(struct data_counters *dc, int set, enum ifs_tx_rx direction, int proto, int bytes) @@ -1207,10 +1279,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); return; } - entry->totals[IFS_TX].bytes += stats->tx_bytes; - entry->totals[IFS_TX].packets += stats->tx_packets; - entry->totals[IFS_RX].bytes += stats->rx_bytes; - entry->totals[IFS_RX].packets += stats->rx_packets; + entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes; + entry->totals_via_dev[IFS_TX].packets += stats->tx_packets; + entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes; + entry->totals_via_dev[IFS_RX].packets += stats->rx_packets; /* We don't need the last_known[] anymore */ entry->last_known_valid = false; _iface_stat_set_active(entry, net_dev, false); @@ -1220,6 +1292,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); } +/* + * Update stats for the specified interface from the skb. + * Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called on each sk. + */ +static void iface_stat_update_from_skb(const struct sk_buff *skb, + struct xt_action_param *par) +{ + struct iface_stat *entry; + const struct net_device *el_dev; + enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; + int bytes = skb->len; + + if (!skb->dev) { + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); + el_dev = par->in ? : par->out; + } else { + const struct net_device *other_dev; + el_dev = skb->dev; + other_dev = par->in ? : par->out; + if (el_dev != other_dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " + "par->(in/out)=%p %s\n", + par->hooknum, el_dev, el_dev->name, other_dev, + other_dev->name); + } + } + + if (unlikely(!el_dev)) { + pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); + BUG(); + } else if (unlikely(!el_dev->name)) { + pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); + BUG(); + } else { + int proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", + par->hooknum, el_dev->name, el_dev->type, + par->family, proto); + } + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(el_dev->name); + if (entry == NULL) { + IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", + __func__, el_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + el_dev->name, entry); + + entry->totals_via_skb[direction].bytes += bytes; + entry->totals_via_skb[direction].packets++; + spin_unlock_bh(&iface_stat_list_lock); +} + static void tag_stat_update(struct tag_stat *tag_entry, enum ifs_tx_rx direction, int proto, int bytes) { @@ -1467,18 +1600,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) parent_procdir); if (!iface_stat_all_procfile) { pr_err("qtaguid: iface_stat: init " - " failed to create stat_all proc entry\n"); + " failed to create stat_old proc entry\n"); err = -1; goto err_zap_entry; } - iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; + iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read; + iface_stat_all_procfile->data = (void *)1; /* fmt1 */ + + iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename, + proc_iface_perms, + parent_procdir); + if (!iface_stat_fmt_procfile) { + pr_err("qtaguid: iface_stat: init " + " failed to create stat_all proc entry\n"); + err = -1; + goto err_zap_all_stats_entry; + } + iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read; + iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */ err = register_netdevice_notifier(&iface_netdev_notifier_blk); if (err) { pr_err("qtaguid: iface_stat: init " "failed to register dev event handler\n"); - goto err_zap_all_stats_entry; + goto err_zap_all_stats_entries; } err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); if (err) { @@ -1499,6 +1645,8 @@ err_unreg_ip4_addr: unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); err_unreg_nd: unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entries: + remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir); err_zap_all_stats_entry: remove_proc_entry(iface_stat_all_procfilename, parent_procdir); err_zap_entry: @@ -1550,27 +1698,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, return sk; } -static int ipx_proto(const struct sk_buff *skb, - struct xt_action_param *par) -{ - int thoff = 0, tproto; - - switch (par->family) { - case NFPROTO_IPV6: - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) - MT_DEBUG("%s(): transport header not found in ipv6" - " skb=%p\n", __func__, skb); - break; - case NFPROTO_IPV4: - tproto = ip_hdr(skb)->protocol; - break; - default: - tproto = IPPROTO_RAW; - } - return tproto; -} - static void account_for_uid(const struct sk_buff *skb, const struct sock *alternate_sk, uid_t uid, struct xt_action_param *par) @@ -1630,8 +1757,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) goto ret_res; } - sk = skb->sk; + switch (par->hooknum) { + case NF_INET_PRE_ROUTING: + case NF_INET_POST_ROUTING: + atomic64_inc(&qtu_events.match_calls_prepost); + iface_stat_update_from_skb(skb, par); + /* + * We are done in pre/post. The skb will get processed + * further alter. + */ + res = (info->match ^ info->invert); + goto ret_res; + break; + /* default: Fall through and do UID releated work */ + } + sk = skb->sk; if (sk == NULL) { /* * A missing sk->sk_socket happens when packets are in-flight @@ -1806,8 +1947,10 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, if (*eof) return 0; - CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", - page, items_to_skip, char_count, *eof); + CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u " + "page=%p off=%ld char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, items_to_skip, char_count, *eof); spin_lock_bh(&sock_tag_list_lock); for (node = rb_first(&sock_tag_tree); @@ -1851,6 +1994,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, "delete_cmds=%llu " "iface_events=%llu " "match_calls=%llu " + "match_calls_prepost=%llu " "match_found_sk=%llu " "match_found_sk_in_ct=%llu " "match_found_no_sk_in_ct=%llu " @@ -1862,6 +2006,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, atomic64_read(&qtu_events.delete_cmds), atomic64_read(&qtu_events.iface_events), atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_calls_prepost), atomic64_read(&qtu_events.match_found_sk), atomic64_read(&qtu_events.match_found_sk_in_ct), atomic64_read( @@ -2135,7 +2280,9 @@ static int ctrl_cmd_tag(const char *input) el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ if (!el_socket) { pr_info("qtaguid: ctrl_tag(%s): failed to lookup" - " sock_fd=%d err=%d\n", input, sock_fd, res); + " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", + input, sock_fd, res, current->pid, current->tgid, + current_fsuid()); goto err; } CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2280,7 +2427,9 @@ static int ctrl_cmd_untag(const char *input) el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ if (!el_socket) { pr_info("qtaguid: ctrl_untag(%s): failed to lookup" - " sock_fd=%d err=%d\n", input, sock_fd, res); + " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", + input, sock_fd, res, current->pid, current->tgid, + current_fsuid()); goto err; } CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2356,6 +2505,9 @@ static int qtaguid_ctrl_parse(const char *input, int count) char cmd; int res; + CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + cmd = input[0]; /* Collect params for commands */ switch (cmd) { @@ -2532,9 +2684,12 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned, return len; } - CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", page, *num_items_returned, - items_to_skip, char_count, *eof); + CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u " + "page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, *num_items_returned, + items_to_skip, char_count, *eof); if (*eof) return 0; diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index 02479d6d317d..d79f8383abf4 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -202,7 +202,8 @@ struct iface_stat { /* net_dev is only valid for active iface_stat */ struct net_device *net_dev; - struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; + struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; + struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; /* * We keep the last_known, because some devices reset their counters * just before NETDEV_UP, while some will reset just before @@ -254,6 +255,8 @@ struct qtaguid_event_counts { atomic64_t iface_events; /* Number of NETDEV_* events handled */ atomic64_t match_calls; /* Number of times iptables called mt */ + /* Number of times iptables called mt from pre or post routing hooks */ + atomic64_t match_calls_prepost; /* * match_found_sk_*: numbers related to the netfilter matching * function finding a sock for the sk_buff. diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c index 39176785c91f..8cbd8e42bcc4 100644 --- a/net/netfilter/xt_qtaguid_print.c +++ b/net/netfilter/xt_qtaguid_print.c @@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is) res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" "list=list_head{...}, " "ifname=%s, " - "total={rx={bytes=%llu, " + "total_dev={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "total_skb={rx={bytes=%llu, " "packets=%llu}, " "tx={bytes=%llu, " "packets=%llu}}, " @@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is) "tag_stat_tree=rb_root{...}}", is, is->ifname, - is->totals[IFS_RX].bytes, - is->totals[IFS_RX].packets, - is->totals[IFS_TX].bytes, - is->totals[IFS_TX].packets, + is->totals_via_dev[IFS_RX].bytes, + is->totals_via_dev[IFS_RX].packets, + is->totals_via_dev[IFS_TX].bytes, + is->totals_via_dev[IFS_TX].packets, + is->totals_via_skb[IFS_RX].bytes, + is->totals_via_skb[IFS_RX].packets, + is->totals_via_skb[IFS_TX].bytes, + is->totals_via_skb[IFS_TX].packets, is->last_known_valid, is->last_known[IFS_RX].bytes, is->last_known[IFS_RX].packets, -- 2.34.1