Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net...
authorPablo Neira Ayuso <pablo@netfilter.org>
Thu, 20 Aug 2015 23:10:19 +0000 (01:10 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Fri, 21 Aug 2015 04:09:05 +0000 (06:09 +0200)
Resolve conflicts with conntrack template fixes.

Conflicts:
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_synproxy_core.c
net/netfilter/xt_CT.c

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
44 files changed:
include/linux/netfilter/nfnetlink_acct.h
include/net/net_namespace.h
include/net/netfilter/ipv4/nf_dup_ipv4.h [new file with mode: 0644]
include/net/netfilter/ipv6/nf_dup_ipv6.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_core.h
include/net/netfilter/nf_conntrack_expect.h
include/net/netfilter/nf_conntrack_zones.h
include/net/netfilter/nft_dup.h [new file with mode: 0644]
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter/nfnetlink_conntrack.h
include/uapi/linux/netfilter/xt_CT.h
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/netfilter/nf_dup_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nft_dup_ipv4.c [new file with mode: 0644]
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/ipv6/netfilter/nf_dup_ipv6.c [new file with mode: 0644]
net/ipv6/netfilter/nft_dup_ipv6.c [new file with mode: 0644]
net/netfilter/Kconfig
net/netfilter/ipvs/ip_vs_nfct.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_pptp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_nat_core.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nft_counter.c
net/netfilter/nft_limit.c
net/netfilter/nft_payload.c
net/netfilter/xt_CT.c
net/netfilter/xt_TEE.c
net/netfilter/xt_connlimit.c
net/netfilter/xt_nfacct.c
net/sched/act_connmark.c

index 6ec975748742793fd51c274314a208ea5cb697db..80ca889b164e3eab5b42c7249d264f55650e44bf 100644 (file)
@@ -2,6 +2,7 @@
 #define _NFNL_ACCT_H_
 
 #include <uapi/linux/netfilter/nfnetlink_acct.h>
+#include <net/net_namespace.h>
 
 enum {
        NFACCT_NO_QUOTA         = -1,
@@ -11,7 +12,7 @@ enum {
 
 struct nf_acct;
 
-struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
 extern int nfnl_acct_overquota(const struct sk_buff *skb,
index e951453e0a2378caf405f62910dd91d7c768ea3b..2dcea635ecce3ead337ffa5fd2ba68096db9f997 100644 (file)
@@ -118,6 +118,9 @@ struct net {
 #endif
        struct sock             *nfnl;
        struct sock             *nfnl_stash;
+#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
+       struct list_head        nfnl_acct_list;
+#endif
 #endif
 #ifdef CONFIG_WEXT_CORE
        struct sk_buff_head     wext_nlevents;
diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h
new file mode 100644 (file)
index 0000000..42008f1
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _NF_DUP_IPV4_H_
+#define _NF_DUP_IPV4_H_
+
+void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+                const struct in_addr *gw, int oif);
+
+#endif /* _NF_DUP_IPV4_H_ */
diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h
new file mode 100644 (file)
index 0000000..ed6bd66
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _NF_DUP_IPV6_H_
+#define _NF_DUP_IPV6_H_
+
+void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+                const struct in6_addr *gw, int oif);
+
+#endif /* _NF_DUP_IPV6_H_ */
index 37cd3911d5c59e97fe6328a2852ea17040f4dbc3..f5e23c6dee8bcbcc66705a4d5cefdaef311eb98b 100644 (file)
@@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits);
 void nf_ct_iterate_cleanup(struct net *net,
                           int (*iter)(struct nf_conn *i, void *data),
                           void *data, u32 portid, int report);
+
+struct nf_conntrack_zone;
+
 void nf_conntrack_free(struct nf_conn *ct);
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+                                  const struct nf_conntrack_zone *zone,
                                   const struct nf_conntrack_tuple *orig,
                                   const struct nf_conntrack_tuple *repl,
                                   gfp_t gfp);
@@ -291,7 +295,9 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
-struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+                                const struct nf_conntrack_zone *zone,
+                                gfp_t flags);
 
 #define NF_CT_STAT_INC(net, count)       __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
index f2f0fa3bb15073edeb0af54087087539842e17f1..c03f9c42b3cd32be938e282e47cf66e63536a8c3 100644 (file)
@@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
+nf_conntrack_find_get(struct net *net,
+                     const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple);
 
 int __nf_conntrack_confirm(struct sk_buff *skb);
index 3f3aecbc8632e935e16e6517bc23eb8e07a9b16d..dce56f09ac9aed9c0f7d4a1d99acc9602e33ea5f 100644 (file)
@@ -4,7 +4,9 @@
 
 #ifndef _NF_CONNTRACK_EXPECT_H
 #define _NF_CONNTRACK_EXPECT_H
+
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 extern unsigned int nf_ct_expect_hsize;
 extern unsigned int nf_ct_expect_max;
@@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void);
 void nf_conntrack_expect_fini(void);
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, u16 zone,
+__nf_ct_expect_find(struct net *net,
+                   const struct nf_conntrack_zone *zone,
                    const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, u16 zone,
+nf_ct_expect_find_get(struct net *net,
+                     const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, u16 zone,
+nf_ct_find_expectation(struct net *net,
+                      const struct nf_conntrack_zone *zone,
                       const struct nf_conntrack_tuple *tuple);
 
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
index 034efe8d45a544ac9557724b4e40359398dd1279..5316c7b3a374db1aa0b724e4a133a8f656bbb8da 100644 (file)
 #ifndef _NF_CONNTRACK_ZONES_H
 #define _NF_CONNTRACK_ZONES_H
 
-#define NF_CT_DEFAULT_ZONE     0
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-#include <net/netfilter/nf_conntrack_extend.h>
+#define NF_CT_DEFAULT_ZONE_ID  0
+
+#define NF_CT_ZONE_DIR_ORIG    (1 << IP_CT_DIR_ORIGINAL)
+#define NF_CT_ZONE_DIR_REPL    (1 << IP_CT_DIR_REPLY)
+
+#define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
+
+#define NF_CT_FLAG_MARK                1
 
 struct nf_conntrack_zone {
        u16     id;
+       u8      flags;
+       u8      dir;
 };
 
-static inline u16 nf_ct_zone(const struct nf_conn *ct)
+extern const struct nf_conntrack_zone nf_ct_zone_dflt;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static inline const struct nf_conntrack_zone *
+nf_ct_zone(const struct nf_conn *ct)
 {
+       const struct nf_conntrack_zone *nf_ct_zone = NULL;
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-       struct nf_conntrack_zone *nf_ct_zone;
        nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
-       if (nf_ct_zone)
-               return nf_ct_zone->id;
 #endif
-       return NF_CT_DEFAULT_ZONE;
+       return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
+}
+
+static inline const struct nf_conntrack_zone *
+nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
+{
+       zone->id = id;
+       zone->flags = flags;
+       zone->dir = dir;
+
+       return zone;
+}
+
+static inline const struct nf_conntrack_zone *
+nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
+               struct nf_conntrack_zone *tmp)
+{
+       const struct nf_conntrack_zone *zone;
+
+       if (!tmpl)
+               return &nf_ct_zone_dflt;
+
+       zone = nf_ct_zone(tmpl);
+       if (zone->flags & NF_CT_FLAG_MARK)
+               zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
+
+       return zone;
+}
+
+static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
+                                const struct nf_conntrack_zone *info)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+       struct nf_conntrack_zone *nf_ct_zone;
+
+       nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
+       if (!nf_ct_zone)
+               return -ENOMEM;
+
+       nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
+                       info->flags);
+#endif
+       return 0;
 }
 
-#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */
+static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
+                                         enum ip_conntrack_dir dir)
+{
+       return zone->dir & (1 << dir);
+}
+
+static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
+                               enum ip_conntrack_dir dir)
+{
+       return nf_ct_zone_matches_dir(zone, dir) ?
+              zone->id : NF_CT_DEFAULT_ZONE_ID;
+}
+
+static inline bool nf_ct_zone_equal(const struct nf_conn *a,
+                                   const struct nf_conntrack_zone *b,
+                                   enum ip_conntrack_dir dir)
+{
+       return nf_ct_zone_id(nf_ct_zone(a), dir) ==
+              nf_ct_zone_id(b, dir);
+}
+
+static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
+                                       const struct nf_conntrack_zone *b)
+{
+       return nf_ct_zone(a)->id == b->id;
+}
+#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 #endif /* _NF_CONNTRACK_ZONES_H */
diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h
new file mode 100644 (file)
index 0000000..6b84cf6
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _NFT_DUP_H_
+#define _NFT_DUP_H_
+
+struct nft_dup_inet {
+       enum nft_registers      sreg_addr:8;
+       enum nft_registers      sreg_dev:8;
+};
+
+#endif /* _NFT_DUP_H_ */
index a99e6a9971408014514c21cc338df70ea5421f34..d8c8a7c9d88a7068c00d2b1cebe251c3c3c0dde4 100644 (file)
@@ -756,16 +756,25 @@ enum nft_ct_attributes {
 };
 #define NFTA_CT_MAX            (__NFTA_CT_MAX - 1)
 
+enum nft_limit_type {
+       NFT_LIMIT_PKTS,
+       NFT_LIMIT_PKT_BYTES
+};
+
 /**
  * enum nft_limit_attributes - nf_tables limit expression netlink attributes
  *
  * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
  * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
+ * @NFTA_LIMIT_BURST: burst (NLA_U32)
+ * @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type)
  */
 enum nft_limit_attributes {
        NFTA_LIMIT_UNSPEC,
        NFTA_LIMIT_RATE,
        NFTA_LIMIT_UNIT,
+       NFTA_LIMIT_BURST,
+       NFTA_LIMIT_TYPE,
        __NFTA_LIMIT_MAX
 };
 #define NFTA_LIMIT_MAX         (__NFTA_LIMIT_MAX - 1)
@@ -935,6 +944,20 @@ enum nft_redir_attributes {
 };
 #define NFTA_REDIR_MAX         (__NFTA_REDIR_MAX - 1)
 
+/**
+ * enum nft_dup_attributes - nf_tables dup expression netlink attributes
+ *
+ * @NFTA_DUP_SREG_ADDR: source register of address (NLA_U32: nft_registers)
+ * @NFTA_DUP_SREG_DEV: source register of output interface (NLA_U32: nft_register)
+ */
+enum nft_dup_attributes {
+       NFTA_DUP_UNSPEC,
+       NFTA_DUP_SREG_ADDR,
+       NFTA_DUP_SREG_DEV,
+       __NFTA_DUP_MAX
+};
+#define NFTA_DUP_MAX           (__NFTA_DUP_MAX - 1)
+
 /**
  * enum nft_gen_attributes - nf_tables ruleset generation attributes
  *
index acad6c52a6521d0fe81d1078a95fc7bb032d796c..c1a4e1441a25416e960349414b6a71e2c4409189 100644 (file)
@@ -61,6 +61,7 @@ enum ctattr_tuple {
        CTA_TUPLE_UNSPEC,
        CTA_TUPLE_IP,
        CTA_TUPLE_PROTO,
+       CTA_TUPLE_ZONE,
        __CTA_TUPLE_MAX
 };
 #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
index 5a688c1ca4d78e1cab449b26f8f3d2afdd190161..9e520418b858d5f5a355ee8514f5f1d3ee4a827f 100644 (file)
@@ -6,7 +6,13 @@
 enum {
        XT_CT_NOTRACK           = 1 << 0,
        XT_CT_NOTRACK_ALIAS     = 1 << 1,
-       XT_CT_MASK              = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS,
+       XT_CT_ZONE_DIR_ORIG     = 1 << 2,
+       XT_CT_ZONE_DIR_REPL     = 1 << 3,
+       XT_CT_ZONE_MARK         = 1 << 4,
+
+       XT_CT_MASK              = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS |
+                                 XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL |
+                                 XT_CT_ZONE_MARK,
 };
 
 struct xt_ct_target_info {
index 2199a5db25e60412389d861a105d86c62100b396..690d27d3f2f90d99612de8ed4a32dec0596a680a 100644 (file)
@@ -58,6 +58,12 @@ config NFT_REJECT_IPV4
        default NFT_REJECT
        tristate
 
+config NFT_DUP_IPV4
+       tristate "IPv4 nf_tables packet duplication support"
+       select NF_DUP_IPV4
+       help
+         This module enables IPv4 packet duplication support for nf_tables.
+
 endif # NF_TABLES_IPV4
 
 config NF_TABLES_ARP
@@ -67,6 +73,12 @@ config NF_TABLES_ARP
 
 endif # NF_TABLES
 
+config NF_DUP_IPV4
+       tristate "Netfilter IPv4 packet duplication to alternate destination"
+       help
+         This option enables the nf_dup_ipv4 core, which duplicates an IPv4
+         packet to be rerouted to another destination.
+
 config NF_LOG_ARP
        tristate "ARP packet logging"
        default m if NETFILTER_ADVANCED=n
index 7fe6c703528f79f3ba6d355724c26f32e20a21c5..87b073da14c928df176e7f4163e5301edcafdd89 100644 (file)
@@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
+obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
 obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
 
 # generic IP tables 
@@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o
index 30ad9554b5e9931ad37329f0ffda6a8aacdf55d1..8a2caaf3940bedaa9abba13352594a29341d287e 100644 (file)
@@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
                return -EINVAL;
        }
 
-       h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
        if (h) {
                struct sockaddr_in sin;
                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
index 80d5554b9a88da301a69db2df7d98f8a3a0a0348..cdde3ec496e94321c424d3dd37b31cb305e05451 100644 (file)
@@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
        struct nf_conntrack_tuple innertuple, origtuple;
        const struct nf_conntrack_l4proto *innerproto;
        const struct nf_conntrack_tuple_hash *h;
-       u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+       const struct nf_conntrack_zone *zone;
+       struct nf_conntrack_zone tmp;
 
        NF_CT_ASSERT(skb->nfct == NULL);
+       zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
        /* Are they talking about one of our connections? */
        if (!nf_ct_get_tuplepr(skb,
index b69e82bda2159464b2eb0b0fd7c184c605947cc8..9306ec4fab41e9fa0c3c99fd6be78bcf8adfb397 100644 (file)
@@ -43,19 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
                                              struct sk_buff *skb)
 {
-       u16 zone = NF_CT_DEFAULT_ZONE;
-
+       u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-       if (skb->nfct)
-               zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+       if (skb->nfct) {
+               enum ip_conntrack_info ctinfo;
+               const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+               zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+       }
 #endif
        if (nf_bridge_in_prerouting(skb))
-               return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+               return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
 
        if (hooknum == NF_INET_PRE_ROUTING)
-               return IP_DEFRAG_CONNTRACK_IN + zone;
+               return IP_DEFRAG_CONNTRACK_IN + zone_id;
        else
-               return IP_DEFRAG_CONNTRACK_OUT + zone;
+               return IP_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
 static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
new file mode 100644 (file)
index 0000000..b5bb375
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+       const struct dst_entry *dst;
+
+       if (skb->dev != NULL)
+               return dev_net(skb->dev);
+       dst = skb_dst(skb);
+       if (dst != NULL && dst->dev != NULL)
+               return dev_net(dst->dev);
+#endif
+       return &init_net;
+}
+
+static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
+                             int oif)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+       struct net *net = pick_net(skb);
+       struct rtable *rt;
+       struct flowi4 fl4;
+
+       memset(&fl4, 0, sizeof(fl4));
+       if (oif != -1)
+               fl4.flowi4_oif = oif;
+
+       fl4.daddr = gw->s_addr;
+       fl4.flowi4_tos = RT_TOS(iph->tos);
+       fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+       fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
+       rt = ip_route_output_key(net, &fl4);
+       if (IS_ERR(rt))
+               return false;
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, &rt->dst);
+       skb->dev      = rt->dst.dev;
+       skb->protocol = htons(ETH_P_IP);
+
+       return true;
+}
+
+void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+                const struct in_addr *gw, int oif)
+{
+       struct iphdr *iph;
+
+       if (this_cpu_read(nf_skb_duplicated))
+               return;
+       /*
+        * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+        * the original skb, which should continue on its way as if nothing has
+        * happened. The copy should be independently delivered to the gateway.
+        */
+       skb = pskb_copy(skb, GFP_ATOMIC);
+       if (skb == NULL)
+               return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       /* Avoid counting cloned packets towards the original connection. */
+       nf_conntrack_put(skb->nfct);
+       skb->nfct     = &nf_ct_untracked_get()->ct_general;
+       skb->nfctinfo = IP_CT_NEW;
+       nf_conntrack_get(skb->nfct);
+#endif
+       /*
+        * If we are in PREROUTING/INPUT, the checksum must be recalculated
+        * since the length could have changed as a result of defragmentation.
+        *
+        * We also decrease the TTL to mitigate potential loops between two
+        * hosts.
+        *
+        * Set %IP_DF so that the original source is notified of a potentially
+        * decreased MTU on the clone route. IPv6 does this too.
+        */
+       iph = ip_hdr(skb);
+       iph->frag_off |= htons(IP_DF);
+       if (hooknum == NF_INET_PRE_ROUTING ||
+           hooknum == NF_INET_LOCAL_IN)
+               --iph->ttl;
+       ip_send_check(iph);
+
+       if (nf_dup_ipv4_route(skb, gw, oif)) {
+               __this_cpu_write(nf_skb_duplicated, true);
+               ip_local_out(skb);
+               __this_cpu_write(nf_skb_duplicated, false);
+       } else {
+               kfree_skb(skb);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv4);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
new file mode 100644 (file)
index 0000000..25419fb
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+
+struct nft_dup_ipv4 {
+       enum nft_registers      sreg_addr:8;
+       enum nft_registers      sreg_dev:8;
+};
+
+static void nft_dup_ipv4_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                             const struct nft_pktinfo *pkt)
+{
+       struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+       struct in_addr gw = {
+               .s_addr = regs->data[priv->sreg_addr],
+       };
+       int oif = regs->data[priv->sreg_dev];
+
+       nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
+}
+
+static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr * const tb[])
+{
+       struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+               return -EINVAL;
+
+       priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+       err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+               priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+               return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+       }
+       return 0;
+}
+
+static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+           nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv4_type;
+static const struct nft_expr_ops nft_dup_ipv4_ops = {
+       .type           = &nft_dup_ipv4_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)),
+       .eval           = nft_dup_ipv4_eval,
+       .init           = nft_dup_ipv4_init,
+       .dump           = nft_dup_ipv4_dump,
+};
+
+static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = {
+       [NFTA_DUP_SREG_ADDR]    = { .type = NLA_U32 },
+       [NFTA_DUP_SREG_DEV]     = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv4_type __read_mostly = {
+       .family         = NFPROTO_IPV4,
+       .name           = "dup",
+       .ops            = &nft_dup_ipv4_ops,
+       .policy         = nft_dup_ipv4_policy,
+       .maxattr        = NFTA_DUP_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv4_module_init(void)
+{
+       return nft_register_expr(&nft_dup_ipv4_type);
+}
+
+static void __exit nft_dup_ipv4_module_exit(void)
+{
+       nft_unregister_expr(&nft_dup_ipv4_type);
+}
+
+module_init(nft_dup_ipv4_module_init);
+module_exit(nft_dup_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
index b552cf0d6198c50308461d0d83ac09bdd8c21f82..96833e4b31939a191eaf7de297ac438d4aa41fa4 100644 (file)
@@ -47,9 +47,21 @@ config NFT_REJECT_IPV6
        default NFT_REJECT
        tristate
 
+config NFT_DUP_IPV6
+       tristate "IPv6 nf_tables packet duplication support"
+       select NF_DUP_IPV6
+       help
+         This module enables IPv6 packet duplication support for nf_tables.
+
 endif # NF_TABLES_IPV6
 endif # NF_TABLES
 
+config NF_DUP_IPV6
+       tristate "Netfilter IPv6 packet duplication to alternate destination"
+       help
+         This option enables the nf_dup_ipv6 core, which duplicates an IPv6
+         packet to be rerouted to another destination.
+
 config NF_REJECT_IPV6
        tristate "IPv6 packet rejection"
        default m if NETFILTER_ADVANCED=n
index c36e0a5490de10cd64f5c64571efa13628568199..b4f7d0b4e2afc630f7a5be2ae949dff676dc5985 100644 (file)
@@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
 # reject
 obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
 
+obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
+
 # nf_tables
 obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
@@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
 obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
 obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
+obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
index 4ba0c34c627b0e88d3a06fda6532c83a3936315e..7302900c321aff58fcb7dc21794b50e04b1942d8 100644 (file)
@@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
        if (*len < 0 || (unsigned int) *len < sizeof(sin6))
                return -EINVAL;
 
-       h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
        if (!h) {
                pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
                         &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
index 90388d606483cbbd15e421b3e51f6d757cd05883..0e6fae103d33454f70fb5790b71d2529af969636 100644 (file)
@@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
        struct nf_conntrack_tuple intuple, origtuple;
        const struct nf_conntrack_tuple_hash *h;
        const struct nf_conntrack_l4proto *inproto;
-       u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+       struct nf_conntrack_zone tmp;
 
        NF_CT_ASSERT(skb->nfct == NULL);
 
@@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 
        *ctinfo = IP_CT_RELATED;
 
-       h = nf_conntrack_find_get(net, zone, &intuple);
+       h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+                                 &intuple);
        if (!h) {
                pr_debug("icmpv6_error: no match\n");
                return -NF_ACCEPT;
index 267fb8d5876e169f27e0e9a595dc89a20cfbea4e..6d9c0b3d5b8c49d111cca7bd70b9bc5229f0a263 100644 (file)
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
                                                struct sk_buff *skb)
 {
-       u16 zone = NF_CT_DEFAULT_ZONE;
-
+       u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-       if (skb->nfct)
-               zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+       if (skb->nfct) {
+               enum ip_conntrack_info ctinfo;
+               const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+               zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+       }
 #endif
        if (nf_bridge_in_prerouting(skb))
-               return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+               return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
 
        if (hooknum == NF_INET_PRE_ROUTING)
-               return IP6_DEFRAG_CONNTRACK_IN + zone;
+               return IP6_DEFRAG_CONNTRACK_IN + zone_id;
        else
-               return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
+               return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
 static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
new file mode 100644 (file)
index 0000000..d8ab654
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+       const struct dst_entry *dst;
+
+       if (skb->dev != NULL)
+               return dev_net(skb->dev);
+       dst = skb_dst(skb);
+       if (dst != NULL && dst->dev != NULL)
+               return dev_net(dst->dev);
+#endif
+       return &init_net;
+}
+
+static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
+                             int oif)
+{
+       const struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct net *net = pick_net(skb);
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+
+       memset(&fl6, 0, sizeof(fl6));
+       if (oif != -1)
+               fl6.flowi6_oif = oif;
+
+       fl6.daddr = *gw;
+       fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+                        (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (dst->error) {
+               dst_release(dst);
+               return false;
+       }
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+       skb->dev      = dst->dev;
+       skb->protocol = htons(ETH_P_IPV6);
+
+       return true;
+}
+
+void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+                const struct in6_addr *gw, int oif)
+{
+       if (this_cpu_read(nf_skb_duplicated))
+               return;
+       skb = pskb_copy(skb, GFP_ATOMIC);
+       if (skb == NULL)
+               return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       nf_conntrack_put(skb->nfct);
+       skb->nfct     = &nf_ct_untracked_get()->ct_general;
+       skb->nfctinfo = IP_CT_NEW;
+       nf_conntrack_get(skb->nfct);
+#endif
+       if (hooknum == NF_INET_PRE_ROUTING ||
+           hooknum == NF_INET_LOCAL_IN) {
+               struct ipv6hdr *iph = ipv6_hdr(skb);
+               --iph->hop_limit;
+       }
+       if (nf_dup_ipv6_route(skb, gw, oif)) {
+               __this_cpu_write(nf_skb_duplicated, true);
+               ip6_local_out(skb);
+               __this_cpu_write(nf_skb_duplicated, false);
+       } else {
+               kfree_skb(skb);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv6);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
new file mode 100644 (file)
index 0000000..0eaa4f6
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+
+struct nft_dup_ipv6 {
+       enum nft_registers      sreg_addr:8;
+       enum nft_registers      sreg_dev:8;
+};
+
+static void nft_dup_ipv6_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                             const struct nft_pktinfo *pkt)
+{
+       struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+       struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
+       int oif = regs->data[priv->sreg_dev];
+
+       nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
+}
+
+static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr * const tb[])
+{
+       struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+       int err;
+
+       if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+               return -EINVAL;
+
+       priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+       err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+               priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+               return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+       }
+       return 0;
+}
+
+static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+           nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv6_type;
+static const struct nft_expr_ops nft_dup_ipv6_ops = {
+       .type           = &nft_dup_ipv6_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)),
+       .eval           = nft_dup_ipv6_eval,
+       .init           = nft_dup_ipv6_init,
+       .dump           = nft_dup_ipv6_dump,
+};
+
+static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
+       [NFTA_DUP_SREG_ADDR]    = { .type = NLA_U32 },
+       [NFTA_DUP_SREG_DEV]     = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv6_type __read_mostly = {
+       .family         = NFPROTO_IPV6,
+       .name           = "dup",
+       .ops            = &nft_dup_ipv6_ops,
+       .policy         = nft_dup_ipv6_policy,
+       .maxattr        = NFTA_DUP_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv6_module_init(void)
+{
+       return nft_register_expr(&nft_dup_ipv6_type);
+}
+
+static void __exit nft_dup_ipv6_module_exit(void)
+{
+       nft_unregister_expr(&nft_dup_ipv6_type);
+}
+
+module_init(nft_dup_ipv6_module_init);
+module_exit(nft_dup_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
index 6eae69a698ed8344fee68a46e767a2393e53148a..3e1b4abf1897a5bdeca9e5fa061bd06d9b858263 100644 (file)
@@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE
        depends on NETFILTER_ADVANCED
        depends on IPV6 || IPV6=n
        depends on !NF_CONNTRACK || NF_CONNTRACK
+       select NF_DUP_IPV4
+       select NF_DUP_IPV6 if IP6_NF_IPTABLES
        ---help---
        This option adds a "TEE" target with which a packet can be cloned and
        this clone be rerouted to another nexthop.
index 5882bbfd198c24b9e72d1d6ddc05f21ad38f53bc..136184572fc9d274a5ef9493852c57045119ce5c 100644 (file)
@@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
                " for conn " FMT_CONN "\n",
                __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 
-       h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+       h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
                                  &tuple);
        if (h) {
                ct = nf_ct_tuplehash_to_ctrack(h);
index 3c20d02aee738c5293a5b449f28ebff596c7232d..ac3be9b0629b7aee7743d414bbffc994ecc9704f 100644 (file)
@@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 unsigned int nf_conntrack_hash_rnd __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
 
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
 {
        unsigned int n;
 
@@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
         * three bytes manually.
         */
        n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
-       return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+       return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
                      (((__force __u16)tuple->dst.u.all << 16) |
                      tuple->dst.protonum));
 }
@@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net)
 }
 
 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-                                 u16 zone, unsigned int size)
+                                 unsigned int size)
 {
-       return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
+       return __hash_bucket(hash_conntrack_raw(tuple), size);
 }
 
-static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
+static inline u_int32_t hash_conntrack(const struct net *net,
                                       const struct nf_conntrack_tuple *tuple)
 {
-       return __hash_conntrack(tuple, zone, net->ct.htable_size);
+       return __hash_conntrack(tuple, net->ct.htable_size);
 }
 
 bool
@@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
 }
 
 /* Released via destroy_conntrack() */
-struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+                                const struct nf_conntrack_zone *zone,
+                                gfp_t flags)
 {
        struct nf_conn *tmpl;
 
@@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
        tmpl->status = IPS_TEMPLATE;
        write_pnet(&tmpl->ct_net, net);
 
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-       if (zone) {
-               struct nf_conntrack_zone *nf_ct_zone;
+       if (nf_ct_zone_add(tmpl, flags, zone) < 0)
+               goto out_free;
 
-               nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags);
-               if (!nf_ct_zone)
-                       goto out_free;
-               nf_ct_zone->id = zone;
-       }
-#endif
        atomic_set(&tmpl->ct_general.use, 0);
 
        return tmpl;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
 out_free:
        kfree(tmpl);
        return NULL;
-#endif
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 
@@ -373,7 +366,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 {
        struct net *net = nf_ct_net(ct);
        unsigned int hash, reply_hash;
-       u16 zone = nf_ct_zone(ct);
        unsigned int sequence;
 
        nf_ct_helper_destroy(ct);
@@ -381,9 +373,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
        local_bh_disable();
        do {
                sequence = read_seqcount_begin(&net->ct.generation);
-               hash = hash_conntrack(net, zone,
+               hash = hash_conntrack(net,
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-               reply_hash = hash_conntrack(net, zone,
+               reply_hash = hash_conntrack(net,
                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
@@ -431,8 +423,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
 
 static inline bool
 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
-                       const struct nf_conntrack_tuple *tuple,
-                       u16 zone)
+               const struct nf_conntrack_tuple *tuple,
+               const struct nf_conntrack_zone *zone)
 {
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 
@@ -440,8 +432,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
         * so we need to check that the conntrack is confirmed
         */
        return nf_ct_tuple_equal(tuple, &h->tuple) &&
-               nf_ct_zone(ct) == zone &&
-               nf_ct_is_confirmed(ct);
+              nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
+              nf_ct_is_confirmed(ct);
 }
 
 /*
@@ -450,7 +442,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
  *   and recheck nf_ct_tuple_equal(tuple, &h->tuple)
  */
 static struct nf_conntrack_tuple_hash *
-____nf_conntrack_find(struct net *net, u16 zone,
+____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple, u32 hash)
 {
        struct nf_conntrack_tuple_hash *h;
@@ -486,7 +478,7 @@ begin:
 
 /* Find a connection corresponding to a tuple. */
 static struct nf_conntrack_tuple_hash *
-__nf_conntrack_find_get(struct net *net, u16 zone,
+__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                        const struct nf_conntrack_tuple *tuple, u32 hash)
 {
        struct nf_conntrack_tuple_hash *h;
@@ -513,11 +505,11 @@ begin:
 }
 
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple)
 {
        return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple, zone));
+                                      hash_conntrack_raw(tuple));
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 
@@ -536,11 +528,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 int
 nf_conntrack_hash_check_insert(struct nf_conn *ct)
 {
+       const struct nf_conntrack_zone *zone;
        struct net *net = nf_ct_net(ct);
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
-       u16 zone;
        unsigned int sequence;
 
        zone = nf_ct_zone(ct);
@@ -548,9 +540,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        local_bh_disable();
        do {
                sequence = read_seqcount_begin(&net->ct.generation);
-               hash = hash_conntrack(net, zone,
+               hash = hash_conntrack(net,
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-               reply_hash = hash_conntrack(net, zone,
+               reply_hash = hash_conntrack(net,
                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
@@ -558,12 +550,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                      &h->tuple) &&
-                   zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+                                    NF_CT_DIRECTION(h)))
                        goto out;
        hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                      &h->tuple) &&
-                   zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+                                    NF_CT_DIRECTION(h)))
                        goto out;
 
        add_timer(&ct->timeout);
@@ -588,6 +582,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
 {
+       const struct nf_conntrack_zone *zone;
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
@@ -596,7 +591,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        struct hlist_nulls_node *n;
        enum ip_conntrack_info ctinfo;
        struct net *net;
-       u16 zone;
        unsigned int sequence;
 
        ct = nf_ct_get(skb, &ctinfo);
@@ -617,7 +611,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                /* reuse the hash saved before */
                hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
                hash = hash_bucket(hash, net);
-               reply_hash = hash_conntrack(net, zone,
+               reply_hash = hash_conntrack(net,
                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -649,12 +643,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                      &h->tuple) &&
-                   zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+                                    NF_CT_DIRECTION(h)))
                        goto out;
        hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
                if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                      &h->tuple) &&
-                   zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+                                    NF_CT_DIRECTION(h)))
                        goto out;
 
        /* Timer relative to confirmation time, not original
@@ -707,11 +703,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
                         const struct nf_conn *ignored_conntrack)
 {
        struct net *net = nf_ct_net(ignored_conntrack);
+       const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
        struct nf_conn *ct;
-       u16 zone = nf_ct_zone(ignored_conntrack);
-       unsigned int hash = hash_conntrack(net, zone, tuple);
+       unsigned int hash;
+
+       zone = nf_ct_zone(ignored_conntrack);
+       hash = hash_conntrack(net, tuple);
 
        /* Disable BHs the entire time since we need to disable them at
         * least once for the stats anyway.
@@ -721,7 +720,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
                ct = nf_ct_tuplehash_to_ctrack(h);
                if (ct != ignored_conntrack &&
                    nf_ct_tuple_equal(tuple, &h->tuple) &&
-                   nf_ct_zone(ct) == zone) {
+                   nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
                        NF_CT_STAT_INC(net, found);
                        rcu_read_unlock_bh();
                        return 1;
@@ -810,7 +809,8 @@ void init_nf_conntrack_hash_rnd(void)
 }
 
 static struct nf_conn *
-__nf_conntrack_alloc(struct net *net, u16 zone,
+__nf_conntrack_alloc(struct net *net,
+                    const struct nf_conntrack_zone *zone,
                     const struct nf_conntrack_tuple *orig,
                     const struct nf_conntrack_tuple *repl,
                     gfp_t gfp, u32 hash)
@@ -820,7 +820,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
        if (unlikely(!nf_conntrack_hash_rnd)) {
                init_nf_conntrack_hash_rnd();
                /* recompute the hash as nf_conntrack_hash_rnd is initialized */
-               hash = hash_conntrack_raw(orig, zone);
+               hash = hash_conntrack_raw(orig);
        }
 
        /* We don't want any race condition at early drop stage */
@@ -840,10 +840,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
         * SLAB_DESTROY_BY_RCU.
         */
        ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
-       if (ct == NULL) {
-               atomic_dec(&net->ct.count);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (ct == NULL)
+               goto out;
+
        spin_lock_init(&ct->lock);
        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
        ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -857,31 +856,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
        memset(&ct->__nfct_init_offset[0], 0,
               offsetof(struct nf_conn, proto) -
               offsetof(struct nf_conn, __nfct_init_offset[0]));
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-       if (zone) {
-               struct nf_conntrack_zone *nf_ct_zone;
 
-               nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
-               if (!nf_ct_zone)
-                       goto out_free;
-               nf_ct_zone->id = zone;
-       }
-#endif
+       if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
+               goto out_free;
+
        /* Because we use RCU lookups, we set ct_general.use to zero before
         * this is inserted in any list.
         */
        atomic_set(&ct->ct_general.use, 0);
        return ct;
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
 out_free:
-       atomic_dec(&net->ct.count);
        kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+out:
+       atomic_dec(&net->ct.count);
        return ERR_PTR(-ENOMEM);
-#endif
 }
 
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+                                  const struct nf_conntrack_zone *zone,
                                   const struct nf_conntrack_tuple *orig,
                                   const struct nf_conntrack_tuple *repl,
                                   gfp_t gfp)
@@ -923,8 +915,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        struct nf_conntrack_tuple repl_tuple;
        struct nf_conntrack_ecache *ecache;
        struct nf_conntrack_expect *exp = NULL;
-       u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+       const struct nf_conntrack_zone *zone;
        struct nf_conn_timeout *timeout_ext;
+       struct nf_conntrack_zone tmp;
        unsigned int *timeouts;
 
        if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -932,6 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
                return NULL;
        }
 
+       zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
        ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
                                  hash);
        if (IS_ERR(ct))
@@ -1026,10 +1020,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                  int *set_reply,
                  enum ip_conntrack_info *ctinfo)
 {
+       const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
+       struct nf_conntrack_zone tmp;
        struct nf_conn *ct;
-       u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
        u32 hash;
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
@@ -1040,7 +1035,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        }
 
        /* look for tuple match */
-       hash = hash_conntrack_raw(&tuple, zone);
+       zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+       hash = hash_conntrack_raw(&tuple);
        h = __nf_conntrack_find_get(net, zone, &tuple, hash);
        if (!h) {
                h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1290,6 +1286,13 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
+/* Built-in default zone used e.g. by modules. */
+const struct nf_conntrack_zone nf_ct_zone_dflt = {
+       .id     = NF_CT_DEFAULT_ZONE_ID,
+       .dir    = NF_CT_DEFAULT_ZONE_DIR,
+};
+EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
        .len    = sizeof(struct nf_conntrack_zone),
@@ -1596,8 +1599,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
                                        struct nf_conntrack_tuple_hash, hnnode);
                        ct = nf_ct_tuplehash_to_ctrack(h);
                        hlist_nulls_del_rcu(&h->hnnode);
-                       bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
-                                                 hashsize);
+                       bucket = __hash_conntrack(&h->tuple, hashsize);
                        hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                }
        }
index b45a4223cb058a47ae2863a4166cd5085e587ca6..acf5c7b3f378c600ec983a0b92e7eb935c56b0c8 100644 (file)
@@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, u16 zone,
+__nf_ct_expect_find(struct net *net,
+                   const struct nf_conntrack_zone *zone,
                    const struct nf_conntrack_tuple *tuple)
 {
        struct nf_conntrack_expect *i;
@@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone,
        h = nf_ct_expect_dst_hash(tuple);
        hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
                if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
-                   nf_ct_zone(i->master) == zone)
+                   nf_ct_zone_equal_any(i->master, zone))
                        return i;
        }
        return NULL;
@@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, u16 zone,
+nf_ct_expect_find_get(struct net *net,
+                     const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple)
 {
        struct nf_conntrack_expect *i;
@@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, u16 zone,
+nf_ct_find_expectation(struct net *net,
+                      const struct nf_conntrack_zone *zone,
                       const struct nf_conntrack_tuple *tuple)
 {
        struct nf_conntrack_expect *i, *exp = NULL;
@@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone,
        hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
                if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
                    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
-                   nf_ct_zone(i->master) == zone) {
+                   nf_ct_zone_equal_any(i->master, zone)) {
                        exp = i;
                        break;
                }
@@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
        }
 
        return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
-              nf_ct_zone(a->master) == nf_ct_zone(b->master);
+              nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
 }
 
 static inline int expect_matches(const struct nf_conntrack_expect *a,
                                 const struct nf_conntrack_expect *b)
 {
        return a->master == b->master && a->class == b->class &&
-               nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
-               nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
-               nf_ct_zone(a->master) == nf_ct_zone(b->master);
+              nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+              nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+              nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
 }
 
 /* Generally a bad idea to call this: could have matched already. */
index 6b8b0abbfab482280ae6a318f8bc58260e0b21c8..94a66541e0b76a1764ad6b2d73bf8f6cd1310a6b 100644 (file)
@@ -127,6 +127,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
        return ret;
 }
 
+static inline int
+ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype,
+                      const struct nf_conntrack_zone *zone, int dir)
+{
+       if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir)
+               return 0;
+       if (nla_put_be16(skb, attrtype, htons(zone->id)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
 static inline int
 ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
 {
@@ -458,6 +472,7 @@ static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
                    struct nf_conn *ct)
 {
+       const struct nf_conntrack_zone *zone;
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
        struct nlattr *nest_parms;
@@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
        nfmsg->version      = NFNETLINK_V0;
        nfmsg->res_id       = 0;
 
+       zone = nf_ct_zone(ct);
+
        nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_ORIG) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
        nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_REPL) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
-       if (nf_ct_zone(ct) &&
-           nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+       if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+                                  NF_CT_DEFAULT_ZONE_DIR) < 0)
                goto nla_put_failure;
 
        if (ctnetlink_dump_status(skb, ct) < 0 ||
@@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
               + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
 #endif
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-              + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+              + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
 #endif
               + ctnetlink_proto_size(ct)
               + ctnetlink_label_size(ct)
@@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 static int
 ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
+       const struct nf_conntrack_zone *zone;
        struct net *net;
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
@@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
        nfmsg->res_id   = 0;
 
        rcu_read_lock();
+       zone = nf_ct_zone(ct);
+
        nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_ORIG) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
        nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_REPL) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
-       if (nf_ct_zone(ct) &&
-           nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+       if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+                                  NF_CT_DEFAULT_ZONE_DIR) < 0)
                goto nla_put_failure;
 
        if (ctnetlink_dump_id(skb, ct) < 0)
@@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
        return ret;
 }
 
+static int
+ctnetlink_parse_zone(const struct nlattr *attr,
+                    struct nf_conntrack_zone *zone)
+{
+       nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID,
+                       NF_CT_DEFAULT_ZONE_DIR, 0);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+       if (attr)
+               zone->id = ntohs(nla_get_be16(attr));
+#else
+       if (attr)
+               return -EOPNOTSUPP;
+#endif
+       return 0;
+}
+
+static int
+ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type,
+                          struct nf_conntrack_zone *zone)
+{
+       int ret;
+
+       if (zone->id != NF_CT_DEFAULT_ZONE_ID)
+               return -EINVAL;
+
+       ret = ctnetlink_parse_zone(attr, zone);
+       if (ret < 0)
+               return ret;
+
+       if (type == CTA_TUPLE_REPLY)
+               zone->dir = NF_CT_ZONE_DIR_REPL;
+       else
+               zone->dir = NF_CT_ZONE_DIR_ORIG;
+
+       return 0;
+}
+
 static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
        [CTA_TUPLE_IP]          = { .type = NLA_NESTED },
        [CTA_TUPLE_PROTO]       = { .type = NLA_NESTED },
+       [CTA_TUPLE_ZONE]        = { .type = NLA_U16 },
 };
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
                      struct nf_conntrack_tuple *tuple,
-                     enum ctattr_type type, u_int8_t l3num)
+                     enum ctattr_type type, u_int8_t l3num,
+                     struct nf_conntrack_zone *zone)
 {
        struct nlattr *tb[CTA_TUPLE_MAX+1];
        int err;
@@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
        if (err < 0)
                return err;
 
+       if (tb[CTA_TUPLE_ZONE]) {
+               if (!zone)
+                       return -EINVAL;
+
+               err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE],
+                                                type, zone);
+               if (err < 0)
+                       return err;
+       }
+
        /* orig and expect tuples get DIR_ORIGINAL */
        if (type == CTA_TUPLE_REPLY)
                tuple->dst.dir = IP_CT_DIR_REPLY;
@@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
        return 0;
 }
 
-static int
-ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
-{
-       if (attr)
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-               *zone = ntohs(nla_get_be16(attr));
-#else
-               return -EOPNOTSUPP;
-#endif
-       else
-               *zone = 0;
-
-       return 0;
-}
-
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
        [CTA_HELP_NAME]         = { .type = NLA_NUL_STRING,
                                    .len = NF_CT_HELPER_NAME_LEN - 1 },
@@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conn *ct;
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
-       u16 zone;
+       struct nf_conntrack_zone zone;
        int err;
 
        err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
                return err;
 
        if (cda[CTA_TUPLE_ORIG])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+                                           u3, &zone);
        else if (cda[CTA_TUPLE_REPLY])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+                                           u3, &zone);
        else {
                return ctnetlink_flush_conntrack(net, cda,
                                                 NETLINK_CB(skb).portid,
@@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
        if (err < 0)
                return err;
 
-       h = nf_conntrack_find_get(net, zone, &tuple);
+       h = nf_conntrack_find_get(net, &zone, &tuple);
        if (!h)
                return -ENOENT;
 
@@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct sk_buff *skb2 = NULL;
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
-       u16 zone;
+       struct nf_conntrack_zone zone;
        int err;
 
        if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
                return err;
 
        if (cda[CTA_TUPLE_ORIG])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+                                           u3, &zone);
        else if (cda[CTA_TUPLE_REPLY])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+                                           u3, &zone);
        else
                return -EINVAL;
 
        if (err < 0)
                return err;
 
-       h = nf_conntrack_find_get(net, zone, &tuple);
+       h = nf_conntrack_find_get(net, &zone, &tuple);
        if (!h)
                return -ENOENT;
 
@@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net, u16 zone,
+ctnetlink_create_conntrack(struct net *net,
+                          const struct nf_conntrack_zone *zone,
                           const struct nlattr * const cda[],
                           struct nf_conntrack_tuple *otuple,
                           struct nf_conntrack_tuple *rtuple,
@@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
                struct nf_conntrack_tuple_hash *master_h;
                struct nf_conn *master_ct;
 
-               err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3);
+               err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER,
+                                           u3, NULL);
                if (err < 0)
                        goto err2;
 
@@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        struct nf_conn *ct;
        u_int8_t u3 = nfmsg->nfgen_family;
-       u16 zone;
+       struct nf_conntrack_zone zone;
        int err;
 
        err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
                return err;
 
        if (cda[CTA_TUPLE_ORIG]) {
-               err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+               err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG,
+                                           u3, &zone);
                if (err < 0)
                        return err;
        }
 
        if (cda[CTA_TUPLE_REPLY]) {
-               err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+               err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY,
+                                           u3, &zone);
                if (err < 0)
                        return err;
        }
 
        if (cda[CTA_TUPLE_ORIG])
-               h = nf_conntrack_find_get(net, zone, &otuple);
+               h = nf_conntrack_find_get(net, &zone, &otuple);
        else if (cda[CTA_TUPLE_REPLY])
-               h = nf_conntrack_find_get(net, zone, &rtuple);
+               h = nf_conntrack_find_get(net, &zone, &rtuple);
 
        if (h == NULL) {
                err = -ENOENT;
@@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
                        if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
                                return -EINVAL;
 
-                       ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
+                       ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
                                                        &rtuple, u3);
                        if (IS_ERR(ct))
                                return PTR_ERR(ct);
@@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
               + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
 #endif
 #ifdef CONFIG_NF_CONNTRACK_ZONES
-              + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+              + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
 #endif
               + ctnetlink_proto_size(ct)
               ;
@@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
 static int
 ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
 {
+       const struct nf_conntrack_zone *zone;
        struct nlattr *nest_parms;
 
        rcu_read_lock();
+       zone = nf_ct_zone(ct);
+
        nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_ORIG) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
        nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
                goto nla_put_failure;
        if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
                goto nla_put_failure;
+       if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+                                  NF_CT_ZONE_DIR_REPL) < 0)
+               goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
-       if (nf_ct_zone(ct)) {
-               if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
-                       goto nla_put_failure;
-       }
+       if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+                                  NF_CT_DEFAULT_ZONE_DIR) < 0)
+               goto nla_put_failure;
 
        if (ctnetlink_dump_id(skb, ct) < 0)
                goto nla_put_failure;
@@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
        int err;
 
        err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
-                                   nf_ct_l3num(ct));
+                                   nf_ct_l3num(ct), NULL);
        if (err < 0)
                return err;
 
        return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
-                                    nf_ct_l3num(ct));
+                                    nf_ct_l3num(ct), NULL);
 }
 
 static int
@@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
-       u16 zone = 0;
+       struct nf_conntrack_zone zone;
        struct netlink_dump_control c = {
                .dump = ctnetlink_exp_ct_dump_table,
                .done = ctnetlink_exp_done,
        };
 
-       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+                                   u3, NULL);
        if (err < 0)
                return err;
 
-       if (cda[CTA_EXPECT_ZONE]) {
-               err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
-               if (err < 0)
-                       return err;
-       }
+       err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+       if (err < 0)
+               return err;
 
-       h = nf_conntrack_find_get(net, zone, &tuple);
+       h = nf_conntrack_find_get(net, &zone, &tuple);
        if (!h)
                return -ENOENT;
 
@@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
        struct sk_buff *skb2;
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
-       u16 zone;
+       struct nf_conntrack_zone zone;
        int err;
 
        if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
                return err;
 
        if (cda[CTA_EXPECT_TUPLE])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+                                           u3, NULL);
        else if (cda[CTA_EXPECT_MASTER])
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+                                           u3, NULL);
        else
                return -EINVAL;
 
        if (err < 0)
                return err;
 
-       exp = nf_ct_expect_find_get(net, zone, &tuple);
+       exp = nf_ct_expect_find_get(net, &zone, &tuple);
        if (!exp)
                return -ENOENT;
 
@@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        struct hlist_node *next;
        u_int8_t u3 = nfmsg->nfgen_family;
+       struct nf_conntrack_zone zone;
        unsigned int i;
-       u16 zone;
        int err;
 
        if (cda[CTA_EXPECT_TUPLE]) {
@@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
                if (err < 0)
                        return err;
 
-               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+               err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+                                           u3, NULL);
                if (err < 0)
                        return err;
 
                /* bump usage count to 2 */
-               exp = nf_ct_expect_find_get(net, zone, &tuple);
+               exp = nf_ct_expect_find_get(net, &zone, &tuple);
                if (!exp)
                        return -ENOENT;
 
@@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
                return -EINVAL;
 
        err = ctnetlink_parse_tuple((const struct nlattr * const *)tb,
-                                       &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3);
+                                   &nat_tuple, CTA_EXPECT_NAT_TUPLE,
+                                   u3, NULL);
        if (err < 0)
                return err;
 
@@ -2937,7 +3022,8 @@ err_out:
 }
 
 static int
-ctnetlink_create_expect(struct net *net, u16 zone,
+ctnetlink_create_expect(struct net *net,
+                       const struct nf_conntrack_zone *zone,
                        const struct nlattr * const cda[],
                        u_int8_t u3, u32 portid, int report)
 {
@@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone,
        int err;
 
        /* caller guarantees that those three CTA_EXPECT_* exist */
-       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+                                   u3, NULL);
        if (err < 0)
                return err;
-       err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+       err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK,
+                                   u3, NULL);
        if (err < 0)
                return err;
-       err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+       err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER,
+                                   u3, NULL);
        if (err < 0)
                return err;
 
@@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_expect *exp;
        struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
-       u16 zone;
+       struct nf_conntrack_zone zone;
        int err;
 
        if (!cda[CTA_EXPECT_TUPLE]
@@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
        if (err < 0)
                return err;
 
-       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+       err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+                                   u3, NULL);
        if (err < 0)
                return err;
 
        spin_lock_bh(&nf_conntrack_expect_lock);
-       exp = __nf_ct_expect_find(net, zone, &tuple);
-
+       exp = __nf_ct_expect_find(net, &zone, &tuple);
        if (!exp) {
                spin_unlock_bh(&nf_conntrack_expect_lock);
                err = -ENOENT;
                if (nlh->nlmsg_flags & NLM_F_CREATE) {
-                       err = ctnetlink_create_expect(net, zone, cda,
-                                                     u3,
+                       err = ctnetlink_create_expect(net, &zone, cda, u3,
                                                      NETLINK_CB(skb).portid,
                                                      nlmsg_report(nlh));
                }
index 825c3e3f83053582dba71c0128d706cb516c943a..5588c7ae1ac26740df91576867d5aff51aa961f9 100644 (file)
@@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
                                  const struct nf_conntrack_tuple *t)
 {
        const struct nf_conntrack_tuple_hash *h;
+       const struct nf_conntrack_zone *zone;
        struct nf_conntrack_expect *exp;
        struct nf_conn *sibling;
-       u16 zone = nf_ct_zone(ct);
 
        pr_debug("trying to timeout ct or exp for tuple ");
        nf_ct_dump_tuple(t);
 
+       zone = nf_ct_zone(ct);
        h = nf_conntrack_find_get(net, zone, t);
        if (h)  {
                sibling = nf_ct_tuplehash_to_ctrack(h);
index fc823fa5dcf53794bc8977cb5502d1dac92938e2..1fb3cacc04e16794ce27e9061893b9a90015fb82 100644 (file)
@@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+                        int dir)
+{
+       const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+       if (zone->dir != dir)
+               return;
+       switch (zone->dir) {
+       case NF_CT_DEFAULT_ZONE_DIR:
+               seq_printf(s, "zone=%u ", zone->id);
+               break;
+       case NF_CT_ZONE_DIR_ORIG:
+               seq_printf(s, "zone-orig=%u ", zone->id);
+               break;
+       case NF_CT_ZONE_DIR_REPL:
+               seq_printf(s, "zone-reply=%u ", zone->id);
+               break;
+       default:
+               break;
+       }
+}
+#else
+static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+                               int dir)
+{
+}
+#endif
+
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
 {
@@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
        print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                    l3proto, l4proto);
 
+       ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
+
        if (seq_has_overflowed(s))
                goto release;
 
@@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
        print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                    l3proto, l4proto);
 
+       ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
+
        if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
                goto release;
 
@@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 #endif
 
        ct_show_secctx(s, ct);
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-       seq_printf(s, "zone=%u ", nf_ct_zone(ct));
-#endif
-
+       ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
        ct_show_delta_time(s, ct);
 
        seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
index 4e0b47831d43a25f021a1eeb2c62c2307b8630e1..5113dfd39df929967f247ca644a96664e6d72f1a 100644 (file)
@@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct net *net, u16 zone,
-           const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
        unsigned int hash;
 
        /* Original src, to ensure we map it consistently if poss. */
        hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
-                     tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
+                     tuple->dst.protonum ^ nf_conntrack_hash_rnd);
 
        return reciprocal_scale(hash, net->ct.nat_htable_size);
 }
@@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct,
 
 /* Only called for SRC manip */
 static int
-find_appropriate_src(struct net *net, u16 zone,
+find_appropriate_src(struct net *net,
+                    const struct nf_conntrack_zone *zone,
                     const struct nf_nat_l3proto *l3proto,
                     const struct nf_nat_l4proto *l4proto,
                     const struct nf_conntrack_tuple *tuple,
                     struct nf_conntrack_tuple *result,
                     const struct nf_nat_range *range)
 {
-       unsigned int h = hash_by_src(net, zone, tuple);
+       unsigned int h = hash_by_src(net, tuple);
        const struct nf_conn_nat *nat;
        const struct nf_conn *ct;
 
        hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
                ct = nat->ct;
-               if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
+               if (same_src(ct, tuple) &&
+                   nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
                        /* Copy source part from reply tuple. */
                        nf_ct_invert_tuplepr(result,
                                       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone,
  * the ip with the lowest src-ip/dst-ip/proto usage.
  */
 static void
-find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
+find_best_ips_proto(const struct nf_conntrack_zone *zone,
+                   struct nf_conntrack_tuple *tuple,
                    const struct nf_nat_range *range,
                    const struct nf_conn *ct,
                    enum nf_nat_manip_type maniptype)
@@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
         */
        j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
                   range->flags & NF_NAT_RANGE_PERSISTENT ?
-                       0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
+                       0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
 
        full_range = false;
        for (i = 0; i <= max; i++) {
@@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
                 struct nf_conn *ct,
                 enum nf_nat_manip_type maniptype)
 {
+       const struct nf_conntrack_zone *zone;
        const struct nf_nat_l3proto *l3proto;
        const struct nf_nat_l4proto *l4proto;
        struct net *net = nf_ct_net(ct);
-       u16 zone = nf_ct_zone(ct);
+
+       zone = nf_ct_zone(ct);
 
        rcu_read_lock();
        l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
@@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct,
        if (maniptype == NF_NAT_MANIP_SRC) {
                unsigned int srchash;
 
-               srchash = hash_by_src(net, nf_ct_zone(ct),
+               srchash = hash_by_src(net,
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                spin_lock_bh(&nf_nat_lock);
                /* nf_conntrack_alter_reply might re-allocate extension aera */
index 14f8b43ec5a7c736bbdafa6a8dd8b3f1fed2afa8..8fbbdb09826eefd29105b3e80f87c1e54dbb64a9 100644 (file)
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_tcpudp.h>
 #include <linux/netfilter/xt_SYNPROXY.h>
+
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 int synproxy_net_id;
 EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net)
        struct nf_conn *ct;
        int err = -ENOMEM;
 
-       ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
+       ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL);
        if (!ct)
                goto err1;
 
index c18af2f63eefb07e00be893190c35492232d4008..fefbf5f0b28d2f91e33db5e5d04182ff5a42db4b 100644 (file)
@@ -27,8 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
 
-static LIST_HEAD(nfnl_acct_list);
-
 struct nf_acct {
        atomic64_t              pkts;
        atomic64_t              bytes;
@@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
             const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
        struct nf_acct *nfacct, *matching = NULL;
+       struct net *net = sock_net(nfnl);
        char *acct_name;
        unsigned int size = 0;
        u32 flags = 0;
@@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
        if (strlen(acct_name) == 0)
                return -EINVAL;
 
-       list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+       list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
                if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
                        continue;
 
@@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
                             be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
        }
        atomic_set(&nfacct->refcnt, 1);
-       list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+       list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
        return 0;
 }
 
@@ -185,6 +184,7 @@ nla_put_failure:
 static int
 nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+       struct net *net = sock_net(skb->sk);
        struct nf_acct *cur, *last;
        const struct nfacct_filter *filter = cb->data;
 
@@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
                cb->args[1] = 0;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+       list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
                if (last) {
                        if (cur != last)
                                continue;
@@ -257,6 +257,7 @@ static int
 nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
             const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
+       struct net *net = sock_net(nfnl);
        int ret = -ENOENT;
        struct nf_acct *cur;
        char *acct_name;
@@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
                return -EINVAL;
        acct_name = nla_data(tb[NFACCT_NAME]);
 
-       list_for_each_entry(cur, &nfnl_acct_list, head) {
+       list_for_each_entry(cur, &net->nfnl_acct_list, head) {
                struct sk_buff *skb2;
 
                if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
@@ -336,19 +337,20 @@ static int
 nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
             const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
+       struct net *net = sock_net(nfnl);
        char *acct_name;
        struct nf_acct *cur;
        int ret = -ENOENT;
 
        if (!tb[NFACCT_NAME]) {
-               list_for_each_entry(cur, &nfnl_acct_list, head)
+               list_for_each_entry(cur, &net->nfnl_acct_list, head)
                        nfnl_acct_try_del(cur);
 
                return 0;
        }
        acct_name = nla_data(tb[NFACCT_NAME]);
 
-       list_for_each_entry(cur, &nfnl_acct_list, head) {
+       list_for_each_entry(cur, &net->nfnl_acct_list, head) {
                if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
                        continue;
 
@@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = {
 
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
 
-struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
 {
        struct nf_acct *cur, *acct = NULL;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+       list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
                if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
                        continue;
 
@@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
 
 void nfnl_acct_put(struct nf_acct *acct)
 {
-       atomic_dec(&acct->refcnt);
+       if (atomic_dec_and_test(&acct->refcnt))
+               kfree_rcu(acct, rcu_head);
+
        module_put(THIS_MODULE);
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_put);
@@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
 
+static int __net_init nfnl_acct_net_init(struct net *net)
+{
+       INIT_LIST_HEAD(&net->nfnl_acct_list);
+
+       return 0;
+}
+
+static void __net_exit nfnl_acct_net_exit(struct net *net)
+{
+       struct nf_acct *cur, *tmp;
+
+       list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
+               list_del_rcu(&cur->head);
+
+               if (atomic_dec_and_test(&cur->refcnt))
+                       kfree_rcu(cur, rcu_head);
+       }
+}
+
+static struct pernet_operations nfnl_acct_ops = {
+        .init   = nfnl_acct_net_init,
+        .exit   = nfnl_acct_net_exit,
+};
+
 static int __init nfnl_acct_init(void)
 {
        int ret;
 
+       ret = register_pernet_subsys(&nfnl_acct_ops);
+       if (ret < 0) {
+               pr_err("nfnl_acct_init: failed to register pernet ops\n");
+               goto err_out;
+       }
+
        pr_info("nfnl_acct: registering with nfnetlink.\n");
        ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
        if (ret < 0) {
                pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
-               goto err_out;
+               goto cleanup_pernet;
        }
        return 0;
+
+cleanup_pernet:
+       unregister_pernet_subsys(&nfnl_acct_ops);
 err_out:
        return ret;
 }
 
 static void __exit nfnl_acct_exit(void)
 {
-       struct nf_acct *cur, *tmp;
-
        pr_info("nfnl_acct: unregistering from nfnetlink.\n");
        nfnetlink_subsys_unregister(&nfnl_acct_subsys);
-
-       list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
-               list_del_rcu(&cur->head);
-               /* We are sure that our objects have no clients at this point,
-                * it's safe to release them all without checking refcnt. */
-               kfree_rcu(cur, rcu_head);
-       }
+       unregister_pernet_subsys(&nfnl_acct_ops);
 }
 
 module_init(nfnl_acct_init);
index 17591239229f75564b944dc7db61e1dae6a2f1f1..1067fb4c1ffa2ec24988143fc7ec8d134dc9dda6 100644 (file)
 #include <net/netfilter/nf_tables.h>
 
 struct nft_counter {
-       seqlock_t       lock;
        u64             bytes;
        u64             packets;
 };
 
+struct nft_counter_percpu {
+       struct nft_counter      counter;
+       struct u64_stats_sync   syncp;
+};
+
+struct nft_counter_percpu_priv {
+       struct nft_counter_percpu __percpu *counter;
+};
+
 static void nft_counter_eval(const struct nft_expr *expr,
                             struct nft_regs *regs,
                             const struct nft_pktinfo *pkt)
 {
-       struct nft_counter *priv = nft_expr_priv(expr);
-
-       write_seqlock_bh(&priv->lock);
-       priv->bytes += pkt->skb->len;
-       priv->packets++;
-       write_sequnlock_bh(&priv->lock);
+       struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+       struct nft_counter_percpu *this_cpu;
+
+       local_bh_disable();
+       this_cpu = this_cpu_ptr(priv->counter);
+       u64_stats_update_begin(&this_cpu->syncp);
+       this_cpu->counter.bytes += pkt->skb->len;
+       this_cpu->counter.packets++;
+       u64_stats_update_end(&this_cpu->syncp);
+       local_bh_enable();
 }
 
 static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
-       struct nft_counter *priv = nft_expr_priv(expr);
+       struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+       struct nft_counter_percpu *cpu_stats;
+       struct nft_counter total;
+       u64 bytes, packets;
        unsigned int seq;
-       u64 bytes;
-       u64 packets;
-
-       do {
-               seq = read_seqbegin(&priv->lock);
-               bytes   = priv->bytes;
-               packets = priv->packets;
-       } while (read_seqretry(&priv->lock, seq));
-
-       if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
-               goto nla_put_failure;
-       if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+       int cpu;
+
+       memset(&total, 0, sizeof(total));
+       for_each_possible_cpu(cpu) {
+               cpu_stats = per_cpu_ptr(priv->counter, cpu);
+               do {
+                       seq     = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+                       bytes   = cpu_stats->counter.bytes;
+                       packets = cpu_stats->counter.packets;
+               } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+
+               total.packets += packets;
+               total.bytes += bytes;
+       }
+
+       if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
+           nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
                goto nla_put_failure;
        return 0;
 
@@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx,
                            const struct nft_expr *expr,
                            const struct nlattr * const tb[])
 {
-       struct nft_counter *priv = nft_expr_priv(expr);
+       struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+       struct nft_counter_percpu __percpu *cpu_stats;
+       struct nft_counter_percpu *this_cpu;
+
+       cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
+       if (cpu_stats == NULL)
+               return ENOMEM;
+
+       preempt_disable();
+       this_cpu = this_cpu_ptr(cpu_stats);
+       if (tb[NFTA_COUNTER_PACKETS]) {
+               this_cpu->counter.packets =
+                       be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+       }
+       if (tb[NFTA_COUNTER_BYTES]) {
+               this_cpu->counter.bytes =
+                       be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+       }
+       preempt_enable();
+       priv->counter = cpu_stats;
+       return 0;
+}
 
-       if (tb[NFTA_COUNTER_PACKETS])
-               priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
-       if (tb[NFTA_COUNTER_BYTES])
-               priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+static void nft_counter_destroy(const struct nft_ctx *ctx,
+                               const struct nft_expr *expr)
+{
+       struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
 
-       seqlock_init(&priv->lock);
-       return 0;
+       free_percpu(priv->counter);
 }
 
 static struct nft_expr_type nft_counter_type;
 static const struct nft_expr_ops nft_counter_ops = {
        .type           = &nft_counter_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
        .eval           = nft_counter_eval,
        .init           = nft_counter_init,
+       .destroy        = nft_counter_destroy,
        .dump           = nft_counter_dump,
 };
 
index 435c1ccd6c0e6a74266b2054a62f603d038ecb00..5d67938f8b2f27a090f47386fe4ec889c0507b13 100644 (file)
 static DEFINE_SPINLOCK(limit_lock);
 
 struct nft_limit {
+       u64             last;
        u64             tokens;
+       u64             tokens_max;
        u64             rate;
-       u64             unit;
-       unsigned long   stamp;
+       u64             nsecs;
+       u32             burst;
 };
 
-static void nft_limit_eval(const struct nft_expr *expr,
-                          struct nft_regs *regs,
-                          const struct nft_pktinfo *pkt)
+static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
 {
-       struct nft_limit *priv = nft_expr_priv(expr);
+       u64 now, tokens;
+       s64 delta;
 
        spin_lock_bh(&limit_lock);
-       if (time_after_eq(jiffies, priv->stamp)) {
-               priv->tokens = priv->rate;
-               priv->stamp = jiffies + priv->unit * HZ;
-       }
-
-       if (priv->tokens >= 1) {
-               priv->tokens--;
+       now = ktime_get_ns();
+       tokens = limit->tokens + now - limit->last;
+       if (tokens > limit->tokens_max)
+               tokens = limit->tokens_max;
+
+       limit->last = now;
+       delta = tokens - cost;
+       if (delta >= 0) {
+               limit->tokens = delta;
                spin_unlock_bh(&limit_lock);
-               return;
+               return false;
        }
+       limit->tokens = tokens;
        spin_unlock_bh(&limit_lock);
-
-       regs->verdict.code = NFT_BREAK;
+       return true;
 }
 
-static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
-       [NFTA_LIMIT_RATE]       = { .type = NLA_U64 },
-       [NFTA_LIMIT_UNIT]       = { .type = NLA_U64 },
-};
-
-static int nft_limit_init(const struct nft_ctx *ctx,
-                         const struct nft_expr *expr,
+static int nft_limit_init(struct nft_limit *limit,
                          const struct nlattr * const tb[])
 {
-       struct nft_limit *priv = nft_expr_priv(expr);
+       u64 unit;
 
        if (tb[NFTA_LIMIT_RATE] == NULL ||
            tb[NFTA_LIMIT_UNIT] == NULL)
                return -EINVAL;
 
-       priv->rate   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
-       priv->unit   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
-       priv->stamp  = jiffies + priv->unit * HZ;
-       priv->tokens = priv->rate;
+       limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+       unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+       limit->nsecs = unit * NSEC_PER_SEC;
+       if (limit->rate == 0 || limit->nsecs < unit)
+               return -EOVERFLOW;
+       limit->tokens = limit->tokens_max = limit->nsecs;
+
+       if (tb[NFTA_LIMIT_BURST]) {
+               u64 rate;
+
+               limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+
+               rate = limit->rate + limit->burst;
+               if (rate < limit->rate)
+                       return -EOVERFLOW;
+
+               limit->rate = rate;
+       }
+       limit->last = ktime_get_ns();
+
        return 0;
 }
 
-static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
+                         enum nft_limit_type type)
 {
-       const struct nft_limit *priv = nft_expr_priv(expr);
+       u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
+       u64 rate = limit->rate - limit->burst;
 
-       if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
-               goto nla_put_failure;
-       if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+       if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
+           nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
+           nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
+           nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
                goto nla_put_failure;
        return 0;
 
@@ -84,18 +100,114 @@ nla_put_failure:
        return -1;
 }
 
+struct nft_limit_pkts {
+       struct nft_limit        limit;
+       u64                     cost;
+};
+
+static void nft_limit_pkts_eval(const struct nft_expr *expr,
+                               struct nft_regs *regs,
+                               const struct nft_pktinfo *pkt)
+{
+       struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+       if (nft_limit_eval(&priv->limit, priv->cost))
+               regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+       [NFTA_LIMIT_RATE]       = { .type = NLA_U64 },
+       [NFTA_LIMIT_UNIT]       = { .type = NLA_U64 },
+       [NFTA_LIMIT_BURST]      = { .type = NLA_U32 },
+       [NFTA_LIMIT_TYPE]       = { .type = NLA_U32 },
+};
+
+static int nft_limit_pkts_init(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr,
+                              const struct nlattr * const tb[])
+{
+       struct nft_limit_pkts *priv = nft_expr_priv(expr);
+       int err;
+
+       err = nft_limit_init(&priv->limit, tb);
+       if (err < 0)
+               return err;
+
+       priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate);
+       return 0;
+}
+
+static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+       return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
 static struct nft_expr_type nft_limit_type;
-static const struct nft_expr_ops nft_limit_ops = {
+static const struct nft_expr_ops nft_limit_pkts_ops = {
+       .type           = &nft_limit_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+       .eval           = nft_limit_pkts_eval,
+       .init           = nft_limit_pkts_init,
+       .dump           = nft_limit_pkts_dump,
+};
+
+static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
+                                    struct nft_regs *regs,
+                                    const struct nft_pktinfo *pkt)
+{
+       struct nft_limit *priv = nft_expr_priv(expr);
+       u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+       if (nft_limit_eval(priv, cost))
+               regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
+                                   const struct nft_expr *expr,
+                                   const struct nlattr * const tb[])
+{
+       struct nft_limit *priv = nft_expr_priv(expr);
+
+       return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
+                                   const struct nft_expr *expr)
+{
+       const struct nft_limit *priv = nft_expr_priv(expr);
+
+       return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
        .type           = &nft_limit_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
-       .eval           = nft_limit_eval,
-       .init           = nft_limit_init,
-       .dump           = nft_limit_dump,
+       .eval           = nft_limit_pkt_bytes_eval,
+       .init           = nft_limit_pkt_bytes_init,
+       .dump           = nft_limit_pkt_bytes_dump,
 };
 
+static const struct nft_expr_ops *
+nft_limit_select_ops(const struct nft_ctx *ctx,
+                    const struct nlattr * const tb[])
+{
+       if (tb[NFTA_LIMIT_TYPE] == NULL)
+               return &nft_limit_pkts_ops;
+
+       switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+       case NFT_LIMIT_PKTS:
+               return &nft_limit_pkts_ops;
+       case NFT_LIMIT_PKT_BYTES:
+               return &nft_limit_pkt_bytes_ops;
+       }
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
 static struct nft_expr_type nft_limit_type __read_mostly = {
        .name           = "limit",
-       .ops            = &nft_limit_ops,
+       .select_ops     = nft_limit_select_ops,
        .policy         = nft_limit_policy,
        .maxattr        = NFTA_LIMIT_MAX,
        .flags          = NFT_EXPR_STATEFUL,
index 94fb3b27a2c54393091602e0e96b2634ff8ceb1b..09b4b07eb67644fdc90ef357378c46d243b7a642 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/if_vlan.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/netlink.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
 
+/* add vlan header into the user buffer for if tag was removed by offloads */
+static bool
+nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+{
+       int mac_off = skb_mac_header(skb) - skb->data;
+       u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
+       struct vlan_ethhdr veth;
+
+       vlanh = (u8 *) &veth;
+       if (offset < ETH_HLEN) {
+               u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
+
+               if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
+                       return false;
+
+               veth.h_vlan_proto = skb->vlan_proto;
+
+               memcpy(dst_u8, vlanh + offset, ethlen);
+
+               len -= ethlen;
+               if (len == 0)
+                       return true;
+
+               dst_u8 += ethlen;
+               offset = ETH_HLEN;
+       } else if (offset >= VLAN_ETH_HLEN) {
+               offset -= VLAN_HLEN;
+               goto skip;
+       }
+
+       veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+       veth.h_vlan_encapsulated_proto = skb->protocol;
+
+       vlanh += offset;
+
+       vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
+       memcpy(dst_u8, vlanh, vlan_len);
+
+       len -= vlan_len;
+       if (!len)
+               return true;
+
+       dst_u8 += vlan_len;
+ skip:
+       return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+}
+
 static void nft_payload_eval(const struct nft_expr *expr,
                             struct nft_regs *regs,
                             const struct nft_pktinfo *pkt)
@@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr,
        u32 *dest = &regs->data[priv->dreg];
        int offset;
 
+       dest[priv->len / NFT_REG32_SIZE] = 0;
        switch (priv->base) {
        case NFT_PAYLOAD_LL_HEADER:
                if (!skb_mac_header_was_set(skb))
                        goto err;
+
+               if (skb_vlan_tag_present(skb)) {
+                       if (!nft_payload_copy_vlan(dest, skb,
+                                                  priv->offset, priv->len))
+                               goto err;
+                       return;
+               }
                offset = skb_mac_header(skb) - skb->data;
                break;
        case NFT_PAYLOAD_NETWORK_HEADER:
@@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr,
        }
        offset += priv->offset;
 
-       dest[priv->len / NFT_REG32_SIZE] = 0;
        if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
                goto err;
        return;
index 43ddeee404e91f97908fb9228c1e873931b75bcc..8e524898ccea234a2b5cae3bdfaf2cd72d023238 100644 (file)
@@ -181,9 +181,23 @@ out:
 #endif
 }
 
+static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
+{
+       switch (info->flags & (XT_CT_ZONE_DIR_ORIG |
+                              XT_CT_ZONE_DIR_REPL)) {
+       case XT_CT_ZONE_DIR_ORIG:
+               return NF_CT_ZONE_DIR_ORIG;
+       case XT_CT_ZONE_DIR_REPL:
+               return NF_CT_ZONE_DIR_REPL;
+       default:
+               return NF_CT_DEFAULT_ZONE_DIR;
+       }
+}
+
 static int xt_ct_tg_check(const struct xt_tgchk_param *par,
                          struct xt_ct_target_info_v1 *info)
 {
+       struct nf_conntrack_zone zone;
        struct nf_conn *ct;
        int ret = -EOPNOTSUPP;
 
@@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
        }
 
 #ifndef CONFIG_NF_CONNTRACK_ZONES
-       if (info->zone)
+       if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
+                                        XT_CT_ZONE_DIR_REPL |
+                                        XT_CT_ZONE_MARK))
                goto err1;
 #endif
 
@@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
        if (ret < 0)
                goto err1;
 
-       ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
+       memset(&zone, 0, sizeof(zone));
+       zone.id = info->zone;
+       zone.dir = xt_ct_flags_to_dir(info);
+       if (info->flags & XT_CT_ZONE_MARK)
+               zone.flags |= NF_CT_FLAG_MARK;
+
+       ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
        if (!ct) {
                ret = -ENOMEM;
                goto err2;
index c5d6556dbc5e407cffca198ac5fe66b97a0cb908..49fee6aa2c0aa03b8af6bf9c3358d300d8b9ac7c 100644 (file)
  *     modify it under the terms of the GNU General Public License
  *     version 2 or later, as published by the Free Software Foundation.
  */
-#include <linux/ip.h>
 #include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/route.h>
 #include <linux/skbuff.h>
-#include <linux/notifier.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/route.h>
+#include <linux/route.h>
 #include <linux/netfilter/x_tables.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
 #include <linux/netfilter/xt_TEE.h>
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#      define WITH_CONNTRACK 1
-#      include <net/netfilter/nf_conntrack.h>
-#endif
-
 struct xt_tee_priv {
        struct notifier_block   notifier;
        struct xt_tee_tginfo    *tginfo;
@@ -38,161 +27,24 @@ struct xt_tee_priv {
 
 static const union nf_inet_addr tee_zero_address;
 
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
-       const struct dst_entry *dst;
-
-       if (skb->dev != NULL)
-               return dev_net(skb->dev);
-       dst = skb_dst(skb);
-       if (dst != NULL && dst->dev != NULL)
-               return dev_net(dst->dev);
-#endif
-       return &init_net;
-}
-
-static bool
-tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-       struct net *net = pick_net(skb);
-       struct rtable *rt;
-       struct flowi4 fl4;
-
-       memset(&fl4, 0, sizeof(fl4));
-       if (info->priv) {
-               if (info->priv->oif == -1)
-                       return false;
-               fl4.flowi4_oif = info->priv->oif;
-       }
-       fl4.daddr = info->gw.ip;
-       fl4.flowi4_tos = RT_TOS(iph->tos);
-       fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
-       fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
-       rt = ip_route_output_key(net, &fl4);
-       if (IS_ERR(rt))
-               return false;
-
-       skb_dst_drop(skb);
-       skb_dst_set(skb, &rt->dst);
-       skb->dev      = rt->dst.dev;
-       skb->protocol = htons(ETH_P_IP);
-       return true;
-}
-
 static unsigned int
 tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_tee_tginfo *info = par->targinfo;
-       struct iphdr *iph;
 
-       if (__this_cpu_read(nf_skb_duplicated))
-               return XT_CONTINUE;
-       /*
-        * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
-        * the original skb, which should continue on its way as if nothing has
-        * happened. The copy should be independently delivered to the TEE
-        * --gateway.
-        */
-       skb = pskb_copy(skb, GFP_ATOMIC);
-       if (skb == NULL)
-               return XT_CONTINUE;
-
-#ifdef WITH_CONNTRACK
-       /* Avoid counting cloned packets towards the original connection. */
-       nf_conntrack_put(skb->nfct);
-       skb->nfct     = &nf_ct_untracked_get()->ct_general;
-       skb->nfctinfo = IP_CT_NEW;
-       nf_conntrack_get(skb->nfct);
-#endif
-       /*
-        * If we are in PREROUTING/INPUT, the checksum must be recalculated
-        * since the length could have changed as a result of defragmentation.
-        *
-        * We also decrease the TTL to mitigate potential TEE loops
-        * between two hosts.
-        *
-        * Set %IP_DF so that the original source is notified of a potentially
-        * decreased MTU on the clone route. IPv6 does this too.
-        */
-       iph = ip_hdr(skb);
-       iph->frag_off |= htons(IP_DF);
-       if (par->hooknum == NF_INET_PRE_ROUTING ||
-           par->hooknum == NF_INET_LOCAL_IN)
-               --iph->ttl;
-       ip_send_check(iph);
+       nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
 
-       if (tee_tg_route4(skb, info)) {
-               __this_cpu_write(nf_skb_duplicated, true);
-               ip_local_out(skb);
-               __this_cpu_write(nf_skb_duplicated, false);
-       } else {
-               kfree_skb(skb);
-       }
        return XT_CONTINUE;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static bool
-tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
-       const struct ipv6hdr *iph = ipv6_hdr(skb);
-       struct net *net = pick_net(skb);
-       struct dst_entry *dst;
-       struct flowi6 fl6;
-
-       memset(&fl6, 0, sizeof(fl6));
-       if (info->priv) {
-               if (info->priv->oif == -1)
-                       return false;
-               fl6.flowi6_oif = info->priv->oif;
-       }
-       fl6.daddr = info->gw.in6;
-       fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
-                          (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
-       fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
-       dst = ip6_route_output(net, NULL, &fl6);
-       if (dst->error) {
-               dst_release(dst);
-               return false;
-       }
-       skb_dst_drop(skb);
-       skb_dst_set(skb, dst);
-       skb->dev      = dst->dev;
-       skb->protocol = htons(ETH_P_IPV6);
-       return true;
-}
-
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_tee_tginfo *info = par->targinfo;
 
-       if (__this_cpu_read(nf_skb_duplicated))
-               return XT_CONTINUE;
-       skb = pskb_copy(skb, GFP_ATOMIC);
-       if (skb == NULL)
-               return XT_CONTINUE;
+       nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
 
-#ifdef WITH_CONNTRACK
-       nf_conntrack_put(skb->nfct);
-       skb->nfct     = &nf_ct_untracked_get()->ct_general;
-       skb->nfctinfo = IP_CT_NEW;
-       nf_conntrack_get(skb->nfct);
-#endif
-       if (par->hooknum == NF_INET_PRE_ROUTING ||
-           par->hooknum == NF_INET_LOCAL_IN) {
-               struct ipv6hdr *iph = ipv6_hdr(skb);
-               --iph->hop_limit;
-       }
-       if (tee_tg_route6(skb, info)) {
-               __this_cpu_write(nf_skb_duplicated, true);
-               ip6_local_out(skb);
-               __this_cpu_write(nf_skb_duplicated, false);
-       } else {
-               kfree_skb(skb);
-       }
        return XT_CONTINUE;
 }
 #endif
index 29ba6218a820e7cc8e9363db91312cc27c09004e..075d89d94d28f4deb87f473dd787f61395fa2681 100644 (file)
@@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head,
 static unsigned int check_hlist(struct net *net,
                                struct hlist_head *head,
                                const struct nf_conntrack_tuple *tuple,
-                               u16 zone,
+                               const struct nf_conntrack_zone *zone,
                                bool *addit)
 {
        const struct nf_conntrack_tuple_hash *found;
@@ -201,7 +201,7 @@ static unsigned int
 count_tree(struct net *net, struct rb_root *root,
           const struct nf_conntrack_tuple *tuple,
           const union nf_inet_addr *addr, const union nf_inet_addr *mask,
-          u8 family, u16 zone)
+          u8 family, const struct nf_conntrack_zone *zone)
 {
        struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
        struct rb_node **rbnode, *parent;
@@ -290,7 +290,8 @@ static int count_them(struct net *net,
                      const struct nf_conntrack_tuple *tuple,
                      const union nf_inet_addr *addr,
                      const union nf_inet_addr *mask,
-                     u_int8_t family, u16 zone)
+                     u_int8_t family,
+                     const struct nf_conntrack_zone *zone)
 {
        struct rb_root *root;
        int count;
@@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
        union nf_inet_addr addr;
        struct nf_conntrack_tuple tuple;
        const struct nf_conntrack_tuple *tuple_ptr = &tuple;
+       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
        enum ip_conntrack_info ctinfo;
        const struct nf_conn *ct;
        unsigned int connections;
-       u16 zone = NF_CT_DEFAULT_ZONE;
 
        ct = nf_ct_get(skb, &ctinfo);
        if (ct != NULL) {
index 8c646ed9c921bca1fbf507c1aa97c1dca60d8df1..3048a7e3a90a5a27887b7e4ff731d00098f2c928 100644 (file)
@@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
        struct xt_nfacct_match_info *info = par->matchinfo;
        struct nf_acct *nfacct;
 
-       nfacct = nfnl_acct_find_get(info->name);
+       nfacct = nfnl_acct_find_get(par->net, info->name);
        if (nfacct == NULL) {
                pr_info("xt_nfacct: accounting object with name `%s' "
                        "does not exists\n", info->name);
index f2b540220ad02f1f8e3b2add9c7477a334081c3d..5019a47b9270e758f65c346631becc99e933b0c6 100644 (file)
@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
        struct nf_conntrack_tuple tuple;
        enum ip_conntrack_info ctinfo;
        struct tcf_connmark_info *ca = a->priv;
+       struct nf_conntrack_zone zone;
        struct nf_conn *c;
        int proto;
 
@@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
                               proto, &tuple))
                goto out;
 
-       thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+       zone.id = ca->zone;
+       zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+       thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
        if (!thash)
                goto out;