Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

author David S. Miller <davem@davemloft.net>

Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)

committer David S. Miller <davem@davemloft.net>

Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)
author David S. Miller <davem@davemloft.net>
Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)
committer David S. Miller <davem@davemloft.net>
Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h

index 96afc29184bee810a1ec550933cfb15998f42efb..f1606fa6132d562b03ba11757393e1aeb4aa6e06 100644 (file)
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
         IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
         IPSET_EXT_BIT_COMMENT = 2,
         IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+       IPSET_EXT_BIT_SKBINFO = 3,
+       IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
         /* Mark set with an extension which needs to call destroy */
         IPSET_EXT_BIT_DESTROY = 7,
         IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
  #define SET_WITH_TIMEOUT(s)    ((s)->extensions & IPSET_EXT_TIMEOUT)
  #define SET_WITH_COUNTER(s)    ((s)->extensions & IPSET_EXT_COUNTER)
  #define SET_WITH_COMMENT(s)    ((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s)    ((s)->extensions & IPSET_EXT_SKBINFO)
  #define SET_WITH_FORCEADD(s)   ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
  
  /* Extension id, in size order */
  enum ip_set_ext_id {
         IPSET_EXT_ID_COUNTER = 0,
         IPSET_EXT_ID_TIMEOUT,
+       IPSET_EXT_ID_SKBINFO,
         IPSET_EXT_ID_COMMENT,
         IPSET_EXT_ID_MAX,
  };
@@ -92,6 +96,10 @@ struct ip_set_ext {
         u64 packets;
         u64 bytes;
         u32 timeout;
+       u32 skbmark;
+       u32 skbmarkmask;
+       u32 skbprio;
+       u16 skbqueue;
         char *comment;
  };
  
@@ -104,6 +112,13 @@ struct ip_set_comment {
         char *str;
  };
  
+struct ip_set_skbinfo {
+       u32 skbmark;
+       u32 skbmarkmask;
+       u32 skbprio;
+       u16 skbqueue;
+};
+
  struct ip_set;
  
  #define ext_timeout(e, s)      \
@@ -112,7 +127,8 @@ struct ip_set;
  (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
  #define ext_comment(e, s)      \
  (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s)      \
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
  
  typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
                            const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
                 cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
         if (SET_WITH_COMMENT(set))
                 cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+       if (SET_WITH_SKBINFO(set))
+               cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
         if (SET_WITH_FORCEADD(set))
                 cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
  
@@ -304,6 +322,43 @@ ip_set_update_counter(struct ip_set_counter *counter,
         }
  }
  
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+                     const struct ip_set_ext *ext,
+                     struct ip_set_ext *mext, u32 flags)
+{
+               mext->skbmark = skbinfo->skbmark;
+               mext->skbmarkmask = skbinfo->skbmarkmask;
+               mext->skbprio = skbinfo->skbprio;
+               mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+       /* Send nonzero parameters only */
+       return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+               nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+                             cpu_to_be64((u64)skbinfo->skbmark << 32 |
+                                         skbinfo->skbmarkmask))) ||
+              (skbinfo->skbprio &&
+               nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+                             cpu_to_be32(skbinfo->skbprio))) ||
+              (skbinfo->skbqueue &&
+               nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+                            cpu_to_be16(skbinfo->skbqueue)));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+                   const struct ip_set_ext *ext)
+{
+       skbinfo->skbmark = ext->skbmark;
+       skbinfo->skbmarkmask = ext->skbmarkmask;
+       skbinfo->skbprio = ext->skbprio;
+       skbinfo->skbqueue = ext->skbqueue;
+}
+
  static inline bool
  ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
  {
@@ -497,6 +552,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
         if (SET_WITH_COMMENT(set) &&
             ip_set_put_comment(skb, ext_comment(e, set)))
                 return -EMSGSIZE;
+       if (SET_WITH_SKBINFO(set) &&
+           ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+               return -EMSGSIZE;
         return 0;
  }
  
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h

index 8ab1c278b66da77229647e08ef8e0d948d4e6122..c755e4971fa3cc89e3000b669e6f3ba557f555b8 100644 (file)
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
         NF_BR_PRI_LAST = INT_MAX,
  };
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  
  #define BRNF_PKT_TYPE                  0x01
  #define BRNF_BRIDGED_DNAT              0x02
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
  #define BRNF_8021Q                     0x10
  #define BRNF_PPPoE                     0x20
  
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
-       if (skb->nf_bridge &&
-           skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
-               return nf_bridge_copy_header(skb);
-       return 0;
-}
-
  static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
  {
         switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
         }
  }
  
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+       if (skb->nf_bridge->mask & BRNF_8021Q)
+               skb->protocol = htons(ETH_P_8021Q);
+       else if (skb->nf_bridge->mask & BRNF_PPPoE)
+               skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+       int err;
+       unsigned int header_size;
+
+       nf_bridge_update_protocol(skb);
+       header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+       err = skb_cow_head(skb, header_size);
+       if (err)
+               return err;
+
+       skb_copy_to_linear_data_offset(skb, -header_size,
+                                      skb->nf_bridge->data, header_size);
+       __skb_push(skb, nf_bridge_encap_header_len(skb));
+       return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+       if (skb->nf_bridge &&
+           skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+               return nf_bridge_copy_header(skb);
+       return 0;
+}
+
  static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
  {
         if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index b6cced304b266011fe2b7ece6407b05cd1b9ba1c..262efdbc346bdd9817a5fb496d9ee16341e81f68 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -156,7 +156,7 @@ struct nf_conntrack {
  };
  #endif
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  struct nf_bridge_info {
         atomic_t                use;
         unsigned int            mask;
@@ -534,7 +534,7 @@ struct sk_buff {
  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
         struct nf_conntrack     *nfct;
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         struct nf_bridge_info   *nf_bridge;
  #endif
         unsigned int            len,
@@ -556,8 +556,6 @@ struct sk_buff {
         /* one bit hole */
         kmemcheck_bitfield_end(flags1);
  
-
-
         /* fields enclosed in headers_start/headers_end are copied
          * using a single memcpy() in __copy_skb_header()
          */
@@ -3016,7 +3014,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
                 atomic_inc(&nfct->use);
  }
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
  {
         if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -3034,7 +3032,7 @@ static inline void nf_reset(struct sk_buff *skb)
         nf_conntrack_put(skb->nfct);
         skb->nfct = NULL;
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         nf_bridge_put(skb->nf_bridge);
         skb->nf_bridge = NULL;
  #endif
@@ -3057,7 +3055,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
         if (copy)
                 dst->nfctinfo = src->nfctinfo;
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         dst->nf_bridge  = src->nf_bridge;
         nf_bridge_get(src->nf_bridge);
  #endif
@@ -3072,7 +3070,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
         nf_conntrack_put(dst->nfct);
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         nf_bridge_put(dst->nf_bridge);
  #endif
         __nf_copy(dst, src, true);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h

index 624a8a54806d4c877cabcbefec9dd975e1204922..576d7f0bed5d2bf37ed54603307b5e8c70fa01fd 100644 (file)
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -535,6 +535,7 @@ struct ip_vs_conn {
         union nf_inet_addr      daddr;          /* destination address */
         volatile __u32          flags;          /* status flags */
         __u16                   protocol;       /* Which protocol (TCP/UDP) */
+       __u16                   daf;            /* Address family of the dest */
  #ifdef CONFIG_NET_NS
         struct net              *net;           /* Name space */
  #endif
@@ -648,6 +649,9 @@ struct ip_vs_dest_user_kern {
         /* thresholds for active connections */
         u32                     u_threshold;    /* upper threshold */
         u32                     l_threshold;    /* lower threshold */
+
+       /* Address family of addr */
+       u16                     af;
  };
  
  
@@ -986,6 +990,10 @@ struct netns_ipvs {
         char                    backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
         /* net name space ptr */
         struct net              *net;            /* Needed by timer routines */
+       /* Number of heterogeneous destinations, needed because
+        * heterogeneous are not supported when synchronization is
+        * enabled */
+       unsigned int            mixed_address_family_dests;
  };
  
  #define DEFAULT_SYNC_THRESHOLD 3
@@ -1210,7 +1218,7 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
  void ip_vs_conn_put(struct ip_vs_conn *cp);
  void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
  
-struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
+struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
                                   const union nf_inet_addr *daddr,
                                   __be16 dport, unsigned int flags,
                                   struct ip_vs_dest *dest, __u32 fwmark);
@@ -1396,8 +1404,9 @@ void ip_vs_unregister_nl_ioctl(void);
  int ip_vs_control_init(void);
  void ip_vs_control_cleanup(void);
  struct ip_vs_dest *
-ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
-               __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+               const union nf_inet_addr *daddr, __be16 dport,
+               const union nf_inet_addr *vaddr, __be16 vport,
                 __u16 protocol, __u32 fwmark, __u32 flags);
  void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
  
diff --git a/include/net/neighbour.h b/include/net/neighbour.h

index 47f425464f847fd827719ac5da99cf2749824a14..f60558d0254ca1a482ab4eb2924ecbaf8d6c169c 100644 (file)
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
         return 0;
  }
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
  {
         unsigned int seq, hh_alen;
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h

index 931fbf812171afa37c53f258213a77759cd43edb..f713b5a31d622b7e29542d40bbe6a099b8a96720 100644 (file)
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -98,7 +98,7 @@ static void nf_send_reset(struct sk_buff *oldskb, int hook)
  
         nf_ct_attach(nskb, oldskb);
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         /* If we use ip_local_out for bridged traffic, the MAC source on
          * the RST will be ours, instead of the destination's.  This confuses
          * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h

index 710d17ed70b4c0ad6f2d28db779e12d976d10387..7a10cfcd8e33fd73e47e14877f31b1d5baf65b1f 100644 (file)
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -147,7 +147,7 @@ static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
  
         nf_ct_attach(nskb, oldskb);
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         /* If we use ip6_local_out for bridged traffic, the MAC source on
          * the RST will be ours, instead of the destination's.  This confuses
          * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h

index a71dd333ac6869fdce096dce3a26be133e4d4aae..344b1ab19220c34ec14b99b112ef3f5699a846f4 100644 (file)
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,10 +32,8 @@ struct nf_conn_nat {
         struct hlist_node bysource;
         struct nf_conn *ct;
         union nf_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
-    defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
         int masq_index;
  #endif
  };
@@ -68,8 +66,8 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
                                       struct nf_conn_nat *nat,
                                       const struct net_device *out)
  {
-#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
         return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
                CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
                nat->masq_index != out->ifindex;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index c4d86198d3d6542088ed6db8d4daed2adf191806..3d7292392fac91ca806b12945fd9eb190c21a031 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -241,6 +241,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
   *     @dtype: data type (verdict or numeric type defined by userspace)
   *     @size: maximum set size
   *     @nelems: number of elements
+ *     @policy: set parameterization (see enum nft_set_policies)
   *     @ops: set ops
   *     @flags: set flags
   *     @klen: key length
@@ -255,6 +256,7 @@ struct nft_set {
         u32                             dtype;
         u32                             size;
         u32                             nelems;
+       u16                             policy;
         /* runtime data below here */
         const struct nft_set_ops        *ops ____cacheline_aligned;
         u16                             flags;
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h

index fbcffe8041f7aff03046f0e901902f44cb094d0d..cabe95d5b4613c78af31f16be630ef03146a7d0b 100644 (file)
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -384,6 +384,9 @@ enum {
         IPVS_DEST_ATTR_PERSIST_CONNS,   /* persistent connections */
  
         IPVS_DEST_ATTR_STATS,           /* nested attribute for dest stats */
+
+       IPVS_DEST_ATTR_ADDR_FAMILY,     /* Address family of address */
+
         __IPVS_DEST_ATTR_MAX,
  };
  
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h

index 78c2f2e799208a467b75c8035b4d3362365362bb..ca03119111a2182dd5ff9b4f930153cf066e4ad3 100644 (file)
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
         IPSET_ATTR_BYTES,
         IPSET_ATTR_PACKETS,
         IPSET_ATTR_COMMENT,
+       IPSET_ATTR_SKBMARK,
+       IPSET_ATTR_SKBPRIO,
+       IPSET_ATTR_SKBQUEUE,
         __IPSET_ATTR_ADT_MAX,
  };
  #define IPSET_ATTR_ADT_MAX     (__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
         IPSET_ERR_COUNTER,
         IPSET_ERR_COMMENT,
         IPSET_ERR_INVALID_MARKMASK,
+       IPSET_ERR_SKBINFO,
  
         /* Type specific error codes */
         IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
         IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
         IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
         IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+       IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+       IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+       IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+       IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+       IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+       IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
         IPSET_FLAG_CMD_MAX = 15,
  };
  
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
         IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
         IPSET_FLAG_BIT_WITH_FORCEADD = 5,
         IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+       IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+       IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
         IPSET_FLAG_CADT_MAX     = 15,
  };
  
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h

index eeec0ae845ef3833ca89fb73fdf675b67600cb1f..b72ccfeaf86527e12b61230597798d1cb902f949 100644 (file)
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -51,6 +51,8 @@ enum nft_verdicts {
   * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
   * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
   * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
+ * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
   */
  enum nf_tables_msg_types {
         NFT_MSG_NEWTABLE,
@@ -68,6 +70,8 @@ enum nf_tables_msg_types {
         NFT_MSG_NEWSETELEM,
         NFT_MSG_GETSETELEM,
         NFT_MSG_DELSETELEM,
+       NFT_MSG_NEWGEN,
+       NFT_MSG_GETGEN,
         NFT_MSG_MAX,
  };
  
@@ -806,9 +810,22 @@ enum nft_nat_attributes {
   * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
   */
  enum nft_masq_attributes {
+       NFTA_MASQ_UNSPEC,
         NFTA_MASQ_FLAGS,
         __NFTA_MASQ_MAX
  };
  #define NFTA_MASQ_MAX          (__NFTA_MASQ_MAX - 1)
  
+/**
+ * enum nft_gen_attributes - nf_tables ruleset generation attributes
+ *
+ * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
+ */
+enum nft_gen_attributes {
+       NFTA_GEN_UNSPEC,
+       NFTA_GEN_ID,
+       __NFTA_GEN_MAX
+};
+#define NFTA_GEN_MAX           (__NFTA_GEN_MAX - 1)
+
  #endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h

index 964d3d42f8749d7e697aee47c42e3e31b67c5e48..d6a1df1f2947ba6aa0cc8680a2efb8f61a0ae178 100644 (file)
--- a/include/uapi/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
@@ -71,4 +71,14 @@ struct xt_set_info_match_v3 {
         __u32 flags;
  };
  
+/* Revision 3 target */
+
+struct xt_set_info_target_v3 {
+       struct xt_set_info add_set;
+       struct xt_set_info del_set;
+       struct xt_set_info map_set;
+       __u32 flags;
+       __u32 timeout;
+};
+
  #endif /*_XT_SET_H*/
diff --git a/net/Kconfig b/net/Kconfig

index 4051fdfa43677ccf7142244c97e2e383515e77e8..dc5d700d05e70cc25aee44e511fc0a9890d113bd 100644 (file)
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -176,10 +176,11 @@ config NETFILTER_ADVANCED
           If unsure, say Y.
  
  config BRIDGE_NETFILTER
-       bool "Bridged IP/ARP packets filtering"
-       depends on BRIDGE && NETFILTER && INET
+       tristate "Bridged IP/ARP packets filtering"
+       depends on (BRIDGE || BRIDGE=n)
+       depends on NETFILTER && INET
         depends on NETFILTER_ADVANCED
-       default y
+       default m
         ---help---
           Enabling this option will let arptables resp. iptables see bridged
           ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/Makefile b/net/bridge/Makefile

index 8590b942bffa62a11d23c4e7a0666c9564d310c1..5e3eac5dc8b9006a6dc8d146b017380debc41bc2 100644 (file)
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -6,11 +6,12 @@ obj-$(CONFIG_BRIDGE) += bridge.o
  
  bridge-y       := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
                         br_ioctl.o br_stp.o br_stp_bpdu.o \
-                       br_stp_if.o br_stp_timer.o br_netlink.o
+                       br_stp_if.o br_stp_timer.o br_netlink.o \
+                       br_nf_core.o
  
  bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
  
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
  
  bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
  
diff --git a/net/bridge/br.c b/net/bridge/br.c

index 1a755a1e54101d924e88ea240a82c154dcb7bbe5..44425aff7cba15f93c659fd0502a7b650d050b5e 100644 (file)
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -161,7 +161,7 @@ static int __init br_init(void)
         if (err)
                 goto err_out1;
  
-       err = br_netfilter_init();
+       err = br_nf_core_init();
         if (err)
                 goto err_out2;
  
@@ -179,11 +179,16 @@ static int __init br_init(void)
         br_fdb_test_addr_hook = br_fdb_test_addr;
  #endif
  
+       pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+               "deprecated. Update your scripts to load br_netfilter if you "
+               "need this.\n");
+
         return 0;
+
  err_out4:
         unregister_netdevice_notifier(&br_device_notifier);
  err_out3:
-       br_netfilter_fini();
+       br_nf_core_fini();
  err_out2:
         unregister_pernet_subsys(&br_net_ops);
  err_out1:
@@ -196,20 +201,17 @@ err_out:
  static void __exit br_deinit(void)
  {
         stp_proto_unregister(&br_stp_proto);
-
         br_netlink_fini();
         unregister_netdevice_notifier(&br_device_notifier);
         brioctl_set(NULL);
-
         unregister_pernet_subsys(&br_net_ops);
  
         rcu_barrier(); /* Wait for completion of call_rcu()'s */
  
-       br_netfilter_fini();
+       br_nf_core_fini();
  #if IS_ENABLED(CONFIG_ATM_LANE)
         br_fdb_test_addr_hook = NULL;
  #endif
-
         br_fdb_fini();
  }
  
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c

index 568cccd39a3d8a25716ef13e14b2f4e4850c1db5..659cac15c0dfd7a76176ffbfb8e88a0b1181e04a 100644 (file)
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
         u16 vid = 0;
  
         rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
                 br_nf_pre_routing_finish_bridge_slow(skb);
                 rcu_read_unlock();
@@ -167,7 +167,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
  
         dev->mtu = new_mtu;
  
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         /* remember the MTU in the rtable for PMTU */
         dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
  #endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c

index 056b67b0e2778fdce7bd80a2bf4ede2d552c0e1d..992ec49a96aa7e289bd3c74bca9a606762c63614 100644 (file)
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
  
  int br_forward_finish(struct sk_buff *skb)
  {
@@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb)
                        br_dev_queue_push_xmit);
  
  }
+EXPORT_SYMBOL_GPL(br_forward_finish);
  
  static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
  {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c

index 366c43649079d9bdef66063af2ab848965ca8197..6fd5522df696ce744558a4db82803c34394eed6f 100644 (file)
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,6 +140,7 @@ drop:
         kfree_skb(skb);
         goto out;
  }
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
  
  /* note: already called with rcu_read_lock */
  static int br_handle_local_finish(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c

index a615264cf01a950aafd894109d43f55cfd8dff91..97e43937aaca2b090bf883428ea3cfdce94103c2 100644 (file)
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
          pppoe_proto(skb) == htons(PPP_IPV6) && \
          brnf_filter_pppoe_tagged)
  
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
-                            struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
-                         struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
-       return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
-                                          struct sk_buff *skb,
-                                          const void *daddr)
-{
-       return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
-       return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
-       .family =               AF_INET,
-       .protocol =             cpu_to_be16(ETH_P_IP),
-       .update_pmtu =          fake_update_pmtu,
-       .redirect =             fake_redirect,
-       .cow_metrics =          fake_cow_metrics,
-       .neigh_lookup =         fake_neigh_lookup,
-       .mtu =                  fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it.  Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
-       [RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
-       struct rtable *rt = &br->fake_rtable;
-
-       atomic_set(&rt->dst.__refcnt, 1);
-       rt->dst.dev = br->dev;
-       rt->dst.path = &rt->dst;
-       dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
-       rt->dst.flags   = DST_NOXFRM | DST_FAKE_RTABLE;
-       rt->dst.ops = &fake_dst_ops;
-}
-
  static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
  {
         struct net_bridge_port *port;
@@ -245,14 +185,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
                                          skb->nf_bridge->data, header_size);
  }
  
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
-       if (skb->nf_bridge->mask & BRNF_8021Q)
-               skb->protocol = htons(ETH_P_8021Q);
-       else if (skb->nf_bridge->mask & BRNF_PPPoE)
-               skb->protocol = htons(ETH_P_PPP_SES);
-}
-
  /* When handing a packet over to the IP layer
   * check whether we have a skb that is in the
   * expected format
@@ -320,26 +252,6 @@ drop:
         return -1;
  }
  
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
-       int err;
-       unsigned int header_size;
-
-       nf_bridge_update_protocol(skb);
-       header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-       err = skb_cow_head(skb, header_size);
-       if (err)
-               return err;
-
-       skb_copy_to_linear_data_offset(skb, -header_size,
-                                      skb->nf_bridge->data, header_size);
-       __skb_push(skb, nf_bridge_encap_header_len(skb));
-       return 0;
-}
-
  /* PF_BRIDGE/PRE_ROUTING *********************************************/
  /* Undo the changes made for ip6tables PREROUTING and continue the
   * bridge PRE_ROUTING hook. */
@@ -1059,38 +971,42 @@ static struct ctl_table brnf_table[] = {
  };
  #endif
  
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
  {
         int ret;
  
-       ret = dst_entries_init(&fake_dst_ops);
+       ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
         if (ret < 0)
                 return ret;
  
-       ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-       if (ret < 0) {
-               dst_entries_destroy(&fake_dst_ops);
-               return ret;
-       }
  #ifdef CONFIG_SYSCTL
         brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
         if (brnf_sysctl_header == NULL) {
                 printk(KERN_WARNING
                        "br_netfilter: can't register to sysctl.\n");
-               nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-               dst_entries_destroy(&fake_dst_ops);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto err1;
         }
  #endif
         printk(KERN_NOTICE "Bridge firewalling registered\n");
         return 0;
+err1:
+       nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+       return ret;
  }
  
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
  {
         nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
  #ifdef CONFIG_SYSCTL
         unregister_net_sysctl_table(brnf_sysctl_header);
  #endif
-       dst_entries_destroy(&fake_dst_ops);
  }
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c

index 90a91e137acc3112004a4108abb68371919fa2be..0fa66b83685f44b824ec726c2cfae7aca4d11475 100644 (file)
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -602,7 +602,7 @@ out_af:
         return err;
  }
  
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
  {
         br_mdb_uninit();
         rtnl_af_unregister(&br_af_ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c

new file mode 100644 (file)

index 0000000..387cb3b
--- /dev/null
+++ b/net/bridge/br_nf_core.c
@@ -0,0 +1,96 @@
+/*
+ *     Handle firewalling core
+ *     Linux ethernet bridge
+ *
+ *     Authors:
+ *     Lennert Buytenhek               <buytenh@gnu.org>
+ *     Bart De Schuymer                <bdschuym@pandora.be>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ *     Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                            struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+                         struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+       return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+                                          struct sk_buff *skb,
+                                          const void *daddr)
+{
+       return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+       return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+       .family         = AF_INET,
+       .protocol       = cpu_to_be16(ETH_P_IP),
+       .update_pmtu    = fake_update_pmtu,
+       .redirect       = fake_redirect,
+       .cow_metrics    = fake_cow_metrics,
+       .neigh_lookup   = fake_neigh_lookup,
+       .mtu            = fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it.  Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+       [RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+       struct rtable *rt = &br->fake_rtable;
+
+       atomic_set(&rt->dst.__refcnt, 1);
+       rt->dst.dev = br->dev;
+       rt->dst.path = &rt->dst;
+       dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+       rt->dst.flags   = DST_NOXFRM | DST_FAKE_RTABLE;
+       rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+       return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+       dst_entries_destroy(&fake_dst_ops);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h

index b6c04cbcfdc5ee4aa8200677e50107014e0ce2cb..f53592fc3ef97048ae6e2da35bd03e7091070d6a 100644 (file)
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -221,7 +221,7 @@ struct net_bridge
         struct pcpu_sw_netstats         __percpu *stats;
         spinlock_t                      hash_lock;
         struct hlist_head               hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         struct rtable                   fake_rtable;
         bool                            nf_call_iptables;
         bool                            nf_call_ip6tables;
@@ -754,13 +754,13 @@ static inline int br_vlan_enabled(struct net_bridge *br)
  #endif
  
  /* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
  void br_netfilter_rtable_init(struct net_bridge *);
  #else
-#define br_netfilter_init()    (0)
-#define br_netfilter_fini()    do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
  #define br_netfilter_rtable_init(x)
  #endif
  
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c

index c9e2572b15f400f9183606922a87f024ffe6570f..cb431c6016ee18935ddc20cb818391a98646c49e 100644 (file)
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store(
  }
  static DEVICE_ATTR_RW(multicast_startup_query_interval);
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  static ssize_t nf_call_iptables_show(
         struct device *d, struct device_attribute *attr, char *buf)
  {
@@ -763,7 +763,7 @@ static struct attribute *bridge_attrs[] = {
         &dev_attr_multicast_query_response_interval.attr,
         &dev_attr_multicast_startup_query_interval.attr,
  #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
         &dev_attr_nf_call_iptables.attr,
         &dev_attr_nf_call_ip6tables.attr,
         &dev_attr_nf_call_arptables.attr,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig

index d189c5262bdbd3f44d77ca8cbba149ecdb8c809d..345242a79db6906fa8751f23b6c990efa8adb327 100644 (file)
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,16 +61,6 @@ config NFT_CHAIN_ROUTE_IPV4
           fields such as the source, destination, type of service and
           the packet mark.
  
-config NFT_CHAIN_NAT_IPV4
-       depends on NF_TABLES_IPV4
-       depends on NF_NAT_IPV4 && NFT_NAT
-       tristate "IPv4 nf_tables nat chain support"
-       help
-         This option enables the "nat" chain for IPv4 in nf_tables. This
-         chain type is used to perform Network Address Translation (NAT)
-         packet transformations such as the source, destination address and
-         source and destination ports.
-
  config NFT_REJECT_IPV4
         depends on NF_TABLES_IPV4
         default NFT_REJECT
@@ -94,6 +84,30 @@ config NF_NAT_IPV4
  
  if NF_NAT_IPV4
  
+config NFT_CHAIN_NAT_IPV4
+       depends on NF_TABLES_IPV4
+       tristate "IPv4 nf_tables nat chain support"
+       help
+         This option enables the "nat" chain for IPv4 in nf_tables. This
+         chain type is used to perform Network Address Translation (NAT)
+         packet transformations such as the source, destination address and
+         source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV4
+       tristate "IPv4 masquerade support"
+       help
+         This is the kernel functionality to provide NAT in the masquerade
+         flavour (automatic source address selection).
+
+config NFT_MASQ_IPV4
+       tristate "IPv4 masquerading support for nf_tables"
+       depends on NF_TABLES_IPV4
+       depends on NFT_MASQ
+       select NF_NAT_MASQUERADE_IPV4
+       help
+         This is the expression that provides IPv4 masquerading support for
+         nf_tables.
+
  config NF_NAT_SNMP_BASIC
         tristate "Basic SNMP-ALG support"
         depends on NF_CONNTRACK_SNMP
@@ -232,18 +246,6 @@ config IP_NF_NAT
  
  if IP_NF_NAT
  
-config NF_NAT_MASQUERADE_IPV4
-       tristate "IPv4 masquerade support"
-       help
-       This is the kernel functionality to provide NAT in the masquerade
-       flavour (automatic source address selection).
-
-config NFT_MASQ_IPV4
-       tristate "IPv4 masquerading support for nf_tables"
-       depends on NF_TABLES_IPV4
-       depends on NFT_MASQ
-       select NF_NAT_MASQUERADE_IPV4
-
  config IP_NF_TARGET_MASQUERADE
         tristate "MASQUERADE target support"
         select NF_NAT_MASQUERADE_IPV4
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig

index a8f25306a46a2457d07fb9ce12a90649d9dcfec7..bb1a40db7be1c647562678399945f51c4870372b 100644 (file)
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -40,16 +40,6 @@ config NFT_CHAIN_ROUTE_IPV6
           fields such as the source, destination, flowlabel, hop-limit and
           the packet mark.
  
-config NFT_CHAIN_NAT_IPV6
-       depends on NF_TABLES_IPV6
-       depends on NF_NAT_IPV6 && NFT_NAT
-       tristate "IPv6 nf_tables nat chain support"
-       help
-         This option enables the "nat" chain for IPv6 in nf_tables. This
-         chain type is used to perform Network Address Translation (NAT)
-         packet transformations such as the source, destination address and
-         source and destination ports.
-
  config NFT_REJECT_IPV6
         depends on NF_TABLES_IPV6
         default NFT_REJECT
@@ -70,6 +60,34 @@ config NF_NAT_IPV6
           forms of full Network Address Port Translation. This can be
           controlled by iptables or nft.
  
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+       depends on NF_TABLES_IPV6
+       tristate "IPv6 nf_tables nat chain support"
+       help
+         This option enables the "nat" chain for IPv6 in nf_tables. This
+         chain type is used to perform Network Address Translation (NAT)
+         packet transformations such as the source, destination address and
+         source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV6
+       tristate "IPv6 masquerade support"
+       help
+         This is the kernel functionality to provide NAT in the masquerade
+         flavour (automatic source address selection) for IPv6.
+
+config NFT_MASQ_IPV6
+       tristate "IPv6 masquerade support for nf_tables"
+       depends on NF_TABLES_IPV6
+       depends on NFT_MASQ
+       select NF_NAT_MASQUERADE_IPV6
+       help
+         This is the expression that provides IPv4 masquerading support for
+         nf_tables.
+
+endif # NF_NAT_IPV6
+
  config IP6_NF_IPTABLES
         tristate "IP6 tables support (required for filtering)"
         depends on INET && IPV6
@@ -258,18 +276,6 @@ config IP6_NF_NAT
  
  if IP6_NF_NAT
  
-config NF_NAT_MASQUERADE_IPV6
-       tristate "IPv6 masquerade support"
-       help
-        This is the kernel functionality to provide NAT in the masquerade
-        flavour (automatic source address selection) for IPv6.
-
-config NFT_MASQ_IPV6
-       tristate "IPv6 masquerade support for nf_tables"
-       depends on NF_TABLES_IPV6
-       depends on NFT_MASQ
-       select NF_NAT_MASQUERADE_IPV6
-
  config IP6_NF_TARGET_MASQUERADE
         tristate "MASQUERADE target support"
         select NF_NAT_MASQUERADE_IPV6
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig

index 2f7f5c32c6f90a0eb376d7921aecf167564329ce..234a8ec82076803a1c2ca75de9c7a2406363954b 100644 (file)
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config IP_SET_HASH_MAC
+       tristate "hash:mac set support"
+       depends on IP_SET
+       help
+         This option adds the hash:mac set type support, by which
+         one can store MAC (ethernet address) elements in a set.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  config IP_SET_HASH_NETPORTNET
         tristate "hash:net,port,net set support"
         depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile

index 231f10196cb906fd4cbbe5f58263c5a3ba97886a..3dbd5e95848947230f80692cb26acb9437b3106c 100644 (file)
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
  obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
  obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
  obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
  obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
  obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
  obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h

index f2c7d83dc23f46b83e4d24e665f2abc31317783b..6f024a8a1534a7552168c49a8007444c61af0bc0 100644 (file)
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -128,6 +128,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                 return 0;
         if (SET_WITH_COUNTER(set))
                 ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
+       if (SET_WITH_SKBINFO(set))
+               ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
         return 1;
  }
  
@@ -161,6 +163,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                 ip_set_init_counter(ext_counter(x, set), ext);
         if (SET_WITH_COMMENT(set))
                 ip_set_init_comment(ext_comment(x, set), ext);
+       if (SET_WITH_SKBINFO(set))
+               ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
         return 0;
  }
  
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c

index dafdb39ef042643cadc7a46f17c45604d56bf79f..55b083ec587a617109bc2b1d4b299f0f6fbe15e3 100644 (file)
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,7 +27,8 @@
  
  #define IPSET_TYPE_REV_MIN     0
  /*                             1          Counter support added */
-#define IPSET_TYPE_REV_MAX     2       /* Comment support added */
+/*                             2          Comment support added */
+#define IPSET_TYPE_REV_MAX     3       /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -139,7 +140,10 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
         if (unlikely(!tb[IPSET_ATTR_IP] ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -357,6 +361,9 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c

index dbad505e79e349ffc853c8c0ecdc335c0955478b..86104744b00ff67339f78db238f093874b70845a 100644 (file)
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -27,7 +27,8 @@
  
  #define IPSET_TYPE_REV_MIN     0
  /*                             1          Counter support added */
-#define IPSET_TYPE_REV_MAX     2       /* Comment support added */
+/*                             2          Comment support added */
+#define IPSET_TYPE_REV_MAX     3       /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -240,7 +241,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
         if (unlikely(!tb[IPSET_ATTR_IP] ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -394,6 +398,9 @@ static struct ip_set_type bitmap_ipmac_type = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c

index a4b65ae1986c5a1a3b52a788e2b3378d301859fa..005dd36444c3472b2257be6e29ada9882649c1c8 100644 (file)
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,7 +22,8 @@
  
  #define IPSET_TYPE_REV_MIN     0
  /*                             1          Counter support added */
-#define IPSET_TYPE_REV_MAX     2       /* Comment support added */
+/*                             2          Comment support added */
+#define IPSET_TYPE_REV_MAX     3       /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -139,7 +140,10 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -291,6 +295,9 @@ static struct ip_set_type bitmap_port_type = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c

index 5593e97426c45cfa47559b5b631f42211ef13cda..26c795e6b57f799b3690e4641b06bf7b888bd67b 100644 (file)
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
                 .len    = sizeof(unsigned long),
                 .align  = __alignof__(unsigned long),
         },
+       [IPSET_EXT_ID_SKBINFO] = {
+               .type   = IPSET_EXT_SKBINFO,
+               .flag   = IPSET_FLAG_WITH_SKBINFO,
+               .len    = sizeof(struct ip_set_skbinfo),
+               .align  = __alignof__(struct ip_set_skbinfo),
+       },
         [IPSET_EXT_ID_COMMENT] = {
                 .type    = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
                 .flag    = IPSET_FLAG_WITH_COMMENT,
@@ -382,6 +388,7 @@ int
  ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
                       struct ip_set_ext *ext)
  {
+       u64 fullmark;
         if (tb[IPSET_ATTR_TIMEOUT]) {
                 if (!(set->extensions & IPSET_EXT_TIMEOUT))
                         return -IPSET_ERR_TIMEOUT;
@@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
                         return -IPSET_ERR_COMMENT;
                 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
         }
-
+       if (tb[IPSET_ATTR_SKBMARK]) {
+               if (!(set->extensions & IPSET_EXT_SKBINFO))
+                       return -IPSET_ERR_SKBINFO;
+               fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+               ext->skbmark = fullmark >> 32;
+               ext->skbmarkmask = fullmark & 0xffffffff;
+       }
+       if (tb[IPSET_ATTR_SKBPRIO]) {
+               if (!(set->extensions & IPSET_EXT_SKBINFO))
+                       return -IPSET_ERR_SKBINFO;
+               ext->skbprio = be32_to_cpu(nla_get_be32(
+                                           tb[IPSET_ATTR_SKBPRIO]));
+       }
+       if (tb[IPSET_ATTR_SKBQUEUE]) {
+               if (!(set->extensions & IPSET_EXT_SKBINFO))
+                       return -IPSET_ERR_SKBINFO;
+               ext->skbqueue = be16_to_cpu(nla_get_be16(
+                                           tb[IPSET_ATTR_SKBQUEUE]));
+       }
         return 0;
  }
  EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -1397,7 +1422,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
                 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
                 struct sk_buff *skb2;
                 struct nlmsgerr *errmsg;
-               size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+               size_t payload = min(SIZE_MAX,
+                                    sizeof(*errmsg) + nlmsg_len(nlh));
                 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
                 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
                 struct nlattr *cmdattr;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h

index 8a38890cbe5eb283794b29ec629b4d94f919052e..fee7c64e4dd183e5e2fa9d312bbd5cf3134f59f8 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -720,6 +720,8 @@ reuse_slot:
                 ip_set_init_counter(ext_counter(data, set), ext);
         if (SET_WITH_COMMENT(set))
                 ip_set_init_comment(ext_comment(data, set), ext);
+       if (SET_WITH_SKBINFO(set))
+               ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
  
  out:
         rcu_read_unlock_bh();
@@ -797,6 +799,9 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
         if (SET_WITH_COUNTER(set))
                 ip_set_update_counter(ext_counter(data, set),
                                       ext, mext, flags);
+       if (SET_WITH_SKBINFO(set))
+               ip_set_get_skbinfo(ext_skbinfo(data, set),
+                                  ext, mext, flags);
         return mtype_do_data_match(data);
  }
  
@@ -1049,8 +1054,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
         struct HTYPE *h;
         struct htable *t;
  
+#ifndef IP_SET_PROTO_UNDEF
         if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
                 return -IPSET_ERR_INVALID_FAMILY;
+#endif
  
  #ifdef IP_SET_HASH_WITH_MARKMASK
         markmask = 0xffffffff;
@@ -1132,25 +1139,32 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
         rcu_assign_pointer(h->table, t);
  
         set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
         if (set->family == NFPROTO_IPV4) {
+#endif
                 set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
                 set->dsize = ip_set_elem_len(set, tb,
                                 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
         } else {
                 set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
                 set->dsize = ip_set_elem_len(set, tb,
                                 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
         }
+#endif
         if (tb[IPSET_ATTR_TIMEOUT]) {
                 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
                 if (set->family == NFPROTO_IPV4)
+#endif
                         IPSET_TOKEN(HTYPE, 4_gc_init)(set,
                                 IPSET_TOKEN(HTYPE, 4_gc));
+#ifndef IP_SET_PROTO_UNDEF
                 else
                         IPSET_TOKEN(HTYPE, 6_gc_init)(set,
                                 IPSET_TOKEN(HTYPE, 6_gc));
+#endif
         }
-
         pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
                  set->name, jhash_size(t->htable_bits),
                  t->htable_bits, h->maxelem, set->data, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c

index e52739938533066c9185cdd7be51be518d7eca95..76959d79e9d1f67e4618b494bd265ceb0acb57a6 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -26,7 +26,8 @@
  #define IPSET_TYPE_REV_MIN     0
  /*                             1          Counters support */
  /*                             2          Comments support */
-#define IPSET_TYPE_REV_MAX     3       /* Forceadd support */
+/*                             3          Forceadd support */
+#define IPSET_TYPE_REV_MAX     4       /* skbinfo support  */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -111,7 +112,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
         if (unlikely(!tb[IPSET_ATTR_IP] ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -247,6 +251,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
                      tb[IPSET_ATTR_IP_TO] ||
                      tb[IPSET_ATTR_CIDR]))
                 return -IPSET_ERR_PROTOCOL;
@@ -295,6 +302,9 @@ static struct ip_set_type hash_ip_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c

index 4eff0a29725498045703ac9fc2fc1af042c76386..7abf9788cfa850705bc2e5240751d8443a2a558e 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -25,7 +25,8 @@
  #include <linux/netfilter/ipset/ip_set_hash.h>
  
  #define IPSET_TYPE_REV_MIN     0
-#define IPSET_TYPE_REV_MAX     1       /* Forceadd support */
+/*                             1          Forceadd support */
+#define IPSET_TYPE_REV_MAX     2       /* skbinfo support  */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -113,7 +114,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +248,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
                      tb[IPSET_ATTR_IP_TO] ||
                      tb[IPSET_ATTR_CIDR]))
                 return -IPSET_ERR_PROTOCOL;
@@ -301,6 +308,9 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c

index f37a5ae8a5e00f2756afd6936b2be53a6bfa9c12..dcbcceb9a52feea746d2b9d88a9a80e79d579bdb 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -28,7 +28,8 @@
  /*                             1    SCTP and UDPLITE support added */
  /*                             2    Counters support added */
  /*                             3    Comments support added */
-#define IPSET_TYPE_REV_MAX     4 /* Forceadd support added */
+/*                             4    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     5 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -122,7 +123,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -287,6 +291,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
                      tb[IPSET_ATTR_IP_TO] ||
                      tb[IPSET_ATTR_CIDR]))
                 return -IPSET_ERR_PROTOCOL;
@@ -370,6 +377,9 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c

index 41ef00eda874e7fbe0cec0fb08313e1fa881f1d3..7ef93fc887a13b5e5d530a98a2bb3ae7f015d390 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -28,7 +28,8 @@
  /*                             1    SCTP and UDPLITE support added */
  /*                             2    Counters support added */
  /*                             3    Comments support added */
-#define IPSET_TYPE_REV_MAX     4 /* Forceadd support added */
+/*                             4    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     5 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -124,7 +125,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -295,6 +299,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
                      tb[IPSET_ATTR_IP_TO] ||
                      tb[IPSET_ATTR_CIDR]))
                 return -IPSET_ERR_PROTOCOL;
@@ -382,6 +389,9 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c

index 7308d84f9277813f16351b692f75218ff1a451b3..b6012ad9278113e43c98e8615a0694ef9c30c48b 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -30,7 +30,8 @@
  /*                             3    nomatch flag support added */
  /*                             4    Counters support added */
  /*                             5    Comments support added */
-#define IPSET_TYPE_REV_MAX     6 /* Forceadd support added */
+/*                             6    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     7 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -179,7 +180,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -432,6 +436,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
                      tb[IPSET_ATTR_IP_TO] ||
                      tb[IPSET_ATTR_CIDR]))
                 return -IPSET_ERR_PROTOCOL;
@@ -541,6 +548,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c

new file mode 100644 (file)

index 0000000..65690b5
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -0,0 +1,173 @@
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN     0
+#define IPSET_TYPE_REV_MAX     0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE          hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+       /* Zero valued IP addresses cannot be stored */
+       union {
+               unsigned char ether[ETH_ALEN];
+               __be32 foo[2];
+       };
+};
+
+/* Common functions */
+
+static inline bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+                    const struct hash_mac4_elem *e2,
+                    u32 *multi)
+{
+       return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static inline bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+       return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+}
+
+static inline void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+                   const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE          hash_mac4
+#define PF             4
+#define HOST_MASK      32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+              const struct xt_action_param *par,
+              enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+       struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+        /* MAC can be src only */
+       if (!(opt->flags & IPSET_DIM_ONE_SRC))
+               return 0;
+
+       if (skb_mac_header(skb) < skb->head ||
+            (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+               return -EINVAL;
+
+       memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+       if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+               return -EINVAL;
+       return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       int ret;
+
+       if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_LINENO])
+               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+       ret = ip_set_get_extensions(set, tb, &ext);
+       if (ret)
+               return ret;
+       memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+       if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+               return -IPSET_ERR_HASH_ELEM;
+
+       return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+       .name           = "hash:mac",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_MAC,
+       .dimension      = IPSET_DIM_ONE,
+       .family         = NFPROTO_UNSPEC,
+       .revision_min   = IPSET_TYPE_REV_MIN,
+       .revision_max   = IPSET_TYPE_REV_MAX,
+       .create         = hash_mac_create,
+       .create_policy  = {
+               [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
+               [IPSET_ATTR_MAXELEM]    = { .type = NLA_U32 },
+               [IPSET_ATTR_PROBES]     = { .type = NLA_U8 },
+               [IPSET_ATTR_RESIZE]     = { .type = NLA_U8  },
+               [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+               [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+       },
+       .adt_policy     = {
+               [IPSET_ATTR_ETHER]      = { .type = NLA_BINARY,
+                                           .len  = ETH_ALEN },
+               [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+               [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
+               [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
+               [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
+               [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
+       },
+       .me             = THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+       return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+       ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c

index 4c7d495783a3aefa9447d4b37114bd3715928b0a..6b3ac10ac2f18718bda3b1b1f4108fa347eda377 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -27,7 +27,8 @@
  /*                             2    nomatch flag support added */
  /*                             3    Counters support added */
  /*                             4    Comments support added */
-#define IPSET_TYPE_REV_MAX     5 /* Forceadd support added */
+/*                             5    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     6 /* skbinfo mapping support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -150,7 +151,10 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -318,7 +322,10 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
         if (unlikely(tb[IPSET_ATTR_IP_TO]))
                 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -377,6 +384,9 @@ static struct ip_set_type hash_net_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c

index db2606805b3575b91a87cded299222c571fbf6ba..03cdb69ac9bfae4a691565d3d0bf015aaf0f1a06 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -28,7 +28,8 @@
  /*                             2    /0 support added */
  /*                             3    Counters support added */
  /*                             4    Comments support added */
-#define IPSET_TYPE_REV_MAX     5 /* Forceadd support added */
+/*                             5    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     6 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -281,7 +282,10 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -514,7 +518,10 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
         if (unlikely(tb[IPSET_ATTR_IP_TO]))
                 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -590,6 +597,9 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c

index 96b131366e7b6dfb717cba614c32c1459ebb6b7a..da00284b3571a748d843d0d79f6a193a876000d2 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,8 @@
  #include <linux/netfilter/ipset/ip_set_hash.h>
  
  #define IPSET_TYPE_REV_MIN     0
-#define IPSET_TYPE_REV_MAX     1       /* Forceadd support added */
+/*                             1          Forceadd support added */
+#define IPSET_TYPE_REV_MAX     2       /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -171,7 +172,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -394,7 +398,10 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
         if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
                 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -462,6 +469,9 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c

index 1c645fbd09c7d6bcb337b90977ae2536b2ec9ebd..c0ddb58d19dcca43818ea734cbc1ca64ef7eb1c9 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -29,7 +29,8 @@
  /*                             3    nomatch flag support added */
  /*                             4    Counters support added */
  /*                             5    Comments support added */
-#define IPSET_TYPE_REV_MAX     6 /* Forceadd support added */
+/*                             6    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     7 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -172,7 +173,10 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -389,7 +393,10 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
         if (unlikely(tb[IPSET_ATTR_IP_TO]))
                 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -489,6 +496,9 @@ static struct ip_set_type hash_netport_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c

index 2f00343471895079a8162ef02900f9ce96ebf5dc..b8053d675fc39ce0261dcf6270bc7616f383884c 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -26,7 +26,8 @@
  
  #define IPSET_TYPE_REV_MIN     0
  /*                             0    Comments support added */
-#define IPSET_TYPE_REV_MAX     1 /* Forceadd support added */
+/*                             1    Forceadd support added */
+#define IPSET_TYPE_REV_MAX     2 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -189,7 +190,10 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -460,7 +464,10 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
         if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
                 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -569,6 +576,9 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c

index f87adbad607696d12f01a6444378bcd5423abb2c..f8f682806e36df61fe6a606fcc94ed96660e2227 100644 (file)
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -17,7 +17,8 @@
  
  #define IPSET_TYPE_REV_MIN     0
  /*                             1    Counters support added */
-#define IPSET_TYPE_REV_MAX     2 /* Comments support added */
+/*                             2    Comments support added */
+#define IPSET_TYPE_REV_MAX     3 /* skbinfo support added */
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -73,6 +74,10 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
                                 ip_set_update_counter(ext_counter(e, set),
                                                       ext, &opt->ext,
                                                       cmdflags);
+                       if (SET_WITH_SKBINFO(set))
+                               ip_set_get_skbinfo(ext_skbinfo(e, set),
+                                                  ext, &opt->ext,
+                                                  cmdflags);
                         return ret;
                 }
         }
@@ -197,6 +202,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
                 ip_set_init_counter(ext_counter(e, set), ext);
         if (SET_WITH_COMMENT(set))
                 ip_set_init_comment(ext_comment(e, set), ext);
+       if (SET_WITH_SKBINFO(set))
+               ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
         return 0;
  }
  
@@ -307,6 +314,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                         ip_set_init_counter(ext_counter(e, set), ext);
                 if (SET_WITH_COMMENT(set))
                         ip_set_init_comment(ext_comment(e, set), ext);
+               if (SET_WITH_SKBINFO(set))
+                       ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
                 /* Set is already added to the list */
                 ip_set_put_byindex(map->net, d->id);
                 return 0;
@@ -378,7 +387,10 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
                      !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
-                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
                 return -IPSET_ERR_PROTOCOL;
  
         if (tb[IPSET_ATTR_LINENO])
@@ -667,6 +679,9 @@ static struct ip_set_type list_set_type __read_mostly = {
                 [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
                 [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
                 [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
         },
         .me             = THIS_MODULE,
  };
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig

index 0c3b1670b0d164cddf2e80717a89737821fd2b3b..3b6929dec7487a00630c1611892eabf6d4a14462 100644 (file)
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -152,6 +152,16 @@ config     IP_VS_WLC
           If you want to compile it in kernel, say Y. To compile it as a
           module, choose M here. If unsure, say N.
  
+config  IP_VS_FO
+               tristate "weighted failover scheduling"
+       ---help---
+         The weighted failover scheduling algorithm directs network
+         connections to the server with the highest weight that is
+         currently available.
+
+         If you want to compile it in kernel, say Y. To compile it as a
+         module, choose M here. If unsure, say N.
+
  config IP_VS_LBLC
         tristate "locality-based least-connection scheduling"
         ---help---
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile

index 34ee602ddb667d806691111c4f6cede4227a6872..38b2723b2e3d10211c6bd00aa0aef14f78c765c1 100644 (file)
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
  obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
  obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
  obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
  obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
  obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
  obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c

index 610e19c0e13fc82b15eb3fa1dd8328df29d9baa2..b0f7b626b56da755222c0a1bd9a3a7b276ba32c8 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -27,6 +27,7 @@
  
  #include <linux/interrupt.h>
  #include <linux/in.h>
+#include <linux/inet.h>
  #include <linux/net.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
@@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
  #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
  #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
  
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
  struct ip_vs_aligned_lock
  {
         spinlock_t      l;
@@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
                 break;
  
         case IP_VS_CONN_F_TUNNEL:
-               cp->packet_xmit = ip_vs_tunnel_xmit;
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->daf == AF_INET6)
+                       cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+               else
+#endif
+                       cp->packet_xmit = ip_vs_tunnel_xmit;
                 break;
  
         case IP_VS_CONN_F_DROUTE:
@@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
                 break;
  
         case IP_VS_CONN_F_TUNNEL:
-               cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+               if (cp->daf == AF_INET6)
+                       cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+               else
+                       cp->packet_xmit = ip_vs_tunnel_xmit;
                 break;
  
         case IP_VS_CONN_F_DROUTE:
@@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
                       ip_vs_proto_name(cp->protocol),
                       IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
-                     IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+                     IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                       ip_vs_fwd_tag(cp), cp->state,
                       cp->flags, atomic_read(&cp->refcnt),
                       atomic_read(&dest->refcnt));
@@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
         struct ip_vs_dest *dest;
  
         rcu_read_lock();
-       dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+
+       /* This function is only invoked by the synchronization code. We do
+        * not currently support heterogeneous pools with synchronization,
+        * so we can make the assumption that the svc_af is the same as the
+        * dest_af
+        */
+       dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
                                cp->dport, &cp->vaddr, cp->vport,
                                cp->protocol, cp->fwmark, cp->flags);
         if (dest) {
@@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
                       ip_vs_proto_name(cp->protocol),
                       IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
-                     IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+                     IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                       ip_vs_fwd_tag(cp), cp->state,
                       cp->flags, atomic_read(&cp->refcnt),
                       atomic_read(&dest->refcnt));
@@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
                               ntohs(ct->cport),
                               IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
                               ntohs(ct->vport),
-                             IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+                             IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
                               ntohs(ct->dport));
  
                 /*
@@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
   *     Create a new connection entry and hash it into the ip_vs_conn_tab
   */
  struct ip_vs_conn *
-ip_vs_conn_new(const struct ip_vs_conn_param *p,
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
                const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
                struct ip_vs_dest *dest, __u32 fwmark)
  {
@@ -867,6 +889,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
         setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
         ip_vs_conn_net_set(cp, p->net);
         cp->af             = p->af;
+       cp->daf            = dest_af;
         cp->protocol       = p->protocol;
         ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
         cp->cport          = p->cport;
@@ -874,7 +897,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
         ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
                        &cp->vaddr, p->vaddr);
         cp->vport          = p->vport;
-       ip_vs_addr_set(p->af, &cp->daddr, daddr);
+       ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
         cp->dport          = dport;
         cp->flags          = flags;
         cp->fwmark         = fwmark;
@@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                 struct net *net = seq_file_net(seq);
                 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
                 size_t len = 0;
+               char dbuf[IP_VS_ADDRSTRLEN];
  
                 if (!ip_vs_conn_net_eq(cp, net))
                         return 0;
@@ -1049,25 +1073,33 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                 }
                 pe_data[len] = '\0';
  
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->daf == AF_INET6)
+                       snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+               else
+#endif
+                       snprintf(dbuf, sizeof(dbuf), "%08X",
+                                ntohl(cp->daddr.ip));
+
  #ifdef CONFIG_IP_VS_IPV6
                 if (cp->af == AF_INET6)
                         seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
-                               "%pI6 %04X %-11s %7lu%s\n",
+                               "%s %04X %-11s %7lu%s\n",
                                 ip_vs_proto_name(cp->protocol),
                                 &cp->caddr.in6, ntohs(cp->cport),
                                 &cp->vaddr.in6, ntohs(cp->vport),
-                               &cp->daddr.in6, ntohs(cp->dport),
+                               dbuf, ntohs(cp->dport),
                                 ip_vs_state_name(cp->protocol, cp->state),
                                 (cp->timer.expires-jiffies)/HZ, pe_data);
                 else
  #endif
                         seq_printf(seq,
                                 "%-3s %08X %04X %08X %04X"
-                               " %08X %04X %-11s %7lu%s\n",
+                               " %s %04X %-11s %7lu%s\n",
                                 ip_vs_proto_name(cp->protocol),
                                 ntohl(cp->caddr.ip), ntohs(cp->cport),
                                 ntohl(cp->vaddr.ip), ntohs(cp->vport),
-                               ntohl(cp->daddr.ip), ntohs(cp->dport),
+                               dbuf, ntohs(cp->dport),
                                 ip_vs_state_name(cp->protocol, cp->state),
                                 (cp->timer.expires-jiffies)/HZ, pe_data);
         }
@@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags)
  
  static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
  {
+       char dbuf[IP_VS_ADDRSTRLEN];
  
         if (v == SEQ_START_TOKEN)
                 seq_puts(seq,
@@ -1116,13 +1149,22 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
                 if (!ip_vs_conn_net_eq(cp, net))
                         return 0;
  
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->daf == AF_INET6)
+                       snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+               else
+#endif
+                       snprintf(dbuf, sizeof(dbuf), "%08X",
+                                ntohl(cp->daddr.ip));
+
  #ifdef CONFIG_IP_VS_IPV6
                 if (cp->af == AF_INET6)
-                       seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+                       seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+                               "%s %04X %-11s %-6s %7lu\n",
                                 ip_vs_proto_name(cp->protocol),
                                 &cp->caddr.in6, ntohs(cp->cport),
                                 &cp->vaddr.in6, ntohs(cp->vport),
-                               &cp->daddr.in6, ntohs(cp->dport),
+                               dbuf, ntohs(cp->dport),
                                 ip_vs_state_name(cp->protocol, cp->state),
                                 ip_vs_origin_name(cp->flags),
                                 (cp->timer.expires-jiffies)/HZ);
@@ -1130,11 +1172,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
  #endif
                         seq_printf(seq,
                                 "%-3s %08X %04X %08X %04X "
-                               "%08X %04X %-11s %-6s %7lu\n",
+                               "%s %04X %-11s %-6s %7lu\n",
                                 ip_vs_proto_name(cp->protocol),
                                 ntohl(cp->caddr.ip), ntohs(cp->cport),
                                 ntohl(cp->vaddr.ip), ntohs(cp->vport),
-                               ntohl(cp->daddr.ip), ntohs(cp->dport),
+                               dbuf, ntohs(cp->dport),
                                 ip_vs_state_name(cp->protocol, cp->state),
                                 ip_vs_origin_name(cp->flags),
                                 (cp->timer.expires-jiffies)/HZ);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index 5c34e8d42e0190a14ec31c1238bde5929fdf0539..990decba1fe418e36e59a1f081fcf0e47188da29 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                  * This adds param.pe_data to the template,
                  * and thus param.pe_data will be destroyed
                  * when the template expires */
-               ct = ip_vs_conn_new(&param, &dest->addr, dport,
+               ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
                                     IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
                 if (ct == NULL) {
                         kfree(param.pe_data);
@@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
         ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
                               src_port, &iph->daddr, dst_port, &param);
  
-       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
+       cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
+                           skb->mark);
         if (cp == NULL) {
                 ip_vs_conn_put(ct);
                 *ignored = -1;
@@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
                                       &iph->saddr, pptr[0], &iph->daddr,
                                       pptr[1], &p);
-               cp = ip_vs_conn_new(&p, &dest->addr,
+               cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
                                     dest->port ? dest->port : pptr[1],
                                     flags, dest, skb->mark);
                 if (!cp) {
@@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
         IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
                       "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
                       ip_vs_fwd_tag(cp),
-                     IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
-                     IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
-                     IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+                     IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                       cp->flags, atomic_read(&cp->refcnt));
  
         ip_vs_conn_stats(cp, svc);
@@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                         ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
                                               &iph->saddr, pptr[0],
                                               &iph->daddr, pptr[1], &p);
-                       cp = ip_vs_conn_new(&p, &daddr, 0,
+                       cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
                                             IP_VS_CONN_F_BYPASS | flags,
                                             NULL, skb->mark);
                         if (!cp)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c

index bd2b208ba56c2f619b50e99289387d6190ac7d59..ac7ba689efe76c3e3df89c44dbd0094b08b6cf89 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -574,8 +574,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
   * Called under RCU lock.
   */
  static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
-                 __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+                 const union nf_inet_addr *daddr, __be16 dport)
  {
         struct ip_vs_dest *dest;
  
@@ -583,9 +583,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
          * Find the destination for the given service
          */
         list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
-               if ((dest->af == svc->af)
-                   && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
-                   && (dest->port == dport)) {
+               if ((dest->af == dest_af) &&
+                   ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+                   (dest->port == dport)) {
                         /* HIT */
                         return dest;
                 }
@@ -602,7 +602,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
   * on the backup.
   * Called under RCU lock, no refcnt is returned.
   */
-struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int svc_af, int dest_af,
                                    const union nf_inet_addr *daddr,
                                    __be16 dport,
                                    const union nf_inet_addr *vaddr,
@@ -613,14 +613,14 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
         struct ip_vs_service *svc;
         __be16 port = dport;
  
-       svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
+       svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
         if (!svc)
                 return NULL;
         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
                 port = 0;
-       dest = ip_vs_lookup_dest(svc, daddr, port);
+       dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
         if (!dest)
-               dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
+               dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
         return dest;
  }
  
@@ -657,8 +657,8 @@ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
   *  scheduling.
   */
  static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
-                    __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+                    const union nf_inet_addr *daddr, __be16 dport)
  {
         struct ip_vs_dest *dest;
         struct netns_ipvs *ipvs = net_ipvs(svc->net);
@@ -671,11 +671,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
                               "dest->refcnt=%d\n",
                               dest->vfwmark,
-                             IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
                               ntohs(dest->port),
                               atomic_read(&dest->refcnt));
-               if (dest->af == svc->af &&
-                   ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+               if (dest->af == dest_af &&
+                   ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
                     dest->port == dport &&
                     dest->vfwmark == svc->fwmark &&
                     dest->protocol == svc->protocol &&
@@ -779,6 +779,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
         struct ip_vs_scheduler *sched;
         int conn_flags;
  
+       /* We cannot modify an address and change the address family */
+       BUG_ON(!add && udest->af != dest->af);
+
+       if (add && udest->af != svc->af)
+               ipvs->mixed_address_family_dests++;
+
         /* set the weight and the flags */
         atomic_set(&dest->weight, udest->weight);
         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
@@ -816,6 +822,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
         dest->u_threshold = udest->u_threshold;
         dest->l_threshold = udest->l_threshold;
  
+       dest->af = udest->af;
+
         spin_lock_bh(&dest->dst_lock);
         __ip_vs_dst_cache_reset(dest);
         spin_unlock_bh(&dest->dst_lock);
@@ -847,7 +855,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
         EnterFunction(2);
  
  #ifdef CONFIG_IP_VS_IPV6
-       if (svc->af == AF_INET6) {
+       if (udest->af == AF_INET6) {
                 atype = ipv6_addr_type(&udest->addr.in6);
                 if ((!(atype & IPV6_ADDR_UNICAST) ||
                         atype & IPV6_ADDR_LINKLOCAL) &&
@@ -875,12 +883,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                 u64_stats_init(&ip_vs_dest_stats->syncp);
         }
  
-       dest->af = svc->af;
+       dest->af = udest->af;
         dest->protocol = svc->protocol;
         dest->vaddr = svc->addr;
         dest->vport = svc->port;
         dest->vfwmark = svc->fwmark;
-       ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+       ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
         dest->port = udest->port;
  
         atomic_set(&dest->activeconns, 0);
@@ -928,11 +936,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
                 return -ERANGE;
         }
  
-       ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+       ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
  
         /* We use function that requires RCU lock */
         rcu_read_lock();
-       dest = ip_vs_lookup_dest(svc, &daddr, dport);
+       dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
         rcu_read_unlock();
  
         if (dest != NULL) {
@@ -944,12 +952,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
          * Check if the dest already exists in the trash and
          * is from the same service
          */
-       dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+       dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
  
         if (dest != NULL) {
                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
                               "dest->refcnt=%d, service %u/%s:%u\n",
-                             IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+                             IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
                               atomic_read(&dest->refcnt),
                               dest->vfwmark,
                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
@@ -992,11 +1000,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
                 return -ERANGE;
         }
  
-       ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+       ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
  
         /* We use function that requires RCU lock */
         rcu_read_lock();
-       dest = ip_vs_lookup_dest(svc, &daddr, dport);
+       dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
         rcu_read_unlock();
  
         if (dest == NULL) {
@@ -1055,6 +1063,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
         list_del_rcu(&dest->n_list);
         svc->num_dests--;
  
+       if (dest->af != svc->af)
+               net_ipvs(svc->net)->mixed_address_family_dests--;
+
         if (svcupd) {
                 struct ip_vs_scheduler *sched;
  
@@ -1078,7 +1089,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  
         /* We use function that requires RCU lock */
         rcu_read_lock();
-       dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+       dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
         rcu_read_unlock();
  
         if (dest == NULL) {
@@ -2244,6 +2255,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
         udest->weight           = udest_compat->weight;
         udest->u_threshold      = udest_compat->u_threshold;
         udest->l_threshold      = udest_compat->l_threshold;
+       udest->af               = AF_INET;
  }
  
  static int
@@ -2480,6 +2492,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                         if (count >= get->num_dests)
                                 break;
  
+                       /* Cannot expose heterogeneous members via sockopt
+                        * interface
+                        */
+                       if (dest->af != svc->af)
+                               continue;
+
                         entry.addr = dest->addr.ip;
                         entry.port = dest->port;
                         entry.conn_flags = atomic_read(&dest->conn_flags);
@@ -2777,6 +2795,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
+       [IPVS_DEST_ATTR_ADDR_FAMILY]    = { .type = NLA_U16 },
  };
  
  static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3032,7 +3051,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
                         atomic_read(&dest->inactconns)) ||
             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
-                       atomic_read(&dest->persistconns)))
+                       atomic_read(&dest->persistconns)) ||
+           nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
                 goto nla_put_failure;
         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
                 goto nla_put_failure;
@@ -3113,6 +3133,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
  {
         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
         struct nlattr *nla_addr, *nla_port;
+       struct nlattr *nla_addr_family;
  
         /* Parse mandatory identifying destination fields first */
         if (nla == NULL ||
@@ -3121,6 +3142,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
  
         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
+       nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
  
         if (!(nla_addr && nla_port))
                 return -EINVAL;
@@ -3130,6 +3152,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
         udest->port = nla_get_be16(nla_port);
  
+       if (nla_addr_family)
+               udest->af = nla_get_u16(nla_addr_family);
+       else
+               udest->af = 0;
+
         /* If a full entry was requested, check for the additional fields */
         if (full_entry) {
                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
@@ -3234,6 +3261,12 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
                 return -EINVAL;
  
+       /* The synchronization protocol is incompatible with mixed family
+        * services
+        */
+       if (net_ipvs(net)->mixed_address_family_dests > 0)
+               return -EINVAL;
+
         return start_sync_thread(net,
                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
@@ -3357,6 +3390,35 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                                             need_full_dest);
                 if (ret)
                         goto out;
+
+               /* Old protocols did not allow the user to specify address
+                * family, so we set it to zero instead.  We also didn't
+                * allow heterogeneous pools in the old code, so it's safe
+                * to assume that this will have the same address family as
+                * the service.
+                */
+               if (udest.af == 0)
+                       udest.af = svc->af;
+
+               if (udest.af != svc->af) {
+                       /* The synchronization protocol is incompatible
+                        * with mixed family services
+                        */
+                       if (net_ipvs(net)->sync_state) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
+                       /* Which connection types do we support? */
+                       switch (udest.conn_flags) {
+                       case IP_VS_CONN_F_TUNNEL:
+                               /* We are able to forward this */
+                               break;
+                       default:
+                               ret = -EINVAL;
+                               goto out;
+                       }
+               }
         }
  
         switch (cmd) {
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c

index c3b84546ea9e2b385878f07eeae926495890ad1f..6be5c538b71e6fda1085bf01849af5d9df169895 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -234,7 +234,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  
         IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
                       IP_VS_DBG_ADDR(svc->af, &iph->daddr),
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr),
                       ntohs(dest->port));
  
         return dest;
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c

new file mode 100644 (file)

index 0000000..e09874d
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -0,0 +1,79 @@
+/*
+ * IPVS:        Weighted Fail Over module
+ *
+ * Authors:     Kenny Mathis <kmathis@chokepoint.net>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Kenny Mathis            :     added initial functionality based on weight
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+                 struct ip_vs_iphdr *iph)
+{
+       struct ip_vs_dest *dest, *hweight = NULL;
+       int hw = 0; /* Track highest weight */
+
+       IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+       /* Basic failover functionality
+        * Find virtual server with highest weight and send it traffic
+        */
+       list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+               if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+                   atomic_read(&dest->weight) > hw) {
+                       hweight = dest;
+                       hw = atomic_read(&dest->weight);
+               }
+       }
+
+       if (hweight) {
+               IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+                             IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+                             ntohs(hweight->port),
+                             atomic_read(&hweight->activeconns),
+                             atomic_read(&hweight->weight));
+               return hweight;
+       }
+
+       ip_vs_scheduler_err(svc, "no destination available");
+       return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+       .name =                 "fo",
+       .refcnt =               ATOMIC_INIT(0),
+       .module =               THIS_MODULE,
+       .n_list =               LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+       .schedule =             ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+       return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+       unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+       synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c

index 77c173282f388ce81bbd54ea6e41761656d95045..a64fa15790e53f8ad0eda7c53b73be8bd849f2aa 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -233,7 +233,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                         ip_vs_conn_fill_param(ip_vs_conn_net(cp),
                                               AF_INET, IPPROTO_TCP, &cp->caddr,
                                               0, &cp->vaddr, port, &p);
-                       n_cp = ip_vs_conn_new(&p, &from, port,
+                       /* As above, this is ipv4 only */
+                       n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
                                               IP_VS_CONN_F_NO_CPORT |
                                               IP_VS_CONN_F_NFCT,
                                               cp->dest, skb->mark);
@@ -396,7 +397,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
                                       htons(ntohs(cp->vport)-1), &p);
                 n_cp = ip_vs_conn_in_get(&p);
                 if (!n_cp) {
-                       n_cp = ip_vs_conn_new(&p, &cp->daddr,
+                       /* This is ipv4 only */
+                       n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
                                               htons(ntohs(cp->dport)-1),
                                               IP_VS_CONN_F_NFCT, cp->dest,
                                               skb->mark);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c

index 547ff33c1efdb0cb92f3f8890640a77bba3a73b5..127f14046c519d9aa0d0cb7596b8bcf676b5a55b 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -199,11 +199,11 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
   */
  static inline struct ip_vs_lblc_entry *
  ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
-              struct ip_vs_dest *dest)
+              u16 af, struct ip_vs_dest *dest)
  {
         struct ip_vs_lblc_entry *en;
  
-       en = ip_vs_lblc_get(dest->af, tbl, daddr);
+       en = ip_vs_lblc_get(af, tbl, daddr);
         if (en) {
                 if (en->dest == dest)
                         return en;
@@ -213,8 +213,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
         if (!en)
                 return NULL;
  
-       en->af = dest->af;
-       ip_vs_addr_copy(dest->af, &en->addr, daddr);
+       en->af = af;
+       ip_vs_addr_copy(af, &en->addr, daddr);
         en->lastuse = jiffies;
  
         ip_vs_dest_hold(dest);
@@ -521,13 +521,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
         /* If we fail to create a cache entry, we'll just use the valid dest */
         spin_lock_bh(&svc->sched_lock);
         if (!tbl->dead)
-               ip_vs_lblc_new(tbl, &iph->daddr, dest);
+               ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
         spin_unlock_bh(&svc->sched_lock);
  
  out:
         IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
                       IP_VS_DBG_ADDR(svc->af, &iph->daddr),
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
  
         return dest;
  }
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c

index 3f21a2f47de1ffc6be71bcc93b6c35be98f8ae3a..2229d2d8bbe0afe97e4d8fa4c2bfc95a6a93ee72 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -362,18 +362,18 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
   */
  static inline struct ip_vs_lblcr_entry *
  ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
-               struct ip_vs_dest *dest)
+               u16 af, struct ip_vs_dest *dest)
  {
         struct ip_vs_lblcr_entry *en;
  
-       en = ip_vs_lblcr_get(dest->af, tbl, daddr);
+       en = ip_vs_lblcr_get(af, tbl, daddr);
         if (!en) {
                 en = kmalloc(sizeof(*en), GFP_ATOMIC);
                 if (!en)
                         return NULL;
  
-               en->af = dest->af;
-               ip_vs_addr_copy(dest->af, &en->addr, daddr);
+               en->af = af;
+               ip_vs_addr_copy(af, &en->addr, daddr);
                 en->lastuse = jiffies;
  
                 /* initialize its dest set */
@@ -706,13 +706,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
         /* If we fail to create a cache entry, we'll just use the valid dest */
         spin_lock_bh(&svc->sched_lock);
         if (!tbl->dead)
-               ip_vs_lblcr_new(tbl, &iph->daddr, dest);
+               ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
         spin_unlock_bh(&svc->sched_lock);
  
  out:
         IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
                       IP_VS_DBG_ADDR(svc->af, &iph->daddr),
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
  
         return dest;
  }
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c

index 2bdcb1cf21279db80ebb0af0b091f600e8b4921b..19a0769a989aa331de570dd6ee2514db454f83ae 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -59,7 +59,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
         else
                 IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
                               "inactconns %d\n",
-                             IP_VS_DBG_ADDR(svc->af, &least->addr),
+                             IP_VS_DBG_ADDR(least->af, &least->addr),
                               ntohs(least->port),
                               atomic_read(&least->activeconns),
                               atomic_read(&least->inactconns));
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c

index 961a6de9bb29035458945185f488a5fc1209ba00..a8b63401e7731e6c8fef37b62c43425e2f96b43c 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -107,7 +107,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
    out:
         IP_VS_DBG_BUF(6, "NQ: server %s:%u "
                       "activeconns %d refcnt %d weight %d overhead %d\n",
-                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     IP_VS_DBG_ADDR(least->af, &least->addr),
+                     ntohs(least->port),
                       atomic_read(&least->activeconns),
                       atomic_read(&least->refcnt),
                       atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c

index 2f7ea7564044ccd0d8600a2b6688cb6603645588..5b84c0b566424dce498c7ff85ee3b1060df4047f 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                                 pd->pp->name,
                                 ((direction == IP_VS_DIR_OUTPUT) ?
                                  "output " : "input "),
-                               IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+                               IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
                                 ntohs(cp->dport),
                                 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
                                 ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c

index e3a697234a988cb9c2f03afbf122a56f99a454aa..8e92beb0cca9920238421af8a5b5206869c357e0 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                               th->fin ? 'F' : '.',
                               th->ack ? 'A' : '.',
                               th->rst ? 'R' : '.',
-                             IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+                             IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
                               ntohs(cp->dport),
                               IP_VS_DBG_ADDR(cp->af, &cp->caddr),
                               ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c

index 176b87c35e34ea2b438739d21a3439f1ebade995..58bacfc461ee6a1d6df4e6e024032fb52a044e35 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -95,7 +95,7 @@ stop:
         spin_unlock_bh(&svc->sched_lock);
         IP_VS_DBG_BUF(6, "RR: server %s:%u "
                       "activeconns %d refcnt %d weight %d\n",
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
                       atomic_read(&dest->activeconns),
                       atomic_read(&dest->refcnt), atomic_read(&dest->weight));
  
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c

index e446b9fa7424c6382cb65433447f3febe2b17eeb..f8e2d00f528b945e774564854fc66f53dbc61970 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -108,7 +108,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  
         IP_VS_DBG_BUF(6, "SED: server %s:%u "
                       "activeconns %d refcnt %d weight %d overhead %d\n",
-                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     IP_VS_DBG_ADDR(least->af, &least->addr),
+                     ntohs(least->port),
                       atomic_read(&least->activeconns),
                       atomic_read(&least->refcnt),
                       atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c

index cc65b2f42cd40283fab4ed5a24a82767d8190a89..98a13433b68c226fee24e3421d5c46c5f0cc8a75 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -138,7 +138,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
                 return dest;
  
         IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
  
         /* if the original dest is unavailable, loop around the table
          * starting from ihash to find a new dest
@@ -153,7 +153,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
                         return dest;
                 IP_VS_DBG_BUF(6, "SH: selected unavailable "
                               "server %s:%d (offset %d), reselecting",
-                             IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
                               ntohs(dest->port), roffset);
         }
  
@@ -192,7 +192,7 @@ ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
                         RCU_INIT_POINTER(b->dest, dest);
  
                         IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
-                                     i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                                     i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
                                       atomic_read(&dest->weight));
  
                         /* Don't move to next dest until filling weight */
@@ -342,7 +342,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  
         IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
                       IP_VS_DBG_ADDR(svc->af, &iph->saddr),
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr),
                       ntohs(dest->port));
  
         return dest;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c

index eadffb29dec0cccf8333a6f446d3756d843ec4ee..7162c86fd50dca443c0e7c3aa008cdb93038b7e3 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -880,10 +880,17 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
                  * but still handled.
                  */
                 rcu_read_lock();
-               dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
-                                      param->vport, protocol, fwmark, flags);
+               /* This function is only invoked by the synchronization
+                * code. We do not currently support heterogeneous pools
+                * with synchronization, so we can make the assumption that
+                * the svc_af is the same as the dest_af
+                */
+               dest = ip_vs_find_dest(net, type, type, daddr, dport,
+                                      param->vaddr, param->vport, protocol,
+                                      fwmark, flags);
  
-               cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+               cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+                                   fwmark);
                 rcu_read_unlock();
                 if (!cp) {
                         kfree(param->pe_data);
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c

index b5b4650d50a9180f211e6cce82e3393ece2fc11c..6b366fd905542ff086a36da4111bd11646d274d8 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -80,7 +80,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  
         IP_VS_DBG_BUF(6, "WLC: server %s:%u "
                       "activeconns %d refcnt %d weight %d overhead %d\n",
-                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     IP_VS_DBG_ADDR(least->af, &least->addr),
+                     ntohs(least->port),
                       atomic_read(&least->activeconns),
                       atomic_read(&least->refcnt),
                       atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c

index 0546cd572d6b84bfe3d9feaea40d78e88cd6d23c..17e6d4406ca7c32657eff5e103d0aa1b9317e813 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -216,7 +216,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  found:
         IP_VS_DBG_BUF(6, "WRR: server %s:%u "
                       "activeconns %d refcnt %d weight %d\n",
-                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+                     IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
                       atomic_read(&dest->activeconns),
                       atomic_read(&dest->refcnt),
                       atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c

index 56896a412bcec7ae01726734115d19bc04079274..91f17c1eb8a20c0226e01c68e255acf118a6a4d0 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -157,18 +157,113 @@ retry:
         return rt;
  }
  
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+       return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+                                               int rt_mode,
+                                               bool new_rt_is_local)
+{
+       bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+       bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+       bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+       bool source_is_loopback;
+       bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (skb_af == AF_INET6) {
+               int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+               source_is_loopback =
+                       (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+                       (addr_type & IPV6_ADDR_LOOPBACK);
+               old_rt_is_local = __ip_vs_is_local_route6(
+                       (struct rt6_info *)skb_dst(skb));
+       } else
+#endif
+       {
+               source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+               old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+       }
+
+       if (unlikely(new_rt_is_local)) {
+               if (!rt_mode_allow_local)
+                       return true;
+               if (!rt_mode_allow_redirect && !old_rt_is_local)
+                       return true;
+       } else {
+               if (!rt_mode_allow_non_local)
+                       return true;
+               if (source_is_loopback)
+                       return true;
+       }
+       return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+       struct sock *sk = skb->sk;
+       struct rtable *ort = skb_rtable(skb);
+
+       if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+               ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+}
+
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+                                         struct ip_vs_iphdr *ipvsh,
+                                         struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (skb_af == AF_INET6) {
+               struct net *net = dev_net(skb_dst(skb)->dev);
+
+               if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+                       if (!skb->dev)
+                               skb->dev = net->loopback_dev;
+                       /* only send ICMP too big on first fragment */
+                       if (!ipvsh->fragoffs)
+                               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       IP_VS_DBG(1, "frag needed for %pI6c\n",
+                                 &ipv6_hdr(skb)->saddr);
+                       return false;
+               }
+       } else
+#endif
+       {
+               struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+               /* If we're going to tunnel the packet and pmtu discovery
+                * is disabled, we'll just fragment it anyway
+                */
+               if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+                       return true;
+
+               if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+                            skb->len > mtu && !skb_is_gso(skb))) {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+                       IP_VS_DBG(1, "frag needed for %pI4\n",
+                                 &ip_hdr(skb)->saddr);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
  /* Get route to destination or remote server */
  static int
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
-                  __be32 daddr, int rt_mode, __be32 *ret_saddr)
+__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+                  __be32 daddr, int rt_mode, __be32 *ret_saddr,
+                  struct ip_vs_iphdr *ipvsh)
  {
         struct net *net = dev_net(skb_dst(skb)->dev);
-       struct netns_ipvs *ipvs = net_ipvs(net);
         struct ip_vs_dest_dst *dest_dst;
         struct rtable *rt;                      /* Route to the other host */
-       struct rtable *ort;                     /* Original route */
-       struct iphdr *iph;
-       __be16 df;
         int mtu;
         int local, noref = 1;
  
@@ -218,30 +313,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
         }
  
         local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
-       if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
-             rt_mode)) {
-               IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
-                            (rt->rt_flags & RTCF_LOCAL) ?
-                            "local":"non-local", &daddr);
+       if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+                                                 local))) {
+               IP_VS_DBG_RL("We are crossing local and non-local addresses"
+                            " daddr=%pI4\n", &dest->addr.ip);
                 goto err_put;
         }
-       iph = ip_hdr(skb);
-       if (likely(!local)) {
-               if (unlikely(ipv4_is_loopback(iph->saddr))) {
-                       IP_VS_DBG_RL("Stopping traffic from loopback address "
-                                    "%pI4 to non-local address, dest: %pI4\n",
-                                    &iph->saddr, &daddr);
-                       goto err_put;
-               }
-       } else {
-               ort = skb_rtable(skb);
-               if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
-                   !(ort->rt_flags & RTCF_LOCAL)) {
-                       IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
-                                    "local requires NAT method, dest: %pI4\n",
-                                    &iph->daddr, &daddr);
-                       goto err_put;
-               }
+
+       if (unlikely(local)) {
                 /* skb to local stack, preserve old route */
                 if (!noref)
                         ip_rt_put(rt);
@@ -250,28 +329,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
  
         if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
                 mtu = dst_mtu(&rt->dst);
-               df = iph->frag_off & htons(IP_DF);
         } else {
-               struct sock *sk = skb->sk;
-
                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
                 if (mtu < 68) {
                         IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
                         goto err_put;
                 }
-               ort = skb_rtable(skb);
-               if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
-                       ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
-               /* MTU check allowed? */
-               df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
+               maybe_update_pmtu(skb_af, skb, mtu);
         }
  
-       /* MTU checking */
-       if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
-               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-               IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+       if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
                 goto err_put;
-       }
  
         skb_dst_drop(skb);
         if (noref) {
@@ -295,12 +363,6 @@ err_unreach:
  }
  
  #ifdef CONFIG_IP_VS_IPV6
-
-static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
-{
-       return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
-}
-
  static struct dst_entry *
  __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
                         struct in6_addr *ret_saddr, int do_xfrm)
@@ -339,14 +401,13 @@ out_err:
   * Get route to destination or remote server
   */
  static int
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
                       struct in6_addr *daddr, struct in6_addr *ret_saddr,
                       struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
  {
         struct net *net = dev_net(skb_dst(skb)->dev);
         struct ip_vs_dest_dst *dest_dst;
         struct rt6_info *rt;                    /* Route to the other host */
-       struct rt6_info *ort;                   /* Original route */
         struct dst_entry *dst;
         int mtu;
         int local, noref = 1;
@@ -393,32 +454,15 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
         }
  
         local = __ip_vs_is_local_route6(rt);
-       if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
-             rt_mode)) {
-               IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
-                            local ? "local":"non-local", daddr);
+
+       if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+                                                 local))) {
+               IP_VS_DBG_RL("We are crossing local and non-local addresses"
+                            " daddr=%pI6\n", &dest->addr.in6);
                 goto err_put;
         }
-       if (likely(!local)) {
-               if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-                            ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
-                                           IPV6_ADDR_LOOPBACK)) {
-                       IP_VS_DBG_RL("Stopping traffic from loopback address "
-                                    "%pI6c to non-local address, "
-                                    "dest: %pI6c\n",
-                                    &ipv6_hdr(skb)->saddr, daddr);
-                       goto err_put;
-               }
-       } else {
-               ort = (struct rt6_info *) skb_dst(skb);
-               if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
-                   !__ip_vs_is_local_route6(ort)) {
-                       IP_VS_DBG_RL("Redirect from non-local address %pI6c "
-                                    "to local requires NAT method, "
-                                    "dest: %pI6c\n",
-                                    &ipv6_hdr(skb)->daddr, daddr);
-                       goto err_put;
-               }
+
+       if (unlikely(local)) {
                 /* skb to local stack, preserve old route */
                 if (!noref)
                         dst_release(&rt->dst);
@@ -429,28 +473,17 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
         if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
                 mtu = dst_mtu(&rt->dst);
         else {
-               struct sock *sk = skb->sk;
-
                 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
                 if (mtu < IPV6_MIN_MTU) {
                         IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
                                      IPV6_MIN_MTU);
                         goto err_put;
                 }
-               ort = (struct rt6_info *) skb_dst(skb);
-               if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
-                       ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+               maybe_update_pmtu(skb_af, skb, mtu);
         }
  
-       if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
-               if (!skb->dev)
-                       skb->dev = net->loopback_dev;
-               /* only send ICMP too big on first fragment */
-               if (!ipvsh->fragoffs)
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-               IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+       if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
                 goto err_put;
-       }
  
         skb_dst_drop(skb);
         if (noref) {
@@ -556,8 +589,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
-                              NULL) < 0)
+       if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+                              IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
                 goto tx_error;
  
         ip_send_check(iph);
@@ -586,7 +619,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+       if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
                                   ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
                 goto tx_error;
  
@@ -633,10 +666,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         }
  
         was_input = rt_is_input_route(skb_rtable(skb));
-       local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+       local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
                                    IP_VS_RT_MODE_LOCAL |
                                    IP_VS_RT_MODE_NON_LOCAL |
-                                  IP_VS_RT_MODE_RDR, NULL);
+                                  IP_VS_RT_MODE_RDR, NULL, ipvsh);
         if (local < 0)
                 goto tx_error;
         rt = skb_rtable(skb);
@@ -721,8 +754,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
         }
  
-       local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-                                     ipvsh, 0,
+       local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+                                     NULL, ipvsh, 0,
                                       IP_VS_RT_MODE_LOCAL |
                                       IP_VS_RT_MODE_NON_LOCAL |
                                       IP_VS_RT_MODE_RDR);
@@ -791,6 +824,81 @@ tx_error:
  }
  #endif
  
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb.  This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+                          unsigned int max_headroom, __u8 *next_protocol,
+                          __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+                          __be16 *df)
+{
+       struct sk_buff *new_skb = NULL;
+       struct iphdr *old_iph = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+       struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+       if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+               new_skb = skb_realloc_headroom(skb, max_headroom);
+               if (!new_skb)
+                       goto error;
+               consume_skb(skb);
+               skb = new_skb;
+       }
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (skb_af == AF_INET6) {
+               old_ipv6h = ipv6_hdr(skb);
+               *next_protocol = IPPROTO_IPV6;
+               if (payload_len)
+                       *payload_len =
+                               ntohs(old_ipv6h->payload_len) +
+                               sizeof(*old_ipv6h);
+               *dsfield = ipv6_get_dsfield(old_ipv6h);
+               *ttl = old_ipv6h->hop_limit;
+               if (df)
+                       *df = 0;
+       } else
+#endif
+       {
+               old_iph = ip_hdr(skb);
+               /* Copy DF, reset fragment offset and MF */
+               if (df)
+                       *df = (old_iph->frag_off & htons(IP_DF));
+               *next_protocol = IPPROTO_IPIP;
+
+               /* fix old IP header checksum */
+               ip_send_check(old_iph);
+               *dsfield = ipv4_get_dsfield(old_iph);
+               *ttl = old_iph->ttl;
+               if (payload_len)
+                       *payload_len = ntohs(old_iph->tot_len);
+       }
+
+       return skb;
+error:
+       kfree_skb(skb);
+       return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+       if (encaps_af == AF_INET) {
+               if (orig_af == AF_INET)
+                       return SKB_GSO_IPIP;
+
+               return SKB_GSO_SIT;
+       }
+
+       /* GSO: we need to provide proper SKB_GSO_ value for IPv6:
+        * SKB_GSO_SIT/IPV6
+        */
+       return 0;
+}
  
  /*
   *   IP Tunneling transmitter
@@ -819,9 +927,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         struct rtable *rt;                      /* Route to the other host */
         __be32 saddr;                           /* Source for tunnel */
         struct net_device *tdev;                /* Device to other host */
-       struct iphdr  *old_iph = ip_hdr(skb);
-       u8     tos = old_iph->tos;
-       __be16 df;
+       __u8 next_protocol = 0;
+       __u8 dsfield = 0;
+       __u8 ttl = 0;
+       __be16 df = 0;
+       __be16 *dfp = NULL;
         struct iphdr  *iph;                     /* Our new IP header */
         unsigned int max_headroom;              /* The extra header space needed */
         int ret, local;
@@ -829,11 +939,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+       local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
                                    IP_VS_RT_MODE_LOCAL |
                                    IP_VS_RT_MODE_NON_LOCAL |
                                    IP_VS_RT_MODE_CONNECT |
-                                  IP_VS_RT_MODE_TUNNEL, &saddr);
+                                  IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
         if (local < 0)
                 goto tx_error;
         if (local) {
@@ -844,29 +954,21 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         rt = skb_rtable(skb);
         tdev = rt->dst.dev;
  
-       /* Copy DF, reset fragment offset and MF */
-       df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
-
         /*
          * Okay, now see if we can stuff it in the buffer as-is.
          */
         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
  
-       if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
-               struct sk_buff *new_skb =
-                       skb_realloc_headroom(skb, max_headroom);
-
-               if (!new_skb)
-                       goto tx_error;
-               consume_skb(skb);
-               skb = new_skb;
-               old_iph = ip_hdr(skb);
-       }
-
-       /* fix old IP header checksum */
-       ip_send_check(old_iph);
+       /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+       dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+       skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+                                        &next_protocol, NULL, &dsfield,
+                                        &ttl, dfp);
+       if (IS_ERR(skb))
+               goto tx_error;
  
-       skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+       skb = iptunnel_handle_offloads(
+               skb, false, __tun_gso_type_mask(AF_INET, cp->af));
         if (IS_ERR(skb))
                 goto tx_error;
  
@@ -883,11 +985,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         iph->version            =       4;
         iph->ihl                =       sizeof(struct iphdr)>>2;
         iph->frag_off           =       df;
-       iph->protocol           =       IPPROTO_IPIP;
-       iph->tos                =       tos;
+       iph->protocol           =       next_protocol;
+       iph->tos                =       dsfield;
         iph->daddr              =       cp->daddr.ip;
         iph->saddr              =       saddr;
-       iph->ttl                =       old_iph->ttl;
+       iph->ttl                =       ttl;
         ip_select_ident(skb, NULL);
  
         /* Another hack: avoid icmp_send in ip_fragment */
@@ -920,7 +1022,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
         struct rt6_info *rt;            /* Route to the other host */
         struct in6_addr saddr;          /* Source for tunnel */
         struct net_device *tdev;        /* Device to other host */
-       struct ipv6hdr  *old_iph = ipv6_hdr(skb);
+       __u8 next_protocol = 0;
+       __u32 payload_len = 0;
+       __u8 dsfield = 0;
+       __u8 ttl = 0;
         struct ipv6hdr  *iph;           /* Our new IP header */
         unsigned int max_headroom;      /* The extra header space needed */
         int ret, local;
@@ -928,7 +1033,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+       local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
                                       &saddr, ipvsh, 1,
                                       IP_VS_RT_MODE_LOCAL |
                                       IP_VS_RT_MODE_NON_LOCAL |
@@ -948,19 +1053,14 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
          */
         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
  
-       if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
-               struct sk_buff *new_skb =
-                       skb_realloc_headroom(skb, max_headroom);
-
-               if (!new_skb)
-                       goto tx_error;
-               consume_skb(skb);
-               skb = new_skb;
-               old_iph = ipv6_hdr(skb);
-       }
+       skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+                                        &next_protocol, &payload_len,
+                                        &dsfield, &ttl, NULL);
+       if (IS_ERR(skb))
+               goto tx_error;
  
-       /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
-       skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+       skb = iptunnel_handle_offloads(
+               skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
         if (IS_ERR(skb))
                 goto tx_error;
  
@@ -975,14 +1075,13 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
          */
         iph                     =       ipv6_hdr(skb);
         iph->version            =       6;
-       iph->nexthdr            =       IPPROTO_IPV6;
-       iph->payload_len        =       old_iph->payload_len;
-       be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
+       iph->nexthdr            =       next_protocol;
+       iph->payload_len        =       htons(payload_len);
         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
-       ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
+       ipv6_change_dsfield(iph, 0, dsfield);
         iph->daddr = cp->daddr.in6;
         iph->saddr = saddr;
-       iph->hop_limit          =       old_iph->hop_limit;
+       iph->hop_limit          =       ttl;
  
         /* Another hack: avoid icmp_send in ip_fragment */
         skb->ignore_df = 1;
@@ -1021,10 +1120,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+       local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
                                    IP_VS_RT_MODE_LOCAL |
                                    IP_VS_RT_MODE_NON_LOCAL |
-                                  IP_VS_RT_MODE_KNOWN_NH, NULL);
+                                  IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
         if (local < 0)
                 goto tx_error;
         if (local) {
@@ -1060,8 +1159,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
         EnterFunction(10);
  
         rcu_read_lock();
-       local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-                                     ipvsh, 0,
+       local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+                                     NULL, ipvsh, 0,
                                       IP_VS_RT_MODE_LOCAL |
                                       IP_VS_RT_MODE_NON_LOCAL);
         if (local < 0)
@@ -1128,7 +1227,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                   IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
                   IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
         rcu_read_lock();
-       local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+       local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+                                  NULL, iph);
         if (local < 0)
                 goto tx_error;
         rt = skb_rtable(skb);
@@ -1219,8 +1319,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
                   IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
                   IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
         rcu_read_lock();
-       local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
-                                     ipvsh, 0, rt_mode);
+       local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+                                     NULL, ipvsh, 0, rt_mode);
         if (local < 0)
                 goto tx_error;
         rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c

index d25f293776482f5e88c831c290058f2ca2e6af2b..957c1db6665254645f43d0c04456015a1d4df65c 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -14,6 +14,30 @@
  
  static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
  
+static bool nf_generic_should_process(u8 proto)
+{
+       switch (proto) {
+#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
+       case IPPROTO_SCTP:
+               return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
+       case IPPROTO_DCCP:
+               return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
+       case IPPROTO_GRE:
+               return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
+       case IPPROTO_UDPLITE:
+               return false;
+#endif
+       default:
+               return true;
+       }
+}
+
  static inline struct nf_generic_net *generic_pernet(struct net *net)
  {
         return &net->ct.nf_ct_proto.generic;
@@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct,
  static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
                         unsigned int dataoff, unsigned int *timeouts)
  {
-       return true;
+       return nf_generic_should_process(nf_ct_protonum(ct));
  }
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index 82374601577e538750ae822c793ae51af628a0da..19e79f0d9ad25f121c8860f07e012a96ba0af528 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -405,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
         [NFTA_TABLE_FLAGS]      = { .type = NLA_U32 },
  };
  
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
-                                    int event, u32 flags, int family,
-                                    const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+                                    u32 portid, u32 seq, int event, u32 flags,
+                                    int family, const struct nft_table *table)
  {
         struct nlmsghdr *nlh;
         struct nfgenmsg *nfmsg;
@@ -420,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family     = family;
         nfmsg->version          = NFNETLINK_V0;
-       nfmsg->res_id           = 0;
+       nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
             nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -448,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
         if (skb == NULL)
                 goto err;
  
-       err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
-                                       ctx->afi->family, ctx->table);
+       err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+                                       event, 0, ctx->afi->family, ctx->table);
         if (err < 0) {
                 kfree_skb(skb);
                 goto err;
@@ -488,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
                         if (idx > s_idx)
                                 memset(&cb->args[1], 0,
                                        sizeof(cb->args) - sizeof(cb->args[0]));
-                       if (nf_tables_fill_table_info(skb,
+                       if (nf_tables_fill_table_info(skb, net,
                                                       NETLINK_CB(cb->skb).portid,
                                                       cb->nlh->nlmsg_seq,
                                                       NFT_MSG_NEWTABLE,
@@ -540,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
         if (!skb2)
                 return -ENOMEM;
  
-       err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+       err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
                                         nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
                                         family, table);
         if (err < 0)
@@ -914,9 +914,9 @@ nla_put_failure:
         return -ENOSPC;
  }
  
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
-                                    int event, u32 flags, int family,
-                                    const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+                                    u32 portid, u32 seq, int event, u32 flags,
+                                    int family, const struct nft_table *table,
                                      const struct nft_chain *chain)
  {
         struct nlmsghdr *nlh;
@@ -930,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family     = family;
         nfmsg->version          = NFNETLINK_V0;
-       nfmsg->res_id           = 0;
+       nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
                 goto nla_put_failure;
@@ -988,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
         if (skb == NULL)
                 goto err;
  
-       err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
-                                       ctx->afi->family, ctx->table,
+       err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+                                       event, 0, ctx->afi->family, ctx->table,
                                         ctx->chain);
         if (err < 0) {
                 kfree_skb(skb);
@@ -1031,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
                                 if (idx > s_idx)
                                         memset(&cb->args[1], 0,
                                                sizeof(cb->args) - sizeof(cb->args[0]));
-                               if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+                               if (nf_tables_fill_chain_info(skb, net,
+                                                             NETLINK_CB(cb->skb).portid,
                                                               cb->nlh->nlmsg_seq,
                                                               NFT_MSG_NEWCHAIN,
                                                               NLM_F_MULTI,
@@ -1090,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
         if (!skb2)
                 return -ENOMEM;
  
-       err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+       err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
                                         nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
                                         family, table, chain);
         if (err < 0)
@@ -1647,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
                                     .len = NFT_USERDATA_MAXLEN },
  };
  
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
-                                   int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+                                   u32 portid, u32 seq, int event,
+                                   u32 flags, int family,
                                     const struct nft_table *table,
                                     const struct nft_chain *chain,
                                     const struct nft_rule *rule)
@@ -1668,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family     = family;
         nfmsg->version          = NFNETLINK_V0;
-       nfmsg->res_id           = 0;
+       nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
                 goto nla_put_failure;
@@ -1724,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
         if (skb == NULL)
                 goto err;
  
-       err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
-                                      ctx->afi->family, ctx->table,
+       err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+                                      event, 0, ctx->afi->family, ctx->table,
                                        ctx->chain, rule);
         if (err < 0) {
                 kfree_skb(skb);
@@ -1771,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
                                         if (idx > s_idx)
                                                 memset(&cb->args[1], 0,
                                                        sizeof(cb->args) - sizeof(cb->args[0]));
-                                       if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+                                       if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
                                                                       cb->nlh->nlmsg_seq,
                                                                       NFT_MSG_NEWRULE,
                                                                       NLM_F_MULTI | NLM_F_APPEND,
@@ -1837,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
         if (!skb2)
                 return -ENOMEM;
  
-       err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+       err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
                                        nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
                                        family, table, chain, rule);
         if (err < 0)
@@ -2321,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family     = ctx->afi->family;
         nfmsg->version          = NFNETLINK_V0;
-       nfmsg->res_id           = 0;
+       nfmsg->res_id           = htons(ctx->net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
                 goto nla_put_failure;
@@ -2342,6 +2344,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
                         goto nla_put_failure;
         }
  
+       if (set->policy != NFT_SET_POL_PERFORMANCE) {
+               if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+                       goto nla_put_failure;
+       }
+
         desc = nla_nest_start(skb, NFTA_SET_DESC);
         if (desc == NULL)
                 goto nla_put_failure;
@@ -2667,6 +2674,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
         set->dlen  = desc.dlen;
         set->flags = flags;
         set->size  = desc.size;
+       set->policy = policy;
  
         err = ops->init(set, &desc, nla);
         if (err < 0)
@@ -2925,7 +2933,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family = ctx.afi->family;
         nfmsg->version      = NFNETLINK_V0;
-       nfmsg->res_id       = 0;
+       nfmsg->res_id       = htons(ctx.net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
                 goto nla_put_failure;
@@ -3006,7 +3014,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
         nfmsg = nlmsg_data(nlh);
         nfmsg->nfgen_family     = ctx->afi->family;
         nfmsg->version          = NFNETLINK_V0;
-       nfmsg->res_id           = 0;
+       nfmsg->res_id           = htons(ctx->net->nft.base_seq & 0xffff);
  
         if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
                 goto nla_put_failure;
@@ -3293,6 +3301,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
         return err;
  }
  
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+                                  u32 portid, u32 seq)
+{
+       struct nlmsghdr *nlh;
+       struct nfgenmsg *nfmsg;
+       int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+       if (nlh == NULL)
+               goto nla_put_failure;
+
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family     = AF_UNSPEC;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
+
+       if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+               goto nla_put_failure;
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       nlmsg_trim(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(skb);
+       struct sk_buff *skb2;
+       int err;
+
+       if (nlmsg_report(nlh) &&
+           !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+               return 0;
+
+       err = -ENOBUFS;
+       skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (skb2 == NULL)
+               goto err;
+
+       err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+                                     nlh->nlmsg_seq);
+       if (err < 0) {
+               kfree_skb(skb2);
+               goto err;
+       }
+
+       err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+                            NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+       if (err < 0) {
+               nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+                                 err);
+       }
+       return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+                           const struct nlmsghdr *nlh,
+                           const struct nlattr * const nla[])
+{
+       struct net *net = sock_net(skb->sk);
+       struct sk_buff *skb2;
+       int err;
+
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (skb2 == NULL)
+               return -ENOMEM;
+
+       err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+                                     nlh->nlmsg_seq);
+       if (err < 0)
+               goto err;
+
+       return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+       kfree_skb(skb2);
+       return err;
+}
+
  static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
         [NFT_MSG_NEWTABLE] = {
                 .call_batch     = nf_tables_newtable,
@@ -3369,6 +3458,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
                 .attr_count     = NFTA_SET_ELEM_LIST_MAX,
                 .policy         = nft_set_elem_list_policy,
         },
+       [NFT_MSG_GETGEN] = {
+               .call           = nf_tables_getgen,
+       },
  };
  
  static void nft_chain_commit_update(struct nft_trans *trans)
@@ -3526,6 +3618,8 @@ static int nf_tables_commit(struct sk_buff *skb)
                 call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
         }
  
+       nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
         return 0;
  }
  
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c

index c138b8fbe280af6886693421a7fe8d9a288156cf..f77d3f7f22b581af8274cdbf2a9f63d3899f246e 100644 (file)
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -333,7 +333,7 @@ replay:
                          * original skb.
                          */
                         if (err == -EAGAIN) {
-                               ss->abort(skb);
+                               ss->abort(oskb);
                                 nfnl_unlock(subsys_id);
                                 kfree_skb(nskb);
                                 goto replay;
@@ -357,9 +357,9 @@ ack:
         }
  done:
         if (success && done)
-               ss->commit(skb);
+               ss->commit(oskb);
         else
-               ss->abort(skb);
+               ss->abort(oskb);
  
         nfnl_unlock(subsys_id);
         kfree_skb(nskb);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c

index 272ae4d6fdf4f1dcb27eb2bc6e68414a25b63363..133eb4772f12586d31b4b95aa45202318753d848 100644 (file)
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1101,22 +1101,11 @@ static const struct seq_operations xt_match_seq_ops = {
  
  static int xt_match_open(struct inode *inode, struct file *file)
  {
-       struct seq_file *seq;
         struct nf_mttg_trav *trav;
-       int ret;
-
-       trav = kmalloc(sizeof(*trav), GFP_KERNEL);
-       if (trav == NULL)
+       trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
+       if (!trav)
                 return -ENOMEM;
  
-       ret = seq_open(file, &xt_match_seq_ops);
-       if (ret < 0) {
-               kfree(trav);
-               return ret;
-       }
-
-       seq = file->private_data;
-       seq->private = trav;
         trav->nfproto = (unsigned long)PDE_DATA(inode);
         return 0;
  }
@@ -1165,22 +1154,11 @@ static const struct seq_operations xt_target_seq_ops = {
  
  static int xt_target_open(struct inode *inode, struct file *file)
  {
-       struct seq_file *seq;
         struct nf_mttg_trav *trav;
-       int ret;
-
-       trav = kmalloc(sizeof(*trav), GFP_KERNEL);
-       if (trav == NULL)
+       trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
+       if (!trav)
                 return -ENOMEM;
  
-       ret = seq_open(file, &xt_target_seq_ops);
-       if (ret < 0) {
-               kfree(trav);
-               return ret;
-       }
-
-       seq = file->private_data;
-       seq->private = trav;
         trav->nfproto = (unsigned long)PDE_DATA(inode);
         return 0;
  }
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c

index cb70f6ec5695f8216b303c733a9985ae9c0778e0..5732cd64acc0d579dd423b0ef23d9b65d77ba5f4 100644 (file)
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -366,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
  #define set_target_v2_checkentry       set_target_v1_checkentry
  #define set_target_v2_destroy          set_target_v1_destroy
  
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct xt_set_info_target_v3 *info = par->targinfo;
+       ADT_OPT(add_opt, par->family, info->add_set.dim,
+               info->add_set.flags, info->flags, info->timeout);
+       ADT_OPT(del_opt, par->family, info->del_set.dim,
+               info->del_set.flags, 0, UINT_MAX);
+       ADT_OPT(map_opt, par->family, info->map_set.dim,
+               info->map_set.flags, 0, UINT_MAX);
+
+       int ret;
+
+       /* Normalize to fit into jiffies */
+       if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+           add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+               add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+       if (info->add_set.index != IPSET_INVALID_ID)
+               ip_set_add(info->add_set.index, skb, par, &add_opt);
+       if (info->del_set.index != IPSET_INVALID_ID)
+               ip_set_del(info->del_set.index, skb, par, &del_opt);
+       if (info->map_set.index != IPSET_INVALID_ID) {
+               map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+                                                  IPSET_FLAG_MAP_SKBPRIO |
+                                                  IPSET_FLAG_MAP_SKBQUEUE);
+               ret = match_set(info->map_set.index, skb, par, &map_opt,
+                               info->map_set.flags & IPSET_INV_MATCH);
+               if (!ret)
+                       return XT_CONTINUE;
+               if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+                       skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+                                   ^ (map_opt.ext.skbmark);
+               if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+                       skb->priority = map_opt.ext.skbprio;
+               if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+                   skb->dev &&
+                   skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+                       skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+       }
+       return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+       const struct xt_set_info_target_v3 *info = par->targinfo;
+       ip_set_id_t index;
+
+       if (info->add_set.index != IPSET_INVALID_ID) {
+               index = ip_set_nfnl_get_byindex(par->net,
+                                               info->add_set.index);
+               if (index == IPSET_INVALID_ID) {
+                       pr_warn("Cannot find add_set index %u as target\n",
+                               info->add_set.index);
+                       return -ENOENT;
+               }
+       }
+
+       if (info->del_set.index != IPSET_INVALID_ID) {
+               index = ip_set_nfnl_get_byindex(par->net,
+                                               info->del_set.index);
+               if (index == IPSET_INVALID_ID) {
+                       pr_warn("Cannot find del_set index %u as target\n",
+                               info->del_set.index);
+                       if (info->add_set.index != IPSET_INVALID_ID)
+                               ip_set_nfnl_put(par->net,
+                                               info->add_set.index);
+                       return -ENOENT;
+               }
+       }
+
+       if (info->map_set.index != IPSET_INVALID_ID) {
+               if (strncmp(par->table, "mangle", 7)) {
+                       pr_warn("--map-set only usable from mangle table\n");
+                       return -EINVAL;
+               }
+               if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+                    (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+                    !(par->hook_mask & (1 << NF_INET_FORWARD |
+                                        1 << NF_INET_LOCAL_OUT |
+                                        1 << NF_INET_POST_ROUTING))) {
+                       pr_warn("mapping of prio or/and queue is allowed only"
+                               "from OUTPUT/FORWARD/POSTROUTING chains\n");
+                       return -EINVAL;
+               }
+               index = ip_set_nfnl_get_byindex(par->net,
+                                               info->map_set.index);
+               if (index == IPSET_INVALID_ID) {
+                       pr_warn("Cannot find map_set index %u as target\n",
+                               info->map_set.index);
+                       if (info->add_set.index != IPSET_INVALID_ID)
+                               ip_set_nfnl_put(par->net,
+                                               info->add_set.index);
+                       if (info->del_set.index != IPSET_INVALID_ID)
+                               ip_set_nfnl_put(par->net,
+                                               info->del_set.index);
+                       return -ENOENT;
+               }
+       }
+
+       if (info->add_set.dim > IPSET_DIM_MAX ||
+           info->del_set.dim > IPSET_DIM_MAX ||
+           info->map_set.dim > IPSET_DIM_MAX) {
+               pr_warn("Protocol error: SET target dimension "
+                       "is over the limit!\n");
+               if (info->add_set.index != IPSET_INVALID_ID)
+                       ip_set_nfnl_put(par->net, info->add_set.index);
+               if (info->del_set.index != IPSET_INVALID_ID)
+                       ip_set_nfnl_put(par->net, info->del_set.index);
+               if (info->map_set.index != IPSET_INVALID_ID)
+                       ip_set_nfnl_put(par->net, info->map_set.index);
+               return -ERANGE;
+       }
+
+       return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+       const struct xt_set_info_target_v3 *info = par->targinfo;
+
+       if (info->add_set.index != IPSET_INVALID_ID)
+               ip_set_nfnl_put(par->net, info->add_set.index);
+       if (info->del_set.index != IPSET_INVALID_ID)
+               ip_set_nfnl_put(par->net, info->del_set.index);
+       if (info->map_set.index != IPSET_INVALID_ID)
+               ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
  static struct xt_match set_matches[] __read_mostly = {
         {
                 .name           = "set",
@@ -493,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = {
                 .destroy        = set_target_v2_destroy,
                 .me             = THIS_MODULE
         },
+       /* --map-set support */
+       {
+               .name           = "SET",
+               .revision       = 3,
+               .family         = NFPROTO_IPV4,
+               .target         = set_target_v3,
+               .targetsize     = sizeof(struct xt_set_info_target_v3),
+               .checkentry     = set_target_v3_checkentry,
+               .destroy        = set_target_v3_destroy,
+               .me             = THIS_MODULE
+       },
+       {
+               .name           = "SET",
+               .revision       = 3,
+               .family         = NFPROTO_IPV6,
+               .target         = set_target_v3,
+               .targetsize     = sizeof(struct xt_set_info_target_v3),
+               .checkentry     = set_target_v3_checkentry,
+               .destroy        = set_target_v3_destroy,
+               .me             = THIS_MODULE
+       },
  };
  
  static int __init xt_set_init(void)
author	David S. Miller <davem@davemloft.net>
	Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)
committer	David S. Miller <davem@davemloft.net>
	Mon, 29 Sep 2014 18:46:53 +0000 (14:46 -0400)
include/linux/netfilter/ipset/ip_set.h		patch \| blob \| history
include/linux/netfilter_bridge.h		patch \| blob \| history
include/linux/skbuff.h		patch \| blob \| history
include/net/ip_vs.h		patch \| blob \| history
include/net/neighbour.h		patch \| blob \| history
include/net/netfilter/ipv4/nf_reject.h		patch \| blob \| history
include/net/netfilter/ipv6/nf_reject.h		patch \| blob \| history
include/net/netfilter/nf_nat.h		patch \| blob \| history
include/net/netfilter/nf_tables.h		patch \| blob \| history
include/uapi/linux/ip_vs.h		patch \| blob \| history
include/uapi/linux/netfilter/ipset/ip_set.h		patch \| blob \| history
include/uapi/linux/netfilter/nf_tables.h		patch \| blob \| history
include/uapi/linux/netfilter/xt_set.h		patch \| blob \| history
net/Kconfig		patch \| blob \| history
net/bridge/Makefile		patch \| blob \| history
net/bridge/br.c		patch \| blob \| history
net/bridge/br_device.c		patch \| blob \| history
net/bridge/br_forward.c		patch \| blob \| history
net/bridge/br_input.c		patch \| blob \| history
net/bridge/br_netfilter.c		patch \| blob \| history
net/bridge/br_netlink.c		patch \| blob \| history
net/bridge/br_nf_core.c	[new file with mode: 0644]	patch \| blob
net/bridge/br_private.h		patch \| blob \| history
net/bridge/br_sysfs_br.c		patch \| blob \| history
net/ipv4/netfilter/Kconfig		patch \| blob \| history
net/ipv6/netfilter/Kconfig		patch \| blob \| history
net/netfilter/ipset/Kconfig		patch \| blob \| history
net/netfilter/ipset/Makefile		patch \| blob \| history
net/netfilter/ipset/ip_set_bitmap_gen.h		patch \| blob \| history
net/netfilter/ipset/ip_set_bitmap_ip.c		patch \| blob \| history
net/netfilter/ipset/ip_set_bitmap_ipmac.c		patch \| blob \| history
net/netfilter/ipset/ip_set_bitmap_port.c		patch \| blob \| history
net/netfilter/ipset/ip_set_core.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_gen.h		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_ip.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_ipmark.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_ipport.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_ipportip.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_ipportnet.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_mac.c	[new file with mode: 0644]	patch \| blob
net/netfilter/ipset/ip_set_hash_net.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_netiface.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_netnet.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_netport.c		patch \| blob \| history
net/netfilter/ipset/ip_set_hash_netportnet.c		patch \| blob \| history
net/netfilter/ipset/ip_set_list_set.c		patch \| blob \| history
net/netfilter/ipvs/Kconfig		patch \| blob \| history
net/netfilter/ipvs/Makefile		patch \| blob \| history
net/netfilter/ipvs/ip_vs_conn.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_core.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_ctl.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_dh.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_fo.c	[new file with mode: 0644]	patch \| blob
net/netfilter/ipvs/ip_vs_ftp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_lblc.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_lblcr.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_lc.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_nq.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_sctp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_tcp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_rr.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_sed.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_sh.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_sync.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_wlc.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_wrr.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_xmit.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_generic.c		patch \| blob \| history
net/netfilter/nf_tables_api.c		patch \| blob \| history
net/netfilter/nfnetlink.c		patch \| blob \| history
net/netfilter/x_tables.c		patch \| blob \| history
net/netfilter/xt_set.c		patch \| blob \| history