IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
IPSET_EXT_BIT_COMMENT = 2,
IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+ IPSET_EXT_BIT_SKBINFO = 3,
+ IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
/* Mark set with an extension which needs to call destroy */
IPSET_EXT_BIT_DESTROY = 7,
IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s) ((s)->extensions & IPSET_EXT_SKBINFO)
#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
/* Extension id, in size order */
enum ip_set_ext_id {
IPSET_EXT_ID_COUNTER = 0,
IPSET_EXT_ID_TIMEOUT,
+ IPSET_EXT_ID_SKBINFO,
IPSET_EXT_ID_COMMENT,
IPSET_EXT_ID_MAX,
};
u64 packets;
u64 bytes;
u32 timeout;
+ u32 skbmark;
+ u32 skbmarkmask;
+ u32 skbprio;
+ u16 skbqueue;
char *comment;
};
char *str;
};
+struct ip_set_skbinfo {
+ u32 skbmark;
+ u32 skbmarkmask;
+ u32 skbprio;
+ u16 skbqueue;
+};
+
struct ip_set;
#define ext_timeout(e, s) \
(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
#define ext_comment(e, s) \
(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s) \
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
const struct ip_set_ext *ext,
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_SKBINFO(set))
+ cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
if (SET_WITH_FORCEADD(set))
cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
}
}
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ mext->skbmark = skbinfo->skbmark;
+ mext->skbmarkmask = skbinfo->skbmarkmask;
+ mext->skbprio = skbinfo->skbprio;
+ mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+ /* Send nonzero parameters only */
+ return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+ cpu_to_be64((u64)skbinfo->skbmark << 32 |
+ skbinfo->skbmarkmask))) ||
+ (skbinfo->skbprio &&
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ cpu_to_be32(skbinfo->skbprio))) ||
+ (skbinfo->skbqueue &&
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ cpu_to_be16(skbinfo->skbqueue)));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext)
+{
+ skbinfo->skbmark = ext->skbmark;
+ skbinfo->skbmarkmask = ext->skbmarkmask;
+ skbinfo->skbprio = ext->skbprio;
+ skbinfo->skbqueue = ext->skbqueue;
+}
+
static inline bool
ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
{
if (SET_WITH_COMMENT(set) &&
ip_set_put_comment(skb, ext_comment(e, set)))
return -EMSGSIZE;
+ if (SET_WITH_SKBINFO(set) &&
+ ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+ return -EMSGSIZE;
return 0;
}
NF_BR_PRI_LAST = INT_MAX,
};
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#define BRNF_PKT_TYPE 0x01
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_8021Q 0x10
#define BRNF_PPPoE 0x20
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
- return nf_bridge_copy_header(skb);
- return 0;
-}
-
static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
switch (skb->protocol) {
}
}
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_8021Q)
+ skb->protocol = htons(ETH_P_8021Q);
+ else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+ int err;
+ unsigned int header_size;
+
+ nf_bridge_update_protocol(skb);
+ header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+ err = skb_cow_head(skb, header_size);
+ if (err)
+ return err;
+
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
+ return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+ return nf_bridge_copy_header(skb);
+ return 0;
+}
+
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
};
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
atomic_t use;
unsigned int mask;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info *nf_bridge;
#endif
unsigned int len,
/* one bit hole */
kmemcheck_bitfield_end(flags1);
-
-
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
atomic_inc(&nfct->use);
}
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
#endif
if (copy)
dst->nfctinfo = src->nfctinfo;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(dst->nf_bridge);
#endif
__nf_copy(dst, src, true);
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
+ __u16 daf; /* Address family of the dest */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
/* thresholds for active connections */
u32 u_threshold; /* upper threshold */
u32 l_threshold; /* lower threshold */
+
+ /* Address family of addr */
+ u16 af;
};
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
+ /* Number of heterogeneous destinations, needed because
+ * heterogeneous are not supported when synchronization is
+ * enabled */
+ unsigned int mixed_address_family_dests;
};
#define DEFAULT_SYNC_THRESHOLD 3
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
-struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
+struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark);
int ip_vs_control_init(void);
void ip_vs_control_cleanup(void);
struct ip_vs_dest *
-ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
- __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport,
+ const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark, __u32 flags);
void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
return 0;
}
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
unsigned int seq, hh_alen;
nf_ct_attach(nskb, oldskb);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
nf_ct_attach(nskb, oldskb);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip6_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
struct hlist_node bysource;
struct nf_conn *ct;
union nf_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
- defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
int masq_index;
#endif
};
struct nf_conn_nat *nat,
const struct net_device *out)
{
-#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
nat->masq_index != out->ifindex;
* @dtype: data type (verdict or numeric type defined by userspace)
* @size: maximum set size
* @nelems: number of elements
+ * @policy: set parameterization (see enum nft_set_policies)
* @ops: set ops
* @flags: set flags
* @klen: key length
u32 dtype;
u32 size;
u32 nelems;
+ u16 policy;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags;
IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */
IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */
+
+ IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */
+
__IPVS_DEST_ATTR_MAX,
};
IPSET_ATTR_BYTES,
IPSET_ATTR_PACKETS,
IPSET_ATTR_COMMENT,
+ IPSET_ATTR_SKBMARK,
+ IPSET_ATTR_SKBPRIO,
+ IPSET_ATTR_SKBQUEUE,
__IPSET_ATTR_ADT_MAX,
};
#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1)
IPSET_ERR_COUNTER,
IPSET_ERR_COMMENT,
IPSET_ERR_INVALID_MARKMASK,
+ IPSET_ERR_SKBINFO,
/* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352,
IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+ IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+ IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+ IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+ IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+ IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+ IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
IPSET_FLAG_CMD_MAX = 15,
};
IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
IPSET_FLAG_BIT_WITH_FORCEADD = 5,
IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+ IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+ IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
IPSET_FLAG_CADT_MAX = 15,
};
* @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
* @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
* @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
+ * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
NFT_MSG_NEWSETELEM,
NFT_MSG_GETSETELEM,
NFT_MSG_DELSETELEM,
+ NFT_MSG_NEWGEN,
+ NFT_MSG_GETGEN,
NFT_MSG_MAX,
};
* @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
*/
enum nft_masq_attributes {
+ NFTA_MASQ_UNSPEC,
NFTA_MASQ_FLAGS,
__NFTA_MASQ_MAX
};
#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1)
+/**
+ * enum nft_gen_attributes - nf_tables ruleset generation attributes
+ *
+ * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
+ */
+enum nft_gen_attributes {
+ NFTA_GEN_UNSPEC,
+ NFTA_GEN_ID,
+ __NFTA_GEN_MAX
+};
+#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
+
#endif /* _LINUX_NF_TABLES_H */
__u32 flags;
};
+/* Revision 3 target */
+
+struct xt_set_info_target_v3 {
+ struct xt_set_info add_set;
+ struct xt_set_info del_set;
+ struct xt_set_info map_set;
+ __u32 flags;
+ __u32 timeout;
+};
+
#endif /*_XT_SET_H*/
If unsure, say Y.
config BRIDGE_NETFILTER
- bool "Bridged IP/ARP packets filtering"
- depends on BRIDGE && NETFILTER && INET
+ tristate "Bridged IP/ARP packets filtering"
+ depends on (BRIDGE || BRIDGE=n)
+ depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
- default y
+ default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
br_ioctl.o br_stp.o br_stp_bpdu.o \
- br_stp_if.o br_stp_timer.o br_netlink.o
+ br_stp_if.o br_stp_timer.o br_netlink.o \
+ br_nf_core.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
if (err)
goto err_out1;
- err = br_netfilter_init();
+ err = br_nf_core_init();
if (err)
goto err_out2;
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
+ pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+ "deprecated. Update your scripts to load br_netfilter if you "
+ "need this.\n");
+
return 0;
+
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
- br_netfilter_fini();
+ br_nf_core_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
-
br_netlink_fini();
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
-
unregister_pernet_subsys(&br_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
- br_netfilter_fini();
+ br_nf_core_fini();
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = NULL;
#endif
-
br_fdb_fini();
}
u16 vid = 0;
rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
br_nf_pre_routing_finish_bridge_slow(skb);
rcu_read_unlock();
dev->mtu = new_mtu;
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
#endif
return 0;
}
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct sk_buff *skb)
{
br_dev_queue_push_xmit);
}
+EXPORT_SYMBOL_GPL(br_forward_finish);
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
kfree_skb(skb);
goto out;
}
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
- return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
- return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
- .family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
- .update_pmtu = fake_update_pmtu,
- .redirect = fake_redirect,
- .cow_metrics = fake_cow_metrics,
- .neigh_lookup = fake_neigh_lookup,
- .mtu = fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it. Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
- struct rtable *rt = &br->fake_rtable;
-
- atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.dev = br->dev;
- rt->dst.path = &rt->dst;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
- rt->dst.ops = &fake_dst_ops;
-}
-
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
skb->nf_bridge->data, header_size);
}
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
- if (skb->nf_bridge->mask & BRNF_8021Q)
- skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
- skb->protocol = htons(ETH_P_PPP_SES);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
return -1;
}
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
- int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return err;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return 0;
-}
-
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
};
#endif
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
{
int ret;
- ret = dst_entries_init(&fake_dst_ops);
+ ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret < 0)
return ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- if (ret < 0) {
- dst_entries_destroy(&fake_dst_ops);
- return ret;
- }
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- dst_entries_destroy(&fake_dst_ops);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err1;
}
#endif
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
+err1:
+ nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ return ret;
}
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
{
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(brnf_sysctl_header);
#endif
- dst_entries_destroy(&fake_dst_ops);
}
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
return err;
}
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
{
br_mdb_uninit();
rtnl_af_unregister(&br_af_ops);
--- /dev/null
+/*
+ * Handle firewalling core
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+ .family = AF_INET,
+ .protocol = cpu_to_be16(ETH_P_IP),
+ .update_pmtu = fake_update_pmtu,
+ .redirect = fake_redirect,
+ .cow_metrics = fake_cow_metrics,
+ .neigh_lookup = fake_neigh_lookup,
+ .mtu = fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+ [RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+ struct rtable *rt = &br->fake_rtable;
+
+ atomic_set(&rt->dst.__refcnt, 1);
+ rt->dst.dev = br->dev;
+ rt->dst.path = &rt->dst;
+ dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
+ rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+ return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+ dst_entries_destroy(&fake_dst_ops);
+}
struct pcpu_sw_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct rtable fake_rtable;
bool nf_call_iptables;
bool nf_call_ip6tables;
#endif
/* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
void br_netfilter_rtable_init(struct net_bridge *);
#else
-#define br_netfilter_init() (0)
-#define br_netfilter_fini() do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
#define br_netfilter_rtable_init(x)
#endif
}
static DEVICE_ATTR_RW(multicast_startup_query_interval);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static ssize_t nf_call_iptables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
&dev_attr_multicast_query_response_interval.attr,
&dev_attr_multicast_startup_query_interval.attr,
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
&dev_attr_nf_call_iptables.attr,
&dev_attr_nf_call_ip6tables.attr,
&dev_attr_nf_call_arptables.attr,
fields such as the source, destination, type of service and
the packet mark.
-config NFT_CHAIN_NAT_IPV4
- depends on NF_TABLES_IPV4
- depends on NF_NAT_IPV4 && NFT_NAT
- tristate "IPv4 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv4 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NFT_REJECT_IPV4
depends on NF_TABLES_IPV4
default NFT_REJECT
if NF_NAT_IPV4
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV4
+ tristate "IPv4 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection).
+
+config NFT_MASQ_IPV4
+ tristate "IPv4 masquerading support for nf_tables"
+ depends on NF_TABLES_IPV4
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV4
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
depends on NF_CONNTRACK_SNMP
if IP_NF_NAT
-config NF_NAT_MASQUERADE_IPV4
- tristate "IPv4 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection).
-
-config NFT_MASQ_IPV4
- tristate "IPv4 masquerading support for nf_tables"
- depends on NF_TABLES_IPV4
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV4
-
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NF_NAT_MASQUERADE_IPV4
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
-config NFT_CHAIN_NAT_IPV6
- depends on NF_TABLES_IPV6
- depends on NF_NAT_IPV6 && NFT_NAT
- tristate "IPv6 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv6 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NFT_REJECT_IPV6
depends on NF_TABLES_IPV6
default NFT_REJECT
forms of full Network Address Port Translation. This can be
controlled by iptables or nft.
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV6
+ tristate "IPv6 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection) for IPv6.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
+ depends on NF_TABLES_IPV6
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+endif # NF_NAT_IPV6
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
if IP6_NF_NAT
-config NF_NAT_MASQUERADE_IPV6
- tristate "IPv6 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection) for IPv6.
-
-config NFT_MASQ_IPV6
- tristate "IPv6 masquerade support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV6
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NF_NAT_MASQUERADE_IPV6
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_MAC
+ tristate "hash:mac set support"
+ depends on IP_SET
+ help
+ This option adds the hash:mac set type support, by which
+ one can store MAC (ethernet address) elements in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NETPORTNET
tristate "hash:net,port,net set support"
depends on IP_SET
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
return 0;
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
return 1;
}
ip_set_init_counter(ext_counter(x, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(x, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
return 0;
}
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
.len = sizeof(unsigned long),
.align = __alignof__(unsigned long),
},
+ [IPSET_EXT_ID_SKBINFO] = {
+ .type = IPSET_EXT_SKBINFO,
+ .flag = IPSET_FLAG_WITH_SKBINFO,
+ .len = sizeof(struct ip_set_skbinfo),
+ .align = __alignof__(struct ip_set_skbinfo),
+ },
[IPSET_EXT_ID_COMMENT] = {
.type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
.flag = IPSET_FLAG_WITH_COMMENT,
ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
+ u64 fullmark;
if (tb[IPSET_ATTR_TIMEOUT]) {
if (!(set->extensions & IPSET_EXT_TIMEOUT))
return -IPSET_ERR_TIMEOUT;
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
-
+ if (tb[IPSET_ATTR_SKBMARK]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+ ext->skbmark = fullmark >> 32;
+ ext->skbmarkmask = fullmark & 0xffffffff;
+ }
+ if (tb[IPSET_ATTR_SKBPRIO]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbprio = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBPRIO]));
+ }
+ if (tb[IPSET_ATTR_SKBQUEUE]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbqueue = be16_to_cpu(nla_get_be16(
+ tb[IPSET_ATTR_SKBQUEUE]));
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
struct sk_buff *skb2;
struct nlmsgerr *errmsg;
- size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+ size_t payload = min(SIZE_MAX,
+ sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *cmdattr;
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
out:
rcu_read_unlock_bh();
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(data, set),
ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
return mtype_do_data_match(data);
}
struct HTYPE *h;
struct htable *t;
+#ifndef IP_SET_PROTO_UNDEF
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
markmask = 0xffffffff;
rcu_assign_pointer(h->table, t);
set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
+#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
+#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
+#endif
IPSET_TOKEN(HTYPE, 4_gc_init)(set,
IPSET_TOKEN(HTYPE, 4_gc));
+#ifndef IP_SET_PROTO_UNDEF
else
IPSET_TOKEN(HTYPE, 6_gc_init)(set,
IPSET_TOKEN(HTYPE, 6_gc));
+#endif
}
-
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
set->name, jhash_size(t->htable_bits),
t->htable_bits, h->maxelem, set->data, t);
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
/* 2 Comments support */
-#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
+/* 3 Forceadd support */
+#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+/* 1 Forceadd support */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
--- /dev/null
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+ /* Zero valued IP addresses cannot be stored */
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+ const struct hash_mac4_elem *e2,
+ u32 *multi)
+{
+ return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static inline bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+ return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+}
+
+static inline void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+ const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE hash_mac4
+#define PF 4
+#define HOST_MASK 32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_ONE_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -EINVAL;
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+ .name = "hash:mac",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_MAC,
+ .dimension = IPSET_DIM_ONE,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_mac_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+ return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+ ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
/* 2 nomatch flag support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
/* 2 /0 support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#define IPSET_TYPE_REV_MIN 0
/* 0 Comments support added */
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support added */
+/* 2 Comments support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
ip_set_update_counter(ext_counter(e, set),
ext, &opt->ext,
cmdflags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(e, set),
+ ext, &opt->ext,
+ cmdflags);
return ret;
}
}
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
return 0;
}
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_FO
+ tristate "weighted failover scheduling"
+ ---help---
+ The weighted failover scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
#include <linux/interrupt.h>
#include <linux/in.h>
+#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
struct ip_vs_aligned_lock
{
spinlock_t l;
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+#endif
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
struct ip_vs_dest *dest;
rcu_read_lock();
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+
+ /* This function is only invoked by the synchronization code. We do
+ * not currently support heterogeneous pools with synchronization,
+ * so we can make the assumption that the svc_af is the same as the
+ * dest_af
+ */
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
- IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));
/*
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(const struct ip_vs_conn_param *p,
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
+ cp->daf = dest_af;
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
&cp->vaddr, p->vaddr);
cp->vport = p->vport;
- ip_vs_addr_set(p->af, &cp->daddr, daddr);
+ ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ char dbuf[IP_VS_ADDRSTRLEN];
if (!ip_vs_conn_net_eq(cp, net))
return 0;
}
pe_data[len] = '\0';
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%pI6 %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %08X %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
+ char dbuf[IP_VS_ADDRSTRLEN];
if (v == SEQ_START_TOKEN)
seq_puts(seq,
if (!ip_vs_conn_net_eq(cp, net))
return 0;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
- seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+ seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
* This adds param.pe_data to the template,
* and thus param.pe_data will be destroyed
* when the template expires */
- ct = ip_vs_conn_new(¶m, &dest->addr, dport,
+ ct = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
src_port, &iph->daddr, dst_port, ¶m);
- cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
+ cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest,
+ skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr,
pptr[1], &p);
- cp = ip_vs_conn_new(&p, &dest->addr,
+ cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
if (!cp) {
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
- IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
- IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
- cp = ip_vs_conn_new(&p, &daddr, 0,
+ cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
if (!cp)
* Called under RCU lock.
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
* Find the destination for the given service
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
- if ((dest->af == svc->af)
- && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
- && (dest->port == dport)) {
+ if ((dest->af == dest_af) &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+ (dest->port == dport)) {
/* HIT */
return dest;
}
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
port = 0;
- dest = ip_vs_lookup_dest(svc, daddr, port);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
if (!dest)
- dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
return dest;
}
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- if (dest->af == svc->af &&
- ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+ if (dest->af == dest_af &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
struct ip_vs_scheduler *sched;
int conn_flags;
+ /* We cannot modify an address and change the address family */
+ BUG_ON(!add && udest->af != dest->af);
+
+ if (add && udest->af != svc->af)
+ ipvs->mixed_address_family_dests++;
+
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
+ dest->af = udest->af;
+
spin_lock_bh(&dest->dst_lock);
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
EnterFunction(2);
#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6) {
+ if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
u64_stats_init(&ip_vs_dest_stats->syncp);
}
- dest->af = svc->af;
+ dest->af = udest->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
- ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest != NULL) {
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+ dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
- IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest == NULL) {
list_del_rcu(&dest->n_list);
svc->num_dests--;
+ if (dest->af != svc->af)
+ net_ipvs(svc->net)->mixed_address_family_dests--;
+
if (svcupd) {
struct ip_vs_scheduler *sched;
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
rcu_read_unlock();
if (dest == NULL) {
udest->weight = udest_compat->weight;
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
+ udest->af = AF_INET;
}
static int
if (count >= get->num_dests)
break;
+ /* Cannot expose heterogeneous members via sockopt
+ * interface
+ */
+ if (dest->af != svc->af)
+ continue;
+
entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
[IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+ [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
atomic_read(&dest->inactconns)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
- atomic_read(&dest->persistconns)))
+ atomic_read(&dest->persistconns)) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
+ struct nlattr *nla_addr_family;
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
nla_port = attrs[IPVS_DEST_ATTR_PORT];
+ nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
if (!(nla_addr && nla_port))
return -EINVAL;
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
+ if (nla_addr_family)
+ udest->af = nla_get_u16(nla_addr_family);
+ else
+ udest->af = 0;
+
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ /* The synchronization protocol is incompatible with mixed family
+ * services
+ */
+ if (net_ipvs(net)->mixed_address_family_dests > 0)
+ return -EINVAL;
+
return start_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
need_full_dest);
if (ret)
goto out;
+
+ /* Old protocols did not allow the user to specify address
+ * family, so we set it to zero instead. We also didn't
+ * allow heterogeneous pools in the old code, so it's safe
+ * to assume that this will have the same address family as
+ * the service.
+ */
+ if (udest.af == 0)
+ udest.af = svc->af;
+
+ if (udest.af != svc->af) {
+ /* The synchronization protocol is incompatible
+ * with mixed family services
+ */
+ if (net_ipvs(net)->sync_state) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Which connection types do we support? */
+ switch (udest.conn_flags) {
+ case IP_VS_CONN_F_TUNNEL:
+ /* We are able to forward this */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
switch (cmd) {
IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
--- /dev/null
+/*
+ * IPVS: Weighted Fail Over module
+ *
+ * Authors: Kenny Mathis <kmathis@chokepoint.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Kenny Mathis : added initial functionality based on weight
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *hweight = NULL;
+ int hw = 0; /* Track highest weight */
+
+ IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+ /* Basic failover functionality
+ * Find virtual server with highest weight and send it traffic
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > hw) {
+ hweight = dest;
+ hw = atomic_read(&dest->weight);
+ }
+ }
+
+ if (hweight) {
+ IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+ IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+ ntohs(hweight->port),
+ atomic_read(&hweight->activeconns),
+ atomic_read(&hweight->weight));
+ return hweight;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+ .name = "fo",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+ .schedule = ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
- n_cp = ip_vs_conn_new(&p, &from, port,
+ /* As above, this is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
cp->dest, skb->mark);
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
- n_cp = ip_vs_conn_new(&p, &cp->daddr,
+ /* This is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
- en = ip_vs_lblc_get(dest->af, tbl, daddr);
+ en = ip_vs_lblc_get(af, tbl, daddr);
if (en) {
if (en->dest == dest)
return en;
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
ip_vs_dest_hold(dest);
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblc_new(tbl, &iph->daddr, dest);
+ ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblcr_entry *en;
- en = ip_vs_lblcr_get(dest->af, tbl, daddr);
+ en = ip_vs_lblcr_get(af, tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
/* initialize its dest set */
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblcr_new(tbl, &iph->daddr, dest);
+ ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
else
IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
"inactconns %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
out:
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
IP_VS_DBG_BUF(6, "SED: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* if the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable "
"server %s:%d (offset %d), reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
- i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
atomic_read(&dest->weight));
/* Don't move to next dest until filling weight */
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
* but still handled.
*/
rcu_read_lock();
- dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
- param->vport, protocol, fwmark, flags);
+ /* This function is only invoked by the synchronization
+ * code. We do not currently support heterogeneous pools
+ * with synchronization, so we can make the assumption that
+ * the svc_af is the same as the dest_af
+ */
+ dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ param->vaddr, param->vport, protocol,
+ fwmark, flags);
- cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+ fwmark);
rcu_read_unlock();
if (!cp) {
kfree(param->pe_data);
IP_VS_DBG_BUF(6, "WLC: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
return rt;
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+ return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+ int rt_mode,
+ bool new_rt_is_local)
+{
+ bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+ bool source_is_loopback;
+ bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+ source_is_loopback =
+ (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ (addr_type & IPV6_ADDR_LOOPBACK);
+ old_rt_is_local = __ip_vs_is_local_route6(
+ (struct rt6_info *)skb_dst(skb));
+ } else
+#endif
+ {
+ source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+ old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+ }
+
+ if (unlikely(new_rt_is_local)) {
+ if (!rt_mode_allow_local)
+ return true;
+ if (!rt_mode_allow_redirect && !old_rt_is_local)
+ return true;
+ } else {
+ if (!rt_mode_allow_non_local)
+ return true;
+ if (source_is_loopback)
+ return true;
+ }
+ return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+}
+
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+ struct ip_vs_iphdr *ipvsh,
+ struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
+ return false;
+ }
+ } else
+#endif
+ {
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+ /* If we're going to tunnel the packet and pmtu discovery
+ * is disabled, we'll just fragment it anyway
+ */
+ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+ return true;
+
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n",
+ &ip_hdr(skb)->saddr);
+ return false;
+ }
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, int rt_mode, __be32 *ret_saddr)
+__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+ __be32 daddr, int rt_mode, __be32 *ret_saddr,
+ struct ip_vs_iphdr *ipvsh)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
- struct rtable *ort; /* Original route */
- struct iphdr *iph;
- __be16 df;
int mtu;
int local, noref = 1;
}
local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
- (rt->rt_flags & RTCF_LOCAL) ?
- "local":"non-local", &daddr);
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI4\n", &dest->addr.ip);
goto err_put;
}
- iph = ip_hdr(skb);
- if (likely(!local)) {
- if (unlikely(ipv4_is_loopback(iph->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI4 to non-local address, dest: %pI4\n",
- &iph->saddr, &daddr);
- goto err_put;
- }
- } else {
- ort = skb_rtable(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !(ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
- "local requires NAT method, dest: %pI4\n",
- &iph->daddr, &daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
ip_rt_put(rt);
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
- df = iph->frag_off & htons(IP_DF);
} else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
}
- ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
- /* MTU check allowed? */
- df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- /* MTU checking */
- if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
}
#ifdef CONFIG_IP_VS_IPV6
-
-static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
-{
- return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
-}
-
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm)
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
- struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
int mtu;
int local, noref = 1;
}
local = __ip_vs_is_local_route6(rt);
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
- local ? "local":"non-local", daddr);
+
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI6\n", &dest->addr.in6);
goto err_put;
}
- if (likely(!local)) {
- if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI6c to non-local address, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- goto err_put;
- }
- } else {
- ort = (struct rt6_info *) skb_dst(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !__ip_vs_is_local_route6(ort)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c "
- "to local requires NAT method, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
dst_release(&rt->dst);
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
goto err_put;
}
- ort = (struct rt6_info *) skb_dst(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
- if (!skb->dev)
- skb->dev = net->loopback_dev;
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
- NULL) < 0)
+ if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
ip_send_check(iph);
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL);
+ IP_VS_RT_MODE_RDR, NULL, ipvsh);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR);
}
#endif
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb. This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+ unsigned int max_headroom, __u8 *next_protocol,
+ __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+ __be16 *df)
+{
+ struct sk_buff *new_skb = NULL;
+ struct iphdr *old_iph = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb)
+ goto error;
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ old_ipv6h = ipv6_hdr(skb);
+ *next_protocol = IPPROTO_IPV6;
+ if (payload_len)
+ *payload_len =
+ ntohs(old_ipv6h->payload_len) +
+ sizeof(*old_ipv6h);
+ *dsfield = ipv6_get_dsfield(old_ipv6h);
+ *ttl = old_ipv6h->hop_limit;
+ if (df)
+ *df = 0;
+ } else
+#endif
+ {
+ old_iph = ip_hdr(skb);
+ /* Copy DF, reset fragment offset and MF */
+ if (df)
+ *df = (old_iph->frag_off & htons(IP_DF));
+ *next_protocol = IPPROTO_IPIP;
+
+ /* fix old IP header checksum */
+ ip_send_check(old_iph);
+ *dsfield = ipv4_get_dsfield(old_iph);
+ *ttl = old_iph->ttl;
+ if (payload_len)
+ *payload_len = ntohs(old_iph->tot_len);
+ }
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+ if (encaps_af == AF_INET) {
+ if (orig_af == AF_INET)
+ return SKB_GSO_IPIP;
+
+ return SKB_GSO_SIT;
+ }
+
+ /* GSO: we need to provide proper SKB_GSO_ value for IPv6:
+ * SKB_GSO_SIT/IPV6
+ */
+ return 0;
+}
/*
* IP Tunneling transmitter
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- u8 tos = old_iph->tos;
- __be16 df;
+ __u8 next_protocol = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
+ __be16 df = 0;
+ __be16 *dfp = NULL;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
- IP_VS_RT_MODE_TUNNEL, &saddr);
+ IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
rt = skb_rtable(skb);
tdev = rt->dst.dev;
- /* Copy DF, reset fragment offset and MF */
- df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- }
-
- /* fix old IP header checksum */
- ip_send_check(old_iph);
+ /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, NULL, &dsfield,
+ &ttl, dfp);
+ if (IS_ERR(skb))
+ goto tx_error;
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
goto tx_error;
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
- iph->protocol = IPPROTO_IPIP;
- iph->tos = tos;
+ iph->protocol = next_protocol;
+ iph->tos = dsfield;
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
- iph->ttl = old_iph->ttl;
+ iph->ttl = ttl;
ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ __u8 next_protocol = 0;
+ __u32 payload_len = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ipv6_hdr(skb);
- }
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, &payload_len,
+ &dsfield, &ttl, NULL);
+ if (IS_ERR(skb))
+ goto tx_error;
- /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
- skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
goto tx_error;
*/
iph = ipv6_hdr(skb);
iph->version = 6;
- iph->nexthdr = IPPROTO_IPV6;
- iph->payload_len = old_iph->payload_len;
- be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
+ iph->nexthdr = next_protocol;
+ iph->payload_len = htons(payload_len);
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
- ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
+ ipv6_change_dsfield(iph, 0, dsfield);
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
- iph->hop_limit = old_iph->hop_limit;
+ iph->hop_limit = ttl;
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL);
+ IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL);
if (local < 0)
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ NULL, iph);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static bool nf_generic_should_process(u8 proto)
+{
+ switch (proto) {
+#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
+ case IPPROTO_SCTP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
+ case IPPROTO_DCCP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
+ case IPPROTO_GRE:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
+ case IPPROTO_UDPLITE:
+ return false;
+#endif
+ default:
+ return true;
+ }
+}
+
static inline struct nf_generic_net *generic_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.generic;
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return true;
+ return nf_generic_should_process(nf_ct_protonum(ct));
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table);
+ err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_table_info(skb,
+ if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWTABLE,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
family, table);
if (err < 0)
return -ENOSPC;
}
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWCHAIN,
NLM_F_MULTI,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
family, table, chain);
if (err < 0)
.len = NFT_USERDATA_MAXLEN },
};
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
const struct nft_rule *rule)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule);
if (err < 0)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
goto nla_put_failure;
}
+ if (set->policy != NFT_SET_POL_PERFORMANCE) {
+ if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+ goto nla_put_failure;
+ }
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
+ set->policy = policy;
err = ops->init(set, &desc, nla);
if (err < 0)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
return err;
}
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlmsg_report(nlh) &&
+ !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ goto err;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0) {
+ kfree_skb(skb2);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+ NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+ if (err < 0) {
+ nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ err);
+ }
+ return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ struct sk_buff *skb2;
+ int err;
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
.call_batch = nf_tables_newtable,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_GETGEN] = {
+ .call = nf_tables_getgen,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
}
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
return 0;
}
* original skb.
*/
if (err == -EAGAIN) {
- ss->abort(skb);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
goto replay;
}
done:
if (success && done)
- ss->commit(skb);
+ ss->commit(oskb);
else
- ss->abort(skb);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
static int xt_match_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_match_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
static int xt_target_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_target_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ADT_OPT(add_opt, par->family, info->add_set.dim,
+ info->add_set.flags, info->flags, info->timeout);
+ ADT_OPT(del_opt, par->family, info->del_set.dim,
+ info->del_set.flags, 0, UINT_MAX);
+ ADT_OPT(map_opt, par->family, info->map_set.dim,
+ info->map_set.flags, 0, UINT_MAX);
+
+ int ret;
+
+ /* Normalize to fit into jiffies */
+ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+ add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_add(info->add_set.index, skb, par, &add_opt);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_del(info->del_set.index, skb, par, &del_opt);
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+ IPSET_FLAG_MAP_SKBPRIO |
+ IPSET_FLAG_MAP_SKBQUEUE);
+ ret = match_set(info->map_set.index, skb, par, &map_opt,
+ info->map_set.flags & IPSET_INV_MATCH);
+ if (!ret)
+ return XT_CONTINUE;
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+ skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+ ^ (map_opt.ext.skbmark);
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+ skb->priority = map_opt.ext.skbprio;
+ if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+ skb->dev &&
+ skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+ skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+ }
+ return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ip_set_id_t index;
+
+ if (info->add_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->add_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->del_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->del_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ if (strncmp(par->table, "mangle", 7)) {
+ pr_warn("--map-set only usable from mangle table\n");
+ return -EINVAL;
+ }
+ if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+ (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+ !(par->hook_mask & (1 << NF_INET_FORWARD |
+ 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING))) {
+ pr_warn("mapping of prio or/and queue is allowed only"
+ "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ return -EINVAL;
+ }
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->map_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find map_set index %u as target\n",
+ info->map_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->del_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->add_set.dim > IPSET_DIM_MAX ||
+ info->del_set.dim > IPSET_DIM_MAX ||
+ info->map_set.dim > IPSET_DIM_MAX) {
+ pr_warn("Protocol error: SET target dimension "
+ "is over the limit!\n");
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
.destroy = set_target_v2_destroy,
.me = THIS_MODULE
},
+ /* --map-set support */
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
};
static int __init xt_set_init(void)