net: move inet_dport/inet_num in sock_common
authorEric Dumazet <edumazet@google.com>
Fri, 30 Nov 2012 09:49:27 +0000 (09:49 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 30 Nov 2012 20:02:56 +0000 (15:02 -0500)
commit 68835aba4d9b (net: optimize INET input path further)
moved some fields used for tcp/udp sockets lookup in the first cache
line of struct sock_common.

This patch moves inet_dport/inet_num as well, filling a 32bit hole
on 64 bit arches and reducing number of cache line misses in lookups.

Also change INET_MATCH()/INET_TW_MATCH() to perform the ports match
before addresses match, as this check is more discriminant.

Remove the hash check from MATCH() macros because we dont need to
re validate the hash value after taking a refcount on socket, and
use likely/unlikely compiler hints, as the sk_hash/hash check
makes the following conditional tests 100% predicted by cpu.

Introduce skc_addrpair/skc_portpair pair values to better
document the alignment requirements of the port/addr pairs
used in the various MATCH() macros, and remove some casts.

The namespace check can also be done at last.

This slightly improves TCP/UDP lookup times.

IP/TCP early demux needs inet->rx_dst_ifindex and
TCP needs inet->min_ttl, lets group them together in same cache line.

With help from Ben Hutchings & Joe Perches.

Idea of this patch came after Ling Ma proposal to move skc_hash
to the beginning of struct sock_common, and should allow him
to submit a final version of his patch. My tests show an improvement
doing so.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Joe Perches <joe@perches.com>
Cc: Ling Ma <ling.ma.program@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/ipv6.h
include/net/inet_hashtables.h
include/net/inet_sock.h
include/net/inet_timewait_sock.h
include/net/sock.h
net/ipv4/inet_hashtables.c
net/ipv6/inet6_hashtables.c

index 5e11905a4f0194dc9c0f19aa5b02be26748378b8..12729e966dc9c3adde7d058641433b9331c38b61 100644 (file)
@@ -364,20 +364,22 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define inet_v6_ipv6only(__sk)         0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-#define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\
-       (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)   && \
-        ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \
-        ((__sk)->sk_family             == AF_INET6)            && \
-        ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))     && \
-        ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-
-#define INET6_TW_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif) \
-       (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)   && \
-        (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports))   && \
-        ((__sk)->sk_family            == PF_INET6)                     && \
-        (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)))   && \
-        (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr))) && \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)     \
+       ((inet_sk(__sk)->inet_portpair == (__ports))            &&      \
+        ((__sk)->sk_family == AF_INET6)                        &&      \
+        ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))     &&      \
+        ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) &&      \
+        (!(__sk)->sk_bound_dev_if      ||                              \
+          ((__sk)->sk_bound_dev_if == (__dif)))                &&      \
+        net_eq(sock_net(__sk), (__net)))
+
+#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)     \
+       ((inet_twsk(__sk)->tw_portpair == (__ports))                    && \
+        ((__sk)->sk_family == AF_INET6)                                && \
+        ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))     && \
+        ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
+        (!(__sk)->sk_bound_dev_if      ||                                 \
+         ((__sk)->sk_bound_dev_if == (__dif)))                         && \
+        net_eq(sock_net(__sk), (__net)))
 
 #endif /* _IPV6_H */
index 54be0287eb982cb55f7d26973ae7aa281de8a72c..d1de4fbd45c2d1fb76d4bbd665a631f9d7c2629d 100644 (file)
@@ -299,30 +299,34 @@ typedef __u64 __bitwise __addrpair;
                                   (((__force __u64)(__be32)(__daddr)) << 32) | \
                                   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-       (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&    \
-        ((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie))  &&    \
-        ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports))   &&    \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-       (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&    \
-        ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&     \
-        ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&      \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)    \
+       ((inet_sk(__sk)->inet_portpair == (__ports))            &&      \
+        (inet_sk(__sk)->inet_addrpair == (__cookie))           &&      \
+        (!(__sk)->sk_bound_dev_if      ||                              \
+          ((__sk)->sk_bound_dev_if == (__dif)))                &&      \
+        net_eq(sock_net(__sk), (__net)))
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
+       ((inet_twsk(__sk)->tw_portpair == (__ports))    &&              \
+        (inet_twsk(__sk)->tw_addrpair == (__cookie))   &&              \
+        (!(__sk)->sk_bound_dev_if      ||                              \
+          ((__sk)->sk_bound_dev_if == (__dif)))        &&              \
+        net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)    \
-       (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))       &&      \
-        (inet_sk(__sk)->inet_daddr     == (__saddr))           &&      \
-        (inet_sk(__sk)->inet_rcv_saddr == (__daddr))           &&      \
-        ((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) &&      \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif)  \
-       (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))       &&      \
-        (inet_twsk(__sk)->tw_daddr     == (__saddr))           &&      \
-        (inet_twsk(__sk)->tw_rcv_saddr == (__daddr))           &&      \
-        ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&      \
-        (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+       ((inet_sk(__sk)->inet_portpair == (__ports))    &&              \
+        (inet_sk(__sk)->inet_daddr     == (__saddr))   &&              \
+        (inet_sk(__sk)->inet_rcv_saddr == (__daddr))   &&              \
+        (!(__sk)->sk_bound_dev_if      ||                              \
+          ((__sk)->sk_bound_dev_if == (__dif)))        &&              \
+        net_eq(sock_net(__sk), (__net)))
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+       ((inet_twsk(__sk)->tw_portpair == (__ports))    &&              \
+        (inet_twsk(__sk)->tw_daddr     == (__saddr))   &&              \
+        (inet_twsk(__sk)->tw_rcv_saddr == (__daddr))   &&              \
+        (!(__sk)->sk_bound_dev_if      ||                              \
+          ((__sk)->sk_bound_dev_if == (__dif)))        &&              \
+        net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
 /*
index 256c1ed2d69afc06cfbc8b51550c7b374a166afe..a4196cbc84eca774d45a2d275f4a38eb5be775f3 100644 (file)
@@ -144,9 +144,11 @@ struct inet_sock {
        /* Socket demultiplex comparisons on incoming packets. */
 #define inet_daddr             sk.__sk_common.skc_daddr
 #define inet_rcv_saddr         sk.__sk_common.skc_rcv_saddr
+#define inet_addrpair          sk.__sk_common.skc_addrpair
+#define inet_dport             sk.__sk_common.skc_dport
+#define inet_num               sk.__sk_common.skc_num
+#define inet_portpair          sk.__sk_common.skc_portpair
 
-       __be16                  inet_dport;
-       __u16                   inet_num;
        __be32                  inet_saddr;
        __s16                   uc_ttl;
        __u16                   cmsg_flags;
@@ -154,6 +156,7 @@ struct inet_sock {
        __u16                   inet_id;
 
        struct ip_options_rcu __rcu     *inet_opt;
+       int                     rx_dst_ifindex;
        __u8                    tos;
        __u8                    min_ttl;
        __u8                    mc_ttl;
@@ -170,7 +173,6 @@ struct inet_sock {
        int                     uc_index;
        int                     mc_index;
        __be32                  mc_addr;
-       int                     rx_dst_ifindex;
        struct ip_mc_socklist __rcu     *mc_list;
        struct inet_cork_full   cork;
 };
index ba52c830a7a54569795dcd30a32813e1dc580a75..7d658d5773681347bb3e799990dd9947c73e8e32 100644 (file)
@@ -112,6 +112,11 @@ struct inet_timewait_sock {
 #define tw_net                 __tw_common.skc_net
 #define tw_daddr               __tw_common.skc_daddr
 #define tw_rcv_saddr           __tw_common.skc_rcv_saddr
+#define tw_addrpair            __tw_common.skc_addrpair
+#define tw_dport               __tw_common.skc_dport
+#define tw_num                 __tw_common.skc_num
+#define tw_portpair            __tw_common.skc_portpair
+
        int                     tw_timeout;
        volatile unsigned char  tw_substate;
        unsigned char           tw_rcv_wscale;
@@ -119,8 +124,6 @@ struct inet_timewait_sock {
        /* Socket demultiplex comparisons on incoming packets. */
        /* these three are in inet_sock */
        __be16                  tw_sport;
-       __be16                  tw_dport;
-       __u16                   tw_num;
        kmemcheck_bitfield_begin(flags);
        /* And these are ours. */
        unsigned int            tw_ipv6only     : 1,
index c945fba4f54351475ff2efb989f77b23237f60d4..c4132c1b63a8cbecca64b7f8bd43c62b62fe93f7 100644 (file)
@@ -132,6 +132,8 @@ struct net;
  *     @skc_rcv_saddr: Bound local IPv4 addr
  *     @skc_hash: hash value used with various protocol lookup tables
  *     @skc_u16hashes: two u16 hash values used by UDP lookup tables
+ *     @skc_dport: placeholder for inet_dport/tw_dport
+ *     @skc_num: placeholder for inet_num/tw_num
  *     @skc_family: network address family
  *     @skc_state: Connection state
  *     @skc_reuse: %SO_REUSEADDR setting
@@ -149,16 +151,29 @@ struct net;
  *     for struct sock and struct inet_timewait_sock.
  */
 struct sock_common {
-       /* skc_daddr and skc_rcv_saddr must be grouped :
-        * cf INET_MATCH() and INET_TW_MATCH()
+       /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
+        * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH()
         */
-       __be32                  skc_daddr;
-       __be32                  skc_rcv_saddr;
-
+       union {
+               unsigned long   skc_addrpair;
+               struct {
+                       __be32  skc_daddr;
+                       __be32  skc_rcv_saddr;
+               };
+       };
        union  {
                unsigned int    skc_hash;
                __u16           skc_u16hashes[2];
        };
+       /* skc_dport && skc_num must be grouped as well */
+       union {
+               u32             skc_portpair;
+               struct {
+                       __be16  skc_dport;
+                       __u16   skc_num;
+               };
+       };
+
        unsigned short          skc_family;
        volatile unsigned char  skc_state;
        unsigned char           skc_reuse;
index 7880af97020885a3ae1b1fdbfbddbeaf9c4d2bbc..fa3ae814871082e22121855a4033ddc4ec21b1c7 100644 (file)
@@ -237,12 +237,14 @@ struct sock *__inet_lookup_established(struct net *net,
        rcu_read_lock();
 begin:
        sk_nulls_for_each_rcu(sk, node, &head->chain) {
-               if (INET_MATCH(sk, net, hash, acookie,
-                                       saddr, daddr, ports, dif)) {
+               if (sk->sk_hash != hash)
+                       continue;
+               if (likely(INET_MATCH(sk, net, acookie,
+                                     saddr, daddr, ports, dif))) {
                        if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
                                goto begintw;
-                       if (unlikely(!INET_MATCH(sk, net, hash, acookie,
-                               saddr, daddr, ports, dif))) {
+                       if (unlikely(!INET_MATCH(sk, net, acookie,
+                                                saddr, daddr, ports, dif))) {
                                sock_put(sk);
                                goto begin;
                        }
@@ -260,14 +262,18 @@ begin:
 begintw:
        /* Must check for a TIME_WAIT'er before going to listener hash. */
        sk_nulls_for_each_rcu(sk, node, &head->twchain) {
-               if (INET_TW_MATCH(sk, net, hash, acookie,
-                                       saddr, daddr, ports, dif)) {
+               if (sk->sk_hash != hash)
+                       continue;
+               if (likely(INET_TW_MATCH(sk, net, acookie,
+                                        saddr, daddr, ports,
+                                        dif))) {
                        if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
                                sk = NULL;
                                goto out;
                        }
-                       if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie,
-                                saddr, daddr, ports, dif))) {
+                       if (unlikely(!INET_TW_MATCH(sk, net, acookie,
+                                                   saddr, daddr, ports,
+                                                   dif))) {
                                sock_put(sk);
                                goto begintw;
                        }
@@ -314,10 +320,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 
        /* Check TIME-WAIT sockets first. */
        sk_nulls_for_each(sk2, node, &head->twchain) {
-               tw = inet_twsk(sk2);
+               if (sk2->sk_hash != hash)
+                       continue;
 
-               if (INET_TW_MATCH(sk2, net, hash, acookie,
-                                       saddr, daddr, ports, dif)) {
+               if (likely(INET_TW_MATCH(sk2, net, acookie,
+                                        saddr, daddr, ports, dif))) {
+                       tw = inet_twsk(sk2);
                        if (twsk_unique(sk, sk2, twp))
                                goto unique;
                        else
@@ -328,8 +336,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 
        /* And established part... */
        sk_nulls_for_each(sk2, node, &head->chain) {
-               if (INET_MATCH(sk2, net, hash, acookie,
-                                       saddr, daddr, ports, dif))
+               if (sk2->sk_hash != hash)
+                       continue;
+               if (likely(INET_MATCH(sk2, net, acookie,
+                                     saddr, daddr, ports, dif)))
                        goto not_unique;
        }
 
index 73f1a00a96afcd194e5567c36e2d1956a62018ca..dea17fd28e5037295d56cf517671b479a1bca825 100644 (file)
@@ -87,11 +87,13 @@ struct sock *__inet6_lookup_established(struct net *net,
        rcu_read_lock();
 begin:
        sk_nulls_for_each_rcu(sk, node, &head->chain) {
-               /* For IPV6 do the cheaper port and family tests first. */
-               if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+               if (sk->sk_hash != hash)
+                       continue;
+               if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
                        if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
                                goto begintw;
-                       if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+                       if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
+                                                 ports, dif))) {
                                sock_put(sk);
                                goto begin;
                        }
@@ -104,12 +106,16 @@ begin:
 begintw:
        /* Must check for a TIME_WAIT'er before going to listener hash. */
        sk_nulls_for_each_rcu(sk, node, &head->twchain) {
-               if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+               if (sk->sk_hash != hash)
+                       continue;
+               if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
+                                         ports, dif))) {
                        if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
                                sk = NULL;
                                goto out;
                        }
-                       if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+                       if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
+                                                    ports, dif))) {
                                sock_put(sk);
                                goto begintw;
                        }
@@ -236,9 +242,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
        /* Check TIME-WAIT sockets first. */
        sk_nulls_for_each(sk2, node, &head->twchain) {
-               tw = inet_twsk(sk2);
+               if (sk2->sk_hash != hash)
+                       continue;
 
-               if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
+               if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
+                                         ports, dif))) {
+                       tw = inet_twsk(sk2);
                        if (twsk_unique(sk, sk2, twp))
                                goto unique;
                        else
@@ -249,7 +258,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
        /* And established part... */
        sk_nulls_for_each(sk2, node, &head->chain) {
-               if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+               if (sk2->sk_hash != hash)
+                       continue;
+               if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
                        goto not_unique;
        }