From: Eric Dumazet Date: Tue, 15 Sep 2015 22:24:20 +0000 (-0700) Subject: tcp: provide skb->hash to synack packets X-Git-Tag: firefly_0821_release~176^2~818^2~328 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=58d607d3e52f2b15902f58a1161da9fb3b0f6d47;p=firefly-linux-kernel-4.4.55.git tcp: provide skb->hash to synack packets In commit b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit"), Tom provided a l4 hash to most outgoing TCP packets. We'd like to provide one as well for SYNACK packets, so that all packets of a given flow share same txhash, to later enable bonding driver to also use skb->hash to perform slave selection. Note that a SYNACK retransmit shuffles the tx hash, as Tom did in commit 265f94ff54d62 ("net: Recompute sk_txhash on negative routing advice") for established sockets. This has nice effect making TCP flows resilient to some kind of black holes, even at connection establish phase. Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Acked-by: Tom Herbert Signed-off-by: David S. Miller --- diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 48c3696e8645..937b97893d5f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -113,6 +113,7 @@ struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; bool tfo_listener; + u32 txhash; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ diff --git a/include/net/sock.h b/include/net/sock.h index 7aa78440559a..94dff7f566f5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1654,12 +1654,16 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); -static inline void sk_set_txhash(struct sock *sk) +static inline u32 net_tx_rndhash(void) { - sk->sk_txhash = prandom_u32(); + u32 v = prandom_u32(); + + return v ?: 1; +} - if (unlikely(!sk->sk_txhash)) - sk->sk_txhash = 1; +static inline void sk_set_txhash(struct sock *sk) +{ + sk->sk_txhash = net_tx_rndhash(); } static inline void sk_rethink_txhash(struct sock *sk) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a8f515bb19c4..a62e9c76d485 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6228,6 +6228,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); fastopen = !want_cookie && tcp_try_fastopen(sk, skb, req, &foc, dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 93898e093d4e..d671d742a239 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1276,8 +1276,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; + newsk->sk_txhash = tcp_rsk(req)->txhash; inet_csk(newsk)->icsk_ext_hdr_len = 0; - sk_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f9a8a12b62ee..d0ad3554c333 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2987,6 +2987,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, rcu_read_lock(); md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif + skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); @@ -3505,6 +3506,7 @@ int tcp_rtx_synack(struct sock *sk, struct request_sock *req) struct flowi fl; int res; + tcp_rsk(req)->txhash = net_tx_rndhash(); res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 97d9314ea361..f9c0e2640671 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; - sk_set_txhash(newsk); + newsk->sk_txhash = tcp_rsk(req)->txhash; /* Now IPv6 options...