tcp: add tcp_tsopt_ecr_before helper

[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 728f5b3d3c64197bb526240a078744d5a950c8ea..1e97e73e5ecf1a3068355403a3c1a5ea52de98e0 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly;
  int sysctl_tcp_rfc1337 __read_mostly;
  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
  int sysctl_tcp_frto __read_mostly = 2;
+int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
  
  int sysctl_tcp_thin_dupack __read_mostly;
  
@@ -109,6 +110,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
  #define FLAG_SYN_ACKED         0x10 /* This ACK acknowledged SYN.              */
  #define FLAG_DATA_SACKED       0x20 /* New SACK.                               */
  #define FLAG_ECE               0x40 /* ECE in this ACK                         */
+#define FLAG_LOST_RETRANS      0x80 /* This ACK marks some retransmission lost */
  #define FLAG_SLOWPATH          0x100 /* Do not skip RFC checks for window update.*/
  #define FLAG_ORIG_SACK_ACKED   0x200 /* Never retransmitted data are (s)acked  */
  #define FLAG_SND_UNA_ADVANCED  0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -196,11 +198,13 @@ static void tcp_enter_quickack_mode(struct sock *sk)
   * and the session is not interactive.
   */
  
-static inline bool tcp_in_quickack_mode(const struct sock *sk)
+static bool tcp_in_quickack_mode(struct sock *sk)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
+       const struct dst_entry *dst = __sk_dst_get(sk);
  
-       return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
+       return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
+               (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
  }
  
  static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -750,13 +754,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
   * TCP pacing, to smooth the burst on large writes when packets
   * in flight is significantly lower than cwnd (or rwin)
   */
+int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
+int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
+
  static void tcp_update_pacing_rate(struct sock *sk)
  {
         const struct tcp_sock *tp = tcp_sk(sk);
         u64 rate;
  
         /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
-       rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
+       rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
+
+       /* current rate is (cwnd * mss) / srtt
+        * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
+        * In Congestion Avoidance phase, set it to 120 % the current rate.
+        *
+        * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+        *       If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+        *       end of slow start and should slow down.
+        */
+       if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+               rate *= sysctl_tcp_pacing_ss_ratio;
+       else
+               rate *= sysctl_tcp_pacing_ca_ratio;
  
         rate *= max(tp->snd_cwnd, tp->packets_out);
  
@@ -1028,70 +1048,6 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
         return !before(start_seq, end_seq - tp->max_window);
  }
  
-/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "B". Later note: FACK people cheated me again 8), we have to account
- * for reordering! Ugly, but should help.
- *
- * Search retransmitted skbs from write_queue that were sent when snd_nxt was
- * less than what is now known to be received by the other end (derived from
- * highest SACK block). Also calculate the lowest snd_nxt among the remaining
- * retransmitted skbs to avoid some costly processing per ACKs.
- */
-static void tcp_mark_lost_retrans(struct sock *sk)
-{
-       const struct inet_connection_sock *icsk = inet_csk(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-       int cnt = 0;
-       u32 new_low_seq = tp->snd_nxt;
-       u32 received_upto = tcp_highest_sack_seq(tp);
-
-       if (!tcp_is_fack(tp) || !tp->retrans_out ||
-           !after(received_upto, tp->lost_retrans_low) ||
-           icsk->icsk_ca_state != TCP_CA_Recovery)
-               return;
-
-       tcp_for_write_queue(skb, sk) {
-               u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
-
-               if (skb == tcp_send_head(sk))
-                       break;
-               if (cnt == tp->retrans_out)
-                       break;
-               if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-                       continue;
-
-               if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
-                       continue;
-
-               /* TODO: We would like to get rid of tcp_is_fack(tp) only
-                * constraint here (see above) but figuring out that at
-                * least tp->reordering SACK blocks reside between ack_seq
-                * and received_upto is not easy task to do cheaply with
-                * the available datastructures.
-                *
-                * Whether FACK should check here for tp->reordering segs
-                * in-between one could argue for either way (it would be
-                * rather simple to implement as we could count fack_count
-                * during the walk and do tp->fackets_out - fack_count).
-                */
-               if (after(received_upto, ack_seq)) {
-                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-                       tp->retrans_out -= tcp_skb_pcount(skb);
-
-                       tcp_skb_mark_lost_uncond_verify(tp, skb);
-                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
-               } else {
-                       if (before(ack_seq, new_low_seq))
-                               new_low_seq = ack_seq;
-                       cnt += tcp_skb_pcount(skb);
-               }
-       }
-
-       if (tp->retrans_out)
-               tp->lost_retrans_low = new_low_seq;
-}
-
  static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
                             struct tcp_sack_block_wire *sp, int num_sacks,
                             u32 prior_snd_una)
@@ -1818,7 +1774,6 @@ advance_sp:
             ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
                 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
  
-       tcp_mark_lost_retrans(sk);
         tcp_verify_left_out(tp);
  out:
  
@@ -2295,14 +2250,19 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
         tp->snd_cwnd_stamp = tcp_time_stamp;
  }
  
+static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
+{
+       return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+              before(tp->rx_opt.rcv_tsecr, when);
+}
+
  /* Nothing was retransmitted or returned timestamp is less
   * than timestamp of the first retransmission.
   */
  static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
  {
         return !tp->retrans_stamp ||
-               (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-                before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
+              tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
  }
  
  /* Undo procedures. */
@@ -2474,15 +2434,14 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
         return false;
  }
  
-/* The cwnd reduction in CWR and Recovery use the PRR algorithm
- * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
+/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
   * It computes the number of packets to send (sndcnt) based on packets newly
   * delivered:
   *   1) If the packets in flight is larger than ssthresh, PRR spreads the
   *     cwnd reductions across a full RTT.
- *   2) If packets in flight is lower than ssthresh (such as due to excess
- *     losses and/or application stalls), do not perform any further cwnd
- *     reductions, but instead slow start up to ssthresh.
+ *   2) Otherwise PRR uses packet conservation to send as much as delivered.
+ *      But when the retransmits are acked without further losses, PRR
+ *      slow starts cwnd up to ssthresh to speed up the recovery.
   */
  static void tcp_init_cwnd_reduction(struct sock *sk)
  {
@@ -2499,7 +2458,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
  }
  
  static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
-                              int fast_rexmit)
+                              int fast_rexmit, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         int sndcnt = 0;
@@ -2508,16 +2467,18 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
                                  (tp->packets_out - tp->sacked_out);
  
         tp->prr_delivered += newly_acked_sacked;
-       if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+       if (delta < 0) {
                 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
                                tp->prior_cwnd - 1;
                 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
-       } else {
+       } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
+                  !(flag & FLAG_LOST_RETRANS)) {
                 sndcnt = min_t(int, delta,
                                max_t(int, tp->prr_delivered - tp->prr_out,
                                      newly_acked_sacked) + 1);
+       } else {
+               sndcnt = min(delta, newly_acked_sacked);
         }
-
         sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
         tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
  }
@@ -2578,7 +2539,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
         if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                 tcp_try_keep_open(sk);
         } else {
-               tcp_cwnd_reduction(sk, prior_unsacked, 0);
+               tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
         }
  }
  
@@ -2588,6 +2549,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
  
         icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
         icsk->icsk_mtup.probe_size = 0;
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
  }
  
  static void tcp_mtup_probe_success(struct sock *sk)
@@ -2607,6 +2569,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
         icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
         icsk->icsk_mtup.probe_size = 0;
         tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
  }
  
  /* Do a simple retransmit without using the backoff mechanisms in
@@ -2675,7 +2638,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
         tp->prior_ssthresh = 0;
         tcp_init_undo(tp);
  
-       if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+       if (!tcp_in_cwnd_reduction(sk)) {
                 if (!ece_ack)
                         tp->prior_ssthresh = tcp_current_ssthresh(sk);
                 tcp_init_cwnd_reduction(sk);
@@ -2735,7 +2698,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
  
  /* Undo during fast recovery after partial ACK. */
  static bool tcp_try_undo_partial(struct sock *sk, const int acked,
-                                const int prior_unsacked)
+                                const int prior_unsacked, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -2751,7 +2714,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
                  * mark more packets lost or retransmit more.
                  */
                 if (tp->retrans_out) {
-                       tcp_cwnd_reduction(sk, prior_unsacked, 0);
+                       tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
                         return true;
                 }
  
@@ -2838,7 +2801,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                         if (tcp_is_reno(tp) && is_dupack)
                                 tcp_add_reno_sack(sk);
                 } else {
-                       if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+                       if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
                                 return;
                         /* Partial ACK arrived. Force fast retransmit. */
                         do_lost = tcp_is_reno(tp) ||
@@ -2851,9 +2814,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                 break;
         case TCP_CA_Loss:
                 tcp_process_loss(sk, flag, is_dupack);
-               if (icsk->icsk_ca_state != TCP_CA_Open)
+               if (icsk->icsk_ca_state != TCP_CA_Open &&
+                   !(flag & FLAG_LOST_RETRANS))
                         return;
-               /* Fall through to processing in Open state. */
+               /* Change state if cwnd is undone or retransmits are lost */
         default:
                 if (tcp_is_reno(tp)) {
                         if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2888,12 +2852,73 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
  
         if (do_lost)
                 tcp_update_scoreboard(sk, fast_rexmit);
-       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
+       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
         tcp_xmit_retransmit_queue(sk);
  }
  
+/* Kathleen Nichols' algorithm for tracking the minimum value of
+ * a data stream over some fixed time interval. (E.g., the minimum
+ * RTT over the past five minutes.) It uses constant space and constant
+ * time per update yet almost always delivers the same minimum as an
+ * implementation that has to keep all the data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of the
+ * n'th best >= n-1'th best. It also makes sure that the three values
+ * are widely separated in the time window since that bounds the worse
+ * case error when that data is monotonically increasing over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because it
+ * has no value - the new min is <= everything else in the window by
+ * definition and it's the most recent. So we restart fresh on every new min
+ * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
+ * best.
+ */
+static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
+{
+       const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
+       struct rtt_meas *m = tcp_sk(sk)->rtt_min;
+       struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+       u32 elapsed;
+
+       /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
+       if (unlikely(rttm.rtt <= m[0].rtt))
+               m[0] = m[1] = m[2] = rttm;
+       else if (rttm.rtt <= m[1].rtt)
+               m[1] = m[2] = rttm;
+       else if (rttm.rtt <= m[2].rtt)
+               m[2] = rttm;
+
+       elapsed = now - m[0].ts;
+       if (unlikely(elapsed > wlen)) {
+               /* Passed entire window without a new min so make 2nd choice
+                * the new min & 3rd choice the new 2nd. So forth and so on.
+                */
+               m[0] = m[1];
+               m[1] = m[2];
+               m[2] = rttm;
+               if (now - m[0].ts > wlen) {
+                       m[0] = m[1];
+                       m[1] = rttm;
+                       if (now - m[0].ts > wlen)
+                               m[0] = rttm;
+               }
+       } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
+               /* Passed a quarter of the window without a new min so
+                * take 2nd choice from the 2nd quarter of the window.
+                */
+               m[2] = m[1] = rttm;
+       } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
+               /* Passed half the window without a new min so take the 3rd
+                * choice from the last half of the window.
+                */
+               m[2] = rttm;
+       }
+}
+
  static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
-                                     long seq_rtt_us, long sack_rtt_us)
+                                     long seq_rtt_us, long sack_rtt_us,
+                                     long ca_rtt_us)
  {
         const struct tcp_sock *tp = tcp_sk(sk);
  
@@ -2902,9 +2927,6 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
          * Karn's algorithm forbids taking RTT if some retransmitted data
          * is acked (RFC6298).
          */
-       if (flag & FLAG_RETRANS_DATA_ACKED)
-               seq_rtt_us = -1L;
-
         if (seq_rtt_us < 0)
                 seq_rtt_us = sack_rtt_us;
  
@@ -2916,11 +2938,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
          */
         if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
             flag & FLAG_ACKED)
-               seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-
+               seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
+                                                         tp->rx_opt.rcv_tsecr);
         if (seq_rtt_us < 0)
                 return false;
  
+       /* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
+        * always taken together with ACK, SACK, or TS-opts. Any negative
+        * values will be skipped with the seq_rtt_us < 0 check above.
+        */
+       tcp_update_rtt_min(sk, ca_rtt_us);
         tcp_rtt_estimator(sk, seq_rtt_us);
         tcp_set_rto(sk);
  
@@ -2930,21 +2957,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
  }
  
  /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
+void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       long seq_rtt_us = -1L;
+       long rtt_us = -1L;
  
-       if (synack_stamp && !tp->total_retrans)
-               seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
+       if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
+               struct skb_mstamp now;
  
-       /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
-        * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
-        */
-       if (!tp->srtt_us)
-               tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
+               skb_mstamp_get(&now);
+               rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
+       }
+
+       tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
  }
  
+
  static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3146,7 +3173,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 flag |= FLAG_SACK_RENEGING;
  
         skb_mstamp_get(&now);
-       if (likely(first_ackt.v64)) {
+       if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
                 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
                 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
         }
@@ -3155,7 +3182,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
         }
  
-       rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
+       rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
+                                       ca_rtt_us);
  
         if (flag & FLAG_ACKED) {
                 tcp_rearm_rto(sk);
@@ -3325,6 +3353,9 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
                         tp->pred_flags = 0;
                         tcp_fast_path_check(sk);
  
+                       if (tcp_send_head(sk))
+                               tcp_slow_start_after_idle_check(sk);
+
                         if (nwin > tp->max_window) {
                                 tp->max_window = nwin;
                                 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
@@ -3562,10 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                                     &sack_state);
         acked -= tp->packets_out;
  
-       /* Advance cwnd if state allows */
-       if (tcp_may_raise_cwnd(sk, flag))
-               tcp_cong_avoid(sk, ack, acked);
-
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
                 tcp_fastretrans_alert(sk, acked, prior_unsacked,
@@ -3574,6 +3601,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
  
+       /* Advance cwnd if state allows */
+       if (tcp_may_raise_cwnd(sk, flag))
+               tcp_cong_avoid(sk, ack, acked);
+
         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
                 struct dst_entry *dst = __sk_dst_get(sk);
                 if (dst)
@@ -3947,7 +3978,6 @@ void tcp_reset(struct sock *sk)
  static void tcp_fin(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       const struct dst_entry *dst;
  
         inet_csk_schedule_ack(sk);
  
@@ -3959,9 +3989,7 @@ static void tcp_fin(struct sock *sk)
         case TCP_ESTABLISHED:
                 /* Move to CLOSE_WAIT */
                 tcp_set_state(sk, TCP_CLOSE_WAIT);
-               dst = __sk_dst_get(sk);
-               if (!dst || !dst_metric(dst, RTAX_QUICKACK))
-                       inet_csk(sk)->icsk_ack.pingpong = 1;
+               inet_csk(sk)->icsk_ack.pingpong = 1;
                 break;
  
         case TCP_CLOSE_WAIT:
@@ -5449,7 +5477,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
  }
  
  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-                                        const struct tcphdr *th, unsigned int len)
+                                        const struct tcphdr *th)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
@@ -5675,15 +5703,14 @@ reset_and_undo:
   *     address independent.
   */
  
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-                         const struct tcphdr *th, unsigned int len)
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
+       const struct tcphdr *th = tcp_hdr(skb);
         struct request_sock *req;
         int queued = 0;
         bool acceptable;
-       u32 synack_stamp;
  
         tp->rx_opt.saw_tstamp = 0;
  
@@ -5727,7 +5754,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                 goto discard;
  
         case TCP_SYN_SENT:
-               queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
+               queued = tcp_rcv_synsent_state_process(sk, skb, th);
                 if (queued >= 0)
                         return queued;
  
@@ -5762,15 +5789,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                 if (!acceptable)
                         return 1;
  
+               if (!tp->srtt_us)
+                       tcp_synack_rtt_meas(sk, req);
+
                 /* Once we leave TCP_SYN_RECV, we no longer need req
                  * so release it.
                  */
                 if (req) {
-                       synack_stamp = tcp_rsk(req)->snt_synack;
                         tp->total_retrans = req->num_retrans;
                         reqsk_fastopen_remove(sk, req, false);
                 } else {
-                       synack_stamp = tp->lsndtime;
                         /* Make sure socket is routed, for correct metrics. */
                         icsk->icsk_af_ops->rebuild_header(sk);
                         tcp_init_congestion_control(sk);
@@ -5793,7 +5821,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
                 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-               tcp_synack_rtt_meas(sk, synack_stamp);
  
                 if (tp->rx_opt.tstamp_ok)
                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -5980,14 +6007,17 @@ static void tcp_ecn_create_request(struct request_sock *req,
         const struct net *net = sock_net(listen_sk);
         bool th_ecn = th->ece && th->cwr;
         bool ect, ecn_ok;
+       u32 ecn_ok_dst;
  
         if (!th_ecn)
                 return;
  
         ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
-       ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
+       ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
+       ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
  
-       if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
+       if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
+           (ecn_ok_dst & DST_FEATURE_ECN_CA))
                 inet_rsk(req)->ecn_ok = 1;
  }
  
@@ -5997,11 +6027,11 @@ static void tcp_openreq_init(struct request_sock *req,
  {
         struct inet_request_sock *ireq = inet_rsk(req);
  
-       req->rcv_wnd = 0;               /* So that tcp_send_synack() knows! */
+       req->rsk_rcv_wnd = 0;           /* So that tcp_send_synack() knows! */
         req->cookie_ts = 0;
         tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
         tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-       tcp_rsk(req)->snt_synack = tcp_time_stamp;
+       skb_mstamp_get(&tcp_rsk(req)->snt_synack);
         tcp_rsk(req)->last_oow_ack_time = 0;
         req->mss = rx_opt->mss_clamp;
         req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6017,9 +6047,11 @@ static void tcp_openreq_init(struct request_sock *req,
  }
  
  struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
-                                     struct sock *sk_listener)
+                                     struct sock *sk_listener,
+                                     bool attach_listener)
  {
-       struct request_sock *req = reqsk_alloc(ops, sk_listener);
+       struct request_sock *req = reqsk_alloc(ops, sk_listener,
+                                              attach_listener);
  
         if (req) {
                 struct inet_request_sock *ireq = inet_rsk(req);
@@ -6039,13 +6071,13 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
  /*
   * Return true if a syncookie should be sent
   */
-static bool tcp_syn_flood_action(struct sock *sk,
+static bool tcp_syn_flood_action(const struct sock *sk,
                                  const struct sk_buff *skb,
                                  const char *proto)
  {
+       struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
         const char *msg = "Dropping request";
         bool want_cookie = false;
-       struct listen_sock *lopt;
  
  #ifdef CONFIG_SYN_COOKIES
         if (sysctl_tcp_syncookies) {
@@ -6056,12 +6088,12 @@ static bool tcp_syn_flood_action(struct sock *sk,
  #endif
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
  
-       lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-       if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
-               lopt->synflood_warned = 1;
+       if (!queue->synflood_warned &&
+           sysctl_tcp_syncookies != 2 &&
+           xchg(&queue->synflood_warned, 1) == 0)
                 pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
                         proto, ntohs(tcp_hdr(skb)->dest), msg);
-       }
+
         return want_cookie;
  }
  
@@ -6086,16 +6118,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                      const struct tcp_request_sock_ops *af_ops,
                      struct sock *sk, struct sk_buff *skb)
  {
+       struct tcp_fastopen_cookie foc = { .len = -1 };
+       __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
         struct tcp_options_received tmp_opt;
-       struct request_sock *req;
         struct tcp_sock *tp = tcp_sk(sk);
+       struct sock *fastopen_sk = NULL;
         struct dst_entry *dst = NULL;
-       __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
-       bool want_cookie = false, fastopen;
+       struct request_sock *req;
+       bool want_cookie = false;
         struct flowi fl;
-       struct tcp_fastopen_cookie foc = { .len = -1 };
-       int err;
-
  
         /* TW buckets are converted to open requests without
          * limitations, they conserve resources and peer is
@@ -6119,7 +6150,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                 goto drop;
         }
  
-       req = inet_reqsk_alloc(rsk_ops, sk);
+       req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
         if (!req)
                 goto drop;
  
@@ -6202,20 +6233,30 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
         }
  
         tcp_rsk(req)->snt_isn = isn;
+       tcp_rsk(req)->txhash = net_tx_rndhash();
         tcp_openreq_init_rwin(req, sk, dst);
-       fastopen = !want_cookie &&
-                  tcp_try_fastopen(sk, skb, req, &foc, dst);
-       err = af_ops->send_synack(sk, dst, &fl, req,
-                                 skb_get_queue_mapping(skb), &foc);
-       if (!fastopen) {
-               if (err || want_cookie)
-                       goto drop_and_free;
-
+       if (!want_cookie) {
+               tcp_reqsk_record_syn(sk, req, skb);
+               fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
+       }
+       if (fastopen_sk) {
+               af_ops->send_synack(fastopen_sk, dst, &fl, req,
+                                   &foc, false);
+               /* Add the child socket directly into the accept queue */
+               inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+               sk->sk_data_ready(sk);
+               bh_unlock_sock(fastopen_sk);
+               sock_put(fastopen_sk);
+       } else {
                 tcp_rsk(req)->tfo_listener = false;
-               af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+               if (!want_cookie)
+                       inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+               af_ops->send_synack(sk, dst, &fl, req,
+                                   &foc, !want_cookie);
+               if (want_cookie)
+                       goto drop_and_free;
         }
-       tcp_reqsk_record_syn(sk, req, skb);
-
+       reqsk_put(req);
         return 0;
  
  drop_and_release: