tcp: fix three tcp sysctls tuning
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp.c
index 64d0af675823c7fe18d15f53c8d48224105f7c1b..0df6fc0e8a53cae9f257f82f6b195c9e6a8dd972 100644 (file)
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk)
 
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
+/* Convert seconds to retransmits based on initial and max timeout */
+static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
+{
+       u8 res = 0;
+
+       if (seconds > 0) {
+               int period = timeout;
+
+               res = 1;
+               while (seconds > period && res < 255) {
+                       res++;
+                       timeout <<= 1;
+                       if (timeout > rto_max)
+                               timeout = rto_max;
+                       period += timeout;
+               }
+       }
+       return res;
+}
+
+/* Convert retransmits to seconds based on initial and max timeout */
+static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+{
+       int period = 0;
+
+       if (retrans > 0) {
+               period = timeout;
+               while (--retrans) {
+                       timeout <<= 1;
+                       if (timeout > rto_max)
+                               timeout = rto_max;
+                       period += timeout;
+               }
+       }
+       return period;
+}
+
 /*
  *     Wait for a TCP event.
  *
@@ -414,7 +451,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
                                        mask |= POLLOUT | POLLWRNORM;
                        }
-               }
+               } else
+                       mask |= POLLOUT | POLLWRNORM;
 
                if (tp->urg_data & TCP_URG_VALID)
                        mask |= POLLPRI;
@@ -1146,7 +1184,9 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 #if TCP_DEBUG
        struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-       WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+       WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
+            KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
+            tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
 #endif
 
        if (inet_csk_ack_scheduled(sk)) {
@@ -1393,11 +1433,13 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                        /* Now that we have two receive queues this
                         * shouldn't happen.
                         */
-                       if (before(*seq, TCP_SKB_CB(skb)->seq)) {
-                               printk(KERN_INFO "recvmsg bug: copied %X "
-                                      "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
+                       if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
+                            KERN_INFO "recvmsg bug: copied %X "
+                                      "seq %X rcvnxt %X fl %X\n", *seq,
+                                      TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+                                      flags))
                                break;
-                       }
+
                        offset = *seq - TCP_SKB_CB(skb)->seq;
                        if (tcp_hdr(skb)->syn)
                                offset--;
@@ -1405,7 +1447,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                                goto found_ok_skb;
                        if (tcp_hdr(skb)->fin)
                                goto found_fin_ok;
-                       WARN_ON(!(flags & MSG_PEEK));
+                       WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: "
+                                       "copied %X seq %X rcvnxt %X fl %X\n",
+                                       *seq, TCP_SKB_CB(skb)->seq,
+                                       tp->rcv_nxt, flags);
                }
 
                /* Well, if we have backlog, try to process it now yet. */
@@ -1931,11 +1976,8 @@ adjudge_to_death:
                }
        }
        if (sk->sk_state != TCP_CLOSE) {
-               int orphan_count = percpu_counter_read_positive(
-                                               sk->sk_prot->orphan_count);
-
                sk_mem_reclaim(sk);
-               if (tcp_too_many_orphans(sk, orphan_count)) {
+               if (tcp_too_many_orphans(sk, 0)) {
                        if (net_ratelimit())
                                printk(KERN_INFO "TCP: too many of orphaned "
                                       "sockets\n");
@@ -2163,16 +2205,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                break;
 
        case TCP_DEFER_ACCEPT:
-               icsk->icsk_accept_queue.rskq_defer_accept = 0;
-               if (val > 0) {
-                       /* Translate value in seconds to number of
-                        * retransmits */
-                       while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
-                              val > ((TCP_TIMEOUT_INIT / HZ) <<
-                                      icsk->icsk_accept_queue.rskq_defer_accept))
-                               icsk->icsk_accept_queue.rskq_defer_accept++;
-                       icsk->icsk_accept_queue.rskq_defer_accept++;
-               }
+               /* Translate value in seconds to number of retransmits */
+               icsk->icsk_accept_queue.rskq_defer_accept =
+                       secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+                                       TCP_RTO_MAX / HZ);
                break;
 
        case TCP_WINDOW_CLAMP:
@@ -2353,8 +2389,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                        val = (val ? : sysctl_tcp_fin_timeout) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
-               val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
-                       ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+               val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+                                     TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
                break;
        case TCP_WINDOW_CLAMP:
                val = tp->window_clamp;
@@ -2842,7 +2878,7 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long nr_pages, limit;
-       int order, i, max_share;
+       int i, max_share, cnt;
 
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -2891,22 +2927,12 @@ void __init tcp_init(void)
                INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
        }
 
-       /* Try to be a bit smarter and adjust defaults depending
-        * on available memory.
-        */
-       for (order = 0; ((1 << order) << PAGE_SHIFT) <
-                       (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
-                       order++)
-               ;
-       if (order >= 4) {
-               tcp_death_row.sysctl_max_tw_buckets = 180000;
-               sysctl_tcp_max_orphans = 4096 << (order - 4);
-               sysctl_max_syn_backlog = 1024;
-       } else if (order < 3) {
-               tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
-               sysctl_tcp_max_orphans >>= (3 - order);
-               sysctl_max_syn_backlog = 128;
-       }
+
+       cnt = tcp_hashinfo.ehash_size;
+
+       tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
+       sysctl_tcp_max_orphans = cnt / 2;
+       sysctl_max_syn_backlog = max(128, cnt / 256);
 
        /* Set the pressure threshold to be a fraction of global memory that
         * is up to 1/2 at 256 MB, decreasing toward zero with the amount of