include/net/tcp.h

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Definitions for the TCP module.
   7  *
   8  * Version:     @(#)tcp.h       1.0.5   05/23/93
   9  *
  10  * Authors:     Ross Biro
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *
  13  *              This program is free software; you can redistribute it and/or
  14  *              modify it under the terms of the GNU General Public License
  15  *              as published by the Free Software Foundation; either version
  16  *              2 of the License, or (at your option) any later version.
  17  */
  18 #ifndef _TCP_H
  19 #define _TCP_H
  20
  21 #define TCP_DEBUG 1
  22 #define FASTRETRANS_DEBUG 1
  23
  24 /* Cancel timers, when they are not required. */
  25 #undef TCP_CLEAR_TIMERS
  26
  27 #include <linux/config.h>
  28 #include <linux/list.h>
  29 #include <linux/tcp.h>
  30 #include <linux/slab.h>
  31 #include <linux/cache.h>
  32 #include <linux/percpu.h>
  33 #include <net/inet_hashtables.h>
  34 #include <net/checksum.h>
  35 #include <net/request_sock.h>
  36 #include <net/sock.h>
  37 #include <net/snmp.h>
  38 #include <net/ip.h>
  39 #include <net/tcp_states.h>
  40
  41 #include <linux/seq_file.h>
  42
  43 extern struct inet_hashinfo tcp_hashinfo;
  44
  45 extern atomic_t tcp_orphan_count;
  46 extern int tcp_tw_count;
  47 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
  48 extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
  49
  50 #define MAX_TCP_HEADER  (128 + MAX_HEADER)
  51
  52 /*
  53  * Never offer a window over 32767 without using window scaling. Some
  54  * poor stacks do signed 16bit maths!
  55  */
  56 #define MAX_TCP_WINDOW          32767U
  57
  58 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
  59 #define TCP_MIN_MSS             88U
  60
  61 /* Minimal RCV_MSS. */
  62 #define TCP_MIN_RCVMSS          536U
  63
  64 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
  65 #define TCP_FASTRETRANS_THRESH 3
  66
  67 /* Maximal reordering. */
  68 #define TCP_MAX_REORDERING      127
  69
  70 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
  71 #define TCP_MAX_QUICKACKS       16U
  72
  73 /* urg_data states */
  74 #define TCP_URG_VALID   0x0100
  75 #define TCP_URG_NOTYET  0x0200
  76 #define TCP_URG_READ    0x0400
  77
  78 #define TCP_RETR1       3       /*
  79                                  * This is how many retries it does before it
  80                                  * tries to figure out if the gateway is
  81                                  * down. Minimal RFC value is 3; it corresponds
  82                                  * to ~3sec-8min depending on RTO.
  83                                  */
  84
  85 #define TCP_RETR2       15      /*
  86                                  * This should take at least
  87                                  * 90 minutes to time out.
  88                                  * RFC1122 says that the limit is 100 sec.
  89                                  * 15 is ~13-30min depending on RTO.
  90                                  */
  91
  92 #define TCP_SYN_RETRIES  5      /* number of times to retry active opening a
  93                                  * connection: ~180sec is RFC minumum   */
  94
  95 #define TCP_SYNACK_RETRIES 5    /* number of times to retry passive opening a
  96                                  * connection: ~180sec is RFC minumum   */
  97
  98
  99 #define TCP_ORPHAN_RETRIES 7    /* number of times to retry on an orphaned
 100                                  * socket. 7 is ~50sec-16min.
 101                                  */
 102
 103
 104 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
 105                                   * state, about 60 seconds     */
 106 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
 107                                  /* BSD style FIN_WAIT2 deadlock breaker.
 108                                   * It used to be 3min, new value is 60sec,
 109                                   * to combine FIN-WAIT-2 timeout with
 110                                   * TIME-WAIT timer.
 111                                   */
 112
 113 #define TCP_DELACK_MAX  ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
 114 #if HZ >= 100
 115 #define TCP_DELACK_MIN  ((unsigned)(HZ/25))     /* minimal time to delay before sending an ACK */
 116 #define TCP_ATO_MIN     ((unsigned)(HZ/25))
 117 #else
 118 #define TCP_DELACK_MIN  4U
 119 #define TCP_ATO_MIN     4U
 120 #endif
 121 #define TCP_RTO_MAX     ((unsigned)(120*HZ))
 122 #define TCP_RTO_MIN     ((unsigned)(HZ/5))
 123 #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ))     /* RFC 1122 initial RTO value   */
 124
 125 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 126                                                          * for local resources.
 127                                                          */
 128
 129 #define TCP_KEEPALIVE_TIME      (120*60*HZ)     /* two hours */
 130 #define TCP_KEEPALIVE_PROBES    9               /* Max of 9 keepalive probes    */
 131 #define TCP_KEEPALIVE_INTVL     (75*HZ)
 132
 133 #define MAX_TCP_KEEPIDLE        32767
 134 #define MAX_TCP_KEEPINTVL       32767
 135 #define MAX_TCP_KEEPCNT         127
 136 #define MAX_TCP_SYNCNT          127
 137
 138 #define TCP_SYNQ_INTERVAL       (HZ/5)  /* Period of SYNACK timer */
 139 #define TCP_SYNQ_HSIZE          512     /* Size of SYNACK hash table */
 140
 141 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
 142 #define TCP_PAWS_MSL    60              /* Per-host timestamps are invalidated
 143                                          * after this time. It should be equal
 144                                          * (or greater than) TCP_TIMEWAIT_LEN
 145                                          * to provide reliability equal to one
 146                                          * provided by timewait state.
 147                                          */
 148 #define TCP_PAWS_WINDOW 1               /* Replay window for per-host
 149                                          * timestamps. It must be less than
 150                                          * minimal timewait lifetime.
 151                                          */
 152
 153 #define TCP_TW_RECYCLE_SLOTS_LOG        5
 154 #define TCP_TW_RECYCLE_SLOTS            (1<<TCP_TW_RECYCLE_SLOTS_LOG)
 155
 156 /* If time > 4sec, it is "slow" path, no recycling is required,
 157    so that we select tick to get range about 4 seconds.
 158  */
 159
 160 #if HZ <= 16 || HZ > 4096
 161 # error Unsupported: HZ <= 16 or HZ > 4096
 162 #elif HZ <= 32
 163 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
 164 #elif HZ <= 64
 165 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
 166 #elif HZ <= 128
 167 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
 168 #elif HZ <= 256
 169 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
 170 #elif HZ <= 512
 171 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
 172 #elif HZ <= 1024
 173 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
 174 #elif HZ <= 2048
 175 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
 176 #else
 177 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
 178 #endif
 179 /*
 180  *      TCP option
 181  */
 182
 183 #define TCPOPT_NOP              1       /* Padding */
 184 #define TCPOPT_EOL              0       /* End of options */
 185 #define TCPOPT_MSS              2       /* Segment size negotiating */
 186 #define TCPOPT_WINDOW           3       /* Window scaling */
 187 #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 188 #define TCPOPT_SACK             5       /* SACK Block */
 189 #define TCPOPT_TIMESTAMP        8       /* Better RTT estimations/PAWS */
 190
 191 /*
 192  *     TCP option lengths
 193  */
 194
 195 #define TCPOLEN_MSS            4
 196 #define TCPOLEN_WINDOW         3
 197 #define TCPOLEN_SACK_PERM      2
 198 #define TCPOLEN_TIMESTAMP      10
 199
 200 /* But this is what stacks really send out. */
 201 #define TCPOLEN_TSTAMP_ALIGNED          12
 202 #define TCPOLEN_WSCALE_ALIGNED          4
 203 #define TCPOLEN_SACKPERM_ALIGNED        4
 204 #define TCPOLEN_SACK_BASE               2
 205 #define TCPOLEN_SACK_BASE_ALIGNED       4
 206 #define TCPOLEN_SACK_PERBLOCK           8
 207
 208 #define TCP_TIME_RETRANS        1       /* Retransmit timer */
 209 #define TCP_TIME_DACK           2       /* Delayed ack timer */
 210 #define TCP_TIME_PROBE0         3       /* Zero window probe timer */
 211 #define TCP_TIME_KEEPOPEN       4       /* Keepalive timer */
 212
 213 /* Flags in tp->nonagle */
 214 #define TCP_NAGLE_OFF           1       /* Nagle's algo is disabled */
 215 #define TCP_NAGLE_CORK          2       /* Socket is corked         */
 216 #define TCP_NAGLE_PUSH          4       /* Cork is overriden for already queued data */
 217
 218 /* sysctl variables for tcp */
 219 extern int sysctl_tcp_timestamps;
 220 extern int sysctl_tcp_window_scaling;
 221 extern int sysctl_tcp_sack;
 222 extern int sysctl_tcp_fin_timeout;
 223 extern int sysctl_tcp_tw_recycle;
 224 extern int sysctl_tcp_keepalive_time;
 225 extern int sysctl_tcp_keepalive_probes;
 226 extern int sysctl_tcp_keepalive_intvl;
 227 extern int sysctl_tcp_syn_retries;
 228 extern int sysctl_tcp_synack_retries;
 229 extern int sysctl_tcp_retries1;
 230 extern int sysctl_tcp_retries2;
 231 extern int sysctl_tcp_orphan_retries;
 232 extern int sysctl_tcp_syncookies;
 233 extern int sysctl_tcp_retrans_collapse;
 234 extern int sysctl_tcp_stdurg;
 235 extern int sysctl_tcp_rfc1337;
 236 extern int sysctl_tcp_abort_on_overflow;
 237 extern int sysctl_tcp_max_orphans;
 238 extern int sysctl_tcp_max_tw_buckets;
 239 extern int sysctl_tcp_fack;
 240 extern int sysctl_tcp_reordering;
 241 extern int sysctl_tcp_ecn;
 242 extern int sysctl_tcp_dsack;
 243 extern int sysctl_tcp_mem[3];
 244 extern int sysctl_tcp_wmem[3];
 245 extern int sysctl_tcp_rmem[3];
 246 extern int sysctl_tcp_app_win;
 247 extern int sysctl_tcp_adv_win_scale;
 248 extern int sysctl_tcp_tw_reuse;
 249 extern int sysctl_tcp_frto;
 250 extern int sysctl_tcp_low_latency;
 251 extern int sysctl_tcp_nometrics_save;
 252 extern int sysctl_tcp_moderate_rcvbuf;
 253 extern int sysctl_tcp_tso_win_divisor;
 254
 255 extern atomic_t tcp_memory_allocated;
 256 extern atomic_t tcp_sockets_allocated;
 257 extern int tcp_memory_pressure;
 258
 259 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 260 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
 261 #else
 262 #define TCP_INET_FAMILY(fam) 1
 263 #endif
 264
 265 /*
 266  *      Pointers to address related TCP functions
 267  *      (i.e. things that depend on the address family)
 268  */
 269
 270 struct tcp_func {
 271         int                     (*queue_xmit)           (struct sk_buff *skb,
 272                                                          int ipfragok);
 273
 274         void                    (*send_check)           (struct sock *sk,
 275                                                          struct tcphdr *th,
 276                                                          int len,
 277                                                          struct sk_buff *skb);
 278
 279         int                     (*rebuild_header)       (struct sock *sk);
 280
 281         int                     (*conn_request)         (struct sock *sk,
 282                                                          struct sk_buff *skb);
 283
 284         struct sock *           (*syn_recv_sock)        (struct sock *sk,
 285                                                          struct sk_buff *skb,
 286                                                          struct request_sock *req,
 287                                                          struct dst_entry *dst);
 288
 289         int                     (*remember_stamp)       (struct sock *sk);
 290
 291         __u16                   net_header_len;
 292
 293         int                     (*setsockopt)           (struct sock *sk,
 294                                                          int level,
 295                                                          int optname,
 296                                                          char __user *optval,
 297                                                          int optlen);
 298
 299         int                     (*getsockopt)           (struct sock *sk,
 300                                                          int level,
 301                                                          int optname,
 302                                                          char __user *optval,
 303                                                          int __user *optlen);
 304
 305
 306         void                    (*addr2sockaddr)        (struct sock *sk,
 307                                                          struct sockaddr *);
 308
 309         int sockaddr_len;
 310 };
 311
 312 /*
 313  * The next routines deal with comparing 32 bit unsigned ints
 314  * and worry about wraparound (automatic with unsigned arithmetic).
 315  */
 316
 317 static inline int before(__u32 seq1, __u32 seq2)
 318 {
 319         return (__s32)(seq1-seq2) < 0;
 320 }
 321
 322 static inline int after(__u32 seq1, __u32 seq2)
 323 {
 324         return (__s32)(seq2-seq1) < 0;
 325 }
 326
 327
 328 /* is s2<=s1<=s3 ? */
 329 static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 330 {
 331         return seq3 - seq2 >= seq1 - seq2;
 332 }
 333
 334
 335 extern struct proto tcp_prot;
 336
 337 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 338 #define TCP_INC_STATS(field)            SNMP_INC_STATS(tcp_statistics, field)
 339 #define TCP_INC_STATS_BH(field)         SNMP_INC_STATS_BH(tcp_statistics, field)
 340 #define TCP_INC_STATS_USER(field)       SNMP_INC_STATS_USER(tcp_statistics, field)
 341 #define TCP_DEC_STATS(field)            SNMP_DEC_STATS(tcp_statistics, field)
 342 #define TCP_ADD_STATS_BH(field, val)    SNMP_ADD_STATS_BH(tcp_statistics, field, val)
 343 #define TCP_ADD_STATS_USER(field, val)  SNMP_ADD_STATS_USER(tcp_statistics, field, val)
 344
 345 extern void                     tcp_v4_err(struct sk_buff *skb, u32);
 346
 347 extern void                     tcp_shutdown (struct sock *sk, int how);
 348
 349 extern int                      tcp_v4_rcv(struct sk_buff *skb);
 350
 351 extern int                      tcp_v4_remember_stamp(struct sock *sk);
 352
 353 extern int                      tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 354
 355 extern int                      tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
 356                                             struct msghdr *msg, size_t size);
 357 extern ssize_t                  tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
 358
 359 extern int                      tcp_ioctl(struct sock *sk,
 360                                           int cmd,
 361                                           unsigned long arg);
 362
 363 extern int                      tcp_rcv_state_process(struct sock *sk,
 364                                                       struct sk_buff *skb,
 365                                                       struct tcphdr *th,
 366                                                       unsigned len);
 367
 368 extern int                      tcp_rcv_established(struct sock *sk,
 369                                                     struct sk_buff *skb,
 370                                                     struct tcphdr *th,
 371                                                     unsigned len);
 372
 373 extern void                     tcp_rcv_space_adjust(struct sock *sk);
 374
 375 enum tcp_ack_state_t
 376 {
 377         TCP_ACK_SCHED = 1,
 378         TCP_ACK_TIMER = 2,
 379         TCP_ACK_PUSHED= 4
 380 };
 381
 382 static inline void tcp_schedule_ack(struct tcp_sock *tp)
 383 {
 384         tp->ack.pending |= TCP_ACK_SCHED;
 385 }
 386
 387 static inline int tcp_ack_scheduled(struct tcp_sock *tp)
 388 {
 389         return tp->ack.pending&TCP_ACK_SCHED;
 390 }
 391
 392 static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
 393 {
 394         if (tp->ack.quick) {
 395                 if (pkts >= tp->ack.quick) {
 396                         tp->ack.quick = 0;
 397
 398                         /* Leaving quickack mode we deflate ATO. */
 399                         tp->ack.ato = TCP_ATO_MIN;
 400                 } else
 401                         tp->ack.quick -= pkts;
 402         }
 403 }
 404
 405 extern void tcp_enter_quickack_mode(struct tcp_sock *tp);
 406
 407 static __inline__ void tcp_delack_init(struct tcp_sock *tp)
 408 {
 409         memset(&tp->ack, 0, sizeof(tp->ack));
 410 }
 411
 412 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 413 {
 414         rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
 415 }
 416
 417 enum tcp_tw_status
 418 {
 419         TCP_TW_SUCCESS = 0,
 420         TCP_TW_RST = 1,
 421         TCP_TW_ACK = 2,
 422         TCP_TW_SYN = 3
 423 };
 424
 425
 426 extern enum tcp_tw_status       tcp_timewait_state_process(struct inet_timewait_sock *tw,
 427                                                            struct sk_buff *skb,
 428                                                            const struct tcphdr *th);
 429
 430 extern struct sock *            tcp_check_req(struct sock *sk,struct sk_buff *skb,
 431                                               struct request_sock *req,
 432                                               struct request_sock **prev);
 433 extern int                      tcp_child_process(struct sock *parent,
 434                                                   struct sock *child,
 435                                                   struct sk_buff *skb);
 436 extern void                     tcp_enter_frto(struct sock *sk);
 437 extern void                     tcp_enter_loss(struct sock *sk, int how);
 438 extern void                     tcp_clear_retrans(struct tcp_sock *tp);
 439 extern void                     tcp_update_metrics(struct sock *sk);
 440
 441 extern void                     tcp_close(struct sock *sk,
 442                                           long timeout);
 443 extern struct sock *            tcp_accept(struct sock *sk, int flags, int *err);
 444 extern unsigned int             tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
 445
 446 extern int                      tcp_getsockopt(struct sock *sk, int level,
 447                                                int optname,
 448                                                char __user *optval,
 449                                                int __user *optlen);
 450 extern int                      tcp_setsockopt(struct sock *sk, int level,
 451                                                int optname, char __user *optval,
 452                                                int optlen);
 453 extern void                     tcp_set_keepalive(struct sock *sk, int val);
 454 extern int                      tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 455                                             struct msghdr *msg,
 456                                             size_t len, int nonblock,
 457                                             int flags, int *addr_len);
 458
 459 extern int                      tcp_listen_start(struct sock *sk);
 460
 461 extern void                     tcp_parse_options(struct sk_buff *skb,
 462                                                   struct tcp_options_received *opt_rx,
 463                                                   int estab);
 464
 465 /*
 466  *      TCP v4 functions exported for the inet6 API
 467  */
 468
 469 extern void                     tcp_v4_send_check(struct sock *sk,
 470                                                   struct tcphdr *th, int len,
 471                                                   struct sk_buff *skb);
 472
 473 extern int                      tcp_v4_conn_request(struct sock *sk,
 474                                                     struct sk_buff *skb);
 475
 476 extern struct sock *            tcp_create_openreq_child(struct sock *sk,
 477                                                          struct request_sock *req,
 478                                                          struct sk_buff *skb);
 479
 480 extern struct sock *            tcp_v4_syn_recv_sock(struct sock *sk,
 481                                                      struct sk_buff *skb,
 482                                                      struct request_sock *req,
 483                                                         struct dst_entry *dst);
 484
 485 extern int                      tcp_v4_do_rcv(struct sock *sk,
 486                                               struct sk_buff *skb);
 487
 488 extern int                      tcp_v4_connect(struct sock *sk,
 489                                                struct sockaddr *uaddr,
 490                                                int addr_len);
 491
 492 extern int                      tcp_connect(struct sock *sk);
 493
 494 extern struct sk_buff *         tcp_make_synack(struct sock *sk,
 495                                                 struct dst_entry *dst,
 496                                                 struct request_sock *req);
 497
 498 extern int                      tcp_disconnect(struct sock *sk, int flags);
 499
 500 extern void                     tcp_unhash(struct sock *sk);
 501
 502 extern int                      tcp_v4_hash_connecting(struct sock *sk);
 503
 504
 505 /* From syncookies.c */
 506 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 507                                     struct ip_options *opt);
 508 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 509                                      __u16 *mss);
 510
 511 /* tcp_output.c */
 512
 513 extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 514                                       unsigned int cur_mss, int nonagle);
 515 extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp);
 516 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 517 extern void tcp_xmit_retransmit_queue(struct sock *);
 518 extern void tcp_simple_retransmit(struct sock *);
 519 extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
 520
 521 extern void tcp_send_probe0(struct sock *);
 522 extern void tcp_send_partial(struct sock *);
 523 extern int  tcp_write_wakeup(struct sock *);
 524 extern void tcp_send_fin(struct sock *sk);
 525 extern void tcp_send_active_reset(struct sock *sk,
 526                                   unsigned int __nocast priority);
 527 extern int  tcp_send_synack(struct sock *);
 528 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 529 extern void tcp_send_ack(struct sock *sk);
 530 extern void tcp_send_delayed_ack(struct sock *sk);
 531
 532 /* tcp_input.c */
 533 extern void tcp_cwnd_application_limited(struct sock *sk);
 534
 535 /* tcp_timer.c */
 536 extern void tcp_init_xmit_timers(struct sock *);
 537 extern void tcp_clear_xmit_timers(struct sock *);
 538
 539 extern void tcp_delete_keepalive_timer(struct sock *);
 540 extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
 541 extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
 542 extern unsigned int tcp_current_mss(struct sock *sk, int large);
 543
 544 #ifdef TCP_DEBUG
 545 extern const char tcp_timer_bug_msg[];
 546 #endif
 547
 548 /* tcp_diag.c */
 549 extern void tcp_get_info(struct sock *, struct tcp_info *);
 550
 551 /* Read 'sendfile()'-style from a TCP socket */
 552 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 553                                 unsigned int, size_t);
 554 extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 555                          sk_read_actor_t recv_actor);
 556
 557 static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
 558 {
 559         struct tcp_sock *tp = tcp_sk(sk);
 560
 561         switch (what) {
 562         case TCP_TIME_RETRANS:
 563         case TCP_TIME_PROBE0:
 564                 tp->pending = 0;
 565
 566 #ifdef TCP_CLEAR_TIMERS
 567                 sk_stop_timer(sk, &tp->retransmit_timer);
 568 #endif
 569                 break;
 570         case TCP_TIME_DACK:
 571                 tp->ack.blocked = 0;
 572                 tp->ack.pending = 0;
 573
 574 #ifdef TCP_CLEAR_TIMERS
 575                 sk_stop_timer(sk, &tp->delack_timer);
 576 #endif
 577                 break;
 578         default:
 579 #ifdef TCP_DEBUG
 580                 printk(tcp_timer_bug_msg);
 581 #endif
 582                 return;
 583         };
 584
 585 }
 586
 587 /*
 588  *      Reset the retransmission timer
 589  */
 590 static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
 591 {
 592         struct tcp_sock *tp = tcp_sk(sk);
 593
 594         if (when > TCP_RTO_MAX) {
 595 #ifdef TCP_DEBUG
 596                 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
 597 #endif
 598                 when = TCP_RTO_MAX;
 599         }
 600
 601         switch (what) {
 602         case TCP_TIME_RETRANS:
 603         case TCP_TIME_PROBE0:
 604                 tp->pending = what;
 605                 tp->timeout = jiffies+when;
 606                 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
 607                 break;
 608
 609         case TCP_TIME_DACK:
 610                 tp->ack.pending |= TCP_ACK_TIMER;
 611                 tp->ack.timeout = jiffies+when;
 612                 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
 613                 break;
 614
 615         default:
 616 #ifdef TCP_DEBUG
 617                 printk(tcp_timer_bug_msg);
 618 #endif
 619                 return;
 620         };
 621 }
 622
 623 /* Initialize RCV_MSS value.
 624  * RCV_MSS is an our guess about MSS used by the peer.
 625  * We haven't any direct information about the MSS.
 626  * It's better to underestimate the RCV_MSS rather than overestimate.
 627  * Overestimations make us ACKing less frequently than needed.
 628  * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
 629  */
 630
 631 static inline void tcp_initialize_rcv_mss(struct sock *sk)
 632 {
 633         struct tcp_sock *tp = tcp_sk(sk);
 634         unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 635
 636         hint = min(hint, tp->rcv_wnd/2);
 637         hint = min(hint, TCP_MIN_RCVMSS);
 638         hint = max(hint, TCP_MIN_MSS);
 639
 640         tp->ack.rcv_mss = hint;
 641 }
 642
 643 static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
 644 {
 645         tp->pred_flags = htonl((tp->tcp_header_len << 26) |
 646                                ntohl(TCP_FLAG_ACK) |
 647                                snd_wnd);
 648 }
 649
 650 static __inline__ void tcp_fast_path_on(struct tcp_sock *tp)
 651 {
 652         __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 653 }
 654
 655 static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
 656 {
 657         if (skb_queue_empty(&tp->out_of_order_queue) &&
 658             tp->rcv_wnd &&
 659             atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
 660             !tp->urg_data)
 661                 tcp_fast_path_on(tp);
 662 }
 663
 664 /* Compute the actual receive window we are currently advertising.
 665  * Rcv_nxt can be after the window if our peer push more data
 666  * than the offered window.
 667  */
 668 static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp)
 669 {
 670         s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
 671
 672         if (win < 0)
 673                 win = 0;
 674         return (u32) win;
 675 }
 676
 677 /* Choose a new window, without checks for shrinking, and without
 678  * scaling applied to the result.  The caller does these things
 679  * if necessary.  This is a "raw" window selection.
 680  */
 681 extern u32      __tcp_select_window(struct sock *sk);
 682
 683 /* TCP timestamps are only 32-bits, this causes a slight
 684  * complication on 64-bit systems since we store a snapshot
 685  * of jiffies in the buffer control blocks below.  We decidely
 686  * only use of the low 32-bits of jiffies and hide the ugly
 687  * casts with the following macro.
 688  */
 689 #define tcp_time_stamp          ((__u32)(jiffies))
 690
 691 /* This is what the send packet queueing engine uses to pass
 692  * TCP per-packet control information to the transmission
 693  * code.  We also store the host-order sequence numbers in
 694  * here too.  This is 36 bytes on 32-bit architectures,
 695  * 40 bytes on 64-bit machines, if this grows please adjust
 696  * skbuff.h:skbuff->cb[xxx] size appropriately.
 697  */
 698 struct tcp_skb_cb {
 699         union {
 700                 struct inet_skb_parm    h4;
 701 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 702                 struct inet6_skb_parm   h6;
 703 #endif
 704         } header;       /* For incoming frames          */
 705         __u32           seq;            /* Starting sequence number     */
 706         __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
 707         __u32           when;           /* used to compute rtt's        */
 708         __u8            flags;          /* TCP header flags.            */
 709
 710         /* NOTE: These must match up to the flags byte in a
 711          *       real TCP header.
 712          */
 713 #define TCPCB_FLAG_FIN          0x01
 714 #define TCPCB_FLAG_SYN          0x02
 715 #define TCPCB_FLAG_RST          0x04
 716 #define TCPCB_FLAG_PSH          0x08
 717 #define TCPCB_FLAG_ACK          0x10
 718 #define TCPCB_FLAG_URG          0x20
 719 #define TCPCB_FLAG_ECE          0x40
 720 #define TCPCB_FLAG_CWR          0x80
 721
 722         __u8            sacked;         /* State flags for SACK/FACK.   */
 723 #define TCPCB_SACKED_ACKED      0x01    /* SKB ACK'd by a SACK block    */
 724 #define TCPCB_SACKED_RETRANS    0x02    /* SKB retransmitted            */
 725 #define TCPCB_LOST              0x04    /* SKB is lost                  */
 726 #define TCPCB_TAGBITS           0x07    /* All tag bits                 */
 727
 728 #define TCPCB_EVER_RETRANS      0x80    /* Ever retransmitted frame     */
 729 #define TCPCB_RETRANS           (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
 730
 731 #define TCPCB_URG               0x20    /* Urgent pointer advenced here */
 732
 733 #define TCPCB_AT_TAIL           (TCPCB_URG)
 734
 735         __u16           urg_ptr;        /* Valid w/URG flags is set.    */
 736         __u32           ack_seq;        /* Sequence number ACK'd        */
 737 };
 738
 739 #define TCP_SKB_CB(__skb)       ((struct tcp_skb_cb *)&((__skb)->cb[0]))
 740
 741 #include <net/tcp_ecn.h>
 742
 743 /* Due to TSO, an SKB can be composed of multiple actual
 744  * packets.  To keep these tracked properly, we use this.
 745  */
 746 static inline int tcp_skb_pcount(const struct sk_buff *skb)
 747 {
 748         return skb_shinfo(skb)->tso_segs;
 749 }
 750
 751 /* This is valid iff tcp_skb_pcount() > 1. */
 752 static inline int tcp_skb_mss(const struct sk_buff *skb)
 753 {
 754         return skb_shinfo(skb)->tso_size;
 755 }
 756
 757 static inline void tcp_dec_pcount_approx(__u32 *count,
 758                                          const struct sk_buff *skb)
 759 {
 760         if (*count) {
 761                 *count -= tcp_skb_pcount(skb);
 762                 if ((int)*count < 0)
 763                         *count = 0;
 764         }
 765 }
 766
 767 static inline void tcp_packets_out_inc(struct sock *sk,
 768                                        struct tcp_sock *tp,
 769                                        const struct sk_buff *skb)
 770 {
 771         int orig = tp->packets_out;
 772
 773         tp->packets_out += tcp_skb_pcount(skb);
 774         if (!orig)
 775                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 776 }
 777
 778 static inline void tcp_packets_out_dec(struct tcp_sock *tp,
 779                                        const struct sk_buff *skb)
 780 {
 781         tp->packets_out -= tcp_skb_pcount(skb);
 782 }
 783
 784 /* Events passed to congestion control interface */
 785 enum tcp_ca_event {
 786         CA_EVENT_TX_START,      /* first transmit when no packets in flight */
 787         CA_EVENT_CWND_RESTART,  /* congestion window restart */
 788         CA_EVENT_COMPLETE_CWR,  /* end of congestion recovery */
 789         CA_EVENT_FRTO,          /* fast recovery timeout */
 790         CA_EVENT_LOSS,          /* loss timeout */
 791         CA_EVENT_FAST_ACK,      /* in sequence ack */
 792         CA_EVENT_SLOW_ACK,      /* other ack */
 793 };
 794
 795 /*
 796  * Interface for adding new TCP congestion control handlers
 797  */
 798 #define TCP_CA_NAME_MAX 16
 799 struct tcp_congestion_ops {
 800         struct list_head        list;
 801
 802         /* initialize private data (optional) */
 803         void (*init)(struct tcp_sock *tp);
 804         /* cleanup private data  (optional) */
 805         void (*release)(struct tcp_sock *tp);
 806
 807         /* return slow start threshold (required) */
 808         u32 (*ssthresh)(struct tcp_sock *tp);
 809         /* lower bound for congestion window (optional) */
 810         u32 (*min_cwnd)(struct tcp_sock *tp);
 811         /* do new cwnd calculation (required) */
 812         void (*cong_avoid)(struct tcp_sock *tp, u32 ack,
 813                            u32 rtt, u32 in_flight, int good_ack);
 814         /* round trip time sample per acked packet (optional) */
 815         void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt);
 816         /* call before changing ca_state (optional) */
 817         void (*set_state)(struct tcp_sock *tp, u8 new_state);
 818         /* call when cwnd event occurs (optional) */
 819         void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev);
 820         /* new value of cwnd after loss (optional) */
 821         u32  (*undo_cwnd)(struct tcp_sock *tp);
 822         /* hook for packet ack accounting (optional) */
 823         void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked);
 824         /* get info for tcp_diag (optional) */
 825         void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb);
 826
 827         char            name[TCP_CA_NAME_MAX];
 828         struct module   *owner;
 829 };
 830
 831 extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 832 extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
 833
 834 extern void tcp_init_congestion_control(struct tcp_sock *tp);
 835 extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
 836 extern int tcp_set_default_congestion_control(const char *name);
 837 extern void tcp_get_default_congestion_control(char *name);
 838 extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
 839
 840 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 841 extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
 842 extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
 843                                 u32 rtt, u32 in_flight, int flag);
 844 extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp);
 845 extern struct tcp_congestion_ops tcp_reno;
 846
 847 static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
 848 {
 849         if (tp->ca_ops->set_state)
 850                 tp->ca_ops->set_state(tp, ca_state);
 851         tp->ca_state = ca_state;
 852 }
 853
 854 static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event)
 855 {
 856         if (tp->ca_ops->cwnd_event)
 857                 tp->ca_ops->cwnd_event(tp, event);
 858 }
 859
 860 /* This determines how many packets are "in the network" to the best
 861  * of our knowledge.  In many cases it is conservative, but where
 862  * detailed information is available from the receiver (via SACK
 863  * blocks etc.) we can make more aggressive calculations.
 864  *
 865  * Use this for decisions involving congestion control, use just
 866  * tp->packets_out to determine if the send queue is empty or not.
 867  *
 868  * Read this equation as:
 869  *
 870  *      "Packets sent once on transmission queue" MINUS
 871  *      "Packets left network, but not honestly ACKed yet" PLUS
 872  *      "Packets fast retransmitted"
 873  */
 874 static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 875 {
 876         return (tp->packets_out - tp->left_out + tp->retrans_out);
 877 }
 878
 879 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
 880  * The exception is rate halving phase, when cwnd is decreasing towards
 881  * ssthresh.
 882  */
 883 static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
 884 {
 885         if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
 886                 return tp->snd_ssthresh;
 887         else
 888                 return max(tp->snd_ssthresh,
 889                            ((tp->snd_cwnd >> 1) +
 890                             (tp->snd_cwnd >> 2)));
 891 }
 892
 893 static inline void tcp_sync_left_out(struct tcp_sock *tp)
 894 {
 895         if (tp->rx_opt.sack_ok &&
 896             (tp->sacked_out >= tp->packets_out - tp->lost_out))
 897                 tp->sacked_out = tp->packets_out - tp->lost_out;
 898         tp->left_out = tp->sacked_out + tp->lost_out;
 899 }
 900
 901 /* Set slow start threshold and cwnd not falling to slow start */
 902 static inline void __tcp_enter_cwr(struct tcp_sock *tp)
 903 {
 904         tp->undo_marker = 0;
 905         tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
 906         tp->snd_cwnd = min(tp->snd_cwnd,
 907                            tcp_packets_in_flight(tp) + 1U);
 908         tp->snd_cwnd_cnt = 0;
 909         tp->high_seq = tp->snd_nxt;
 910         tp->snd_cwnd_stamp = tcp_time_stamp;
 911         TCP_ECN_queue_cwr(tp);
 912 }
 913
 914 static inline void tcp_enter_cwr(struct tcp_sock *tp)
 915 {
 916         tp->prior_ssthresh = 0;
 917         if (tp->ca_state < TCP_CA_CWR) {
 918                 __tcp_enter_cwr(tp);
 919                 tcp_set_ca_state(tp, TCP_CA_CWR);
 920         }
 921 }
 922
 923 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 924
 925 /* Slow start with delack produces 3 packets of burst, so that
 926  * it is safe "de facto".
 927  */
 928 static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
 929 {
 930         return 3;
 931 }
 932
 933 static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
 934                                            const struct sk_buff *skb)
 935 {
 936         if (skb->len < mss)
 937                 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 938 }
 939
 940 static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
 941 {
 942         if (!tp->packets_out && !tp->pending)
 943                 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
 944 }
 945
 946 static __inline__ void tcp_push_pending_frames(struct sock *sk,
 947                                                struct tcp_sock *tp)
 948 {
 949         __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
 950 }
 951
 952 static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
 953 {
 954         tp->snd_wl1 = seq;
 955 }
 956
 957 static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
 958 {
 959         tp->snd_wl1 = seq;
 960 }
 961
 962 extern void tcp_destroy_sock(struct sock *sk);
 963
 964
 965 /*
 966  * Calculate(/check) TCP checksum
 967  */
 968 static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
 969                                    unsigned long saddr, unsigned long daddr,
 970                                    unsigned long base)
 971 {
 972         return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
 973 }
 974
 975 static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
 976 {
 977         return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
 978 }
 979
 980 static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
 981 {
 982         return skb->ip_summed != CHECKSUM_UNNECESSARY &&
 983                 __tcp_checksum_complete(skb);
 984 }
 985
 986 /* Prequeue for VJ style copy to user, combined with checksumming. */
 987
 988 static __inline__ void tcp_prequeue_init(struct tcp_sock *tp)
 989 {
 990         tp->ucopy.task = NULL;
 991         tp->ucopy.len = 0;
 992         tp->ucopy.memory = 0;
 993         skb_queue_head_init(&tp->ucopy.prequeue);
 994 }
 995
 996 /* Packet is added to VJ-style prequeue for processing in process
 997  * context, if a reader task is waiting. Apparently, this exciting
 998  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
 999  * failed somewhere. Latency? Burstiness? Well, at least now we will
1000  * see, why it failed. 8)8)                               --ANK
1001  *
1002  * NOTE: is this not too big to inline?
1003  */
1004 static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1005 {
1006         struct tcp_sock *tp = tcp_sk(sk);
1007
1008         if (!sysctl_tcp_low_latency && tp->ucopy.task) {
1009                 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1010                 tp->ucopy.memory += skb->truesize;
1011                 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1012                         struct sk_buff *skb1;
1013
1014                         BUG_ON(sock_owned_by_user(sk));
1015
1016                         while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1017                                 sk->sk_backlog_rcv(sk, skb1);
1018                                 NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED);
1019                         }
1020
1021                         tp->ucopy.memory = 0;
1022                 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1023                         wake_up_interruptible(sk->sk_sleep);
1024                         if (!tcp_ack_scheduled(tp))
1025                                 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
1026                 }
1027                 return 1;
1028         }
1029         return 0;
1030 }
1031
1032
1033 #undef STATE_TRACE
1034
1035 #ifdef STATE_TRACE
1036 static const char *statename[]={
1037         "Unused","Established","Syn Sent","Syn Recv",
1038         "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1039         "Close Wait","Last ACK","Listen","Closing"
1040 };
1041 #endif
1042
1043 static __inline__ void tcp_set_state(struct sock *sk, int state)
1044 {
1045         int oldstate = sk->sk_state;
1046
1047         switch (state) {
1048         case TCP_ESTABLISHED:
1049                 if (oldstate != TCP_ESTABLISHED)
1050                         TCP_INC_STATS(TCP_MIB_CURRESTAB);
1051                 break;
1052
1053         case TCP_CLOSE:
1054                 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1055                         TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1056
1057                 sk->sk_prot->unhash(sk);
1058                 if (inet_sk(sk)->bind_hash &&
1059                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1060                         inet_put_port(&tcp_hashinfo, sk);
1061                 /* fall through */
1062         default:
1063                 if (oldstate==TCP_ESTABLISHED)
1064                         TCP_DEC_STATS(TCP_MIB_CURRESTAB);
1065         }
1066
1067         /* Change state AFTER socket is unhashed to avoid closed
1068          * socket sitting in hash tables.
1069          */
1070         sk->sk_state = state;
1071
1072 #ifdef STATE_TRACE
1073         SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1074 #endif
1075 }
1076
1077 static __inline__ void tcp_done(struct sock *sk)
1078 {
1079         tcp_set_state(sk, TCP_CLOSE);
1080         tcp_clear_xmit_timers(sk);
1081
1082         sk->sk_shutdown = SHUTDOWN_MASK;
1083
1084         if (!sock_flag(sk, SOCK_DEAD))
1085                 sk->sk_state_change(sk);
1086         else
1087                 tcp_destroy_sock(sk);
1088 }
1089
1090 static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
1091 {
1092         rx_opt->dsack = 0;
1093         rx_opt->eff_sacks = 0;
1094         rx_opt->num_sacks = 0;
1095 }
1096
1097 static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
1098 {
1099         if (tp->rx_opt.tstamp_ok) {
1100                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1101                                           (TCPOPT_NOP << 16) |
1102                                           (TCPOPT_TIMESTAMP << 8) |
1103                                           TCPOLEN_TIMESTAMP);
1104                 *ptr++ = htonl(tstamp);
1105                 *ptr++ = htonl(tp->rx_opt.ts_recent);
1106         }
1107         if (tp->rx_opt.eff_sacks) {
1108                 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
1109                 int this_sack;
1110
1111                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1112                                           (TCPOPT_NOP << 16) |
1113                                           (TCPOPT_SACK << 8) |
1114                                           (TCPOLEN_SACK_BASE +
1115                                            (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1116                 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
1117                         *ptr++ = htonl(sp[this_sack].start_seq);
1118                         *ptr++ = htonl(sp[this_sack].end_seq);
1119                 }
1120                 if (tp->rx_opt.dsack) {
1121                         tp->rx_opt.dsack = 0;
1122                         tp->rx_opt.eff_sacks--;
1123                 }
1124         }
1125 }
1126
1127 /* Construct a tcp options header for a SYN or SYN_ACK packet.
1128  * If this is every changed make sure to change the definition of
1129  * MAX_SYN_SIZE to match the new maximum number of options that you
1130  * can generate.
1131  */
1132 static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1133                                              int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1134 {
1135         /* We always get an MSS option.
1136          * The option bytes which will be seen in normal data
1137          * packets should timestamps be used, must be in the MSS
1138          * advertised.  But we subtract them from tp->mss_cache so
1139          * that calculations in tcp_sendmsg are simpler etc.
1140          * So account for this fact here if necessary.  If we
1141          * don't do this correctly, as a receiver we won't
1142          * recognize data packets as being full sized when we
1143          * should, and thus we won't abide by the delayed ACK
1144          * rules correctly.
1145          * SACKs don't matter, we never delay an ACK when we
1146          * have any of those going out.
1147          */
1148         *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1149         if (ts) {
1150                 if(sack)
1151                         *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1152                                                   (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1153                 else
1154                         *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1155                                                   (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1156                 *ptr++ = htonl(tstamp);         /* TSVAL */
1157                 *ptr++ = htonl(ts_recent);      /* TSECR */
1158         } else if(sack)
1159                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1160                                           (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1161         if (offer_wscale)
1162                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1163 }
1164
1165 /* Determine a window scaling and initial window to offer. */
1166 extern void tcp_select_initial_window(int __space, __u32 mss,
1167                                       __u32 *rcv_wnd, __u32 *window_clamp,
1168                                       int wscale_ok, __u8 *rcv_wscale);
1169
1170 static inline int tcp_win_from_space(int space)
1171 {
1172         return sysctl_tcp_adv_win_scale<=0 ?
1173                 (space>>(-sysctl_tcp_adv_win_scale)) :
1174                 space - (space>>sysctl_tcp_adv_win_scale);
1175 }
1176
1177 /* Note: caller must be prepared to deal with negative returns */
1178 static inline int tcp_space(const struct sock *sk)
1179 {
1180         return tcp_win_from_space(sk->sk_rcvbuf -
1181                                   atomic_read(&sk->sk_rmem_alloc));
1182 }
1183
1184 static inline int tcp_full_space(const struct sock *sk)
1185 {
1186         return tcp_win_from_space(sk->sk_rcvbuf);
1187 }
1188
1189 static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
1190                                          struct sock *child)
1191 {
1192         reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
1193 }
1194
1195 static inline void
1196 tcp_synq_removed(struct sock *sk, struct request_sock *req)
1197 {
1198         if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
1199                 tcp_delete_keepalive_timer(sk);
1200 }
1201
1202 static inline void tcp_synq_added(struct sock *sk)
1203 {
1204         if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
1205                 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1206 }
1207
1208 static inline int tcp_synq_len(struct sock *sk)
1209 {
1210         return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
1211 }
1212
1213 static inline int tcp_synq_young(struct sock *sk)
1214 {
1215         return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
1216 }
1217
1218 static inline int tcp_synq_is_full(struct sock *sk)
1219 {
1220         return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
1221 }
1222
1223 static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
1224                                    struct request_sock **prev)
1225 {
1226         reqsk_queue_unlink(&tp->accept_queue, req, prev);
1227 }
1228
1229 static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
1230                                      struct request_sock **prev)
1231 {
1232         tcp_synq_unlink(tcp_sk(sk), req, prev);
1233         tcp_synq_removed(sk, req);
1234         reqsk_free(req);
1235 }
1236
1237 static __inline__ void tcp_openreq_init(struct request_sock *req,
1238                                         struct tcp_options_received *rx_opt,
1239                                         struct sk_buff *skb)
1240 {
1241         struct inet_request_sock *ireq = inet_rsk(req);
1242
1243         req->rcv_wnd = 0;               /* So that tcp_send_synack() knows! */
1244         tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1245         req->mss = rx_opt->mss_clamp;
1246         req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1247         ireq->tstamp_ok = rx_opt->tstamp_ok;
1248         ireq->sack_ok = rx_opt->sack_ok;
1249         ireq->snd_wscale = rx_opt->snd_wscale;
1250         ireq->wscale_ok = rx_opt->wscale_ok;
1251         ireq->acked = 0;
1252         ireq->ecn_ok = 0;
1253         ireq->rmt_port = skb->h.th->source;
1254 }
1255
1256 extern void tcp_enter_memory_pressure(void);
1257
1258 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1259 {
1260         return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1261 }
1262
1263 static inline int keepalive_time_when(const struct tcp_sock *tp)
1264 {
1265         return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1266 }
1267
1268 static inline int tcp_fin_time(const struct tcp_sock *tp)
1269 {
1270         int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
1271
1272         if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
1273                 fin_timeout = (tp->rto<<2) - (tp->rto>>1);
1274
1275         return fin_timeout;
1276 }
1277
1278 static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
1279 {
1280         if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
1281                 return 0;
1282         if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
1283                 return 0;
1284
1285         /* RST segments are not recommended to carry timestamp,
1286            and, if they do, it is recommended to ignore PAWS because
1287            "their cleanup function should take precedence over timestamps."
1288            Certainly, it is mistake. It is necessary to understand the reasons
1289            of this constraint to relax it: if peer reboots, clock may go
1290            out-of-sync and half-open connections will not be reset.
1291            Actually, the problem would be not existing if all
1292            the implementations followed draft about maintaining clock
1293            via reboots. Linux-2.2 DOES NOT!
1294
1295            However, we can relax time bounds for RST segments to MSL.
1296          */
1297         if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1298                 return 0;
1299         return 1;
1300 }
1301
1302 #define TCP_CHECK_TIMER(sk) do { } while (0)
1303
1304 static inline int tcp_use_frto(const struct sock *sk)
1305 {
1306         const struct tcp_sock *tp = tcp_sk(sk);
1307
1308         /* F-RTO must be activated in sysctl and there must be some
1309          * unsent new data, and the advertised window should allow
1310          * sending it.
1311          */
1312         return (sysctl_tcp_frto && sk->sk_send_head &&
1313                 !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
1314                        tp->snd_una + tp->snd_wnd));
1315 }
1316
1317 static inline void tcp_mib_init(void)
1318 {
1319         /* See RFC 2012 */
1320         TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1);
1321         TCP_ADD_STATS_USER(TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1322         TCP_ADD_STATS_USER(TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1323         TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
1324 }
1325
1326 /* /proc */
1327 enum tcp_seq_states {
1328         TCP_SEQ_STATE_LISTENING,
1329         TCP_SEQ_STATE_OPENREQ,
1330         TCP_SEQ_STATE_ESTABLISHED,
1331         TCP_SEQ_STATE_TIME_WAIT,
1332 };
1333
1334 struct tcp_seq_afinfo {
1335         struct module           *owner;
1336         char                    *name;
1337         sa_family_t             family;
1338         int                     (*seq_show) (struct seq_file *m, void *v);
1339         struct file_operations  *seq_fops;
1340 };
1341
1342 struct tcp_iter_state {
1343         sa_family_t             family;
1344         enum tcp_seq_states     state;
1345         struct sock             *syn_wait_sk;
1346         int                     bucket, sbucket, num, uid;
1347         struct seq_operations   seq_ops;
1348 };
1349
1350 extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1351 extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1352
1353 #endif  /* _TCP_H */