net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/ipv6.h>
  51 #include <net/transp_v6.h>
  52 #include <net/addrconf.h>
  53 #include <net/ip6_route.h>
  54 #include <net/ip6_checksum.h>
  55 #include <net/inet_ecn.h>
  56 #include <net/protocol.h>
  57 #include <net/xfrm.h>
  58 #include <net/addrconf.h>
  59 #include <net/snmp.h>
  60 #include <net/dsfield.h>
  61
  62 #include <asm/uaccess.h>
  63
  64 #include <linux/proc_fs.h>
  65 #include <linux/seq_file.h>
  66
  67 static void     tcp_v6_send_reset(struct sk_buff *skb);
  68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
  69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  70                                   struct sk_buff *skb);
  71
  72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  74
  75 static struct tcp_func ipv6_mapped;
  76 static struct tcp_func ipv6_specific;
  77
  78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
  79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
  80                                     struct in6_addr *faddr, u16 fport)
  81 {
  82         int hashent = (lport ^ fport);
  83
  84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
  85         hashent ^= hashent>>16;
  86         hashent ^= hashent>>8;
  87         return (hashent & (tcp_hashinfo.ehash_size - 1));
  88 }
  89
  90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  91 {
  92         struct inet_sock *inet = inet_sk(sk);
  93         struct ipv6_pinfo *np = inet6_sk(sk);
  94         struct in6_addr *laddr = &np->rcv_saddr;
  95         struct in6_addr *faddr = &np->daddr;
  96         __u16 lport = inet->num;
  97         __u16 fport = inet->dport;
  98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
  99 }
 100
 101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
 102                                        const struct inet_bind_bucket *tb)
 103 {
 104         const struct sock *sk2;
 105         const struct hlist_node *node;
 106
 107         /* We must walk the whole port owner list in this case. -DaveM */
 108         sk_for_each_bound(sk2, node, &tb->owners) {
 109                 if (sk != sk2 &&
 110                     (!sk->sk_bound_dev_if ||
 111                      !sk2->sk_bound_dev_if ||
 112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 113                     (!sk->sk_reuse || !sk2->sk_reuse ||
 114                      sk2->sk_state == TCP_LISTEN) &&
 115                      ipv6_rcv_saddr_equal(sk, sk2))
 116                         break;
 117         }
 118
 119         return node != NULL;
 120 }
 121
 122 /* Grrr, addr_type already calculated by caller, but I don't want
 123  * to add some silly "cookie" argument to this method just for that.
 124  * But it doesn't matter, the recalculation is in the rarest path
 125  * this function ever takes.
 126  */
 127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 128 {
 129         struct inet_bind_hashbucket *head;
 130         struct inet_bind_bucket *tb;
 131         struct hlist_node *node;
 132         int ret;
 133
 134         local_bh_disable();
 135         if (snum == 0) {
 136                 int low = sysctl_local_port_range[0];
 137                 int high = sysctl_local_port_range[1];
 138                 int remaining = (high - low) + 1;
 139                 int rover;
 140
 141                 spin_lock(&tcp_hashinfo.portalloc_lock);
 142                 if (tcp_hashinfo.port_rover < low)
 143                         rover = low;
 144                 else
 145                         rover = tcp_hashinfo.port_rover;
 146                 do {    rover++;
 147                         if (rover > high)
 148                                 rover = low;
 149                         head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
 150                         spin_lock(&head->lock);
 151                         inet_bind_bucket_for_each(tb, node, &head->chain)
 152                                 if (tb->port == rover)
 153                                         goto next;
 154                         break;
 155                 next:
 156                         spin_unlock(&head->lock);
 157                 } while (--remaining > 0);
 158                 tcp_hashinfo.port_rover = rover;
 159                 spin_unlock(&tcp_hashinfo.portalloc_lock);
 160
 161                 /* Exhausted local port range during search?  It is not
 162                  * possible for us to be holding one of the bind hash
 163                  * locks if this test triggers, because if 'remaining'
 164                  * drops to zero, we broke out of the do/while loop at
 165                  * the top level, not from the 'break;' statement.
 166                  */
 167                 ret = 1;
 168                 if (unlikely(remaining <= 0))
 169                         goto fail;
 170
 171                 /* OK, here is the one we will use. */
 172                 snum = rover;
 173         } else {
 174                 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 175                 spin_lock(&head->lock);
 176                 inet_bind_bucket_for_each(tb, node, &head->chain)
 177                         if (tb->port == snum)
 178                                 goto tb_found;
 179         }
 180         tb = NULL;
 181         goto tb_not_found;
 182 tb_found:
 183         if (tb && !hlist_empty(&tb->owners)) {
 184                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 185                     sk->sk_state != TCP_LISTEN) {
 186                         goto success;
 187                 } else {
 188                         ret = 1;
 189                         if (tcp_v6_bind_conflict(sk, tb))
 190                                 goto fail_unlock;
 191                 }
 192         }
 193 tb_not_found:
 194         ret = 1;
 195         if (tb == NULL) {
 196                 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
 197                 if (tb == NULL)
 198                         goto fail_unlock;
 199         }
 200         if (hlist_empty(&tb->owners)) {
 201                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 202                         tb->fastreuse = 1;
 203                 else
 204                         tb->fastreuse = 0;
 205         } else if (tb->fastreuse &&
 206                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 207                 tb->fastreuse = 0;
 208
 209 success:
 210         if (!inet_sk(sk)->bind_hash)
 211                 inet_bind_hash(sk, tb, snum);
 212         BUG_TRAP(inet_sk(sk)->bind_hash == tb);
 213         ret = 0;
 214
 215 fail_unlock:
 216         spin_unlock(&head->lock);
 217 fail:
 218         local_bh_enable();
 219         return ret;
 220 }
 221
 222 static __inline__ void __tcp_v6_hash(struct sock *sk)
 223 {
 224         struct hlist_head *list;
 225         rwlock_t *lock;
 226
 227         BUG_TRAP(sk_unhashed(sk));
 228
 229         if (sk->sk_state == TCP_LISTEN) {
 230                 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
 231                 lock = &tcp_hashinfo.lhash_lock;
 232                 inet_listen_wlock(&tcp_hashinfo);
 233         } else {
 234                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
 235                 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
 236                 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
 237                 write_lock(lock);
 238         }
 239
 240         __sk_add_node(sk, list);
 241         sock_prot_inc_use(sk->sk_prot);
 242         write_unlock(lock);
 243 }
 244
 245
 246 static void tcp_v6_hash(struct sock *sk)
 247 {
 248         if (sk->sk_state != TCP_CLOSE) {
 249                 struct tcp_sock *tp = tcp_sk(sk);
 250
 251                 if (tp->af_specific == &ipv6_mapped) {
 252                         tcp_prot.hash(sk);
 253                         return;
 254                 }
 255                 local_bh_disable();
 256                 __tcp_v6_hash(sk);
 257                 local_bh_enable();
 258         }
 259 }
 260
 261 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
 262 {
 263         struct sock *sk;
 264         struct hlist_node *node;
 265         struct sock *result = NULL;
 266         int score, hiscore;
 267
 268         hiscore=0;
 269         read_lock(&tcp_hashinfo.lhash_lock);
 270         sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
 271                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 272                         struct ipv6_pinfo *np = inet6_sk(sk);
 273
 274                         score = 1;
 275                         if (!ipv6_addr_any(&np->rcv_saddr)) {
 276                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
 277                                         continue;
 278                                 score++;
 279                         }
 280                         if (sk->sk_bound_dev_if) {
 281                                 if (sk->sk_bound_dev_if != dif)
 282                                         continue;
 283                                 score++;
 284                         }
 285                         if (score == 3) {
 286                                 result = sk;
 287                                 break;
 288                         }
 289                         if (score > hiscore) {
 290                                 hiscore = score;
 291                                 result = sk;
 292                         }
 293                 }
 294         }
 295         if (result)
 296                 sock_hold(result);
 297         read_unlock(&tcp_hashinfo.lhash_lock);
 298         return result;
 299 }
 300
 301 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 302  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 303  *
 304  * The sockhash lock must be held as a reader here.
 305  */
 306
 307 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
 308                                                        struct in6_addr *daddr, u16 hnum,
 309                                                        int dif)
 310 {
 311         struct sock *sk;
 312         const struct hlist_node *node;
 313         const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
 314         /* Optimize here for direct hit, only listening connections can
 315          * have wildcards anyways.
 316          */
 317         const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
 318         struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
 319
 320         read_lock(&head->lock);
 321         sk_for_each(sk, node, &head->chain) {
 322                 /* For IPV6 do the cheaper port and family tests first. */
 323                 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
 324                         goto hit; /* You sunk my battleship! */
 325         }
 326         /* Must check for a TIME_WAIT'er before going to listener hash. */
 327         sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
 328                 const struct inet_timewait_sock *tw = inet_twsk(sk);
 329
 330                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 331                    sk->sk_family                == PF_INET6) {
 332                         const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
 333
 334                         if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)        &&
 335                             ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)    &&
 336                             (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 337                                 goto hit;
 338                 }
 339         }
 340         read_unlock(&head->lock);
 341         return NULL;
 342
 343 hit:
 344         sock_hold(sk);
 345         read_unlock(&head->lock);
 346         return sk;
 347 }
 348
 349
 350 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 351                                            struct in6_addr *daddr, u16 hnum,
 352                                            int dif)
 353 {
 354         struct sock *sk;
 355
 356         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
 357
 358         if (sk)
 359                 return sk;
 360
 361         return tcp_v6_lookup_listener(daddr, hnum, dif);
 362 }
 363
 364 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 365                                   struct in6_addr *daddr, u16 dport,
 366                                   int dif)
 367 {
 368         struct sock *sk;
 369
 370         local_bh_disable();
 371         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
 372         local_bh_enable();
 373
 374         return sk;
 375 }
 376
 377 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 378
 379
 380 /*
 381  * Open request hash tables.
 382  */
 383
 384 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
 385 {
 386         u32 a, b, c;
 387
 388         a = raddr->s6_addr32[0];
 389         b = raddr->s6_addr32[1];
 390         c = raddr->s6_addr32[2];
 391
 392         a += JHASH_GOLDEN_RATIO;
 393         b += JHASH_GOLDEN_RATIO;
 394         c += rnd;
 395         __jhash_mix(a, b, c);
 396
 397         a += raddr->s6_addr32[3];
 398         b += (u32) rport;
 399         __jhash_mix(a, b, c);
 400
 401         return c & (TCP_SYNQ_HSIZE - 1);
 402 }
 403
 404 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
 405                                               struct request_sock ***prevp,
 406                                               __u16 rport,
 407                                               struct in6_addr *raddr,
 408                                               struct in6_addr *laddr,
 409                                               int iif)
 410 {
 411         struct listen_sock *lopt = tp->accept_queue.listen_opt;
 412         struct request_sock *req, **prev;
 413
 414         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 415              (req = *prev) != NULL;
 416              prev = &req->dl_next) {
 417                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
 418
 419                 if (inet_rsk(req)->rmt_port == rport &&
 420                     req->rsk_ops->family == AF_INET6 &&
 421                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 422                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
 423                     (!treq->iif || treq->iif == iif)) {
 424                         BUG_TRAP(req->sk == NULL);
 425                         *prevp = prev;
 426                         return req;
 427                 }
 428         }
 429
 430         return NULL;
 431 }
 432
 433 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 434                                    struct in6_addr *saddr,
 435                                    struct in6_addr *daddr,
 436                                    unsigned long base)
 437 {
 438         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 439 }
 440
 441 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 442 {
 443         if (skb->protocol == htons(ETH_P_IPV6)) {
 444                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 445                                                     skb->nh.ipv6h->saddr.s6_addr32,
 446                                                     skb->h.th->dest,
 447                                                     skb->h.th->source);
 448         } else {
 449                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 450                                                   skb->nh.iph->saddr,
 451                                                   skb->h.th->dest,
 452                                                   skb->h.th->source);
 453         }
 454 }
 455
 456 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
 457                                       struct inet_timewait_sock **twp)
 458 {
 459         struct inet_sock *inet = inet_sk(sk);
 460         struct ipv6_pinfo *np = inet6_sk(sk);
 461         struct in6_addr *daddr = &np->rcv_saddr;
 462         struct in6_addr *saddr = &np->daddr;
 463         int dif = sk->sk_bound_dev_if;
 464         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 465         const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 466         struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
 467         struct sock *sk2;
 468         const struct hlist_node *node;
 469         struct inet_timewait_sock *tw;
 470
 471         write_lock(&head->lock);
 472
 473         /* Check TIME-WAIT sockets first. */
 474         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
 475                 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
 476
 477                 tw = inet_twsk(sk2);
 478
 479                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 480                    sk2->sk_family               == PF_INET6     &&
 481                    ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
 482                    ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
 483                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 484                         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
 485                         struct tcp_sock *tp = tcp_sk(sk);
 486
 487                         if (tcptw->tw_ts_recent_stamp &&
 488                             (!twp ||
 489                              (sysctl_tcp_tw_reuse &&
 490                               xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
 491                                 /* See comment in tcp_ipv4.c */
 492                                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 493                                 if (!tp->write_seq)
 494                                         tp->write_seq = 1;
 495                                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 496                                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 497                                 sock_hold(sk2);
 498                                 goto unique;
 499                         } else
 500                                 goto not_unique;
 501                 }
 502         }
 503         tw = NULL;
 504
 505         /* And established part... */
 506         sk_for_each(sk2, node, &head->chain) {
 507                 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
 508                         goto not_unique;
 509         }
 510
 511 unique:
 512         BUG_TRAP(sk_unhashed(sk));
 513         __sk_add_node(sk, &head->chain);
 514         sk->sk_hashent = hash;
 515         sock_prot_inc_use(sk->sk_prot);
 516         write_unlock(&head->lock);
 517
 518         if (twp) {
 519                 *twp = tw;
 520                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 521         } else if (tw) {
 522                 /* Silly. Should hash-dance instead... */
 523                 tcp_tw_deschedule(tw);
 524                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 525
 526                 inet_twsk_put(tw);
 527         }
 528         return 0;
 529
 530 not_unique:
 531         write_unlock(&head->lock);
 532         return -EADDRNOTAVAIL;
 533 }
 534
 535 static inline u32 tcpv6_port_offset(const struct sock *sk)
 536 {
 537         const struct inet_sock *inet = inet_sk(sk);
 538         const struct ipv6_pinfo *np = inet6_sk(sk);
 539
 540         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 541                                            np->daddr.s6_addr32,
 542                                            inet->dport);
 543 }
 544
 545 static int tcp_v6_hash_connect(struct sock *sk)
 546 {
 547         unsigned short snum = inet_sk(sk)->num;
 548         struct inet_bind_hashbucket *head;
 549         struct inet_bind_bucket *tb;
 550         int ret;
 551
 552         if (!snum) {
 553                 int low = sysctl_local_port_range[0];
 554                 int high = sysctl_local_port_range[1];
 555                 int range = high - low;
 556                 int i;
 557                 int port;
 558                 static u32 hint;
 559                 u32 offset = hint + tcpv6_port_offset(sk);
 560                 struct hlist_node *node;
 561                 struct inet_timewait_sock *tw = NULL;
 562
 563                 local_bh_disable();
 564                 for (i = 1; i <= range; i++) {
 565                         port = low + (i + offset) % range;
 566                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
 567                         spin_lock(&head->lock);
 568
 569                         /* Does not bother with rcv_saddr checks,
 570                          * because the established check is already
 571                          * unique enough.
 572                          */
 573                         inet_bind_bucket_for_each(tb, node, &head->chain) {
 574                                 if (tb->port == port) {
 575                                         BUG_TRAP(!hlist_empty(&tb->owners));
 576                                         if (tb->fastreuse >= 0)
 577                                                 goto next_port;
 578                                         if (!__tcp_v6_check_established(sk,
 579                                                                         port,
 580                                                                         &tw))
 581                                                 goto ok;
 582                                         goto next_port;
 583                                 }
 584                         }
 585
 586                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
 587                         if (!tb) {
 588                                 spin_unlock(&head->lock);
 589                                 break;
 590                         }
 591                         tb->fastreuse = -1;
 592                         goto ok;
 593
 594                 next_port:
 595                         spin_unlock(&head->lock);
 596                 }
 597                 local_bh_enable();
 598
 599                 return -EADDRNOTAVAIL;
 600
 601 ok:
 602                 hint += i;
 603
 604                 /* Head lock still held and bh's disabled */
 605                 inet_bind_hash(sk, tb, port);
 606                 if (sk_unhashed(sk)) {
 607                         inet_sk(sk)->sport = htons(port);
 608                         __tcp_v6_hash(sk);
 609                 }
 610                 spin_unlock(&head->lock);
 611
 612                 if (tw) {
 613                         tcp_tw_deschedule(tw);
 614                         inet_twsk_put(tw);
 615                 }
 616
 617                 ret = 0;
 618                 goto out;
 619         }
 620
 621         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
 622         tb   = inet_sk(sk)->bind_hash;
 623         spin_lock_bh(&head->lock);
 624
 625         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 626                 __tcp_v6_hash(sk);
 627                 spin_unlock_bh(&head->lock);
 628                 return 0;
 629         } else {
 630                 spin_unlock(&head->lock);
 631                 /* No definite answer... Walk to established hash table */
 632                 ret = __tcp_v6_check_established(sk, snum, NULL);
 633 out:
 634                 local_bh_enable();
 635                 return ret;
 636         }
 637 }
 638
 639 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
 640 {
 641         return IP6CB(skb)->iif;
 642 }
 643
 644 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 645                           int addr_len)
 646 {
 647         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 648         struct inet_sock *inet = inet_sk(sk);
 649         struct ipv6_pinfo *np = inet6_sk(sk);
 650         struct tcp_sock *tp = tcp_sk(sk);
 651         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 652         struct flowi fl;
 653         struct dst_entry *dst;
 654         int addr_type;
 655         int err;
 656
 657         if (addr_len < SIN6_LEN_RFC2133)
 658                 return -EINVAL;
 659
 660         if (usin->sin6_family != AF_INET6)
 661                 return(-EAFNOSUPPORT);
 662
 663         memset(&fl, 0, sizeof(fl));
 664
 665         if (np->sndflow) {
 666                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 667                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 668                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 669                         struct ip6_flowlabel *flowlabel;
 670                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 671                         if (flowlabel == NULL)
 672                                 return -EINVAL;
 673                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 674                         fl6_sock_release(flowlabel);
 675                 }
 676         }
 677
 678         /*
 679          *      connect() to INADDR_ANY means loopback (BSD'ism).
 680          */
 681
 682         if(ipv6_addr_any(&usin->sin6_addr))
 683                 usin->sin6_addr.s6_addr[15] = 0x1;
 684
 685         addr_type = ipv6_addr_type(&usin->sin6_addr);
 686
 687         if(addr_type & IPV6_ADDR_MULTICAST)
 688                 return -ENETUNREACH;
 689
 690         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 691                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 692                     usin->sin6_scope_id) {
 693                         /* If interface is set while binding, indices
 694                          * must coincide.
 695                          */
 696                         if (sk->sk_bound_dev_if &&
 697                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 698                                 return -EINVAL;
 699
 700                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 701                 }
 702
 703                 /* Connect to link-local address requires an interface */
 704                 if (!sk->sk_bound_dev_if)
 705                         return -EINVAL;
 706         }
 707
 708         if (tp->rx_opt.ts_recent_stamp &&
 709             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 710                 tp->rx_opt.ts_recent = 0;
 711                 tp->rx_opt.ts_recent_stamp = 0;
 712                 tp->write_seq = 0;
 713         }
 714
 715         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 716         np->flow_label = fl.fl6_flowlabel;
 717
 718         /*
 719          *      TCP over IPv4
 720          */
 721
 722         if (addr_type == IPV6_ADDR_MAPPED) {
 723                 u32 exthdrlen = tp->ext_header_len;
 724                 struct sockaddr_in sin;
 725
 726                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 727
 728                 if (__ipv6_only_sock(sk))
 729                         return -ENETUNREACH;
 730
 731                 sin.sin_family = AF_INET;
 732                 sin.sin_port = usin->sin6_port;
 733                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 734
 735                 tp->af_specific = &ipv6_mapped;
 736                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 737
 738                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 739
 740                 if (err) {
 741                         tp->ext_header_len = exthdrlen;
 742                         tp->af_specific = &ipv6_specific;
 743                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 744                         goto failure;
 745                 } else {
 746                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 747                                       inet->saddr);
 748                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 749                                       inet->rcv_saddr);
 750                 }
 751
 752                 return err;
 753         }
 754
 755         if (!ipv6_addr_any(&np->rcv_saddr))
 756                 saddr = &np->rcv_saddr;
 757
 758         fl.proto = IPPROTO_TCP;
 759         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 760         ipv6_addr_copy(&fl.fl6_src,
 761                        (saddr ? saddr : &np->saddr));
 762         fl.oif = sk->sk_bound_dev_if;
 763         fl.fl_ip_dport = usin->sin6_port;
 764         fl.fl_ip_sport = inet->sport;
 765
 766         if (np->opt && np->opt->srcrt) {
 767                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 768                 ipv6_addr_copy(&final, &fl.fl6_dst);
 769                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 770                 final_p = &final;
 771         }
 772
 773         err = ip6_dst_lookup(sk, &dst, &fl);
 774         if (err)
 775                 goto failure;
 776         if (final_p)
 777                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 778
 779         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 780                 dst_release(dst);
 781                 goto failure;
 782         }
 783
 784         if (saddr == NULL) {
 785                 saddr = &fl.fl6_src;
 786                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 787         }
 788
 789         /* set the source address */
 790         ipv6_addr_copy(&np->saddr, saddr);
 791         inet->rcv_saddr = LOOPBACK4_IPV6;
 792
 793         ip6_dst_store(sk, dst, NULL);
 794         sk->sk_route_caps = dst->dev->features &
 795                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 796
 797         tp->ext_header_len = 0;
 798         if (np->opt)
 799                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 800
 801         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 802
 803         inet->dport = usin->sin6_port;
 804
 805         tcp_set_state(sk, TCP_SYN_SENT);
 806         err = tcp_v6_hash_connect(sk);
 807         if (err)
 808                 goto late_failure;
 809
 810         if (!tp->write_seq)
 811                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 812                                                              np->daddr.s6_addr32,
 813                                                              inet->sport,
 814                                                              inet->dport);
 815
 816         err = tcp_connect(sk);
 817         if (err)
 818                 goto late_failure;
 819
 820         return 0;
 821
 822 late_failure:
 823         tcp_set_state(sk, TCP_CLOSE);
 824         __sk_dst_reset(sk);
 825 failure:
 826         inet->dport = 0;
 827         sk->sk_route_caps = 0;
 828         return err;
 829 }
 830
 831 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 832                 int type, int code, int offset, __u32 info)
 833 {
 834         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 835         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 836         struct ipv6_pinfo *np;
 837         struct sock *sk;
 838         int err;
 839         struct tcp_sock *tp;
 840         __u32 seq;
 841
 842         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 843
 844         if (sk == NULL) {
 845                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 846                 return;
 847         }
 848
 849         if (sk->sk_state == TCP_TIME_WAIT) {
 850                 inet_twsk_put((struct inet_timewait_sock *)sk);
 851                 return;
 852         }
 853
 854         bh_lock_sock(sk);
 855         if (sock_owned_by_user(sk))
 856                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 857
 858         if (sk->sk_state == TCP_CLOSE)
 859                 goto out;
 860
 861         tp = tcp_sk(sk);
 862         seq = ntohl(th->seq);
 863         if (sk->sk_state != TCP_LISTEN &&
 864             !between(seq, tp->snd_una, tp->snd_nxt)) {
 865                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 866                 goto out;
 867         }
 868
 869         np = inet6_sk(sk);
 870
 871         if (type == ICMPV6_PKT_TOOBIG) {
 872                 struct dst_entry *dst = NULL;
 873
 874                 if (sock_owned_by_user(sk))
 875                         goto out;
 876                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 877                         goto out;
 878
 879                 /* icmp should have updated the destination cache entry */
 880                 dst = __sk_dst_check(sk, np->dst_cookie);
 881
 882                 if (dst == NULL) {
 883                         struct inet_sock *inet = inet_sk(sk);
 884                         struct flowi fl;
 885
 886                         /* BUGGG_FUTURE: Again, it is not clear how
 887                            to handle rthdr case. Ignore this complexity
 888                            for now.
 889                          */
 890                         memset(&fl, 0, sizeof(fl));
 891                         fl.proto = IPPROTO_TCP;
 892                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 893                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 894                         fl.oif = sk->sk_bound_dev_if;
 895                         fl.fl_ip_dport = inet->dport;
 896                         fl.fl_ip_sport = inet->sport;
 897
 898                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 899                                 sk->sk_err_soft = -err;
 900                                 goto out;
 901                         }
 902
 903                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 904                                 sk->sk_err_soft = -err;
 905                                 goto out;
 906                         }
 907
 908                 } else
 909                         dst_hold(dst);
 910
 911                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 912                         tcp_sync_mss(sk, dst_mtu(dst));
 913                         tcp_simple_retransmit(sk);
 914                 } /* else let the usual retransmit timer handle it */
 915                 dst_release(dst);
 916                 goto out;
 917         }
 918
 919         icmpv6_err_convert(type, code, &err);
 920
 921         /* Might be for an request_sock */
 922         switch (sk->sk_state) {
 923                 struct request_sock *req, **prev;
 924         case TCP_LISTEN:
 925                 if (sock_owned_by_user(sk))
 926                         goto out;
 927
 928                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
 929                                         &hdr->saddr, tcp_v6_iif(skb));
 930                 if (!req)
 931                         goto out;
 932
 933                 /* ICMPs are not backlogged, hence we cannot get
 934                  * an established socket here.
 935                  */
 936                 BUG_TRAP(req->sk == NULL);
 937
 938                 if (seq != tcp_rsk(req)->snt_isn) {
 939                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 940                         goto out;
 941                 }
 942
 943                 tcp_synq_drop(sk, req, prev);
 944                 goto out;
 945
 946         case TCP_SYN_SENT:
 947         case TCP_SYN_RECV:  /* Cannot happen.
 948                                It can, it SYNs are crossed. --ANK */
 949                 if (!sock_owned_by_user(sk)) {
 950                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 951                         sk->sk_err = err;
 952                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 953
 954                         tcp_done(sk);
 955                 } else
 956                         sk->sk_err_soft = err;
 957                 goto out;
 958         }
 959
 960         if (!sock_owned_by_user(sk) && np->recverr) {
 961                 sk->sk_err = err;
 962                 sk->sk_error_report(sk);
 963         } else
 964                 sk->sk_err_soft = err;
 965
 966 out:
 967         bh_unlock_sock(sk);
 968         sock_put(sk);
 969 }
 970
 971
 972 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 973                               struct dst_entry *dst)
 974 {
 975         struct tcp6_request_sock *treq = tcp6_rsk(req);
 976         struct ipv6_pinfo *np = inet6_sk(sk);
 977         struct sk_buff * skb;
 978         struct ipv6_txoptions *opt = NULL;
 979         struct in6_addr * final_p = NULL, final;
 980         struct flowi fl;
 981         int err = -1;
 982
 983         memset(&fl, 0, sizeof(fl));
 984         fl.proto = IPPROTO_TCP;
 985         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 986         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 987         fl.fl6_flowlabel = 0;
 988         fl.oif = treq->iif;
 989         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 990         fl.fl_ip_sport = inet_sk(sk)->sport;
 991
 992         if (dst == NULL) {
 993                 opt = np->opt;
 994                 if (opt == NULL &&
 995                     np->rxopt.bits.srcrt == 2 &&
 996                     treq->pktopts) {
 997                         struct sk_buff *pktopts = treq->pktopts;
 998                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 999                         if (rxopt->srcrt)
1000                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1001                 }
1002
1003                 if (opt && opt->srcrt) {
1004                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1005                         ipv6_addr_copy(&final, &fl.fl6_dst);
1006                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1007                         final_p = &final;
1008                 }
1009
1010                 err = ip6_dst_lookup(sk, &dst, &fl);
1011                 if (err)
1012                         goto done;
1013                 if (final_p)
1014                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1015                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1016                         goto done;
1017         }
1018
1019         skb = tcp_make_synack(sk, dst, req);
1020         if (skb) {
1021                 struct tcphdr *th = skb->h.th;
1022
1023                 th->check = tcp_v6_check(th, skb->len,
1024                                          &treq->loc_addr, &treq->rmt_addr,
1025                                          csum_partial((char *)th, skb->len, skb->csum));
1026
1027                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1028                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1029                 if (err == NET_XMIT_CN)
1030                         err = 0;
1031         }
1032
1033 done:
1034         dst_release(dst);
1035         if (opt && opt != np->opt)
1036                 sock_kfree_s(sk, opt, opt->tot_len);
1037         return err;
1038 }
1039
1040 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1041 {
1042         if (tcp6_rsk(req)->pktopts)
1043                 kfree_skb(tcp6_rsk(req)->pktopts);
1044 }
1045
1046 static struct request_sock_ops tcp6_request_sock_ops = {
1047         .family         =       AF_INET6,
1048         .obj_size       =       sizeof(struct tcp6_request_sock),
1049         .rtx_syn_ack    =       tcp_v6_send_synack,
1050         .send_ack       =       tcp_v6_reqsk_send_ack,
1051         .destructor     =       tcp_v6_reqsk_destructor,
1052         .send_reset     =       tcp_v6_send_reset
1053 };
1054
1055 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1056 {
1057         struct ipv6_pinfo *np = inet6_sk(sk);
1058         struct inet6_skb_parm *opt = IP6CB(skb);
1059
1060         if (np->rxopt.all) {
1061                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1062                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1063                      np->rxopt.bits.rxflow) ||
1064                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1065                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1066                         return 1;
1067         }
1068         return 0;
1069 }
1070
1071
1072 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1073                               struct sk_buff *skb)
1074 {
1075         struct ipv6_pinfo *np = inet6_sk(sk);
1076
1077         if (skb->ip_summed == CHECKSUM_HW) {
1078                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1079                 skb->csum = offsetof(struct tcphdr, check);
1080         } else {
1081                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1082                                             csum_partial((char *)th, th->doff<<2,
1083                                                          skb->csum));
1084         }
1085 }
1086
1087
1088 static void tcp_v6_send_reset(struct sk_buff *skb)
1089 {
1090         struct tcphdr *th = skb->h.th, *t1;
1091         struct sk_buff *buff;
1092         struct flowi fl;
1093
1094         if (th->rst)
1095                 return;
1096
1097         if (!ipv6_unicast_destination(skb))
1098                 return;
1099
1100         /*
1101          * We need to grab some memory, and put together an RST,
1102          * and then put it into the queue to be sent.
1103          */
1104
1105         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1106                          GFP_ATOMIC);
1107         if (buff == NULL)
1108                 return;
1109
1110         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1111
1112         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1113
1114         /* Swap the send and the receive. */
1115         memset(t1, 0, sizeof(*t1));
1116         t1->dest = th->source;
1117         t1->source = th->dest;
1118         t1->doff = sizeof(*t1)/4;
1119         t1->rst = 1;
1120
1121         if(th->ack) {
1122                 t1->seq = th->ack_seq;
1123         } else {
1124                 t1->ack = 1;
1125                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1126                                     + skb->len - (th->doff<<2));
1127         }
1128
1129         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1130
1131         memset(&fl, 0, sizeof(fl));
1132         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1133         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1134
1135         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1136                                     sizeof(*t1), IPPROTO_TCP,
1137                                     buff->csum);
1138
1139         fl.proto = IPPROTO_TCP;
1140         fl.oif = tcp_v6_iif(skb);
1141         fl.fl_ip_dport = t1->dest;
1142         fl.fl_ip_sport = t1->source;
1143
1144         /* sk = NULL, but it is safe for now. RST socket required. */
1145         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1146
1147                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1148                         dst_release(buff->dst);
1149                         return;
1150                 }
1151
1152                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1153                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1154                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1155                 return;
1156         }
1157
1158         kfree_skb(buff);
1159 }
1160
1161 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1162 {
1163         struct tcphdr *th = skb->h.th, *t1;
1164         struct sk_buff *buff;
1165         struct flowi fl;
1166         int tot_len = sizeof(struct tcphdr);
1167
1168         if (ts)
1169                 tot_len += 3*4;
1170
1171         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1172                          GFP_ATOMIC);
1173         if (buff == NULL)
1174                 return;
1175
1176         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1177
1178         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1179
1180         /* Swap the send and the receive. */
1181         memset(t1, 0, sizeof(*t1));
1182         t1->dest = th->source;
1183         t1->source = th->dest;
1184         t1->doff = tot_len/4;
1185         t1->seq = htonl(seq);
1186         t1->ack_seq = htonl(ack);
1187         t1->ack = 1;
1188         t1->window = htons(win);
1189
1190         if (ts) {
1191                 u32 *ptr = (u32*)(t1 + 1);
1192                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1193                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1194                 *ptr++ = htonl(tcp_time_stamp);
1195                 *ptr = htonl(ts);
1196         }
1197
1198         buff->csum = csum_partial((char *)t1, tot_len, 0);
1199
1200         memset(&fl, 0, sizeof(fl));
1201         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1202         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1203
1204         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1205                                     tot_len, IPPROTO_TCP,
1206                                     buff->csum);
1207
1208         fl.proto = IPPROTO_TCP;
1209         fl.oif = tcp_v6_iif(skb);
1210         fl.fl_ip_dport = t1->dest;
1211         fl.fl_ip_sport = t1->source;
1212
1213         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1214                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1215                         dst_release(buff->dst);
1216                         return;
1217                 }
1218                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1219                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1220                 return;
1221         }
1222
1223         kfree_skb(buff);
1224 }
1225
1226 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1227 {
1228         struct inet_timewait_sock *tw = inet_twsk(sk);
1229         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1230
1231         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1232                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1233                         tcptw->tw_ts_recent);
1234
1235         inet_twsk_put(tw);
1236 }
1237
1238 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1239 {
1240         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1241 }
1242
1243
1244 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1245 {
1246         struct request_sock *req, **prev;
1247         struct tcphdr *th = skb->h.th;
1248         struct tcp_sock *tp = tcp_sk(sk);
1249         struct sock *nsk;
1250
1251         /* Find possible connection requests. */
1252         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1253                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1254         if (req)
1255                 return tcp_check_req(sk, skb, req, prev);
1256
1257         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1258                                           th->source,
1259                                           &skb->nh.ipv6h->daddr,
1260                                           ntohs(th->dest),
1261                                           tcp_v6_iif(skb));
1262
1263         if (nsk) {
1264                 if (nsk->sk_state != TCP_TIME_WAIT) {
1265                         bh_lock_sock(nsk);
1266                         return nsk;
1267                 }
1268                 inet_twsk_put((struct inet_timewait_sock *)nsk);
1269                 return NULL;
1270         }
1271
1272 #if 0 /*def CONFIG_SYN_COOKIES*/
1273         if (!th->rst && !th->syn && th->ack)
1274                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1275 #endif
1276         return sk;
1277 }
1278
1279 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1280 {
1281         struct tcp_sock *tp = tcp_sk(sk);
1282         struct listen_sock *lopt = tp->accept_queue.listen_opt;
1283         u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1284
1285         reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1286         tcp_synq_added(sk);
1287 }
1288
1289
1290 /* FIXME: this is substantially similar to the ipv4 code.
1291  * Can some kind of merge be done? -- erics
1292  */
1293 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1294 {
1295         struct tcp6_request_sock *treq;
1296         struct ipv6_pinfo *np = inet6_sk(sk);
1297         struct tcp_options_received tmp_opt;
1298         struct tcp_sock *tp = tcp_sk(sk);
1299         struct request_sock *req = NULL;
1300         __u32 isn = TCP_SKB_CB(skb)->when;
1301
1302         if (skb->protocol == htons(ETH_P_IP))
1303                 return tcp_v4_conn_request(sk, skb);
1304
1305         if (!ipv6_unicast_destination(skb))
1306                 goto drop;
1307
1308         /*
1309          *      There are no SYN attacks on IPv6, yet...
1310          */
1311         if (tcp_synq_is_full(sk) && !isn) {
1312                 if (net_ratelimit())
1313                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1314                 goto drop;
1315         }
1316
1317         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1318                 goto drop;
1319
1320         req = reqsk_alloc(&tcp6_request_sock_ops);
1321         if (req == NULL)
1322                 goto drop;
1323
1324         tcp_clear_options(&tmp_opt);
1325         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1326         tmp_opt.user_mss = tp->rx_opt.user_mss;
1327
1328         tcp_parse_options(skb, &tmp_opt, 0);
1329
1330         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331         tcp_openreq_init(req, &tmp_opt, skb);
1332
1333         treq = tcp6_rsk(req);
1334         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1335         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1336         TCP_ECN_create_request(req, skb->h.th);
1337         treq->pktopts = NULL;
1338         if (ipv6_opt_accepted(sk, skb) ||
1339             np->rxopt.bits.rxinfo ||
1340             np->rxopt.bits.rxhlim) {
1341                 atomic_inc(&skb->users);
1342                 treq->pktopts = skb;
1343         }
1344         treq->iif = sk->sk_bound_dev_if;
1345
1346         /* So that link locals have meaning */
1347         if (!sk->sk_bound_dev_if &&
1348             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1349                 treq->iif = tcp_v6_iif(skb);
1350
1351         if (isn == 0)
1352                 isn = tcp_v6_init_sequence(sk,skb);
1353
1354         tcp_rsk(req)->snt_isn = isn;
1355
1356         if (tcp_v6_send_synack(sk, req, NULL))
1357                 goto drop;
1358
1359         tcp_v6_synq_add(sk, req);
1360
1361         return 0;
1362
1363 drop:
1364         if (req)
1365                 reqsk_free(req);
1366
1367         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1368         return 0; /* don't send reset */
1369 }
1370
1371 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1372                                           struct request_sock *req,
1373                                           struct dst_entry *dst)
1374 {
1375         struct tcp6_request_sock *treq = tcp6_rsk(req);
1376         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1377         struct tcp6_sock *newtcp6sk;
1378         struct inet_sock *newinet;
1379         struct tcp_sock *newtp;
1380         struct sock *newsk;
1381         struct ipv6_txoptions *opt;
1382
1383         if (skb->protocol == htons(ETH_P_IP)) {
1384                 /*
1385                  *      v6 mapped
1386                  */
1387
1388                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1389
1390                 if (newsk == NULL)
1391                         return NULL;
1392
1393                 newtcp6sk = (struct tcp6_sock *)newsk;
1394                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1395
1396                 newinet = inet_sk(newsk);
1397                 newnp = inet6_sk(newsk);
1398                 newtp = tcp_sk(newsk);
1399
1400                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1401
1402                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1403                               newinet->daddr);
1404
1405                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1406                               newinet->saddr);
1407
1408                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1409
1410                 newtp->af_specific = &ipv6_mapped;
1411                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1412                 newnp->pktoptions  = NULL;
1413                 newnp->opt         = NULL;
1414                 newnp->mcast_oif   = tcp_v6_iif(skb);
1415                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1416
1417                 /*
1418                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419                  * here, tcp_create_openreq_child now does this for us, see the comment in
1420                  * that function for the gory details. -acme
1421                  */
1422
1423                 /* It is tricky place. Until this moment IPv4 tcp
1424                    worked with IPv6 af_tcp.af_specific.
1425                    Sync it now.
1426                  */
1427                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1428
1429                 return newsk;
1430         }
1431
1432         opt = np->opt;
1433
1434         if (sk_acceptq_is_full(sk))
1435                 goto out_overflow;
1436
1437         if (np->rxopt.bits.srcrt == 2 &&
1438             opt == NULL && treq->pktopts) {
1439                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1440                 if (rxopt->srcrt)
1441                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1442         }
1443
1444         if (dst == NULL) {
1445                 struct in6_addr *final_p = NULL, final;
1446                 struct flowi fl;
1447
1448                 memset(&fl, 0, sizeof(fl));
1449                 fl.proto = IPPROTO_TCP;
1450                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1451                 if (opt && opt->srcrt) {
1452                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1453                         ipv6_addr_copy(&final, &fl.fl6_dst);
1454                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1455                         final_p = &final;
1456                 }
1457                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1458                 fl.oif = sk->sk_bound_dev_if;
1459                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1460                 fl.fl_ip_sport = inet_sk(sk)->sport;
1461
1462                 if (ip6_dst_lookup(sk, &dst, &fl))
1463                         goto out;
1464
1465                 if (final_p)
1466                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1467
1468                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1469                         goto out;
1470         }
1471
1472         newsk = tcp_create_openreq_child(sk, req, skb);
1473         if (newsk == NULL)
1474                 goto out;
1475
1476         /*
1477          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1478          * count here, tcp_create_openreq_child now does this for us, see the
1479          * comment in that function for the gory details. -acme
1480          */
1481
1482         ip6_dst_store(newsk, dst, NULL);
1483         newsk->sk_route_caps = dst->dev->features &
1484                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1485
1486         newtcp6sk = (struct tcp6_sock *)newsk;
1487         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1488
1489         newtp = tcp_sk(newsk);
1490         newinet = inet_sk(newsk);
1491         newnp = inet6_sk(newsk);
1492
1493         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1494
1495         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1496         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1497         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1498         newsk->sk_bound_dev_if = treq->iif;
1499
1500         /* Now IPv6 options...
1501
1502            First: no IPv4 options.
1503          */
1504         newinet->opt = NULL;
1505
1506         /* Clone RX bits */
1507         newnp->rxopt.all = np->rxopt.all;
1508
1509         /* Clone pktoptions received with SYN */
1510         newnp->pktoptions = NULL;
1511         if (treq->pktopts != NULL) {
1512                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1513                 kfree_skb(treq->pktopts);
1514                 treq->pktopts = NULL;
1515                 if (newnp->pktoptions)
1516                         skb_set_owner_r(newnp->pktoptions, newsk);
1517         }
1518         newnp->opt        = NULL;
1519         newnp->mcast_oif  = tcp_v6_iif(skb);
1520         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1521
1522         /* Clone native IPv6 options from listening socket (if any)
1523
1524            Yes, keeping reference count would be much more clever,
1525            but we make one more one thing there: reattach optmem
1526            to newsk.
1527          */
1528         if (opt) {
1529                 newnp->opt = ipv6_dup_options(newsk, opt);
1530                 if (opt != np->opt)
1531                         sock_kfree_s(sk, opt, opt->tot_len);
1532         }
1533
1534         newtp->ext_header_len = 0;
1535         if (newnp->opt)
1536                 newtp->ext_header_len = newnp->opt->opt_nflen +
1537                                         newnp->opt->opt_flen;
1538
1539         tcp_sync_mss(newsk, dst_mtu(dst));
1540         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1541         tcp_initialize_rcv_mss(newsk);
1542
1543         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1544
1545         __tcp_v6_hash(newsk);
1546         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1547
1548         return newsk;
1549
1550 out_overflow:
1551         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1552 out:
1553         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1554         if (opt && opt != np->opt)
1555                 sock_kfree_s(sk, opt, opt->tot_len);
1556         dst_release(dst);
1557         return NULL;
1558 }
1559
1560 static int tcp_v6_checksum_init(struct sk_buff *skb)
1561 {
1562         if (skb->ip_summed == CHECKSUM_HW) {
1563                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1564                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1565                                   &skb->nh.ipv6h->daddr,skb->csum))
1566                         return 0;
1567                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1568         }
1569         if (skb->len <= 76) {
1570                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1572                         return -1;
1573                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1574         } else {
1575                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1576                                           &skb->nh.ipv6h->daddr,0);
1577         }
1578         return 0;
1579 }
1580
1581 /* The socket must have it's spinlock held when we get
1582  * here.
1583  *
1584  * We have a potential double-lock case here, so even when
1585  * doing backlog processing we use the BH locking scheme.
1586  * This is because we cannot sleep with the original spinlock
1587  * held.
1588  */
1589 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1590 {
1591         struct ipv6_pinfo *np = inet6_sk(sk);
1592         struct tcp_sock *tp;
1593         struct sk_buff *opt_skb = NULL;
1594
1595         /* Imagine: socket is IPv6. IPv4 packet arrives,
1596            goes to IPv4 receive handler and backlogged.
1597            From backlog it always goes here. Kerboom...
1598            Fortunately, tcp_rcv_established and rcv_established
1599            handle them correctly, but it is not case with
1600            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1601          */
1602
1603         if (skb->protocol == htons(ETH_P_IP))
1604                 return tcp_v4_do_rcv(sk, skb);
1605
1606         if (sk_filter(sk, skb, 0))
1607                 goto discard;
1608
1609         /*
1610          *      socket locking is here for SMP purposes as backlog rcv
1611          *      is currently called with bh processing disabled.
1612          */
1613
1614         /* Do Stevens' IPV6_PKTOPTIONS.
1615
1616            Yes, guys, it is the only place in our code, where we
1617            may make it not affecting IPv4.
1618            The rest of code is protocol independent,
1619            and I do not like idea to uglify IPv4.
1620
1621            Actually, all the idea behind IPV6_PKTOPTIONS
1622            looks not very well thought. For now we latch
1623            options, received in the last packet, enqueued
1624            by tcp. Feel free to propose better solution.
1625                                                --ANK (980728)
1626          */
1627         if (np->rxopt.all)
1628                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1629
1630         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631                 TCP_CHECK_TIMER(sk);
1632                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1633                         goto reset;
1634                 TCP_CHECK_TIMER(sk);
1635                 if (opt_skb)
1636                         goto ipv6_pktoptions;
1637                 return 0;
1638         }
1639
1640         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1641                 goto csum_err;
1642
1643         if (sk->sk_state == TCP_LISTEN) {
1644                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1645                 if (!nsk)
1646                         goto discard;
1647
1648                 /*
1649                  * Queue it on the new socket if the new socket is active,
1650                  * otherwise we just shortcircuit this and continue with
1651                  * the new socket..
1652                  */
1653                 if(nsk != sk) {
1654                         if (tcp_child_process(sk, nsk, skb))
1655                                 goto reset;
1656                         if (opt_skb)
1657                                 __kfree_skb(opt_skb);
1658                         return 0;
1659                 }
1660         }
1661
1662         TCP_CHECK_TIMER(sk);
1663         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1664                 goto reset;
1665         TCP_CHECK_TIMER(sk);
1666         if (opt_skb)
1667                 goto ipv6_pktoptions;
1668         return 0;
1669
1670 reset:
1671         tcp_v6_send_reset(skb);
1672 discard:
1673         if (opt_skb)
1674                 __kfree_skb(opt_skb);
1675         kfree_skb(skb);
1676         return 0;
1677 csum_err:
1678         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1679         goto discard;
1680
1681
1682 ipv6_pktoptions:
1683         /* Do you ask, what is it?
1684
1685            1. skb was enqueued by tcp.
1686            2. skb is added to tail of read queue, rather than out of order.
1687            3. socket is not in passive state.
1688            4. Finally, it really contains options, which user wants to receive.
1689          */
1690         tp = tcp_sk(sk);
1691         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1692             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1693                 if (np->rxopt.bits.rxinfo)
1694                         np->mcast_oif = tcp_v6_iif(opt_skb);
1695                 if (np->rxopt.bits.rxhlim)
1696                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1697                 if (ipv6_opt_accepted(sk, opt_skb)) {
1698                         skb_set_owner_r(opt_skb, sk);
1699                         opt_skb = xchg(&np->pktoptions, opt_skb);
1700                 } else {
1701                         __kfree_skb(opt_skb);
1702                         opt_skb = xchg(&np->pktoptions, NULL);
1703                 }
1704         }
1705
1706         if (opt_skb)
1707                 kfree_skb(opt_skb);
1708         return 0;
1709 }
1710
1711 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1712 {
1713         struct sk_buff *skb = *pskb;
1714         struct tcphdr *th;
1715         struct sock *sk;
1716         int ret;
1717
1718         if (skb->pkt_type != PACKET_HOST)
1719                 goto discard_it;
1720
1721         /*
1722          *      Count it even if it's bad.
1723          */
1724         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1725
1726         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1727                 goto discard_it;
1728
1729         th = skb->h.th;
1730
1731         if (th->doff < sizeof(struct tcphdr)/4)
1732                 goto bad_packet;
1733         if (!pskb_may_pull(skb, th->doff*4))
1734                 goto discard_it;
1735
1736         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1737              tcp_v6_checksum_init(skb) < 0))
1738                 goto bad_packet;
1739
1740         th = skb->h.th;
1741         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1742         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1743                                     skb->len - th->doff*4);
1744         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1745         TCP_SKB_CB(skb)->when = 0;
1746         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1747         TCP_SKB_CB(skb)->sacked = 0;
1748
1749         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1750                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1751
1752         if (!sk)
1753                 goto no_tcp_socket;
1754
1755 process:
1756         if (sk->sk_state == TCP_TIME_WAIT)
1757                 goto do_time_wait;
1758
1759         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1760                 goto discard_and_relse;
1761
1762         if (sk_filter(sk, skb, 0))
1763                 goto discard_and_relse;
1764
1765         skb->dev = NULL;
1766
1767         bh_lock_sock(sk);
1768         ret = 0;
1769         if (!sock_owned_by_user(sk)) {
1770                 if (!tcp_prequeue(sk, skb))
1771                         ret = tcp_v6_do_rcv(sk, skb);
1772         } else
1773                 sk_add_backlog(sk, skb);
1774         bh_unlock_sock(sk);
1775
1776         sock_put(sk);
1777         return ret ? -1 : 0;
1778
1779 no_tcp_socket:
1780         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1781                 goto discard_it;
1782
1783         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1784 bad_packet:
1785                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1786         } else {
1787                 tcp_v6_send_reset(skb);
1788         }
1789
1790 discard_it:
1791
1792         /*
1793          *      Discard frame
1794          */
1795
1796         kfree_skb(skb);
1797         return 0;
1798
1799 discard_and_relse:
1800         sock_put(sk);
1801         goto discard_it;
1802
1803 do_time_wait:
1804         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1805                 inet_twsk_put((struct inet_timewait_sock *)sk);
1806                 goto discard_it;
1807         }
1808
1809         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1810                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1811                 inet_twsk_put((struct inet_timewait_sock *)sk);
1812                 goto discard_it;
1813         }
1814
1815         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1816                                            skb, th)) {
1817         case TCP_TW_SYN:
1818         {
1819                 struct sock *sk2;
1820
1821                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1822                 if (sk2 != NULL) {
1823                         tcp_tw_deschedule((struct inet_timewait_sock *)sk);
1824                         inet_twsk_put((struct inet_timewait_sock *)sk);
1825                         sk = sk2;
1826                         goto process;
1827                 }
1828                 /* Fall through to ACK */
1829         }
1830         case TCP_TW_ACK:
1831                 tcp_v6_timewait_ack(sk, skb);
1832                 break;
1833         case TCP_TW_RST:
1834                 goto no_tcp_socket;
1835         case TCP_TW_SUCCESS:;
1836         }
1837         goto discard_it;
1838 }
1839
1840 static int tcp_v6_rebuild_header(struct sock *sk)
1841 {
1842         int err;
1843         struct dst_entry *dst;
1844         struct ipv6_pinfo *np = inet6_sk(sk);
1845
1846         dst = __sk_dst_check(sk, np->dst_cookie);
1847
1848         if (dst == NULL) {
1849                 struct inet_sock *inet = inet_sk(sk);
1850                 struct in6_addr *final_p = NULL, final;
1851                 struct flowi fl;
1852
1853                 memset(&fl, 0, sizeof(fl));
1854                 fl.proto = IPPROTO_TCP;
1855                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1856                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1857                 fl.fl6_flowlabel = np->flow_label;
1858                 fl.oif = sk->sk_bound_dev_if;
1859                 fl.fl_ip_dport = inet->dport;
1860                 fl.fl_ip_sport = inet->sport;
1861
1862                 if (np->opt && np->opt->srcrt) {
1863                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1864                         ipv6_addr_copy(&final, &fl.fl6_dst);
1865                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1866                         final_p = &final;
1867                 }
1868
1869                 err = ip6_dst_lookup(sk, &dst, &fl);
1870                 if (err) {
1871                         sk->sk_route_caps = 0;
1872                         return err;
1873                 }
1874                 if (final_p)
1875                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1876
1877                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1878                         sk->sk_err_soft = -err;
1879                         dst_release(dst);
1880                         return err;
1881                 }
1882
1883                 ip6_dst_store(sk, dst, NULL);
1884                 sk->sk_route_caps = dst->dev->features &
1885                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1886         }
1887
1888         return 0;
1889 }
1890
1891 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1892 {
1893         struct sock *sk = skb->sk;
1894         struct inet_sock *inet = inet_sk(sk);
1895         struct ipv6_pinfo *np = inet6_sk(sk);
1896         struct flowi fl;
1897         struct dst_entry *dst;
1898         struct in6_addr *final_p = NULL, final;
1899
1900         memset(&fl, 0, sizeof(fl));
1901         fl.proto = IPPROTO_TCP;
1902         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1903         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1904         fl.fl6_flowlabel = np->flow_label;
1905         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1906         fl.oif = sk->sk_bound_dev_if;
1907         fl.fl_ip_sport = inet->sport;
1908         fl.fl_ip_dport = inet->dport;
1909
1910         if (np->opt && np->opt->srcrt) {
1911                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1912                 ipv6_addr_copy(&final, &fl.fl6_dst);
1913                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1914                 final_p = &final;
1915         }
1916
1917         dst = __sk_dst_check(sk, np->dst_cookie);
1918
1919         if (dst == NULL) {
1920                 int err = ip6_dst_lookup(sk, &dst, &fl);
1921
1922                 if (err) {
1923                         sk->sk_err_soft = -err;
1924                         return err;
1925                 }
1926
1927                 if (final_p)
1928                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1929
1930                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1931                         sk->sk_route_caps = 0;
1932                         dst_release(dst);
1933                         return err;
1934                 }
1935
1936                 ip6_dst_store(sk, dst, NULL);
1937                 sk->sk_route_caps = dst->dev->features &
1938                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1939         }
1940
1941         skb->dst = dst_clone(dst);
1942
1943         /* Restore final destination back after routing done */
1944         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1945
1946         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1947 }
1948
1949 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1950 {
1951         struct ipv6_pinfo *np = inet6_sk(sk);
1952         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1953
1954         sin6->sin6_family = AF_INET6;
1955         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1956         sin6->sin6_port = inet_sk(sk)->dport;
1957         /* We do not store received flowlabel for TCP */
1958         sin6->sin6_flowinfo = 0;
1959         sin6->sin6_scope_id = 0;
1960         if (sk->sk_bound_dev_if &&
1961             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1962                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1963 }
1964
1965 static int tcp_v6_remember_stamp(struct sock *sk)
1966 {
1967         /* Alas, not yet... */
1968         return 0;
1969 }
1970
1971 static struct tcp_func ipv6_specific = {
1972         .queue_xmit     =       tcp_v6_xmit,
1973         .send_check     =       tcp_v6_send_check,
1974         .rebuild_header =       tcp_v6_rebuild_header,
1975         .conn_request   =       tcp_v6_conn_request,
1976         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1977         .remember_stamp =       tcp_v6_remember_stamp,
1978         .net_header_len =       sizeof(struct ipv6hdr),
1979
1980         .setsockopt     =       ipv6_setsockopt,
1981         .getsockopt     =       ipv6_getsockopt,
1982         .addr2sockaddr  =       v6_addr2sockaddr,
1983         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1984 };
1985
1986 /*
1987  *      TCP over IPv4 via INET6 API
1988  */
1989
1990 static struct tcp_func ipv6_mapped = {
1991         .queue_xmit     =       ip_queue_xmit,
1992         .send_check     =       tcp_v4_send_check,
1993         .rebuild_header =       inet_sk_rebuild_header,
1994         .conn_request   =       tcp_v6_conn_request,
1995         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1996         .remember_stamp =       tcp_v4_remember_stamp,
1997         .net_header_len =       sizeof(struct iphdr),
1998
1999         .setsockopt     =       ipv6_setsockopt,
2000         .getsockopt     =       ipv6_getsockopt,
2001         .addr2sockaddr  =       v6_addr2sockaddr,
2002         .sockaddr_len   =       sizeof(struct sockaddr_in6)
2003 };
2004
2005
2006
2007 /* NOTE: A lot of things set to zero explicitly by call to
2008  *       sk_alloc() so need not be done here.
2009  */
2010 static int tcp_v6_init_sock(struct sock *sk)
2011 {
2012         struct tcp_sock *tp = tcp_sk(sk);
2013
2014         skb_queue_head_init(&tp->out_of_order_queue);
2015         tcp_init_xmit_timers(sk);
2016         tcp_prequeue_init(tp);
2017
2018         tp->rto  = TCP_TIMEOUT_INIT;
2019         tp->mdev = TCP_TIMEOUT_INIT;
2020
2021         /* So many TCP implementations out there (incorrectly) count the
2022          * initial SYN frame in their delayed-ACK and congestion control
2023          * algorithms that we must have the following bandaid to talk
2024          * efficiently to them.  -DaveM
2025          */
2026         tp->snd_cwnd = 2;
2027
2028         /* See draft-stevens-tcpca-spec-01 for discussion of the
2029          * initialization of these values.
2030          */
2031         tp->snd_ssthresh = 0x7fffffff;
2032         tp->snd_cwnd_clamp = ~0;
2033         tp->mss_cache = 536;
2034
2035         tp->reordering = sysctl_tcp_reordering;
2036
2037         sk->sk_state = TCP_CLOSE;
2038
2039         tp->af_specific = &ipv6_specific;
2040         tp->ca_ops = &tcp_init_congestion_ops;
2041         sk->sk_write_space = sk_stream_write_space;
2042         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2043
2044         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2045         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2046
2047         atomic_inc(&tcp_sockets_allocated);
2048
2049         return 0;
2050 }
2051
2052 static int tcp_v6_destroy_sock(struct sock *sk)
2053 {
2054         extern int tcp_v4_destroy_sock(struct sock *sk);
2055
2056         tcp_v4_destroy_sock(sk);
2057         return inet6_destroy_sock(sk);
2058 }
2059
2060 /* Proc filesystem TCPv6 sock list dumping. */
2061 static void get_openreq6(struct seq_file *seq,
2062                          struct sock *sk, struct request_sock *req, int i, int uid)
2063 {
2064         struct in6_addr *dest, *src;
2065         int ttd = req->expires - jiffies;
2066
2067         if (ttd < 0)
2068                 ttd = 0;
2069
2070         src = &tcp6_rsk(req)->loc_addr;
2071         dest = &tcp6_rsk(req)->rmt_addr;
2072         seq_printf(seq,
2073                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2074                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2075                    i,
2076                    src->s6_addr32[0], src->s6_addr32[1],
2077                    src->s6_addr32[2], src->s6_addr32[3],
2078                    ntohs(inet_sk(sk)->sport),
2079                    dest->s6_addr32[0], dest->s6_addr32[1],
2080                    dest->s6_addr32[2], dest->s6_addr32[3],
2081                    ntohs(inet_rsk(req)->rmt_port),
2082                    TCP_SYN_RECV,
2083                    0,0, /* could print option size, but that is af dependent. */
2084                    1,   /* timers active (only the expire timer) */
2085                    jiffies_to_clock_t(ttd),
2086                    req->retrans,
2087                    uid,
2088                    0,  /* non standard timer */
2089                    0, /* open_requests have no inode */
2090                    0, req);
2091 }
2092
2093 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2094 {
2095         struct in6_addr *dest, *src;
2096         __u16 destp, srcp;
2097         int timer_active;
2098         unsigned long timer_expires;
2099         struct inet_sock *inet = inet_sk(sp);
2100         struct tcp_sock *tp = tcp_sk(sp);
2101         struct ipv6_pinfo *np = inet6_sk(sp);
2102
2103         dest  = &np->daddr;
2104         src   = &np->rcv_saddr;
2105         destp = ntohs(inet->dport);
2106         srcp  = ntohs(inet->sport);
2107         if (tp->pending == TCP_TIME_RETRANS) {
2108                 timer_active    = 1;
2109                 timer_expires   = tp->timeout;
2110         } else if (tp->pending == TCP_TIME_PROBE0) {
2111                 timer_active    = 4;
2112                 timer_expires   = tp->timeout;
2113         } else if (timer_pending(&sp->sk_timer)) {
2114                 timer_active    = 2;
2115                 timer_expires   = sp->sk_timer.expires;
2116         } else {
2117                 timer_active    = 0;
2118                 timer_expires = jiffies;
2119         }
2120
2121         seq_printf(seq,
2122                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2123                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2124                    i,
2125                    src->s6_addr32[0], src->s6_addr32[1],
2126                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2127                    dest->s6_addr32[0], dest->s6_addr32[1],
2128                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2129                    sp->sk_state,
2130                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2131                    timer_active,
2132                    jiffies_to_clock_t(timer_expires - jiffies),
2133                    tp->retransmits,
2134                    sock_i_uid(sp),
2135                    tp->probes_out,
2136                    sock_i_ino(sp),
2137                    atomic_read(&sp->sk_refcnt), sp,
2138                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2139                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2140                    );
2141 }
2142
2143 static void get_timewait6_sock(struct seq_file *seq,
2144                                struct inet_timewait_sock *tw, int i)
2145 {
2146         struct in6_addr *dest, *src;
2147         __u16 destp, srcp;
2148         struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2149         int ttd = tw->tw_ttd - jiffies;
2150
2151         if (ttd < 0)
2152                 ttd = 0;
2153
2154         dest = &tcp6tw->tw_v6_daddr;
2155         src  = &tcp6tw->tw_v6_rcv_saddr;
2156         destp = ntohs(tw->tw_dport);
2157         srcp  = ntohs(tw->tw_sport);
2158
2159         seq_printf(seq,
2160                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2161                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2162                    i,
2163                    src->s6_addr32[0], src->s6_addr32[1],
2164                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2165                    dest->s6_addr32[0], dest->s6_addr32[1],
2166                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2167                    tw->tw_substate, 0, 0,
2168                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2169                    atomic_read(&tw->tw_refcnt), tw);
2170 }
2171
2172 #ifdef CONFIG_PROC_FS
2173 static int tcp6_seq_show(struct seq_file *seq, void *v)
2174 {
2175         struct tcp_iter_state *st;
2176
2177         if (v == SEQ_START_TOKEN) {
2178                 seq_puts(seq,
2179                          "  sl  "
2180                          "local_address                         "
2181                          "remote_address                        "
2182                          "st tx_queue rx_queue tr tm->when retrnsmt"
2183                          "   uid  timeout inode\n");
2184                 goto out;
2185         }
2186         st = seq->private;
2187
2188         switch (st->state) {
2189         case TCP_SEQ_STATE_LISTENING:
2190         case TCP_SEQ_STATE_ESTABLISHED:
2191                 get_tcp6_sock(seq, v, st->num);
2192                 break;
2193         case TCP_SEQ_STATE_OPENREQ:
2194                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2195                 break;
2196         case TCP_SEQ_STATE_TIME_WAIT:
2197                 get_timewait6_sock(seq, v, st->num);
2198                 break;
2199         }
2200 out:
2201         return 0;
2202 }
2203
2204 static struct file_operations tcp6_seq_fops;
2205 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2206         .owner          = THIS_MODULE,
2207         .name           = "tcp6",
2208         .family         = AF_INET6,
2209         .seq_show       = tcp6_seq_show,
2210         .seq_fops       = &tcp6_seq_fops,
2211 };
2212
2213 int __init tcp6_proc_init(void)
2214 {
2215         return tcp_proc_register(&tcp6_seq_afinfo);
2216 }
2217
2218 void tcp6_proc_exit(void)
2219 {
2220         tcp_proc_unregister(&tcp6_seq_afinfo);
2221 }
2222 #endif
2223
2224 struct proto tcpv6_prot = {
2225         .name                   = "TCPv6",
2226         .owner                  = THIS_MODULE,
2227         .close                  = tcp_close,
2228         .connect                = tcp_v6_connect,
2229         .disconnect             = tcp_disconnect,
2230         .accept                 = tcp_accept,
2231         .ioctl                  = tcp_ioctl,
2232         .init                   = tcp_v6_init_sock,
2233         .destroy                = tcp_v6_destroy_sock,
2234         .shutdown               = tcp_shutdown,
2235         .setsockopt             = tcp_setsockopt,
2236         .getsockopt             = tcp_getsockopt,
2237         .sendmsg                = tcp_sendmsg,
2238         .recvmsg                = tcp_recvmsg,
2239         .backlog_rcv            = tcp_v6_do_rcv,
2240         .hash                   = tcp_v6_hash,
2241         .unhash                 = tcp_unhash,
2242         .get_port               = tcp_v6_get_port,
2243         .enter_memory_pressure  = tcp_enter_memory_pressure,
2244         .sockets_allocated      = &tcp_sockets_allocated,
2245         .memory_allocated       = &tcp_memory_allocated,
2246         .memory_pressure        = &tcp_memory_pressure,
2247         .sysctl_mem             = sysctl_tcp_mem,
2248         .sysctl_wmem            = sysctl_tcp_wmem,
2249         .sysctl_rmem            = sysctl_tcp_rmem,
2250         .max_header             = MAX_TCP_HEADER,
2251         .obj_size               = sizeof(struct tcp6_sock),
2252         .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
2253         .rsk_prot               = &tcp6_request_sock_ops,
2254 };
2255
2256 static struct inet6_protocol tcpv6_protocol = {
2257         .handler        =       tcp_v6_rcv,
2258         .err_handler    =       tcp_v6_err,
2259         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2260 };
2261
2262 extern struct proto_ops inet6_stream_ops;
2263
2264 static struct inet_protosw tcpv6_protosw = {
2265         .type           =       SOCK_STREAM,
2266         .protocol       =       IPPROTO_TCP,
2267         .prot           =       &tcpv6_prot,
2268         .ops            =       &inet6_stream_ops,
2269         .capability     =       -1,
2270         .no_check       =       0,
2271         .flags          =       INET_PROTOSW_PERMANENT,
2272 };
2273
2274 void __init tcpv6_init(void)
2275 {
2276         /* register inet6 protocol */
2277         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2278                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2279         inet6_register_protosw(&tcpv6_protosw);
2280 }