3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_hashinfo.ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
102 const struct inet_bind_bucket *tb)
104 const struct sock *sk2;
105 const struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct inet_bind_hashbucket *head;
130 struct inet_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_hashinfo.portalloc_lock);
142 if (tcp_hashinfo.port_rover < low)
145 rover = tcp_hashinfo.port_rover;
149 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
150 spin_lock(&head->lock);
151 inet_bind_bucket_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_hashinfo.port_rover = rover;
159 spin_unlock(&tcp_hashinfo.portalloc_lock);
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
168 if (unlikely(remaining <= 0))
171 /* OK, here is the one we will use. */
174 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
175 spin_lock(&head->lock);
176 inet_bind_bucket_for_each(tb, node, &head->chain)
177 if (tb->port == snum)
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
189 if (tcp_v6_bind_conflict(sk, tb))
196 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
200 if (hlist_empty(&tb->owners)) {
201 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
205 } else if (tb->fastreuse &&
206 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
210 if (!inet_sk(sk)->bind_hash)
211 inet_bind_hash(sk, tb, snum);
212 BUG_TRAP(inet_sk(sk)->bind_hash == tb);
216 spin_unlock(&head->lock);
222 static __inline__ void __tcp_v6_hash(struct sock *sk)
224 struct hlist_head *list;
227 BUG_TRAP(sk_unhashed(sk));
229 if (sk->sk_state == TCP_LISTEN) {
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock;
232 inet_listen_wlock(&tcp_hashinfo);
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
240 __sk_add_node(sk, list);
241 sock_prot_inc_use(sk->sk_prot);
246 static void tcp_v6_hash(struct sock *sk)
248 if (sk->sk_state != TCP_CLOSE) {
249 struct tcp_sock *tp = tcp_sk(sk);
251 if (tp->af_specific == &ipv6_mapped) {
261 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
264 struct hlist_node *node;
265 struct sock *result = NULL;
269 read_lock(&tcp_hashinfo.lhash_lock);
270 sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
271 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272 struct ipv6_pinfo *np = inet6_sk(sk);
275 if (!ipv6_addr_any(&np->rcv_saddr)) {
276 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
280 if (sk->sk_bound_dev_if) {
281 if (sk->sk_bound_dev_if != dif)
289 if (score > hiscore) {
297 read_unlock(&tcp_hashinfo.lhash_lock);
301 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
304 * The sockhash lock must be held as a reader here.
307 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308 struct in6_addr *daddr, u16 hnum,
312 const struct hlist_node *node;
313 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
317 const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
320 read_lock(&head->lock);
321 sk_for_each(sk, node, &head->chain) {
322 /* For IPV6 do the cheaper port and family tests first. */
323 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
324 goto hit; /* You sunk my battleship! */
326 /* Must check for a TIME_WAIT'er before going to listener hash. */
327 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
328 const struct inet_timewait_sock *tw = inet_twsk(sk);
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
332 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
334 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
335 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
336 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
340 read_unlock(&head->lock);
345 read_unlock(&head->lock);
350 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 hnum,
356 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
361 return tcp_v6_lookup_listener(daddr, hnum, dif);
364 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
365 struct in6_addr *daddr, u16 dport,
371 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
377 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
381 * Open request hash tables.
384 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
388 a = raddr->s6_addr32[0];
389 b = raddr->s6_addr32[1];
390 c = raddr->s6_addr32[2];
392 a += JHASH_GOLDEN_RATIO;
393 b += JHASH_GOLDEN_RATIO;
395 __jhash_mix(a, b, c);
397 a += raddr->s6_addr32[3];
399 __jhash_mix(a, b, c);
401 return c & (TCP_SYNQ_HSIZE - 1);
404 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
405 struct request_sock ***prevp,
407 struct in6_addr *raddr,
408 struct in6_addr *laddr,
411 struct listen_sock *lopt = tp->accept_queue.listen_opt;
412 struct request_sock *req, **prev;
414 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
415 (req = *prev) != NULL;
416 prev = &req->dl_next) {
417 const struct tcp6_request_sock *treq = tcp6_rsk(req);
419 if (inet_rsk(req)->rmt_port == rport &&
420 req->rsk_ops->family == AF_INET6 &&
421 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
422 ipv6_addr_equal(&treq->loc_addr, laddr) &&
423 (!treq->iif || treq->iif == iif)) {
424 BUG_TRAP(req->sk == NULL);
433 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
434 struct in6_addr *saddr,
435 struct in6_addr *daddr,
438 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
441 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
443 if (skb->protocol == htons(ETH_P_IPV6)) {
444 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
445 skb->nh.ipv6h->saddr.s6_addr32,
449 return secure_tcp_sequence_number(skb->nh.iph->daddr,
456 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
457 struct inet_timewait_sock **twp)
459 struct inet_sock *inet = inet_sk(sk);
460 struct ipv6_pinfo *np = inet6_sk(sk);
461 struct in6_addr *daddr = &np->rcv_saddr;
462 struct in6_addr *saddr = &np->daddr;
463 int dif = sk->sk_bound_dev_if;
464 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
465 const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
466 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
468 const struct hlist_node *node;
469 struct inet_timewait_sock *tw;
471 write_lock(&head->lock);
473 /* Check TIME-WAIT sockets first. */
474 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
475 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
479 if(*((__u32 *)&(tw->tw_dport)) == ports &&
480 sk2->sk_family == PF_INET6 &&
481 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
482 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
483 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
484 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
485 struct tcp_sock *tp = tcp_sk(sk);
487 if (tcptw->tw_ts_recent_stamp &&
489 (sysctl_tcp_tw_reuse &&
490 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
491 /* See comment in tcp_ipv4.c */
492 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
495 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
496 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
505 /* And established part... */
506 sk_for_each(sk2, node, &head->chain) {
507 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
512 BUG_TRAP(sk_unhashed(sk));
513 __sk_add_node(sk, &head->chain);
514 sk->sk_hashent = hash;
515 sock_prot_inc_use(sk->sk_prot);
516 write_unlock(&head->lock);
520 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
522 /* Silly. Should hash-dance instead... */
523 tcp_tw_deschedule(tw);
524 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
531 write_unlock(&head->lock);
532 return -EADDRNOTAVAIL;
535 static inline u32 tcpv6_port_offset(const struct sock *sk)
537 const struct inet_sock *inet = inet_sk(sk);
538 const struct ipv6_pinfo *np = inet6_sk(sk);
540 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
545 static int tcp_v6_hash_connect(struct sock *sk)
547 unsigned short snum = inet_sk(sk)->num;
548 struct inet_bind_hashbucket *head;
549 struct inet_bind_bucket *tb;
553 int low = sysctl_local_port_range[0];
554 int high = sysctl_local_port_range[1];
555 int range = high - low;
559 u32 offset = hint + tcpv6_port_offset(sk);
560 struct hlist_node *node;
561 struct inet_timewait_sock *tw = NULL;
564 for (i = 1; i <= range; i++) {
565 port = low + (i + offset) % range;
566 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
567 spin_lock(&head->lock);
569 /* Does not bother with rcv_saddr checks,
570 * because the established check is already
573 inet_bind_bucket_for_each(tb, node, &head->chain) {
574 if (tb->port == port) {
575 BUG_TRAP(!hlist_empty(&tb->owners));
576 if (tb->fastreuse >= 0)
578 if (!__tcp_v6_check_established(sk,
586 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
588 spin_unlock(&head->lock);
595 spin_unlock(&head->lock);
599 return -EADDRNOTAVAIL;
604 /* Head lock still held and bh's disabled */
605 inet_bind_hash(sk, tb, port);
606 if (sk_unhashed(sk)) {
607 inet_sk(sk)->sport = htons(port);
610 spin_unlock(&head->lock);
613 tcp_tw_deschedule(tw);
621 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
622 tb = inet_sk(sk)->bind_hash;
623 spin_lock_bh(&head->lock);
625 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
627 spin_unlock_bh(&head->lock);
630 spin_unlock(&head->lock);
631 /* No definite answer... Walk to established hash table */
632 ret = __tcp_v6_check_established(sk, snum, NULL);
639 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
641 return IP6CB(skb)->iif;
644 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
647 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
648 struct inet_sock *inet = inet_sk(sk);
649 struct ipv6_pinfo *np = inet6_sk(sk);
650 struct tcp_sock *tp = tcp_sk(sk);
651 struct in6_addr *saddr = NULL, *final_p = NULL, final;
653 struct dst_entry *dst;
657 if (addr_len < SIN6_LEN_RFC2133)
660 if (usin->sin6_family != AF_INET6)
661 return(-EAFNOSUPPORT);
663 memset(&fl, 0, sizeof(fl));
666 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
667 IP6_ECN_flow_init(fl.fl6_flowlabel);
668 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
669 struct ip6_flowlabel *flowlabel;
670 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
671 if (flowlabel == NULL)
673 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
674 fl6_sock_release(flowlabel);
679 * connect() to INADDR_ANY means loopback (BSD'ism).
682 if(ipv6_addr_any(&usin->sin6_addr))
683 usin->sin6_addr.s6_addr[15] = 0x1;
685 addr_type = ipv6_addr_type(&usin->sin6_addr);
687 if(addr_type & IPV6_ADDR_MULTICAST)
690 if (addr_type&IPV6_ADDR_LINKLOCAL) {
691 if (addr_len >= sizeof(struct sockaddr_in6) &&
692 usin->sin6_scope_id) {
693 /* If interface is set while binding, indices
696 if (sk->sk_bound_dev_if &&
697 sk->sk_bound_dev_if != usin->sin6_scope_id)
700 sk->sk_bound_dev_if = usin->sin6_scope_id;
703 /* Connect to link-local address requires an interface */
704 if (!sk->sk_bound_dev_if)
708 if (tp->rx_opt.ts_recent_stamp &&
709 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
710 tp->rx_opt.ts_recent = 0;
711 tp->rx_opt.ts_recent_stamp = 0;
715 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
716 np->flow_label = fl.fl6_flowlabel;
722 if (addr_type == IPV6_ADDR_MAPPED) {
723 u32 exthdrlen = tp->ext_header_len;
724 struct sockaddr_in sin;
726 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
728 if (__ipv6_only_sock(sk))
731 sin.sin_family = AF_INET;
732 sin.sin_port = usin->sin6_port;
733 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
735 tp->af_specific = &ipv6_mapped;
736 sk->sk_backlog_rcv = tcp_v4_do_rcv;
738 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
741 tp->ext_header_len = exthdrlen;
742 tp->af_specific = &ipv6_specific;
743 sk->sk_backlog_rcv = tcp_v6_do_rcv;
746 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
748 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
755 if (!ipv6_addr_any(&np->rcv_saddr))
756 saddr = &np->rcv_saddr;
758 fl.proto = IPPROTO_TCP;
759 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
760 ipv6_addr_copy(&fl.fl6_src,
761 (saddr ? saddr : &np->saddr));
762 fl.oif = sk->sk_bound_dev_if;
763 fl.fl_ip_dport = usin->sin6_port;
764 fl.fl_ip_sport = inet->sport;
766 if (np->opt && np->opt->srcrt) {
767 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
768 ipv6_addr_copy(&final, &fl.fl6_dst);
769 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
773 err = ip6_dst_lookup(sk, &dst, &fl);
777 ipv6_addr_copy(&fl.fl6_dst, final_p);
779 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
786 ipv6_addr_copy(&np->rcv_saddr, saddr);
789 /* set the source address */
790 ipv6_addr_copy(&np->saddr, saddr);
791 inet->rcv_saddr = LOOPBACK4_IPV6;
793 ip6_dst_store(sk, dst, NULL);
794 sk->sk_route_caps = dst->dev->features &
795 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
797 tp->ext_header_len = 0;
799 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
801 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
803 inet->dport = usin->sin6_port;
805 tcp_set_state(sk, TCP_SYN_SENT);
806 err = tcp_v6_hash_connect(sk);
811 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
816 err = tcp_connect(sk);
823 tcp_set_state(sk, TCP_CLOSE);
827 sk->sk_route_caps = 0;
831 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
832 int type, int code, int offset, __u32 info)
834 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
835 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
836 struct ipv6_pinfo *np;
842 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
845 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
849 if (sk->sk_state == TCP_TIME_WAIT) {
850 inet_twsk_put((struct inet_timewait_sock *)sk);
855 if (sock_owned_by_user(sk))
856 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
858 if (sk->sk_state == TCP_CLOSE)
862 seq = ntohl(th->seq);
863 if (sk->sk_state != TCP_LISTEN &&
864 !between(seq, tp->snd_una, tp->snd_nxt)) {
865 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
871 if (type == ICMPV6_PKT_TOOBIG) {
872 struct dst_entry *dst = NULL;
874 if (sock_owned_by_user(sk))
876 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
879 /* icmp should have updated the destination cache entry */
880 dst = __sk_dst_check(sk, np->dst_cookie);
883 struct inet_sock *inet = inet_sk(sk);
886 /* BUGGG_FUTURE: Again, it is not clear how
887 to handle rthdr case. Ignore this complexity
890 memset(&fl, 0, sizeof(fl));
891 fl.proto = IPPROTO_TCP;
892 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
893 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
894 fl.oif = sk->sk_bound_dev_if;
895 fl.fl_ip_dport = inet->dport;
896 fl.fl_ip_sport = inet->sport;
898 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
899 sk->sk_err_soft = -err;
903 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
904 sk->sk_err_soft = -err;
911 if (tp->pmtu_cookie > dst_mtu(dst)) {
912 tcp_sync_mss(sk, dst_mtu(dst));
913 tcp_simple_retransmit(sk);
914 } /* else let the usual retransmit timer handle it */
919 icmpv6_err_convert(type, code, &err);
921 /* Might be for an request_sock */
922 switch (sk->sk_state) {
923 struct request_sock *req, **prev;
925 if (sock_owned_by_user(sk))
928 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
929 &hdr->saddr, tcp_v6_iif(skb));
933 /* ICMPs are not backlogged, hence we cannot get
934 * an established socket here.
936 BUG_TRAP(req->sk == NULL);
938 if (seq != tcp_rsk(req)->snt_isn) {
939 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
943 tcp_synq_drop(sk, req, prev);
947 case TCP_SYN_RECV: /* Cannot happen.
948 It can, it SYNs are crossed. --ANK */
949 if (!sock_owned_by_user(sk)) {
950 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
952 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
956 sk->sk_err_soft = err;
960 if (!sock_owned_by_user(sk) && np->recverr) {
962 sk->sk_error_report(sk);
964 sk->sk_err_soft = err;
972 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
973 struct dst_entry *dst)
975 struct tcp6_request_sock *treq = tcp6_rsk(req);
976 struct ipv6_pinfo *np = inet6_sk(sk);
977 struct sk_buff * skb;
978 struct ipv6_txoptions *opt = NULL;
979 struct in6_addr * final_p = NULL, final;
983 memset(&fl, 0, sizeof(fl));
984 fl.proto = IPPROTO_TCP;
985 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
986 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
987 fl.fl6_flowlabel = 0;
989 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
990 fl.fl_ip_sport = inet_sk(sk)->sport;
995 np->rxopt.bits.srcrt == 2 &&
997 struct sk_buff *pktopts = treq->pktopts;
998 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1000 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1003 if (opt && opt->srcrt) {
1004 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1005 ipv6_addr_copy(&final, &fl.fl6_dst);
1006 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1010 err = ip6_dst_lookup(sk, &dst, &fl);
1014 ipv6_addr_copy(&fl.fl6_dst, final_p);
1015 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1019 skb = tcp_make_synack(sk, dst, req);
1021 struct tcphdr *th = skb->h.th;
1023 th->check = tcp_v6_check(th, skb->len,
1024 &treq->loc_addr, &treq->rmt_addr,
1025 csum_partial((char *)th, skb->len, skb->csum));
1027 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1028 err = ip6_xmit(sk, skb, &fl, opt, 0);
1029 if (err == NET_XMIT_CN)
1035 if (opt && opt != np->opt)
1036 sock_kfree_s(sk, opt, opt->tot_len);
1040 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1042 if (tcp6_rsk(req)->pktopts)
1043 kfree_skb(tcp6_rsk(req)->pktopts);
1046 static struct request_sock_ops tcp6_request_sock_ops = {
1048 .obj_size = sizeof(struct tcp6_request_sock),
1049 .rtx_syn_ack = tcp_v6_send_synack,
1050 .send_ack = tcp_v6_reqsk_send_ack,
1051 .destructor = tcp_v6_reqsk_destructor,
1052 .send_reset = tcp_v6_send_reset
1055 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1057 struct ipv6_pinfo *np = inet6_sk(sk);
1058 struct inet6_skb_parm *opt = IP6CB(skb);
1060 if (np->rxopt.all) {
1061 if ((opt->hop && np->rxopt.bits.hopopts) ||
1062 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1063 np->rxopt.bits.rxflow) ||
1064 (opt->srcrt && np->rxopt.bits.srcrt) ||
1065 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1072 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1073 struct sk_buff *skb)
1075 struct ipv6_pinfo *np = inet6_sk(sk);
1077 if (skb->ip_summed == CHECKSUM_HW) {
1078 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1079 skb->csum = offsetof(struct tcphdr, check);
1081 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1082 csum_partial((char *)th, th->doff<<2,
1088 static void tcp_v6_send_reset(struct sk_buff *skb)
1090 struct tcphdr *th = skb->h.th, *t1;
1091 struct sk_buff *buff;
1097 if (!ipv6_unicast_destination(skb))
1101 * We need to grab some memory, and put together an RST,
1102 * and then put it into the queue to be sent.
1105 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1110 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1112 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1114 /* Swap the send and the receive. */
1115 memset(t1, 0, sizeof(*t1));
1116 t1->dest = th->source;
1117 t1->source = th->dest;
1118 t1->doff = sizeof(*t1)/4;
1122 t1->seq = th->ack_seq;
1125 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1126 + skb->len - (th->doff<<2));
1129 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1131 memset(&fl, 0, sizeof(fl));
1132 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1133 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1135 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1136 sizeof(*t1), IPPROTO_TCP,
1139 fl.proto = IPPROTO_TCP;
1140 fl.oif = tcp_v6_iif(skb);
1141 fl.fl_ip_dport = t1->dest;
1142 fl.fl_ip_sport = t1->source;
1144 /* sk = NULL, but it is safe for now. RST socket required. */
1145 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1147 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1148 dst_release(buff->dst);
1152 ip6_xmit(NULL, buff, &fl, NULL, 0);
1153 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1154 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1161 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1163 struct tcphdr *th = skb->h.th, *t1;
1164 struct sk_buff *buff;
1166 int tot_len = sizeof(struct tcphdr);
1171 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1176 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1178 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1180 /* Swap the send and the receive. */
1181 memset(t1, 0, sizeof(*t1));
1182 t1->dest = th->source;
1183 t1->source = th->dest;
1184 t1->doff = tot_len/4;
1185 t1->seq = htonl(seq);
1186 t1->ack_seq = htonl(ack);
1188 t1->window = htons(win);
1191 u32 *ptr = (u32*)(t1 + 1);
1192 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1193 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1194 *ptr++ = htonl(tcp_time_stamp);
1198 buff->csum = csum_partial((char *)t1, tot_len, 0);
1200 memset(&fl, 0, sizeof(fl));
1201 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1202 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1204 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1205 tot_len, IPPROTO_TCP,
1208 fl.proto = IPPROTO_TCP;
1209 fl.oif = tcp_v6_iif(skb);
1210 fl.fl_ip_dport = t1->dest;
1211 fl.fl_ip_sport = t1->source;
1213 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1214 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1215 dst_release(buff->dst);
1218 ip6_xmit(NULL, buff, &fl, NULL, 0);
1219 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1226 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1228 struct inet_timewait_sock *tw = inet_twsk(sk);
1229 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1231 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1232 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1233 tcptw->tw_ts_recent);
1238 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1240 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1244 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1246 struct request_sock *req, **prev;
1247 struct tcphdr *th = skb->h.th;
1248 struct tcp_sock *tp = tcp_sk(sk);
1251 /* Find possible connection requests. */
1252 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1253 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1255 return tcp_check_req(sk, skb, req, prev);
1257 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1259 &skb->nh.ipv6h->daddr,
1264 if (nsk->sk_state != TCP_TIME_WAIT) {
1268 inet_twsk_put((struct inet_timewait_sock *)nsk);
1272 #if 0 /*def CONFIG_SYN_COOKIES*/
1273 if (!th->rst && !th->syn && th->ack)
1274 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1279 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1281 struct tcp_sock *tp = tcp_sk(sk);
1282 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1283 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1285 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1290 /* FIXME: this is substantially similar to the ipv4 code.
1291 * Can some kind of merge be done? -- erics
1293 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1295 struct tcp6_request_sock *treq;
1296 struct ipv6_pinfo *np = inet6_sk(sk);
1297 struct tcp_options_received tmp_opt;
1298 struct tcp_sock *tp = tcp_sk(sk);
1299 struct request_sock *req = NULL;
1300 __u32 isn = TCP_SKB_CB(skb)->when;
1302 if (skb->protocol == htons(ETH_P_IP))
1303 return tcp_v4_conn_request(sk, skb);
1305 if (!ipv6_unicast_destination(skb))
1309 * There are no SYN attacks on IPv6, yet...
1311 if (tcp_synq_is_full(sk) && !isn) {
1312 if (net_ratelimit())
1313 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1317 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1320 req = reqsk_alloc(&tcp6_request_sock_ops);
1324 tcp_clear_options(&tmp_opt);
1325 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1326 tmp_opt.user_mss = tp->rx_opt.user_mss;
1328 tcp_parse_options(skb, &tmp_opt, 0);
1330 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331 tcp_openreq_init(req, &tmp_opt, skb);
1333 treq = tcp6_rsk(req);
1334 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1335 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1336 TCP_ECN_create_request(req, skb->h.th);
1337 treq->pktopts = NULL;
1338 if (ipv6_opt_accepted(sk, skb) ||
1339 np->rxopt.bits.rxinfo ||
1340 np->rxopt.bits.rxhlim) {
1341 atomic_inc(&skb->users);
1342 treq->pktopts = skb;
1344 treq->iif = sk->sk_bound_dev_if;
1346 /* So that link locals have meaning */
1347 if (!sk->sk_bound_dev_if &&
1348 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1349 treq->iif = tcp_v6_iif(skb);
1352 isn = tcp_v6_init_sequence(sk,skb);
1354 tcp_rsk(req)->snt_isn = isn;
1356 if (tcp_v6_send_synack(sk, req, NULL))
1359 tcp_v6_synq_add(sk, req);
1367 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1368 return 0; /* don't send reset */
1371 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1372 struct request_sock *req,
1373 struct dst_entry *dst)
1375 struct tcp6_request_sock *treq = tcp6_rsk(req);
1376 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1377 struct tcp6_sock *newtcp6sk;
1378 struct inet_sock *newinet;
1379 struct tcp_sock *newtp;
1381 struct ipv6_txoptions *opt;
1383 if (skb->protocol == htons(ETH_P_IP)) {
1388 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1393 newtcp6sk = (struct tcp6_sock *)newsk;
1394 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1396 newinet = inet_sk(newsk);
1397 newnp = inet6_sk(newsk);
1398 newtp = tcp_sk(newsk);
1400 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1402 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1405 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1408 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1410 newtp->af_specific = &ipv6_mapped;
1411 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1412 newnp->pktoptions = NULL;
1414 newnp->mcast_oif = tcp_v6_iif(skb);
1415 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1418 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419 * here, tcp_create_openreq_child now does this for us, see the comment in
1420 * that function for the gory details. -acme
1423 /* It is tricky place. Until this moment IPv4 tcp
1424 worked with IPv6 af_tcp.af_specific.
1427 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1434 if (sk_acceptq_is_full(sk))
1437 if (np->rxopt.bits.srcrt == 2 &&
1438 opt == NULL && treq->pktopts) {
1439 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1441 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1445 struct in6_addr *final_p = NULL, final;
1448 memset(&fl, 0, sizeof(fl));
1449 fl.proto = IPPROTO_TCP;
1450 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1451 if (opt && opt->srcrt) {
1452 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1453 ipv6_addr_copy(&final, &fl.fl6_dst);
1454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1457 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1458 fl.oif = sk->sk_bound_dev_if;
1459 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1460 fl.fl_ip_sport = inet_sk(sk)->sport;
1462 if (ip6_dst_lookup(sk, &dst, &fl))
1466 ipv6_addr_copy(&fl.fl6_dst, final_p);
1468 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1472 newsk = tcp_create_openreq_child(sk, req, skb);
1477 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1478 * count here, tcp_create_openreq_child now does this for us, see the
1479 * comment in that function for the gory details. -acme
1482 ip6_dst_store(newsk, dst, NULL);
1483 newsk->sk_route_caps = dst->dev->features &
1484 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1486 newtcp6sk = (struct tcp6_sock *)newsk;
1487 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1489 newtp = tcp_sk(newsk);
1490 newinet = inet_sk(newsk);
1491 newnp = inet6_sk(newsk);
1493 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1495 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1496 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1497 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1498 newsk->sk_bound_dev_if = treq->iif;
1500 /* Now IPv6 options...
1502 First: no IPv4 options.
1504 newinet->opt = NULL;
1507 newnp->rxopt.all = np->rxopt.all;
1509 /* Clone pktoptions received with SYN */
1510 newnp->pktoptions = NULL;
1511 if (treq->pktopts != NULL) {
1512 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1513 kfree_skb(treq->pktopts);
1514 treq->pktopts = NULL;
1515 if (newnp->pktoptions)
1516 skb_set_owner_r(newnp->pktoptions, newsk);
1519 newnp->mcast_oif = tcp_v6_iif(skb);
1520 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1522 /* Clone native IPv6 options from listening socket (if any)
1524 Yes, keeping reference count would be much more clever,
1525 but we make one more one thing there: reattach optmem
1529 newnp->opt = ipv6_dup_options(newsk, opt);
1531 sock_kfree_s(sk, opt, opt->tot_len);
1534 newtp->ext_header_len = 0;
1536 newtp->ext_header_len = newnp->opt->opt_nflen +
1537 newnp->opt->opt_flen;
1539 tcp_sync_mss(newsk, dst_mtu(dst));
1540 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1541 tcp_initialize_rcv_mss(newsk);
1543 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1545 __tcp_v6_hash(newsk);
1546 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1551 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1553 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1554 if (opt && opt != np->opt)
1555 sock_kfree_s(sk, opt, opt->tot_len);
1560 static int tcp_v6_checksum_init(struct sk_buff *skb)
1562 if (skb->ip_summed == CHECKSUM_HW) {
1563 skb->ip_summed = CHECKSUM_UNNECESSARY;
1564 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1565 &skb->nh.ipv6h->daddr,skb->csum))
1567 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1569 if (skb->len <= 76) {
1570 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1573 skb->ip_summed = CHECKSUM_UNNECESSARY;
1575 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1576 &skb->nh.ipv6h->daddr,0);
1581 /* The socket must have it's spinlock held when we get
1584 * We have a potential double-lock case here, so even when
1585 * doing backlog processing we use the BH locking scheme.
1586 * This is because we cannot sleep with the original spinlock
1589 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1591 struct ipv6_pinfo *np = inet6_sk(sk);
1592 struct tcp_sock *tp;
1593 struct sk_buff *opt_skb = NULL;
1595 /* Imagine: socket is IPv6. IPv4 packet arrives,
1596 goes to IPv4 receive handler and backlogged.
1597 From backlog it always goes here. Kerboom...
1598 Fortunately, tcp_rcv_established and rcv_established
1599 handle them correctly, but it is not case with
1600 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1603 if (skb->protocol == htons(ETH_P_IP))
1604 return tcp_v4_do_rcv(sk, skb);
1606 if (sk_filter(sk, skb, 0))
1610 * socket locking is here for SMP purposes as backlog rcv
1611 * is currently called with bh processing disabled.
1614 /* Do Stevens' IPV6_PKTOPTIONS.
1616 Yes, guys, it is the only place in our code, where we
1617 may make it not affecting IPv4.
1618 The rest of code is protocol independent,
1619 and I do not like idea to uglify IPv4.
1621 Actually, all the idea behind IPV6_PKTOPTIONS
1622 looks not very well thought. For now we latch
1623 options, received in the last packet, enqueued
1624 by tcp. Feel free to propose better solution.
1628 opt_skb = skb_clone(skb, GFP_ATOMIC);
1630 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 TCP_CHECK_TIMER(sk);
1632 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1634 TCP_CHECK_TIMER(sk);
1636 goto ipv6_pktoptions;
1640 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1643 if (sk->sk_state == TCP_LISTEN) {
1644 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1649 * Queue it on the new socket if the new socket is active,
1650 * otherwise we just shortcircuit this and continue with
1654 if (tcp_child_process(sk, nsk, skb))
1657 __kfree_skb(opt_skb);
1662 TCP_CHECK_TIMER(sk);
1663 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1665 TCP_CHECK_TIMER(sk);
1667 goto ipv6_pktoptions;
1671 tcp_v6_send_reset(skb);
1674 __kfree_skb(opt_skb);
1678 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1683 /* Do you ask, what is it?
1685 1. skb was enqueued by tcp.
1686 2. skb is added to tail of read queue, rather than out of order.
1687 3. socket is not in passive state.
1688 4. Finally, it really contains options, which user wants to receive.
1691 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1692 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1693 if (np->rxopt.bits.rxinfo)
1694 np->mcast_oif = tcp_v6_iif(opt_skb);
1695 if (np->rxopt.bits.rxhlim)
1696 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1697 if (ipv6_opt_accepted(sk, opt_skb)) {
1698 skb_set_owner_r(opt_skb, sk);
1699 opt_skb = xchg(&np->pktoptions, opt_skb);
1701 __kfree_skb(opt_skb);
1702 opt_skb = xchg(&np->pktoptions, NULL);
1711 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1713 struct sk_buff *skb = *pskb;
1718 if (skb->pkt_type != PACKET_HOST)
1722 * Count it even if it's bad.
1724 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1726 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1731 if (th->doff < sizeof(struct tcphdr)/4)
1733 if (!pskb_may_pull(skb, th->doff*4))
1736 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1737 tcp_v6_checksum_init(skb) < 0))
1741 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1742 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1743 skb->len - th->doff*4);
1744 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1745 TCP_SKB_CB(skb)->when = 0;
1746 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1747 TCP_SKB_CB(skb)->sacked = 0;
1749 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1750 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1756 if (sk->sk_state == TCP_TIME_WAIT)
1759 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1760 goto discard_and_relse;
1762 if (sk_filter(sk, skb, 0))
1763 goto discard_and_relse;
1769 if (!sock_owned_by_user(sk)) {
1770 if (!tcp_prequeue(sk, skb))
1771 ret = tcp_v6_do_rcv(sk, skb);
1773 sk_add_backlog(sk, skb);
1777 return ret ? -1 : 0;
1780 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1783 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1785 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1787 tcp_v6_send_reset(skb);
1804 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1805 inet_twsk_put((struct inet_timewait_sock *)sk);
1809 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1810 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1811 inet_twsk_put((struct inet_timewait_sock *)sk);
1815 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1821 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1823 tcp_tw_deschedule((struct inet_timewait_sock *)sk);
1824 inet_twsk_put((struct inet_timewait_sock *)sk);
1828 /* Fall through to ACK */
1831 tcp_v6_timewait_ack(sk, skb);
1835 case TCP_TW_SUCCESS:;
1840 static int tcp_v6_rebuild_header(struct sock *sk)
1843 struct dst_entry *dst;
1844 struct ipv6_pinfo *np = inet6_sk(sk);
1846 dst = __sk_dst_check(sk, np->dst_cookie);
1849 struct inet_sock *inet = inet_sk(sk);
1850 struct in6_addr *final_p = NULL, final;
1853 memset(&fl, 0, sizeof(fl));
1854 fl.proto = IPPROTO_TCP;
1855 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1856 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1857 fl.fl6_flowlabel = np->flow_label;
1858 fl.oif = sk->sk_bound_dev_if;
1859 fl.fl_ip_dport = inet->dport;
1860 fl.fl_ip_sport = inet->sport;
1862 if (np->opt && np->opt->srcrt) {
1863 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1864 ipv6_addr_copy(&final, &fl.fl6_dst);
1865 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1869 err = ip6_dst_lookup(sk, &dst, &fl);
1871 sk->sk_route_caps = 0;
1875 ipv6_addr_copy(&fl.fl6_dst, final_p);
1877 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1878 sk->sk_err_soft = -err;
1883 ip6_dst_store(sk, dst, NULL);
1884 sk->sk_route_caps = dst->dev->features &
1885 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1891 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1893 struct sock *sk = skb->sk;
1894 struct inet_sock *inet = inet_sk(sk);
1895 struct ipv6_pinfo *np = inet6_sk(sk);
1897 struct dst_entry *dst;
1898 struct in6_addr *final_p = NULL, final;
1900 memset(&fl, 0, sizeof(fl));
1901 fl.proto = IPPROTO_TCP;
1902 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1903 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1904 fl.fl6_flowlabel = np->flow_label;
1905 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1906 fl.oif = sk->sk_bound_dev_if;
1907 fl.fl_ip_sport = inet->sport;
1908 fl.fl_ip_dport = inet->dport;
1910 if (np->opt && np->opt->srcrt) {
1911 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1912 ipv6_addr_copy(&final, &fl.fl6_dst);
1913 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1917 dst = __sk_dst_check(sk, np->dst_cookie);
1920 int err = ip6_dst_lookup(sk, &dst, &fl);
1923 sk->sk_err_soft = -err;
1928 ipv6_addr_copy(&fl.fl6_dst, final_p);
1930 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1931 sk->sk_route_caps = 0;
1936 ip6_dst_store(sk, dst, NULL);
1937 sk->sk_route_caps = dst->dev->features &
1938 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1941 skb->dst = dst_clone(dst);
1943 /* Restore final destination back after routing done */
1944 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1946 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1949 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1951 struct ipv6_pinfo *np = inet6_sk(sk);
1952 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1954 sin6->sin6_family = AF_INET6;
1955 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1956 sin6->sin6_port = inet_sk(sk)->dport;
1957 /* We do not store received flowlabel for TCP */
1958 sin6->sin6_flowinfo = 0;
1959 sin6->sin6_scope_id = 0;
1960 if (sk->sk_bound_dev_if &&
1961 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1962 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1965 static int tcp_v6_remember_stamp(struct sock *sk)
1967 /* Alas, not yet... */
1971 static struct tcp_func ipv6_specific = {
1972 .queue_xmit = tcp_v6_xmit,
1973 .send_check = tcp_v6_send_check,
1974 .rebuild_header = tcp_v6_rebuild_header,
1975 .conn_request = tcp_v6_conn_request,
1976 .syn_recv_sock = tcp_v6_syn_recv_sock,
1977 .remember_stamp = tcp_v6_remember_stamp,
1978 .net_header_len = sizeof(struct ipv6hdr),
1980 .setsockopt = ipv6_setsockopt,
1981 .getsockopt = ipv6_getsockopt,
1982 .addr2sockaddr = v6_addr2sockaddr,
1983 .sockaddr_len = sizeof(struct sockaddr_in6)
1987 * TCP over IPv4 via INET6 API
1990 static struct tcp_func ipv6_mapped = {
1991 .queue_xmit = ip_queue_xmit,
1992 .send_check = tcp_v4_send_check,
1993 .rebuild_header = inet_sk_rebuild_header,
1994 .conn_request = tcp_v6_conn_request,
1995 .syn_recv_sock = tcp_v6_syn_recv_sock,
1996 .remember_stamp = tcp_v4_remember_stamp,
1997 .net_header_len = sizeof(struct iphdr),
1999 .setsockopt = ipv6_setsockopt,
2000 .getsockopt = ipv6_getsockopt,
2001 .addr2sockaddr = v6_addr2sockaddr,
2002 .sockaddr_len = sizeof(struct sockaddr_in6)
2007 /* NOTE: A lot of things set to zero explicitly by call to
2008 * sk_alloc() so need not be done here.
2010 static int tcp_v6_init_sock(struct sock *sk)
2012 struct tcp_sock *tp = tcp_sk(sk);
2014 skb_queue_head_init(&tp->out_of_order_queue);
2015 tcp_init_xmit_timers(sk);
2016 tcp_prequeue_init(tp);
2018 tp->rto = TCP_TIMEOUT_INIT;
2019 tp->mdev = TCP_TIMEOUT_INIT;
2021 /* So many TCP implementations out there (incorrectly) count the
2022 * initial SYN frame in their delayed-ACK and congestion control
2023 * algorithms that we must have the following bandaid to talk
2024 * efficiently to them. -DaveM
2028 /* See draft-stevens-tcpca-spec-01 for discussion of the
2029 * initialization of these values.
2031 tp->snd_ssthresh = 0x7fffffff;
2032 tp->snd_cwnd_clamp = ~0;
2033 tp->mss_cache = 536;
2035 tp->reordering = sysctl_tcp_reordering;
2037 sk->sk_state = TCP_CLOSE;
2039 tp->af_specific = &ipv6_specific;
2040 tp->ca_ops = &tcp_init_congestion_ops;
2041 sk->sk_write_space = sk_stream_write_space;
2042 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2044 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2045 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2047 atomic_inc(&tcp_sockets_allocated);
2052 static int tcp_v6_destroy_sock(struct sock *sk)
2054 extern int tcp_v4_destroy_sock(struct sock *sk);
2056 tcp_v4_destroy_sock(sk);
2057 return inet6_destroy_sock(sk);
2060 /* Proc filesystem TCPv6 sock list dumping. */
2061 static void get_openreq6(struct seq_file *seq,
2062 struct sock *sk, struct request_sock *req, int i, int uid)
2064 struct in6_addr *dest, *src;
2065 int ttd = req->expires - jiffies;
2070 src = &tcp6_rsk(req)->loc_addr;
2071 dest = &tcp6_rsk(req)->rmt_addr;
2073 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2074 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2076 src->s6_addr32[0], src->s6_addr32[1],
2077 src->s6_addr32[2], src->s6_addr32[3],
2078 ntohs(inet_sk(sk)->sport),
2079 dest->s6_addr32[0], dest->s6_addr32[1],
2080 dest->s6_addr32[2], dest->s6_addr32[3],
2081 ntohs(inet_rsk(req)->rmt_port),
2083 0,0, /* could print option size, but that is af dependent. */
2084 1, /* timers active (only the expire timer) */
2085 jiffies_to_clock_t(ttd),
2088 0, /* non standard timer */
2089 0, /* open_requests have no inode */
2093 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2095 struct in6_addr *dest, *src;
2098 unsigned long timer_expires;
2099 struct inet_sock *inet = inet_sk(sp);
2100 struct tcp_sock *tp = tcp_sk(sp);
2101 struct ipv6_pinfo *np = inet6_sk(sp);
2104 src = &np->rcv_saddr;
2105 destp = ntohs(inet->dport);
2106 srcp = ntohs(inet->sport);
2107 if (tp->pending == TCP_TIME_RETRANS) {
2109 timer_expires = tp->timeout;
2110 } else if (tp->pending == TCP_TIME_PROBE0) {
2112 timer_expires = tp->timeout;
2113 } else if (timer_pending(&sp->sk_timer)) {
2115 timer_expires = sp->sk_timer.expires;
2118 timer_expires = jiffies;
2122 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2123 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2125 src->s6_addr32[0], src->s6_addr32[1],
2126 src->s6_addr32[2], src->s6_addr32[3], srcp,
2127 dest->s6_addr32[0], dest->s6_addr32[1],
2128 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2130 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2132 jiffies_to_clock_t(timer_expires - jiffies),
2137 atomic_read(&sp->sk_refcnt), sp,
2138 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2139 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2143 static void get_timewait6_sock(struct seq_file *seq,
2144 struct inet_timewait_sock *tw, int i)
2146 struct in6_addr *dest, *src;
2148 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2149 int ttd = tw->tw_ttd - jiffies;
2154 dest = &tcp6tw->tw_v6_daddr;
2155 src = &tcp6tw->tw_v6_rcv_saddr;
2156 destp = ntohs(tw->tw_dport);
2157 srcp = ntohs(tw->tw_sport);
2160 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2161 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2163 src->s6_addr32[0], src->s6_addr32[1],
2164 src->s6_addr32[2], src->s6_addr32[3], srcp,
2165 dest->s6_addr32[0], dest->s6_addr32[1],
2166 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2167 tw->tw_substate, 0, 0,
2168 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2169 atomic_read(&tw->tw_refcnt), tw);
2172 #ifdef CONFIG_PROC_FS
2173 static int tcp6_seq_show(struct seq_file *seq, void *v)
2175 struct tcp_iter_state *st;
2177 if (v == SEQ_START_TOKEN) {
2182 "st tx_queue rx_queue tr tm->when retrnsmt"
2183 " uid timeout inode\n");
2188 switch (st->state) {
2189 case TCP_SEQ_STATE_LISTENING:
2190 case TCP_SEQ_STATE_ESTABLISHED:
2191 get_tcp6_sock(seq, v, st->num);
2193 case TCP_SEQ_STATE_OPENREQ:
2194 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2196 case TCP_SEQ_STATE_TIME_WAIT:
2197 get_timewait6_sock(seq, v, st->num);
2204 static struct file_operations tcp6_seq_fops;
2205 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2206 .owner = THIS_MODULE,
2209 .seq_show = tcp6_seq_show,
2210 .seq_fops = &tcp6_seq_fops,
2213 int __init tcp6_proc_init(void)
2215 return tcp_proc_register(&tcp6_seq_afinfo);
2218 void tcp6_proc_exit(void)
2220 tcp_proc_unregister(&tcp6_seq_afinfo);
2224 struct proto tcpv6_prot = {
2226 .owner = THIS_MODULE,
2228 .connect = tcp_v6_connect,
2229 .disconnect = tcp_disconnect,
2230 .accept = tcp_accept,
2232 .init = tcp_v6_init_sock,
2233 .destroy = tcp_v6_destroy_sock,
2234 .shutdown = tcp_shutdown,
2235 .setsockopt = tcp_setsockopt,
2236 .getsockopt = tcp_getsockopt,
2237 .sendmsg = tcp_sendmsg,
2238 .recvmsg = tcp_recvmsg,
2239 .backlog_rcv = tcp_v6_do_rcv,
2240 .hash = tcp_v6_hash,
2241 .unhash = tcp_unhash,
2242 .get_port = tcp_v6_get_port,
2243 .enter_memory_pressure = tcp_enter_memory_pressure,
2244 .sockets_allocated = &tcp_sockets_allocated,
2245 .memory_allocated = &tcp_memory_allocated,
2246 .memory_pressure = &tcp_memory_pressure,
2247 .sysctl_mem = sysctl_tcp_mem,
2248 .sysctl_wmem = sysctl_tcp_wmem,
2249 .sysctl_rmem = sysctl_tcp_rmem,
2250 .max_header = MAX_TCP_HEADER,
2251 .obj_size = sizeof(struct tcp6_sock),
2252 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2253 .rsk_prot = &tcp6_request_sock_ops,
2256 static struct inet6_protocol tcpv6_protocol = {
2257 .handler = tcp_v6_rcv,
2258 .err_handler = tcp_v6_err,
2259 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2262 extern struct proto_ops inet6_stream_ops;
2264 static struct inet_protosw tcpv6_protosw = {
2265 .type = SOCK_STREAM,
2266 .protocol = IPPROTO_TCP,
2267 .prot = &tcpv6_prot,
2268 .ops = &inet6_stream_ops,
2271 .flags = INET_PROTOSW_PERMANENT,
2274 void __init tcpv6_init(void)
2276 /* register inet6 protocol */
2277 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2278 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2279 inet6_register_protosw(&tcpv6_protosw);