1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/module.h>
13 #include <linux/tcp.h>
14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h>
18 #include <asm/unaligned.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28 #include <net/netfilter/nf_log.h>
29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32 /* "Be conservative in what you do,
33 be liberal in what you accept from others."
34 If it's non-zero, we mark only out of window RST segments as INVALID. */
35 static int nf_ct_tcp_be_liberal __read_mostly = 0;
37 /* If it is set to zero, we disable picking up already established
39 static int nf_ct_tcp_loose __read_mostly = 1;
41 /* Max number of the retransmitted packets without receiving an (acceptable)
42 ACK from the destination. If this number is reached, a shorter timer
44 static int nf_ct_tcp_max_retrans __read_mostly = 3;
46 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47 closely. They're more complex. --RR */
49 static const char *const tcp_conntrack_names[] = {
63 #define MINS * 60 SECS
64 #define HOURS * 60 MINS
65 #define DAYS * 24 HOURS
67 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
68 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
69 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
70 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
71 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
72 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
73 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
74 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
75 [TCP_CONNTRACK_CLOSE] = 10 SECS,
76 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
77 /* RFC1122 says the R2 limit should be at least 100 seconds.
78 Linux uses 15 packets as limit, which corresponds
79 to ~13-30min depending on RTO. */
80 [TCP_CONNTRACK_RETRANS] = 5 MINS,
81 [TCP_CONNTRACK_UNACK] = 5 MINS,
84 #define sNO TCP_CONNTRACK_NONE
85 #define sSS TCP_CONNTRACK_SYN_SENT
86 #define sSR TCP_CONNTRACK_SYN_RECV
87 #define sES TCP_CONNTRACK_ESTABLISHED
88 #define sFW TCP_CONNTRACK_FIN_WAIT
89 #define sCW TCP_CONNTRACK_CLOSE_WAIT
90 #define sLA TCP_CONNTRACK_LAST_ACK
91 #define sTW TCP_CONNTRACK_TIME_WAIT
92 #define sCL TCP_CONNTRACK_CLOSE
93 #define sS2 TCP_CONNTRACK_SYN_SENT2
94 #define sIV TCP_CONNTRACK_MAX
95 #define sIG TCP_CONNTRACK_IGNORE
97 /* What TCP flags are set from RST/SYN/FIN/ACK. */
108 * The TCP state transition table needs a few words...
110 * We are the man in the middle. All the packets go through us
111 * but might get lost in transit to the destination.
112 * It is assumed that the destinations can't receive segments
115 * The checked segment is in window, but our windows are *not*
116 * equivalent with the ones of the sender/receiver. We always
117 * try to guess the state of the current sender.
119 * The meaning of the states are:
121 * NONE: initial state
122 * SYN_SENT: SYN-only packet seen
123 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
124 * SYN_RECV: SYN-ACK packet seen
125 * ESTABLISHED: ACK packet seen
126 * FIN_WAIT: FIN packet seen
127 * CLOSE_WAIT: ACK seen (after FIN)
128 * LAST_ACK: FIN seen (after FIN)
129 * TIME_WAIT: last ACK seen
130 * CLOSE: closed connection (RST)
132 * Packets marked as IGNORED (sIG):
133 * if they may be either invalid or valid
134 * and the receiver may send back a connection
135 * closing RST or a SYN/ACK.
137 * Packets marked as INVALID (sIV):
138 * if we regard them as truly invalid packets
140 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
143 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
144 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
146 * sNO -> sSS Initialize a new connection
147 * sSS -> sSS Retransmitted SYN
148 * sS2 -> sS2 Late retransmitted SYN
150 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
151 * are errors. Receiver will reply with RST
152 * and close the connection.
153 * Or we are not in sync and hold a dead connection.
157 * sTW -> sSS Reopened connection (RFC 1122).
160 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
161 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
163 * sNO -> sIV Too late and no reason to do anything
164 * sSS -> sIV Client can't send SYN and then SYN/ACK
165 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
166 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
167 * sES -> sIV Invalid SYN/ACK packets sent by the client
174 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
175 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
177 * sNO -> sIV Too late and no reason to do anything...
178 * sSS -> sIV Client migth not send FIN in this state:
179 * we enforce waiting for a SYN/ACK reply first.
181 * sSR -> sFW Close started.
183 * sFW -> sLA FIN seen in both directions, waiting for
185 * Migth be a retransmitted FIN as well...
187 * sLA -> sLA Retransmitted FIN. Remain in the same state.
191 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
192 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
194 * sNO -> sES Assumed.
195 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
197 * sSR -> sES Established state is reached.
199 * sFW -> sCW Normal close request answered by ACK.
201 * sLA -> sTW Last ACK detected.
202 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
205 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
206 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
207 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
211 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
212 /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
214 * sNO -> sIV Never reached.
215 * sSS -> sS2 Simultaneous open
216 * sS2 -> sS2 Retransmitted simultaneous SYN
217 * sSR -> sIV Invalid SYN packets sent by the server
222 * sTW -> sIV Reopened connection, but server may not do it.
225 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
226 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
228 * sSS -> sSR Standard open.
229 * sS2 -> sSR Simultaneous open
230 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
231 * sES -> sIG Late retransmitted SYN/ACK?
232 * sFW -> sIG Might be SYN/ACK answering ignored SYN
238 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
239 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
241 * sSS -> sIV Server might not send FIN in this state.
243 * sSR -> sFW Close started.
245 * sFW -> sLA FIN seen in both directions.
247 * sLA -> sLA Retransmitted FIN.
251 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
252 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
254 * sSS -> sIG Might be a half-open connection.
256 * sSR -> sSR Might answer late resent SYN.
258 * sFW -> sCW Normal close request answered by ACK.
260 * sLA -> sTW Last ACK detected.
261 * sTW -> sTW Retransmitted last ACK.
264 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
265 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
266 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
270 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
272 return &net->ct.nf_ct_proto.tcp;
275 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
276 struct nf_conntrack_tuple *tuple)
278 const struct tcphdr *hp;
281 /* Actually only need first 8 bytes. */
282 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
286 tuple->src.u.tcp.port = hp->source;
287 tuple->dst.u.tcp.port = hp->dest;
292 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
293 const struct nf_conntrack_tuple *orig)
295 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
296 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
300 /* Print out the per-protocol part of the tuple. */
301 static int tcp_print_tuple(struct seq_file *s,
302 const struct nf_conntrack_tuple *tuple)
304 return seq_printf(s, "sport=%hu dport=%hu ",
305 ntohs(tuple->src.u.tcp.port),
306 ntohs(tuple->dst.u.tcp.port));
309 /* Print out the private part of the conntrack. */
310 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
312 enum tcp_conntrack state;
314 spin_lock_bh(&ct->lock);
315 state = ct->proto.tcp.state;
316 spin_unlock_bh(&ct->lock);
318 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
321 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
323 if (tcph->rst) return TCP_RST_SET;
324 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
325 else if (tcph->fin) return TCP_FIN_SET;
326 else if (tcph->ack) return TCP_ACK_SET;
327 else return TCP_NONE_SET;
330 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
331 in IP Filter' by Guido van Rooij.
333 http://www.sane.nl/events/sane2000/papers.html
334 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
336 The boundaries and the conditions are changed according to RFC793:
337 the packet must intersect the window (i.e. segments may be
338 after the right or before the left edge) and thus receivers may ACK
339 segments after the right edge of the window.
341 td_maxend = max(sack + max(win,1)) seen in reply packets
342 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
343 td_maxwin += seq + len - sender.td_maxend
344 if seq + len > sender.td_maxend
345 td_end = max(seq + len) seen in sent packets
347 I. Upper bound for valid data: seq <= sender.td_maxend
348 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
349 III. Upper bound for valid (s)ack: sack <= receiver.td_end
350 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
352 where sack is the highest right edge of sack block found in the packet
353 or ack in the case of packet without SACK option.
355 The upper bound limit for a valid (s)ack is not ignored -
356 we doesn't have to deal with fragments.
359 static inline __u32 segment_seq_plus_len(__u32 seq,
361 unsigned int dataoff,
362 const struct tcphdr *tcph)
364 /* XXX Should I use payload length field in IP/IPv6 header ?
366 return (seq + len - dataoff - tcph->doff*4
367 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
370 /* Fixme: what about big packets? */
371 #define MAXACKWINCONST 66000
372 #define MAXACKWINDOW(sender) \
373 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
377 * Simplified tcp_parse_options routine from tcp_input.c
379 static void tcp_options(const struct sk_buff *skb,
380 unsigned int dataoff,
381 const struct tcphdr *tcph,
382 struct ip_ct_tcp_state *state)
384 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
385 const unsigned char *ptr;
386 int length = (tcph->doff*4) - sizeof(struct tcphdr);
391 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
405 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
410 if (opsize < 2) /* "silly options" */
413 return; /* don't parse partial options */
415 if (opcode == TCPOPT_SACK_PERM
416 && opsize == TCPOLEN_SACK_PERM)
417 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
418 else if (opcode == TCPOPT_WINDOW
419 && opsize == TCPOLEN_WINDOW) {
420 state->td_scale = *(u_int8_t *)ptr;
422 if (state->td_scale > 14) {
424 state->td_scale = 14;
427 IP_CT_TCP_FLAG_WINDOW_SCALE;
435 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
436 const struct tcphdr *tcph, __u32 *sack)
438 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
439 const unsigned char *ptr;
440 int length = (tcph->doff*4) - sizeof(struct tcphdr);
446 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
450 /* Fast path for timestamp-only option */
451 if (length == TCPOLEN_TSTAMP_ALIGNED
452 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
454 | (TCPOPT_TIMESTAMP << 8)
455 | TCPOLEN_TIMESTAMP))
465 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
470 if (opsize < 2) /* "silly options" */
473 return; /* don't parse partial options */
475 if (opcode == TCPOPT_SACK
476 && opsize >= (TCPOLEN_SACK_BASE
477 + TCPOLEN_SACK_PERBLOCK)
478 && !((opsize - TCPOLEN_SACK_BASE)
479 % TCPOLEN_SACK_PERBLOCK)) {
481 i < (opsize - TCPOLEN_SACK_BASE);
482 i += TCPOLEN_SACK_PERBLOCK) {
483 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
485 if (after(tmp, *sack))
496 #ifdef CONFIG_NF_NAT_NEEDED
497 static inline s16 nat_offset(const struct nf_conn *ct,
498 enum ip_conntrack_dir dir,
501 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
503 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
505 #define NAT_OFFSET(ct, dir, seq) \
506 (nat_offset(ct, dir, seq))
508 #define NAT_OFFSET(ct, dir, seq) 0
511 static bool tcp_in_window(const struct nf_conn *ct,
512 struct ip_ct_tcp *state,
513 enum ip_conntrack_dir dir,
515 const struct sk_buff *skb,
516 unsigned int dataoff,
517 const struct tcphdr *tcph,
520 struct net *net = nf_ct_net(ct);
521 struct nf_tcp_net *tn = tcp_pernet(net);
522 struct ip_ct_tcp_state *sender = &state->seen[dir];
523 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
524 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
525 __u32 seq, ack, sack, end, win, swin;
530 * Get the required data from the packet.
532 seq = ntohl(tcph->seq);
533 ack = sack = ntohl(tcph->ack_seq);
534 win = ntohs(tcph->window);
535 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
537 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
538 tcp_sack(skb, dataoff, tcph, &sack);
540 /* Take into account NAT sequence number mangling */
541 receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
542 ack -= receiver_offset;
543 sack -= receiver_offset;
545 pr_debug("tcp_in_window: START\n");
546 pr_debug("tcp_in_window: ");
547 nf_ct_dump_tuple(tuple);
548 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
549 seq, ack, receiver_offset, sack, receiver_offset, win, end);
550 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
551 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
552 sender->td_end, sender->td_maxend, sender->td_maxwin,
554 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
557 if (sender->td_maxwin == 0) {
559 * Initialize sender data.
563 * SYN-ACK in reply to a SYN
564 * or SYN from reply direction in simultaneous open.
567 sender->td_maxend = end;
568 sender->td_maxwin = (win == 0 ? 1 : win);
570 tcp_options(skb, dataoff, tcph, sender);
573 * Both sides must send the Window Scale option
574 * to enable window scaling in either direction.
576 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
577 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
579 receiver->td_scale = 0;
581 /* Simultaneous open */
585 * We are in the middle of a connection,
586 * its history is lost for us.
587 * Let's try to use the data from the packet.
589 sender->td_end = end;
590 swin = win << sender->td_scale;
591 sender->td_maxwin = (swin == 0 ? 1 : swin);
592 sender->td_maxend = end + sender->td_maxwin;
594 * We haven't seen traffic in the other direction yet
595 * but we have to tweak window tracking to pass III
596 * and IV until that happens.
598 if (receiver->td_maxwin == 0)
599 receiver->td_end = receiver->td_maxend = sack;
601 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
602 && dir == IP_CT_DIR_ORIGINAL)
603 || (state->state == TCP_CONNTRACK_SYN_RECV
604 && dir == IP_CT_DIR_REPLY))
605 && after(end, sender->td_end)) {
607 * RFC 793: "if a TCP is reinitialized ... then it need
608 * not wait at all; it must only be sure to use sequence
609 * numbers larger than those recently used."
612 sender->td_maxend = end;
613 sender->td_maxwin = (win == 0 ? 1 : win);
615 tcp_options(skb, dataoff, tcph, sender);
620 * If there is no ACK, just pretend it was set and OK.
622 ack = sack = receiver->td_end;
623 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
624 (TCP_FLAG_ACK|TCP_FLAG_RST))
627 * Broken TCP stacks, that set ACK in RST packets as well
628 * with zero ack value.
630 ack = sack = receiver->td_end;
633 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
635 * RST sent answering SYN.
637 seq = end = sender->td_end;
639 pr_debug("tcp_in_window: ");
640 nf_ct_dump_tuple(tuple);
641 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
642 seq, ack, receiver_offset, sack, receiver_offset, win, end);
643 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
644 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
645 sender->td_end, sender->td_maxend, sender->td_maxwin,
647 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
650 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
651 before(seq, sender->td_maxend + 1),
652 after(end, sender->td_end - receiver->td_maxwin - 1),
653 before(sack, receiver->td_end + 1),
654 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
656 if (before(seq, sender->td_maxend + 1) &&
657 after(end, sender->td_end - receiver->td_maxwin - 1) &&
658 before(sack, receiver->td_end + 1) &&
659 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
661 * Take into account window scaling (RFC 1323).
664 win <<= sender->td_scale;
667 * Update sender data.
669 swin = win + (sack - ack);
670 if (sender->td_maxwin < swin)
671 sender->td_maxwin = swin;
672 if (after(end, sender->td_end)) {
673 sender->td_end = end;
674 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
677 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
678 sender->td_maxack = ack;
679 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
680 } else if (after(ack, sender->td_maxack))
681 sender->td_maxack = ack;
685 * Update receiver data.
687 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
688 receiver->td_maxwin += end - sender->td_maxend;
689 if (after(sack + win, receiver->td_maxend - 1)) {
690 receiver->td_maxend = sack + win;
692 receiver->td_maxend++;
694 if (ack == receiver->td_end)
695 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
698 * Check retransmissions.
700 if (index == TCP_ACK_SET) {
701 if (state->last_dir == dir
702 && state->last_seq == seq
703 && state->last_ack == ack
704 && state->last_end == end
705 && state->last_win == win)
708 state->last_dir = dir;
709 state->last_seq = seq;
710 state->last_ack = ack;
711 state->last_end = end;
712 state->last_win = win;
719 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
722 if (!res && LOG_INVALID(net, IPPROTO_TCP))
723 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
725 before(seq, sender->td_maxend + 1) ?
726 after(end, sender->td_end - receiver->td_maxwin - 1) ?
727 before(sack, receiver->td_end + 1) ?
728 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
729 : "ACK is under the lower bound (possible overly delayed ACK)"
730 : "ACK is over the upper bound (ACKed data not seen yet)"
731 : "SEQ is under the lower bound (already ACKed data retransmitted)"
732 : "SEQ is over the upper bound (over the window of the receiver)");
735 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
736 "receiver end=%u maxend=%u maxwin=%u\n",
737 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
738 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
743 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
744 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
748 [TCPHDR_SYN|TCPHDR_URG] = 1,
749 [TCPHDR_SYN|TCPHDR_ACK] = 1,
751 [TCPHDR_RST|TCPHDR_ACK] = 1,
752 [TCPHDR_FIN|TCPHDR_ACK] = 1,
753 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
755 [TCPHDR_ACK|TCPHDR_URG] = 1,
758 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
759 static int tcp_error(struct net *net, struct nf_conn *tmpl,
761 unsigned int dataoff,
762 enum ip_conntrack_info *ctinfo,
764 unsigned int hooknum)
766 const struct tcphdr *th;
768 unsigned int tcplen = skb->len - dataoff;
771 /* Smaller that minimal TCP header? */
772 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
774 if (LOG_INVALID(net, IPPROTO_TCP))
775 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
776 "nf_ct_tcp: short packet ");
780 /* Not whole TCP header or malformed packet */
781 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
782 if (LOG_INVALID(net, IPPROTO_TCP))
783 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
784 "nf_ct_tcp: truncated/malformed packet ");
788 /* Checksum invalid? Ignore.
789 * We skip checking packets on the outgoing path
790 * because the checksum is assumed to be correct.
792 /* FIXME: Source route IP option packets --RR */
793 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
794 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
795 if (LOG_INVALID(net, IPPROTO_TCP))
796 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
797 "nf_ct_tcp: bad TCP checksum ");
801 /* Check TCP flags. */
802 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
803 if (!tcp_valid_flags[tcpflags]) {
804 if (LOG_INVALID(net, IPPROTO_TCP))
805 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
806 "nf_ct_tcp: invalid TCP flag combination ");
813 static unsigned int *tcp_get_timeouts(struct net *net)
815 return tcp_pernet(net)->timeouts;
818 /* Returns verdict for packet, or -1 for invalid. */
819 static int tcp_packet(struct nf_conn *ct,
820 const struct sk_buff *skb,
821 unsigned int dataoff,
822 enum ip_conntrack_info ctinfo,
824 unsigned int hooknum,
825 unsigned int *timeouts)
827 struct net *net = nf_ct_net(ct);
828 struct nf_tcp_net *tn = tcp_pernet(net);
829 struct nf_conntrack_tuple *tuple;
830 enum tcp_conntrack new_state, old_state;
831 enum ip_conntrack_dir dir;
832 const struct tcphdr *th;
834 unsigned long timeout;
837 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
840 spin_lock_bh(&ct->lock);
841 old_state = ct->proto.tcp.state;
842 dir = CTINFO2DIR(ctinfo);
843 index = get_conntrack_index(th);
844 new_state = tcp_conntracks[dir][index][old_state];
845 tuple = &ct->tuplehash[dir].tuple;
848 case TCP_CONNTRACK_SYN_SENT:
849 if (old_state < TCP_CONNTRACK_TIME_WAIT)
851 /* RFC 1122: "When a connection is closed actively,
852 * it MUST linger in TIME-WAIT state for a time 2xMSL
853 * (Maximum Segment Lifetime). However, it MAY accept
854 * a new SYN from the remote TCP to reopen the connection
855 * directly from TIME-WAIT state, if..."
856 * We ignore the conditions because we are in the
857 * TIME-WAIT state anyway.
859 * Handle aborted connections: we and the server
860 * think there is an existing connection but the client
861 * aborts it and starts a new one.
863 if (((ct->proto.tcp.seen[dir].flags
864 | ct->proto.tcp.seen[!dir].flags)
865 & IP_CT_TCP_FLAG_CLOSE_INIT)
866 || (ct->proto.tcp.last_dir == dir
867 && ct->proto.tcp.last_index == TCP_RST_SET)) {
868 /* Attempt to reopen a closed/aborted connection.
869 * Delete this connection and look up again. */
870 spin_unlock_bh(&ct->lock);
872 /* Only repeat if we can actually remove the timer.
873 * Destruction may already be in progress in process
874 * context and we must give it a chance to terminate.
881 case TCP_CONNTRACK_IGNORE:
884 * Our connection entry may be out of sync, so ignore
885 * packets which may signal the real connection between
886 * the client and the server.
889 * b) SYN/ACK in REPLY
890 * c) ACK in reply direction after initial SYN in original.
892 * If the ignored packet is invalid, the receiver will send
893 * a RST we'll catch below.
895 if (index == TCP_SYNACK_SET
896 && ct->proto.tcp.last_index == TCP_SYN_SET
897 && ct->proto.tcp.last_dir != dir
898 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
899 /* b) This SYN/ACK acknowledges a SYN that we earlier
900 * ignored as invalid. This means that the client and
901 * the server are both in sync, while the firewall is
902 * not. We get in sync from the previously annotated
905 old_state = TCP_CONNTRACK_SYN_SENT;
906 new_state = TCP_CONNTRACK_SYN_RECV;
907 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
908 ct->proto.tcp.last_end;
909 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
910 ct->proto.tcp.last_end;
911 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
912 ct->proto.tcp.last_win == 0 ?
913 1 : ct->proto.tcp.last_win;
914 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
915 ct->proto.tcp.last_wscale;
916 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
917 ct->proto.tcp.last_flags;
918 memset(&ct->proto.tcp.seen[dir], 0,
919 sizeof(struct ip_ct_tcp_state));
922 ct->proto.tcp.last_index = index;
923 ct->proto.tcp.last_dir = dir;
924 ct->proto.tcp.last_seq = ntohl(th->seq);
925 ct->proto.tcp.last_end =
926 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
927 ct->proto.tcp.last_win = ntohs(th->window);
929 /* a) This is a SYN in ORIGINAL. The client and the server
930 * may be in sync but we are not. In that case, we annotate
931 * the TCP options and let the packet go through. If it is a
932 * valid SYN packet, the server will reply with a SYN/ACK, and
933 * then we'll get in sync. Otherwise, the server ignores it. */
934 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
935 struct ip_ct_tcp_state seen = {};
937 ct->proto.tcp.last_flags =
938 ct->proto.tcp.last_wscale = 0;
939 tcp_options(skb, dataoff, th, &seen);
940 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
941 ct->proto.tcp.last_flags |=
942 IP_CT_TCP_FLAG_WINDOW_SCALE;
943 ct->proto.tcp.last_wscale = seen.td_scale;
945 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
946 ct->proto.tcp.last_flags |=
947 IP_CT_TCP_FLAG_SACK_PERM;
950 spin_unlock_bh(&ct->lock);
951 if (LOG_INVALID(net, IPPROTO_TCP))
952 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
953 "nf_ct_tcp: invalid packet ignored in "
954 "state %s ", tcp_conntrack_names[old_state]);
956 case TCP_CONNTRACK_MAX:
958 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
959 dir, get_conntrack_index(th), old_state);
960 spin_unlock_bh(&ct->lock);
961 if (LOG_INVALID(net, IPPROTO_TCP))
962 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
963 "nf_ct_tcp: invalid state ");
965 case TCP_CONNTRACK_CLOSE:
966 if (index == TCP_RST_SET
967 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
968 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
970 spin_unlock_bh(&ct->lock);
971 if (LOG_INVALID(net, IPPROTO_TCP))
972 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
973 "nf_ct_tcp: invalid RST ");
976 if (index == TCP_RST_SET
977 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
978 && ct->proto.tcp.last_index == TCP_SYN_SET)
979 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
980 && ct->proto.tcp.last_index == TCP_ACK_SET))
981 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
982 /* RST sent to invalid SYN or ACK we had let through
983 * at a) and c) above:
985 * a) SYN was in window then
986 * c) we hold a half-open connection.
988 * Delete our connection entry.
989 * We skip window checking, because packet might ACK
990 * segments we ignored. */
993 /* Just fall through */
995 /* Keep compilers happy. */
999 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1000 skb, dataoff, th, pf)) {
1001 spin_unlock_bh(&ct->lock);
1005 /* From now on we have got in-window packets */
1006 ct->proto.tcp.last_index = index;
1007 ct->proto.tcp.last_dir = dir;
1009 pr_debug("tcp_conntracks: ");
1010 nf_ct_dump_tuple(tuple);
1011 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1012 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1013 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1014 old_state, new_state);
1016 ct->proto.tcp.state = new_state;
1017 if (old_state != new_state
1018 && new_state == TCP_CONNTRACK_FIN_WAIT)
1019 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1021 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1022 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1023 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1024 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1025 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1026 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1027 timeout = timeouts[TCP_CONNTRACK_UNACK];
1029 timeout = timeouts[new_state];
1030 spin_unlock_bh(&ct->lock);
1032 if (new_state != old_state)
1033 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1035 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036 /* If only reply is a RST, we can consider ourselves not to
1037 have an established connection: this is a fairly common
1038 problem case, so we can delete the conntrack
1039 immediately. --RR */
1041 nf_ct_kill_acct(ct, ctinfo, skb);
1044 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1045 && (old_state == TCP_CONNTRACK_SYN_RECV
1046 || old_state == TCP_CONNTRACK_ESTABLISHED)
1047 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1048 /* Set ASSURED if we see see valid ack in ESTABLISHED
1049 after SYN_RECV or a valid answer for a picked up
1051 set_bit(IPS_ASSURED_BIT, &ct->status);
1052 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1054 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1059 /* Called when a new connection for this protocol found. */
1060 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1061 unsigned int dataoff, unsigned int *timeouts)
1063 enum tcp_conntrack new_state;
1064 const struct tcphdr *th;
1065 struct tcphdr _tcph;
1066 struct net *net = nf_ct_net(ct);
1067 struct nf_tcp_net *tn = tcp_pernet(net);
1068 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1069 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1071 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1074 /* Don't need lock here: this conntrack not in circulation yet */
1075 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1077 /* Invalid: delete conntrack */
1078 if (new_state >= TCP_CONNTRACK_MAX) {
1079 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1083 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1084 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1086 ct->proto.tcp.seen[0].td_end =
1087 segment_seq_plus_len(ntohl(th->seq), skb->len,
1089 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1090 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1091 ct->proto.tcp.seen[0].td_maxwin = 1;
1092 ct->proto.tcp.seen[0].td_maxend =
1093 ct->proto.tcp.seen[0].td_end;
1095 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1096 } else if (tn->tcp_loose == 0) {
1097 /* Don't try to pick up connections. */
1100 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1102 * We are in the middle of a connection,
1103 * its history is lost for us.
1104 * Let's try to use the data from the packet.
1106 ct->proto.tcp.seen[0].td_end =
1107 segment_seq_plus_len(ntohl(th->seq), skb->len,
1109 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1110 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1111 ct->proto.tcp.seen[0].td_maxwin = 1;
1112 ct->proto.tcp.seen[0].td_maxend =
1113 ct->proto.tcp.seen[0].td_end +
1114 ct->proto.tcp.seen[0].td_maxwin;
1116 /* We assume SACK and liberal window checking to handle
1118 ct->proto.tcp.seen[0].flags =
1119 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1120 IP_CT_TCP_FLAG_BE_LIBERAL;
1123 /* tcp_packet will set them */
1124 ct->proto.tcp.last_index = TCP_NONE_SET;
1126 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1127 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1128 sender->td_end, sender->td_maxend, sender->td_maxwin,
1130 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1131 receiver->td_scale);
1135 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1137 #include <linux/netfilter/nfnetlink.h>
1138 #include <linux/netfilter/nfnetlink_conntrack.h>
1140 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1143 struct nlattr *nest_parms;
1144 struct nf_ct_tcp_flags tmp = {};
1146 spin_lock_bh(&ct->lock);
1147 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1149 goto nla_put_failure;
1151 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1152 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1153 ct->proto.tcp.seen[0].td_scale) ||
1154 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1155 ct->proto.tcp.seen[1].td_scale))
1156 goto nla_put_failure;
1158 tmp.flags = ct->proto.tcp.seen[0].flags;
1159 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1160 sizeof(struct nf_ct_tcp_flags), &tmp))
1161 goto nla_put_failure;
1163 tmp.flags = ct->proto.tcp.seen[1].flags;
1164 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1165 sizeof(struct nf_ct_tcp_flags), &tmp))
1166 goto nla_put_failure;
1167 spin_unlock_bh(&ct->lock);
1169 nla_nest_end(skb, nest_parms);
1174 spin_unlock_bh(&ct->lock);
1178 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1179 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1180 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1181 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1182 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1183 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1186 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1188 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1189 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1192 /* updates could not contain anything about the private
1193 * protocol info, in that case skip the parsing */
1197 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1201 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1202 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1205 spin_lock_bh(&ct->lock);
1206 if (tb[CTA_PROTOINFO_TCP_STATE])
1207 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1209 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1210 struct nf_ct_tcp_flags *attr =
1211 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1212 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1213 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1216 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1217 struct nf_ct_tcp_flags *attr =
1218 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1219 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1220 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1223 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1224 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1225 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1226 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1227 ct->proto.tcp.seen[0].td_scale =
1228 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1229 ct->proto.tcp.seen[1].td_scale =
1230 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1232 spin_unlock_bh(&ct->lock);
1237 static int tcp_nlattr_size(void)
1239 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1240 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1243 static int tcp_nlattr_tuple_size(void)
1245 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1249 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1251 #include <linux/netfilter/nfnetlink.h>
1252 #include <linux/netfilter/nfnetlink_cttimeout.h>
1254 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1255 struct net *net, void *data)
1257 unsigned int *timeouts = data;
1258 struct nf_tcp_net *tn = tcp_pernet(net);
1261 /* set default TCP timeouts. */
1262 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1263 timeouts[i] = tn->timeouts[i];
1265 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1266 timeouts[TCP_CONNTRACK_SYN_SENT] =
1267 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1269 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1270 timeouts[TCP_CONNTRACK_SYN_RECV] =
1271 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1273 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1274 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1275 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1277 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1278 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1279 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1281 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1282 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1283 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1285 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1286 timeouts[TCP_CONNTRACK_LAST_ACK] =
1287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1289 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1290 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1291 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1293 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1294 timeouts[TCP_CONNTRACK_CLOSE] =
1295 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1297 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1298 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1299 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1301 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1302 timeouts[TCP_CONNTRACK_RETRANS] =
1303 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1305 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1306 timeouts[TCP_CONNTRACK_UNACK] =
1307 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1313 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1315 const unsigned int *timeouts = data;
1317 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1318 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1319 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1320 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1321 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1322 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1323 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1324 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1325 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1326 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1327 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1328 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1329 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1330 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1331 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1332 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1333 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1334 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1335 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1336 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1337 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1338 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1339 goto nla_put_failure;
1346 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1347 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1348 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1349 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1350 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1351 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1352 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1353 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1354 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1355 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
1357 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1359 #ifdef CONFIG_SYSCTL
1360 static struct ctl_table tcp_sysctl_table[] = {
1362 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1363 .maxlen = sizeof(unsigned int),
1365 .proc_handler = proc_dointvec_jiffies,
1368 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1369 .maxlen = sizeof(unsigned int),
1371 .proc_handler = proc_dointvec_jiffies,
1374 .procname = "nf_conntrack_tcp_timeout_established",
1375 .maxlen = sizeof(unsigned int),
1377 .proc_handler = proc_dointvec_jiffies,
1380 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1381 .maxlen = sizeof(unsigned int),
1383 .proc_handler = proc_dointvec_jiffies,
1386 .procname = "nf_conntrack_tcp_timeout_close_wait",
1387 .maxlen = sizeof(unsigned int),
1389 .proc_handler = proc_dointvec_jiffies,
1392 .procname = "nf_conntrack_tcp_timeout_last_ack",
1393 .maxlen = sizeof(unsigned int),
1395 .proc_handler = proc_dointvec_jiffies,
1398 .procname = "nf_conntrack_tcp_timeout_time_wait",
1399 .maxlen = sizeof(unsigned int),
1401 .proc_handler = proc_dointvec_jiffies,
1404 .procname = "nf_conntrack_tcp_timeout_close",
1405 .maxlen = sizeof(unsigned int),
1407 .proc_handler = proc_dointvec_jiffies,
1410 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1411 .maxlen = sizeof(unsigned int),
1413 .proc_handler = proc_dointvec_jiffies,
1416 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
1417 .maxlen = sizeof(unsigned int),
1419 .proc_handler = proc_dointvec_jiffies,
1422 .procname = "nf_conntrack_tcp_loose",
1423 .maxlen = sizeof(unsigned int),
1425 .proc_handler = proc_dointvec,
1428 .procname = "nf_conntrack_tcp_be_liberal",
1429 .maxlen = sizeof(unsigned int),
1431 .proc_handler = proc_dointvec,
1434 .procname = "nf_conntrack_tcp_max_retrans",
1435 .maxlen = sizeof(unsigned int),
1437 .proc_handler = proc_dointvec,
1442 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1443 static struct ctl_table tcp_compat_sysctl_table[] = {
1445 .procname = "ip_conntrack_tcp_timeout_syn_sent",
1446 .maxlen = sizeof(unsigned int),
1448 .proc_handler = proc_dointvec_jiffies,
1451 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
1452 .maxlen = sizeof(unsigned int),
1454 .proc_handler = proc_dointvec_jiffies,
1457 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1458 .maxlen = sizeof(unsigned int),
1460 .proc_handler = proc_dointvec_jiffies,
1463 .procname = "ip_conntrack_tcp_timeout_established",
1464 .maxlen = sizeof(unsigned int),
1466 .proc_handler = proc_dointvec_jiffies,
1469 .procname = "ip_conntrack_tcp_timeout_fin_wait",
1470 .maxlen = sizeof(unsigned int),
1472 .proc_handler = proc_dointvec_jiffies,
1475 .procname = "ip_conntrack_tcp_timeout_close_wait",
1476 .maxlen = sizeof(unsigned int),
1478 .proc_handler = proc_dointvec_jiffies,
1481 .procname = "ip_conntrack_tcp_timeout_last_ack",
1482 .maxlen = sizeof(unsigned int),
1484 .proc_handler = proc_dointvec_jiffies,
1487 .procname = "ip_conntrack_tcp_timeout_time_wait",
1488 .maxlen = sizeof(unsigned int),
1490 .proc_handler = proc_dointvec_jiffies,
1493 .procname = "ip_conntrack_tcp_timeout_close",
1494 .maxlen = sizeof(unsigned int),
1496 .proc_handler = proc_dointvec_jiffies,
1499 .procname = "ip_conntrack_tcp_timeout_max_retrans",
1500 .maxlen = sizeof(unsigned int),
1502 .proc_handler = proc_dointvec_jiffies,
1505 .procname = "ip_conntrack_tcp_loose",
1506 .maxlen = sizeof(unsigned int),
1508 .proc_handler = proc_dointvec,
1511 .procname = "ip_conntrack_tcp_be_liberal",
1512 .maxlen = sizeof(unsigned int),
1514 .proc_handler = proc_dointvec,
1517 .procname = "ip_conntrack_tcp_max_retrans",
1518 .maxlen = sizeof(unsigned int),
1520 .proc_handler = proc_dointvec,
1524 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1525 #endif /* CONFIG_SYSCTL */
1527 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1528 struct nf_tcp_net *tn)
1530 #ifdef CONFIG_SYSCTL
1534 pn->ctl_table = kmemdup(tcp_sysctl_table,
1535 sizeof(tcp_sysctl_table),
1540 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1541 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1542 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1543 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1544 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1545 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1546 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1547 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1548 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1549 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1550 pn->ctl_table[10].data = &tn->tcp_loose;
1551 pn->ctl_table[11].data = &tn->tcp_be_liberal;
1552 pn->ctl_table[12].data = &tn->tcp_max_retrans;
1557 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1558 struct nf_tcp_net *tn)
1560 #ifdef CONFIG_SYSCTL
1561 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1562 pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1563 sizeof(tcp_compat_sysctl_table),
1565 if (!pn->ctl_compat_table)
1568 pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1569 pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1570 pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1571 pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1572 pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1573 pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1574 pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1575 pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1576 pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1577 pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1578 pn->ctl_compat_table[10].data = &tn->tcp_loose;
1579 pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1580 pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1586 static int tcp_init_net(struct net *net, u_int16_t proto)
1589 struct nf_tcp_net *tn = tcp_pernet(net);
1590 struct nf_proto_net *pn = &tn->pn;
1595 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1596 tn->timeouts[i] = tcp_timeouts[i];
1598 tn->tcp_loose = nf_ct_tcp_loose;
1599 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1600 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1603 if (proto == AF_INET) {
1604 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1608 ret = tcp_kmemdup_sysctl_table(pn, tn);
1610 nf_ct_kfree_compat_sysctl_table(pn);
1612 ret = tcp_kmemdup_sysctl_table(pn, tn);
1617 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1619 return &net->ct.nf_ct_proto.tcp.pn;
1622 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1625 .l4proto = IPPROTO_TCP,
1627 .pkt_to_tuple = tcp_pkt_to_tuple,
1628 .invert_tuple = tcp_invert_tuple,
1629 .print_tuple = tcp_print_tuple,
1630 .print_conntrack = tcp_print_conntrack,
1631 .packet = tcp_packet,
1632 .get_timeouts = tcp_get_timeouts,
1635 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1636 .to_nlattr = tcp_to_nlattr,
1637 .nlattr_size = tcp_nlattr_size,
1638 .from_nlattr = nlattr_to_tcp,
1639 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1640 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1641 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1642 .nla_policy = nf_ct_port_nla_policy,
1644 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1646 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1647 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1648 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1649 .obj_size = sizeof(unsigned int) *
1650 TCP_CONNTRACK_TIMEOUT_MAX,
1651 .nla_policy = tcp_timeout_nla_policy,
1653 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1654 .init_net = tcp_init_net,
1655 .get_net_proto = tcp_get_net_proto,
1657 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1659 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1661 .l3proto = PF_INET6,
1662 .l4proto = IPPROTO_TCP,
1664 .pkt_to_tuple = tcp_pkt_to_tuple,
1665 .invert_tuple = tcp_invert_tuple,
1666 .print_tuple = tcp_print_tuple,
1667 .print_conntrack = tcp_print_conntrack,
1668 .packet = tcp_packet,
1669 .get_timeouts = tcp_get_timeouts,
1672 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1673 .to_nlattr = tcp_to_nlattr,
1674 .nlattr_size = tcp_nlattr_size,
1675 .from_nlattr = nlattr_to_tcp,
1676 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1677 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1678 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1679 .nla_policy = nf_ct_port_nla_policy,
1681 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1683 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1684 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1685 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1686 .obj_size = sizeof(unsigned int) *
1687 TCP_CONNTRACK_TIMEOUT_MAX,
1688 .nla_policy = tcp_timeout_nla_policy,
1690 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1691 .init_net = tcp_init_net,
1692 .get_net_proto = tcp_get_net_proto,
1694 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);