net: support marking accepting TCP sockets
authorLorenzo Colitti <lorenzo@google.com>
Tue, 13 May 2014 17:17:35 +0000 (10:17 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 13 May 2014 22:35:09 +0000 (18:35 -0400)
When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inet_sock.h
include/net/netns/ipv4.h
net/ipv4/inet_connection_sock.c
net/ipv4/syncookies.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv6/inet6_connection_sock.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c

index 1833c3f389ee64a0c6b3862d4f2fbc6db0984b0a..b1edf17bec01130f9751747c4d092e5de50aaeac 100644 (file)
@@ -90,6 +90,7 @@ struct inet_request_sock {
        kmemcheck_bitfield_end(flags);
        struct ip_options_rcu   *opt;
        struct sk_buff          *pktopts;
+       u32                     ir_mark;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
        return (struct inet_request_sock *)sk;
 }
 
+static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+{
+       if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
+               return skb->mark;
+       } else {
+               return sk->sk_mark;
+       }
+}
+
 struct inet_cork {
        unsigned int            flags;
        __be32                  addr;
index a32fc4d705daacf1d4bacdf96f06f55ebf7ba2a5..2f0cfad666660abd57493e7658f1b045385ceda0 100644 (file)
@@ -78,6 +78,7 @@ struct netns_ipv4 {
        int sysctl_ip_fwd_use_pmtu;
 
        int sysctl_fwmark_reflect;
+       int sysctl_tcp_fwmark_accept;
 
        struct ping_group_range ping_group_range;
 
index a56b8e6e866a8c4327f86111adac947e9ffc2445..12e502cbfdc75356c9e6dd304cd6ff2092126065 100644 (file)
@@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
        struct net *net = sock_net(sk);
        int flags = inet_sk_flowi_flags(sk);
 
-       flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+       flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                           sk->sk_protocol,
                           flags,
@@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 
        rcu_read_lock();
        opt = rcu_dereference(newinet->inet_opt);
-       flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+       flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
                inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
                newsk->sk_write_space = sk_stream_write_space;
 
+               newsk->sk_mark = inet_rsk(req)->ir_mark;
+
                newicsk->icsk_retransmits = 0;
                newicsk->icsk_backoff     = 0;
                newicsk->icsk_probes_out  = 0;
index f2ed13c2125f7d34820c9e92a3080678f30f46fd..c86624b36a62ece1dd34bf39561d52e34f467bd3 100644 (file)
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        ireq->ir_rmt_port       = th->source;
        ireq->ir_loc_addr       = ip_hdr(skb)->daddr;
        ireq->ir_rmt_addr       = ip_hdr(skb)->saddr;
+       ireq->ir_mark           = inet_request_mark(sk, skb);
        ireq->ecn_ok            = ecn_ok;
        ireq->snd_wscale        = tcp_opt.snd_wscale;
        ireq->sack_ok           = tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
         * hasn't changed since we received the original syn, but I see
         * no easy way to do this.
         */
-       flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+       flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
                           inet_sk_flowi_flags(sk),
                           (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
index f50d518502856d67b70d9a2a63f2ed6b4ab979ec..a33b9fbc1d8001157ccc54004cd52a147de0e4ec 100644 (file)
@@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "tcp_fwmark_accept",
+               .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        { }
 };
 
index a2780e5334c9ddef67a5be84ffe00aff4a25c102..77cccda1ad0c6dc62c8cb70d932eca2322304c81 100644 (file)
@@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq->ir_rmt_addr = saddr;
        ireq->no_srccheck = inet_sk(sk)->transparent;
        ireq->opt = tcp_v4_save_options(skb);
+       ireq->ir_mark = inet_request_mark(sk, skb);
 
        if (security_inet_conn_request(sk, skb, req))
                goto drop_and_free;
index d4ade34ab37566d8cca9e164f5fde5fb5a762fe6..a245e5ddffbd0450968c44de7d3fcd8a1dd055cf 100644 (file)
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
        final_p = fl6_update_dst(fl6, np->opt, &final);
        fl6->saddr = ireq->ir_v6_loc_addr;
        fl6->flowi6_oif = ireq->ir_iif;
-       fl6->flowi6_mark = sk->sk_mark;
+       fl6->flowi6_mark = ireq->ir_mark;
        fl6->fl6_dport = ireq->ir_rmt_port;
        fl6->fl6_sport = htons(ireq->ir_num);
        security_req_classify_flow(req, flowi6_to_flowi(fl6));
index bb53a5e73c1ab67c7a11430488b8418c4edbf98b..a822b880689b5fea5adeed30956afd2328a9c8b9 100644 (file)
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
            ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
                ireq->ir_iif = inet6_iif(skb);
 
+       ireq->ir_mark = inet_request_mark(sk, skb);
+
        req->expires = 0UL;
        req->num_retrans = 0;
        ireq->ecn_ok            = ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
                final_p = fl6_update_dst(&fl6, np->opt, &final);
                fl6.saddr = ireq->ir_v6_loc_addr;
                fl6.flowi6_oif = sk->sk_bound_dev_if;
-               fl6.flowi6_mark = sk->sk_mark;
+               fl6.flowi6_mark = ireq->ir_mark;
                fl6.fl6_dport = ireq->ir_rmt_port;
                fl6.fl6_sport = inet_sk(sk)->inet_sport;
                security_req_classify_flow(req, flowi6_to_flowi(&fl6));
index c54976a44425578de71b4b26e043274cfa2b03b5..f07b2abba3592b729248dfce3c06ec34f3741c32 100644 (file)
@@ -1034,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                TCP_ECN_create_request(req, skb, sock_net(sk));
 
        ireq->ir_iif = sk->sk_bound_dev_if;
+       ireq->ir_mark = inet_request_mark(sk, skb);
 
        /* So that link locals have meaning */
        if (!sk->sk_bound_dev_if &&