net: No more expensive sock_hold()/sock_put() on each tx

author Eric Dumazet <eric.dumazet@gmail.com>

Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)

committer David S. Miller <davem@davemloft.net>

Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)
author Eric Dumazet <eric.dumazet@gmail.com>
Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)
committer David S. Miller <davem@davemloft.net>
Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)
diff --git a/include/net/sock.h b/include/net/sock.h

index 4bb1ff9fd15bcc2b1bbc24c3cbd853fcd796da5c..010e14a93c9256a0d92e35232e2292e746e8f86c 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1217,9 +1217,13 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
  
  static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
  {
-       sock_hold(sk);
         skb->sk = sk;
         skb->destructor = sock_wfree;
+       /*
+        * We used to take a refcount on sk, but following operation
+        * is enough to guarantee sk_free() wont free this sock until
+        * all in-flight packets are completed
+        */
         atomic_add(skb->truesize, &sk->sk_wmem_alloc);
  }
  
diff --git a/net/core/sock.c b/net/core/sock.c

index 04e35eb2e736144a554dcd02354f38d943ba4e3e..06e26b77ad9e78437948592eb1781539b6947703 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1008,7 +1008,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
  }
  EXPORT_SYMBOL(sk_alloc);
  
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
  {
         struct sk_filter *filter;
  
@@ -1031,6 +1031,17 @@ void sk_free(struct sock *sk)
         put_net(sock_net(sk));
         sk_prot_free(sk->sk_prot_creator, sk);
  }
+
+void sk_free(struct sock *sk)
+{
+       /*
+        * We substract one from sk_wmem_alloc and can know if
+        * some packets are still in some tx queue.
+        * If not null, sock_wfree() will call __sk_free(sk) later
+        */
+       if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+               __sk_free(sk);
+}
  EXPORT_SYMBOL(sk_free);
  
  /*
@@ -1071,7 +1082,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
  
                 atomic_set(&newsk->sk_rmem_alloc, 0);
-               atomic_set(&newsk->sk_wmem_alloc, 0);
+               /*
+                * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+                */
+               atomic_set(&newsk->sk_wmem_alloc, 1);
                 atomic_set(&newsk->sk_omem_alloc, 0);
                 skb_queue_head_init(&newsk->sk_receive_queue);
                 skb_queue_head_init(&newsk->sk_write_queue);
@@ -1175,12 +1189,18 @@ void __init sk_init(void)
  void sock_wfree(struct sk_buff *skb)
  {
         struct sock *sk = skb->sk;
+       int res;
  
         /* In case it might be waiting for more memory. */
-       atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+       res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
                 sk->sk_write_space(sk);
-       sock_put(sk);
+       /*
+        * if sk_wmem_alloc reached 0, we are last user and should
+        * free this sock, as sk_free() call could not do it.
+        */
+       if (res == 0)
+               __sk_free(sk);
  }
  EXPORT_SYMBOL(sock_wfree);
  
@@ -1819,6 +1839,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
         sk->sk_stamp = ktime_set(-1L, 0);
  
         atomic_set(&sk->sk_refcnt, 1);
+       atomic_set(&sk->sk_wmem_alloc, 1);
         atomic_set(&sk->sk_drops, 0);
  }
  EXPORT_SYMBOL(sock_init_data);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 9248d2807ba6787ba68ca168ff241a2693fbf2ed..2470262826694d122b5457d6ef4144b66429d7a2 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -498,7 +498,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
  
                         BUG_ON(frag->sk);
                         if (skb->sk) {
-                               sock_hold(skb->sk);
                                 frag->sk = skb->sk;
                                 frag->destructor = sock_wfree;
                                 truesizes += frag->truesize;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index db6c7224a8620fcee2fbbd877e69d8bab75b8115..7c76e3d1821551c42acc16d3e2210404a553b4d0 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -680,7 +680,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
  
                         BUG_ON(frag->sk);
                         if (skb->sk) {
-                               sock_hold(skb->sk);
                                 frag->sk = skb->sk;
                                 frag->destructor = sock_wfree;
                                 truesizes += frag->truesize;
author	Eric Dumazet <eric.dumazet@gmail.com>
	Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)
committer	David S. Miller <davem@davemloft.net>
	Thu, 11 Jun 2009 09:55:43 +0000 (02:55 -0700)
include/net/sock.h		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/ipv4/ip_output.c		patch \| blob \| history
net/ipv6/ip6_output.c		patch \| blob \| history