net: convert TCP/DCCP ehash rwlocks to spinlocks
authorEric Dumazet <dada1@cosmosbay.com>
Fri, 21 Nov 2008 04:39:09 +0000 (20:39 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 21 Nov 2008 04:39:09 +0000 (20:39 -0800)
Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks.

/proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'.

This should speedup writers, since spin_lock()/spin_unlock()
only use one atomic operation instead of two for write_lock()/write_unlock()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inet_hashtables.h
net/ipv4/inet_hashtables.c
net/ipv4/inet_timewait_sock.c
net/ipv4/tcp_ipv4.c
net/ipv6/inet6_hashtables.c

index 62d2dd0d7860ba8f3aa8ef0899b6af7709612914..28b3ee3e8d6d8c06a856af91f4dc77f1d753afa2 100644 (file)
@@ -116,7 +116,7 @@ struct inet_hashinfo {
         * TIME_WAIT sockets use a separate chain (twchain).
         */
        struct inet_ehash_bucket        *ehash;
-       rwlock_t                        *ehash_locks;
+       spinlock_t                      *ehash_locks;
        unsigned int                    ehash_size;
        unsigned int                    ehash_locks_mask;
 
@@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
        return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
 }
 
-static inline rwlock_t *inet_ehash_lockp(
+static inline spinlock_t *inet_ehash_lockp(
        struct inet_hashinfo *hashinfo,
        unsigned int hash)
 {
@@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
                size = 4096;
        if (sizeof(rwlock_t) != 0) {
 #ifdef CONFIG_NUMA
-               if (size * sizeof(rwlock_t) > PAGE_SIZE)
-                       hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
+               if (size * sizeof(spinlock_t) > PAGE_SIZE)
+                       hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
                else
 #endif
-               hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
+               hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
                                                GFP_KERNEL);
                if (!hashinfo->ehash_locks)
                        return ENOMEM;
                for (i = 0; i < size; i++)
-                       rwlock_init(&hashinfo->ehash_locks[i]);
+                       spin_lock_init(&hashinfo->ehash_locks[i]);
        }
        hashinfo->ehash_locks_mask = size - 1;
        return 0;
@@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
        if (hashinfo->ehash_locks) {
 #ifdef CONFIG_NUMA
                unsigned int size = (hashinfo->ehash_locks_mask + 1) *
-                                                       sizeof(rwlock_t);
+                                                       sizeof(spinlock_t);
                if (size > PAGE_SIZE)
                        vfree(hashinfo->ehash_locks);
                else
index 377d004e57234eaa5d88e70cc33e98cf8386f612..4c273a9981a659eaec7d7abff7c7b380a01ded21 100644 (file)
@@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        struct net *net = sock_net(sk);
        unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
        struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-       rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
+       spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw;
 
-       prefetch(head->chain.first);
-       write_lock(lock);
+       spin_lock(lock);
 
        /* Check TIME-WAIT sockets first. */
        sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -308,8 +307,8 @@ unique:
        sk->sk_hash = hash;
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
+       spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-       write_unlock(lock);
 
        if (twp) {
                *twp = tw;
@@ -325,7 +324,7 @@ unique:
        return 0;
 
 not_unique:
-       write_unlock(lock);
+       spin_unlock(lock);
        return -EADDRNOTAVAIL;
 }
 
@@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct hlist_nulls_head *list;
-       rwlock_t *lock;
+       spinlock_t *lock;
        struct inet_ehash_bucket *head;
 
        WARN_ON(!sk_unhashed(sk));
@@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk)
        list = &head->chain;
        lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
-       write_lock(lock);
+       spin_lock(lock);
        __sk_nulls_add_node_rcu(sk, list);
+       spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-       write_unlock(lock);
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
@@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk)
                        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
                spin_unlock_bh(&ilb->lock);
        } else {
-               rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+               spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
-               write_lock_bh(lock);
+               spin_lock_bh(lock);
                if (__sk_nulls_del_node_init_rcu(sk))
                        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-               write_unlock_bh(lock);
+               spin_unlock_bh(lock);
        }
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
index 60689951ecdbd84d46ec4db4e8e5a72fe7e13401..8554d0ea1719b7621667026a1641cb7da20ca2d4 100644 (file)
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
        struct inet_bind_hashbucket *bhead;
        struct inet_bind_bucket *tb;
        /* Unlink from established hashes. */
-       rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+       spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
 
-       write_lock(lock);
+       spin_lock(lock);
        if (hlist_nulls_unhashed(&tw->tw_node)) {
-               write_unlock(lock);
+               spin_unlock(lock);
                return;
        }
        hlist_nulls_del_rcu(&tw->tw_node);
        sk_nulls_node_init(&tw->tw_node);
-       write_unlock(lock);
+       spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
        bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
        const struct inet_sock *inet = inet_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
-       rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+       spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
        struct inet_bind_hashbucket *bhead;
        /* Step 1: Put TW into bind hash. Original socket stays there too.
           Note, that any socket with inet->num != 0 MUST be bound in
@@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
        inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
        spin_unlock(&bhead->lock);
 
-       write_lock(lock);
+       spin_lock(lock);
 
        /*
         * Step 2: Hash TW into TIMEWAIT chain.
@@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
        if (__sk_nulls_del_node_init_rcu(sk))
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 
-       write_unlock(lock);
+       spin_unlock(lock);
 }
 
 EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
        for (h = 0; h < (hashinfo->ehash_size); h++) {
                struct inet_ehash_bucket *head =
                        inet_ehash_bucket(hashinfo, h);
-               rwlock_t *lock = inet_ehash_lockp(hashinfo, h);
+               spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
 restart:
-               write_lock(lock);
+               spin_lock(lock);
                sk_nulls_for_each(sk, node, &head->twchain) {
 
                        tw = inet_twsk(sk);
@@ -438,13 +438,13 @@ restart:
                                continue;
 
                        atomic_inc(&tw->tw_refcnt);
-                       write_unlock(lock);
+                       spin_unlock(lock);
                        inet_twsk_deschedule(tw, twdr);
                        inet_twsk_put(tw);
 
                        goto restart;
                }
-               write_unlock(lock);
+               spin_unlock(lock);
        }
        local_bh_enable();
 }
index 330b08a12274c9462c2f23f9ee8938f21be83a3d..a81caa1be0cfe33baed4e3a1e0ded3f2c4a04f84 100644 (file)
@@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq)
                struct sock *sk;
                struct hlist_nulls_node *node;
                struct inet_timewait_sock *tw;
-               rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+               spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
                /* Lockless fast path for the common case of empty buckets */
                if (empty_bucket(st))
                        continue;
 
-               read_lock_bh(lock);
+               spin_lock_bh(lock);
                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
                        if (sk->sk_family != st->family ||
                            !net_eq(sock_net(sk), net)) {
@@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
                        rc = tw;
                        goto out;
                }
-               read_unlock_bh(lock);
+               spin_unlock_bh(lock);
                st->state = TCP_SEQ_STATE_ESTABLISHED;
        }
 out:
@@ -2023,7 +2023,7 @@ get_tw:
                        cur = tw;
                        goto out;
                }
-               read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+               spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
                st->state = TCP_SEQ_STATE_ESTABLISHED;
 
                /* Look for next non empty bucket */
@@ -2033,7 +2033,7 @@ get_tw:
                if (st->bucket >= tcp_hashinfo.ehash_size)
                        return NULL;
 
-               read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+               spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
        } else
                sk = sk_nulls_next(sk);
@@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
        case TCP_SEQ_STATE_TIME_WAIT:
        case TCP_SEQ_STATE_ESTABLISHED:
                if (v)
-                       read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+                       spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
                break;
        }
 }
index 21544b9be25938e96a33d61c2fad439428794784..e0fd68187f83291046f58fb23f54b7c0295619cb 100644 (file)
@@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk)
        } else {
                unsigned int hash;
                struct hlist_nulls_head *list;
-               rwlock_t *lock;
+               spinlock_t *lock;
 
                sk->sk_hash = hash = inet6_sk_ehashfn(sk);
                list = &inet_ehash_bucket(hashinfo, hash)->chain;
                lock = inet_ehash_lockp(hashinfo, hash);
-               write_lock(lock);
+               spin_lock(lock);
                __sk_nulls_add_node_rcu(sk, list);
-               write_unlock(lock);
+               spin_unlock(lock);
        }
 
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
        const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
                                                inet->dport);
        struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-       rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
+       spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw;
 
-       prefetch(head->chain.first);
-       write_lock(lock);
+       spin_lock(lock);
 
        /* Check TIME-WAIT sockets first. */
        sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -230,8 +229,8 @@ unique:
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
        sk->sk_hash = hash;
+       spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-       write_unlock(lock);
 
        if (twp != NULL) {
                *twp = tw;
@@ -246,7 +245,7 @@ unique:
        return 0;
 
 not_unique:
-       write_unlock(lock);
+       spin_unlock(lock);
        return -EADDRNOTAVAIL;
 }