void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb);
-struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
- .__tcp_lhash_lock = RW_LOCK_UNLOCKED,
- .__tcp_lhash_users = ATOMIC_INIT(0),
- .__tcp_lhash_wait
- = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
- .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED
+struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
+ .lhash_lock = RW_LOCK_UNLOCKED,
+ .lhash_users = ATOMIC_INIT(0),
+ .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
+ .portalloc_lock = SPIN_LOCK_UNLOCKED,
+ .port_rover = 1024 - 1,
};
/*
* 32768-61000
*/
int sysctl_local_port_range[2] = { 1024, 4999 };
-int tcp_port_rover = 1024 - 1;
-/* Allocate and initialize a new TCP local port bind bucket.
- * The bindhash mutex for snum's hash chain must be held here.
- */
-struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
- unsigned short snum)
-{
- struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
- SLAB_ATOMIC);
- if (tb) {
- tb->port = snum;
- tb->fastreuse = 0;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-/* Caller must hold hashbucket lock for this tb with local BH disabled */
-void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(tcp_bucket_cachep, tb);
- }
-}
-
-/* Caller must disable local BH processing. */
-static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
-{
- struct tcp_bind_hashbucket *head =
- &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
- struct tcp_bind_bucket *tb;
-
- spin_lock(&head->lock);
- tb = tcp_sk(sk)->bind_hash;
- sk_add_bind_node(child, &tb->owners);
- tcp_sk(child)->bind_hash = tb;
- spin_unlock(&head->lock);
-}
-
-inline void tcp_inherit_port(struct sock *sk, struct sock *child)
-{
- local_bh_disable();
- __tcp_inherit_port(sk, child);
- local_bh_enable();
-}
-
-void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
- unsigned short snum)
-{
- inet_sk(sk)->num = snum;
- sk_add_bind_node(sk, &tb->owners);
- tcp_sk(sk)->bind_hash = tb;
-}
-
-static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
+static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
{
const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
struct sock *sk2;
*/
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
{
- struct tcp_bind_hashbucket *head;
+ struct inet_bind_hashbucket *head;
struct hlist_node *node;
- struct tcp_bind_bucket *tb;
+ struct inet_bind_bucket *tb;
int ret;
local_bh_disable();
int remaining = (high - low) + 1;
int rover;
- spin_lock(&tcp_portalloc_lock);
- if (tcp_port_rover < low)
+ spin_lock(&tcp_hashinfo.portalloc_lock);
+ if (tcp_hashinfo.port_rover < low)
rover = low;
else
- rover = tcp_port_rover;
+ rover = tcp_hashinfo.port_rover;
do {
rover++;
if (rover > high)
rover = low;
- head = &tcp_bhash[tcp_bhashfn(rover)];
+ head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
spin_lock(&head->lock);
- tb_for_each(tb, node, &head->chain)
+ inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->port == rover)
goto next;
break;
next:
spin_unlock(&head->lock);
} while (--remaining > 0);
- tcp_port_rover = rover;
- spin_unlock(&tcp_portalloc_lock);
+ tcp_hashinfo.port_rover = rover;
+ spin_unlock(&tcp_hashinfo.portalloc_lock);
/* Exhausted local port range during search? It is not
* possible for us to be holding one of the bind hash
*/
snum = rover;
} else {
- head = &tcp_bhash[tcp_bhashfn(snum)];
+ head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
spin_lock(&head->lock);
- tb_for_each(tb, node, &head->chain)
+ inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->port == snum)
goto tb_found;
}
}
tb_not_found:
ret = 1;
- if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+ if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL)
goto fail_unlock;
if (hlist_empty(&tb->owners)) {
if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
(!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
tb->fastreuse = 0;
success:
- if (!tcp_sk(sk)->bind_hash)
- tcp_bind_hash(sk, tb, snum);
- BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
+ if (!inet_sk(sk)->bind_hash)
+ inet_bind_hash(sk, tb, snum);
+ BUG_TRAP(inet_sk(sk)->bind_hash == tb);
ret = 0;
fail_unlock:
return ret;
}
-/* Get rid of any references to a local port held by the
- * given sock.
- */
-static void __tcp_put_port(struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
- struct tcp_bind_bucket *tb;
-
- spin_lock(&head->lock);
- tb = tcp_sk(sk)->bind_hash;
- __sk_del_bind_node(sk);
- tcp_sk(sk)->bind_hash = NULL;
- inet->num = 0;
- tcp_bucket_destroy(tb);
- spin_unlock(&head->lock);
-}
-
-void tcp_put_port(struct sock *sk)
-{
- local_bh_disable();
- __tcp_put_port(sk);
- local_bh_enable();
-}
-
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
- * immediately hit write lock and grab all the cpus. Exclusive sleep solves
- * this, _but_ remember, it adds useless work on UP machines (wake up each
- * exclusive lock release). It should be ifdefed really.
- */
-
-void tcp_listen_wlock(void)
-{
- write_lock(&tcp_lhash_lock);
-
- if (atomic_read(&tcp_lhash_users)) {
- DEFINE_WAIT(wait);
-
- for (;;) {
- prepare_to_wait_exclusive(&tcp_lhash_wait,
- &wait, TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&tcp_lhash_users))
- break;
- write_unlock_bh(&tcp_lhash_lock);
- schedule();
- write_lock_bh(&tcp_lhash_lock);
- }
-
- finish_wait(&tcp_lhash_wait, &wait);
- }
-}
-
-static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
-{
- struct hlist_head *list;
- rwlock_t *lock;
-
- BUG_TRAP(sk_unhashed(sk));
- if (listen_possible && sk->sk_state == TCP_LISTEN) {
- list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
- lock = &tcp_lhash_lock;
- tcp_listen_wlock();
- } else {
- sk->sk_hashent = inet_sk_ehashfn(sk, tcp_ehash_size);
- list = &tcp_ehash[sk->sk_hashent].chain;
- lock = &tcp_ehash[sk->sk_hashent].lock;
- write_lock(lock);
- }
- __sk_add_node(sk, list);
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(lock);
- if (listen_possible && sk->sk_state == TCP_LISTEN)
- wake_up(&tcp_lhash_wait);
-}
-
static void tcp_v4_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __tcp_v4_hash(sk, 1);
+ __inet_hash(&tcp_hashinfo, sk, 1);
local_bh_enable();
}
}
if (sk->sk_state == TCP_LISTEN) {
local_bh_disable();
- tcp_listen_wlock();
- lock = &tcp_lhash_lock;
+ inet_listen_wlock(&tcp_hashinfo);
+ lock = &tcp_hashinfo.lhash_lock;
} else {
- struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
+ struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
lock = &head->lock;
write_lock_bh(&head->lock);
}
ende:
if (sk->sk_state == TCP_LISTEN)
- wake_up(&tcp_lhash_wait);
+ wake_up(&tcp_hashinfo.lhash_wait);
}
/* Don't inline this cruft. Here are some nice properties to
* connection. So always assume those are both wildcarded
* during the search since they can never be otherwise.
*/
-static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
- unsigned short hnum, int dif)
+static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head,
+ const u32 daddr,
+ const unsigned short hnum,
+ const int dif)
{
struct sock *result = NULL, *sk;
struct hlist_node *node;
}
/* Optimize the common listener case. */
-static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
- unsigned short hnum, int dif)
+static inline struct sock *tcp_v4_lookup_listener(const u32 daddr,
+ const unsigned short hnum,
+ const int dif)
{
struct sock *sk = NULL;
struct hlist_head *head;
- read_lock(&tcp_lhash_lock);
- head = &tcp_listening_hash[tcp_lhashfn(hnum)];
+ read_lock(&tcp_hashinfo.lhash_lock);
+ head = &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)];
if (!hlist_empty(head)) {
struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
sherry_cache:
sock_hold(sk);
}
- read_unlock(&tcp_lhash_lock);
+ read_unlock(&tcp_hashinfo.lhash_lock);
return sk;
}
* Local BH must be disabled here.
*/
-static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
- u32 daddr, u16 hnum,
- int dif)
+static inline struct sock *__tcp_v4_lookup_established(const u32 saddr,
+ const u16 sport,
+ const u32 daddr,
+ const u16 hnum,
+ const int dif)
{
- struct tcp_ehash_bucket *head;
+ struct inet_ehash_bucket *head;
TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
struct sock *sk;
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_ehash_size);
- head = &tcp_ehash[hash];
+ const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size);
+ head = &tcp_hashinfo.ehash[hash];
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
+ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit;
}
int dif = sk->sk_bound_dev_if;
TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
__u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
- const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size);
- struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+ const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
+ struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
struct sock *sk2;
struct hlist_node *node;
struct tcp_tw_bucket *tw;
write_lock(&head->lock);
/* Check TIME-WAIT sockets first. */
- sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
tw = (struct tcp_tw_bucket *)sk2;
if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
*/
static inline int tcp_v4_hash_connect(struct sock *sk)
{
- unsigned short snum = inet_sk(sk)->num;
- struct tcp_bind_hashbucket *head;
- struct tcp_bind_bucket *tb;
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
int ret;
if (!snum) {
local_bh_disable();
for (i = 1; i <= range; i++) {
port = low + (i + offset) % range;
- head = &tcp_bhash[tcp_bhashfn(port)];
+ head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
* because the established check is already
* unique enough.
*/
- tb_for_each(tb, node, &head->chain) {
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
if (tb->port == port) {
BUG_TRAP(!hlist_empty(&tb->owners));
if (tb->fastreuse >= 0)
}
}
- tb = tcp_bucket_create(head, port);
+ tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
if (!tb) {
spin_unlock(&head->lock);
break;
hint += i;
/* Head lock still held and bh's disabled */
- tcp_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->sport = htons(port);
- __tcp_v4_hash(sk, 0);
+ __inet_hash(&tcp_hashinfo, sk, 0);
}
spin_unlock(&head->lock);
goto out;
}
- head = &tcp_bhash[tcp_bhashfn(snum)];
- tb = tcp_sk(sk)->bind_hash;
+ head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
+ tb = inet_sk(sk)->bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __tcp_v4_hash(sk, 0);
+ __inet_hash(&tcp_hashinfo, sk, 0);
spin_unlock_bh(&head->lock);
return 0;
} else {
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
- __tcp_v4_hash(newsk, 0);
- __tcp_inherit_port(sk, newsk);
+ __inet_hash(&tcp_hashinfo, newsk, 0);
+ __inet_inherit_port(&tcp_hashinfo, sk, newsk);
return newsk;
__skb_queue_purge(&tp->ucopy.prequeue);
/* Clean up a referenced TCP bind bucket. */
- if (tp->bind_hash)
- tcp_put_port(sk);
+ if (inet_sk(sk)->bind_hash)
+ inet_put_port(&tcp_hashinfo, sk);
/*
* If sendmsg cached page exists, toss it.
if (!sk) {
st->bucket = 0;
- sk = sk_head(&tcp_listening_hash[0]);
+ sk = sk_head(&tcp_hashinfo.listening_hash[0]);
goto get_sk;
}
}
read_unlock_bh(&tp->accept_queue.syn_wait_lock);
}
- if (++st->bucket < TCP_LHTABLE_SIZE) {
- sk = sk_head(&tcp_listening_hash[st->bucket]);
+ if (++st->bucket < INET_LHTABLE_SIZE) {
+ sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
goto get_sk;
}
cur = NULL;
struct tcp_iter_state* st = seq->private;
void *rc = NULL;
- for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+ for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
struct sock *sk;
struct hlist_node *node;
struct tcp_tw_bucket *tw;
/* We can reschedule _before_ having picked the target: */
cond_resched_softirq();
- read_lock(&tcp_ehash[st->bucket].lock);
- sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+ read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+ sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != st->family) {
continue;
}
}
st->state = TCP_SEQ_STATE_TIME_WAIT;
tw_for_each(tw, node,
- &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+ &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
if (tw->tw_family != st->family) {
continue;
}
rc = tw;
goto out;
}
- read_unlock(&tcp_ehash[st->bucket].lock);
+ read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
st->state = TCP_SEQ_STATE_ESTABLISHED;
}
out:
cur = tw;
goto out;
}
- read_unlock(&tcp_ehash[st->bucket].lock);
+ read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
st->state = TCP_SEQ_STATE_ESTABLISHED;
/* We can reschedule between buckets: */
cond_resched_softirq();
- if (++st->bucket < tcp_ehash_size) {
- read_lock(&tcp_ehash[st->bucket].lock);
- sk = sk_head(&tcp_ehash[st->bucket].chain);
+ if (++st->bucket < tcp_hashinfo.ehash_size) {
+ read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+ sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
} else {
cur = NULL;
goto out;
}
st->state = TCP_SEQ_STATE_TIME_WAIT;
- tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
+ tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
goto get_tw;
found:
cur = sk;
void *rc;
struct tcp_iter_state* st = seq->private;
- tcp_listen_lock();
+ inet_listen_lock(&tcp_hashinfo);
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_idx(seq, &pos);
if (!rc) {
- tcp_listen_unlock();
+ inet_listen_unlock(&tcp_hashinfo);
local_bh_disable();
st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_idx(seq, pos);
case TCP_SEQ_STATE_LISTENING:
rc = listening_get_next(seq, v);
if (!rc) {
- tcp_listen_unlock();
+ inet_listen_unlock(&tcp_hashinfo);
local_bh_disable();
st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_first(seq);
}
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- tcp_listen_unlock();
+ inet_listen_unlock(&tcp_hashinfo);
break;
case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
- read_unlock(&tcp_ehash[st->bucket].lock);
+ read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
local_bh_enable();
break;
}
}
EXPORT_SYMBOL(ipv4_specific);
-EXPORT_SYMBOL(tcp_bind_hash);
-EXPORT_SYMBOL(tcp_bucket_create);
+EXPORT_SYMBOL(inet_bind_bucket_create);
EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_inherit_port);
-EXPORT_SYMBOL(tcp_listen_wlock);
-EXPORT_SYMBOL(tcp_port_rover);
EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_put_port);
EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(tcp_v4_conn_request);
EXPORT_SYMBOL(tcp_v4_connect);