Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[firefly-linux-kernel-4.4.55.git] / net / netlink / af_netlink.c
index 90504a0e42ab2e0e2309af3e0c19f2a417705bcb..2a3e9ba814c4125d877554b80bc9e7353e7a21e9 100644 (file)
@@ -3,6 +3,7 @@
  *
  *             Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
  *                             Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *                             Patrick McHardy <kaber@trash.net>
  *
  *             This program is free software; you can redistribute it and/or
  *             modify it under the terms of the GNU General Public License
@@ -110,12 +111,40 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
        return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 }
 
+static void netlink_overrun(struct sock *sk)
+{
+       struct netlink_sock *nlk = nlk_sk(sk);
+
+       if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
+               if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+                       sk->sk_err = ENOBUFS;
+                       sk->sk_error_report(sk);
+               }
+       }
+       atomic_inc(&sk->sk_drops);
+}
+
+static void netlink_rcv_wake(struct sock *sk)
+{
+       struct netlink_sock *nlk = nlk_sk(sk);
+
+       if (skb_queue_empty(&sk->sk_receive_queue))
+               clear_bit(NETLINK_CONGESTED, &nlk->state);
+       if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+               wake_up_interruptible(&nlk->wait);
+}
+
 #ifdef CONFIG_NETLINK_MMAP
 static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
 {
        return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
 }
 
+static bool netlink_rx_is_mmaped(struct sock *sk)
+{
+       return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+}
+
 static bool netlink_tx_is_mmaped(struct sock *sk)
 {
        return nlk_sk(sk)->tx_ring.pg_vec != NULL;
@@ -436,15 +465,48 @@ static void netlink_forward_ring(struct netlink_ring *ring)
        } while (ring->head != head);
 }
 
+static bool netlink_dump_space(struct netlink_sock *nlk)
+{
+       struct netlink_ring *ring = &nlk->rx_ring;
+       struct nl_mmap_hdr *hdr;
+       unsigned int n;
+
+       hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+       if (hdr == NULL)
+               return false;
+
+       n = ring->head + ring->frame_max / 2;
+       if (n > ring->frame_max)
+               n -= ring->frame_max;
+
+       hdr = __netlink_lookup_frame(ring, n);
+
+       return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
+}
+
 static unsigned int netlink_poll(struct file *file, struct socket *sock,
                                 poll_table *wait)
 {
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        unsigned int mask;
+       int err;
 
-       if (nlk->cb != NULL && nlk->rx_ring.pg_vec != NULL)
-               netlink_dump(sk);
+       if (nlk->rx_ring.pg_vec != NULL) {
+               /* Memory mapped sockets don't call recvmsg(), so flow control
+                * for dumps is performed here. A dump is allowed to continue
+                * if at least half the ring is unused.
+                */
+               while (nlk->cb != NULL && netlink_dump_space(nlk)) {
+                       err = netlink_dump(sk);
+                       if (err < 0) {
+                               sk->sk_err = err;
+                               sk->sk_error_report(sk);
+                               break;
+                       }
+               }
+               netlink_rcv_wake(sk);
+       }
 
        mask = datagram_poll(file, sock, wait);
 
@@ -589,8 +651,53 @@ out:
        mutex_unlock(&nlk->pg_vec_lock);
        return err;
 }
+
+static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
+{
+       struct nl_mmap_hdr *hdr;
+
+       hdr = netlink_mmap_hdr(skb);
+       hdr->nm_len     = skb->len;
+       hdr->nm_group   = NETLINK_CB(skb).dst_group;
+       hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
+       hdr->nm_uid     = NETLINK_CB(skb).creds.uid;
+       hdr->nm_gid     = NETLINK_CB(skb).creds.gid;
+       netlink_frame_flush_dcache(hdr);
+       netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+       NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
+       kfree_skb(skb);
+}
+
+static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
+{
+       struct netlink_sock *nlk = nlk_sk(sk);
+       struct netlink_ring *ring = &nlk->rx_ring;
+       struct nl_mmap_hdr *hdr;
+
+       spin_lock_bh(&sk->sk_receive_queue.lock);
+       hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+       if (hdr == NULL) {
+               spin_unlock_bh(&sk->sk_receive_queue.lock);
+               kfree_skb(skb);
+               netlink_overrun(sk);
+               return;
+       }
+       netlink_increment_head(ring);
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+       spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+       hdr->nm_len     = skb->len;
+       hdr->nm_group   = NETLINK_CB(skb).dst_group;
+       hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
+       hdr->nm_uid     = NETLINK_CB(skb).creds.uid;
+       hdr->nm_gid     = NETLINK_CB(skb).creds.gid;
+       netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
+}
+
 #else /* CONFIG_NETLINK_MMAP */
 #define netlink_skb_is_mmaped(skb)     false
+#define netlink_rx_is_mmaped(sk)       false
 #define netlink_tx_is_mmaped(sk)       false
 #define netlink_mmap                   sock_no_mmap
 #define netlink_poll                   datagram_poll
@@ -1278,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
        return 0;
 }
 
-static void netlink_overrun(struct sock *sk)
-{
-       struct netlink_sock *nlk = nlk_sk(sk);
-
-       if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
-               if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
-                       sk->sk_err = ENOBUFS;
-                       sk->sk_error_report(sk);
-               }
-       }
-       atomic_inc(&sk->sk_drops);
-}
-
 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 {
        struct sock *sock;
@@ -1381,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 {
        int len = skb->len;
 
-       skb_queue_tail(&sk->sk_receive_queue, skb);
+#ifdef CONFIG_NETLINK_MMAP
+       if (netlink_skb_is_mmaped(skb))
+               netlink_queue_mmaped_skb(sk, skb);
+       else if (netlink_rx_is_mmaped(sk))
+               netlink_ring_set_copied(sk, skb);
+       else
+#endif /* CONFIG_NETLINK_MMAP */
+               skb_queue_tail(&sk->sk_receive_queue, skb);
        sk->sk_data_ready(sk, len);
        return len;
 }
@@ -1426,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
        return skb;
 }
 
-static void netlink_rcv_wake(struct sock *sk)
-{
-       struct netlink_sock *nlk = nlk_sk(sk);
-
-       if (skb_queue_empty(&sk->sk_receive_queue))
-               clear_bit(NETLINK_CONGESTED, &nlk->state);
-       if (!test_bit(NETLINK_CONGESTED, &nlk->state))
-               wake_up_interruptible(&nlk->wait);
-}
-
 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
                                  struct sock *ssk)
 {
@@ -1492,6 +1583,69 @@ retry:
 }
 EXPORT_SYMBOL(netlink_unicast);
 
+struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
+                                 u32 dst_portid, gfp_t gfp_mask)
+{
+#ifdef CONFIG_NETLINK_MMAP
+       struct sock *sk = NULL;
+       struct sk_buff *skb;
+       struct netlink_ring *ring;
+       struct nl_mmap_hdr *hdr;
+       unsigned int maxlen;
+
+       sk = netlink_getsockbyportid(ssk, dst_portid);
+       if (IS_ERR(sk))
+               goto out;
+
+       ring = &nlk_sk(sk)->rx_ring;
+       /* fast-path without atomic ops for common case: non-mmaped receiver */
+       if (ring->pg_vec == NULL)
+               goto out_put;
+
+       skb = alloc_skb_head(gfp_mask);
+       if (skb == NULL)
+               goto err1;
+
+       spin_lock_bh(&sk->sk_receive_queue.lock);
+       /* check again under lock */
+       if (ring->pg_vec == NULL)
+               goto out_free;
+
+       maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+       if (maxlen < size)
+               goto out_free;
+
+       netlink_forward_ring(ring);
+       hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+       if (hdr == NULL)
+               goto err2;
+       netlink_ring_setup_skb(skb, sk, ring, hdr);
+       netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+       atomic_inc(&ring->pending);
+       netlink_increment_head(ring);
+
+       spin_unlock_bh(&sk->sk_receive_queue.lock);
+       return skb;
+
+err2:
+       kfree_skb(skb);
+       spin_unlock_bh(&sk->sk_receive_queue.lock);
+       netlink_overrun(sk);
+err1:
+       sock_put(sk);
+       return NULL;
+
+out_free:
+       kfree_skb(skb);
+       spin_unlock_bh(&sk->sk_receive_queue.lock);
+out_put:
+       sock_put(sk);
+out:
+#endif
+       return alloc_skb(size, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+
 int netlink_has_listeners(struct sock *sk, unsigned int group)
 {
        int res = 0;
@@ -2270,9 +2424,13 @@ static int netlink_dump(struct sock *sk)
 
        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
 
-       skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+       if (!netlink_rx_is_mmaped(sk) &&
+           atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+               goto errout_skb;
+       skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
        if (!skb)
                goto errout_skb;
+       netlink_skb_set_owner_r(skb, sk);
 
        len = cb->dump(skb, cb);
 
@@ -2327,6 +2485,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
        if (cb == NULL)
                return -ENOBUFS;
 
+       /* Memory mapped dump requests need to be copied to avoid looping
+        * on the pending state in netlink_mmap_sendmsg() while the CB hold
+        * a reference to the skb.
+        */
+       if (netlink_skb_is_mmaped(skb)) {
+               skb = skb_copy(skb, GFP_KERNEL);
+               if (skb == NULL) {
+                       kfree(cb);
+                       return -ENOBUFS;
+               }
+       } else
+               atomic_inc(&skb->users);
+
        cb->dump = control->dump;
        cb->done = control->done;
        cb->nlh = nlh;
@@ -2387,7 +2558,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
        if (err)
                payload += nlmsg_len(nlh);
 
-       skb = nlmsg_new(payload, GFP_KERNEL);
+       skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
+                               NETLINK_CB(in_skb).portid, GFP_KERNEL);
        if (!skb) {
                struct sock *sk;