vmscan: remove obsolete shrink_control comment
[firefly-linux-kernel-4.4.55.git] / ipc / mqueue.c
index 076399ce363ab9d7f80696128609672574aceabd..f8e54f5b90804ea58309da551fcd40dd8cf12333 100644 (file)
@@ -69,6 +69,7 @@ struct mqueue_inode_info {
        wait_queue_head_t wait_q;
 
        struct rb_root msg_tree;
+       struct posix_msg_tree_node *node_cache;
        struct mq_attr attr;
 
        struct sigevent notify;
@@ -134,15 +135,20 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
                else
                        p = &(*p)->rb_right;
        }
-       leaf = kzalloc(sizeof(*leaf), GFP_ATOMIC);
-       if (!leaf)
-               return -ENOMEM;
-       rb_init_node(&leaf->rb_node);
-       INIT_LIST_HEAD(&leaf->msg_list);
+       if (info->node_cache) {
+               leaf = info->node_cache;
+               info->node_cache = NULL;
+       } else {
+               leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
+               if (!leaf)
+                       return -ENOMEM;
+               rb_init_node(&leaf->rb_node);
+               INIT_LIST_HEAD(&leaf->msg_list);
+               info->qsize += sizeof(*leaf);
+       }
        leaf->priority = msg->m_type;
        rb_link_node(&leaf->rb_node, parent, p);
        rb_insert_color(&leaf->rb_node, &info->msg_tree);
-       info->qsize += sizeof(struct posix_msg_tree_node);
 insert_msg:
        info->attr.mq_curmsgs++;
        info->qsize += msg->m_ts;
@@ -177,13 +183,17 @@ try_again:
                return NULL;
        }
        leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
-       if (list_empty(&leaf->msg_list)) {
+       if (unlikely(list_empty(&leaf->msg_list))) {
                pr_warn_once("Inconsistency in POSIX message queue, "
                             "empty leaf node but we haven't implemented "
                             "lazy leaf delete!\n");
                rb_erase(&leaf->rb_node, &info->msg_tree);
-               info->qsize -= sizeof(struct posix_msg_tree_node);
-               kfree(leaf);
+               if (info->node_cache) {
+                       info->qsize -= sizeof(*leaf);
+                       kfree(leaf);
+               } else {
+                       info->node_cache = leaf;
+               }
                goto try_again;
        } else {
                msg = list_first_entry(&leaf->msg_list,
@@ -191,8 +201,12 @@ try_again:
                list_del(&msg->m_list);
                if (list_empty(&leaf->msg_list)) {
                        rb_erase(&leaf->rb_node, &info->msg_tree);
-                       info->qsize -= sizeof(struct posix_msg_tree_node);
-                       kfree(leaf);
+                       if (info->node_cache) {
+                               info->qsize -= sizeof(*leaf);
+                               kfree(leaf);
+                       } else {
+                               info->node_cache = leaf;
+                       }
                }
        }
        info->attr.mq_curmsgs--;
@@ -235,6 +249,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
                info->qsize = 0;
                info->user = NULL;      /* set when all is ok */
                info->msg_tree = RB_ROOT;
+               info->node_cache = NULL;
                memset(&info->attr, 0, sizeof(info->attr));
                info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
                                           ipc_ns->mq_msg_default);
@@ -367,6 +382,7 @@ static void mqueue_evict_inode(struct inode *inode)
        spin_lock(&info->lock);
        while ((msg = msg_get(info)) != NULL)
                free_msg(msg);
+       kfree(info->node_cache);
        spin_unlock(&info->lock);
 
        /* Total amount of bytes accounted for the mqueue */
@@ -397,7 +413,7 @@ static void mqueue_evict_inode(struct inode *inode)
 }
 
 static int mqueue_create(struct inode *dir, struct dentry *dentry,
-                               umode_t mode, struct nameidata *nd)
+                               umode_t mode, bool excl)
 {
        struct inode *inode;
        struct mq_attr *attr = dentry->d_fsdata;
@@ -680,33 +696,33 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
        unsigned long total_size;
 
        if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
-               return 0;
+               return -EINVAL;
        if (capable(CAP_SYS_RESOURCE)) {
                if (attr->mq_maxmsg > HARD_MSGMAX ||
                    attr->mq_msgsize > HARD_MSGSIZEMAX)
-                       return 0;
+                       return -EINVAL;
        } else {
                if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
                                attr->mq_msgsize > ipc_ns->mq_msgsize_max)
-                       return 0;
+                       return -EINVAL;
        }
        /* check for overflow */
        if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
-               return 0;
+               return -EOVERFLOW;
        mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) +
                min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) *
                sizeof(struct posix_msg_tree_node);
        total_size = attr->mq_maxmsg * attr->mq_msgsize;
        if (total_size + mq_treesize < total_size)
-               return 0;
-       return 1;
+               return -EOVERFLOW;
+       return 0;
 }
 
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
-static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
-                       struct dentry *dentry, int oflag, umode_t mode,
+static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
+                       struct path *path, int oflag, umode_t mode,
                        struct mq_attr *attr)
 {
        const struct cred *cred = current_cred();
@@ -714,76 +730,65 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
        int ret;
 
        if (attr) {
-               if (!mq_attr_ok(ipc_ns, attr)) {
-                       ret = -EINVAL;
-                       goto out;
-               }
+               ret = mq_attr_ok(ipc_ns, attr);
+               if (ret)
+                       return ERR_PTR(ret);
                /* store for use during create */
-               dentry->d_fsdata = attr;
+               path->dentry->d_fsdata = attr;
+       } else {
+               struct mq_attr def_attr;
+
+               def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
+                                        ipc_ns->mq_msg_default);
+               def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
+                                         ipc_ns->mq_msgsize_default);
+               ret = mq_attr_ok(ipc_ns, &def_attr);
+               if (ret)
+                       return ERR_PTR(ret);
        }
 
        mode &= ~current_umask();
-       ret = mnt_want_write(ipc_ns->mq_mnt);
-       if (ret)
-               goto out;
-       ret = vfs_create(dir->d_inode, dentry, mode, NULL);
-       dentry->d_fsdata = NULL;
+       ret = mnt_want_write(path->mnt);
        if (ret)
-               goto out_drop_write;
-
-       result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+               return ERR_PTR(ret);
+       ret = vfs_create(dir, path->dentry, mode, true);
+       path->dentry->d_fsdata = NULL;
+       if (!ret)
+               result = dentry_open(path, oflag, cred);
+       else
+               result = ERR_PTR(ret);
        /*
         * dentry_open() took a persistent mnt_want_write(),
         * so we can now drop this one.
         */
-       mnt_drop_write(ipc_ns->mq_mnt);
+       mnt_drop_write(path->mnt);
        return result;
-
-out_drop_write:
-       mnt_drop_write(ipc_ns->mq_mnt);
-out:
-       dput(dentry);
-       mntput(ipc_ns->mq_mnt);
-       return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
-static struct file *do_open(struct ipc_namespace *ipc_ns,
-                               struct dentry *dentry, int oflag)
+static struct file *do_open(struct path *path, int oflag)
 {
-       int ret;
-       const struct cred *cred = current_cred();
-
        static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
                                                  MAY_READ | MAY_WRITE };
-
-       if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
-               ret = -EACCES;
-               goto err;
-       }
-
-       return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
-
-err:
-       dput(dentry);
-       mntput(ipc_ns->mq_mnt);
-       return ERR_PTR(ret);
+       int acc;
+       if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+               return ERR_PTR(-EINVAL);
+       acc = oflag2acc[oflag & O_ACCMODE];
+       if (inode_permission(path->dentry->d_inode, acc))
+               return ERR_PTR(-EACCES);
+       return dentry_open(path, oflag, current_cred());
 }
 
 SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
                struct mq_attr __user *, u_attr)
 {
-       struct dentry *dentry;
+       struct path path;
        struct file *filp;
        char *name;
        struct mq_attr attr;
        int fd, error;
        struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+       struct dentry *root = ipc_ns->mq_mnt->mnt_root;
 
        if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
                return -EFAULT;
@@ -797,52 +802,49 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
        if (fd < 0)
                goto out_putname;
 
-       mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
-       dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
-       if (IS_ERR(dentry)) {
-               error = PTR_ERR(dentry);
+       error = 0;
+       mutex_lock(&root->d_inode->i_mutex);
+       path.dentry = lookup_one_len(name, root, strlen(name));
+       if (IS_ERR(path.dentry)) {
+               error = PTR_ERR(path.dentry);
                goto out_putfd;
        }
-       mntget(ipc_ns->mq_mnt);
+       path.mnt = mntget(ipc_ns->mq_mnt);
 
        if (oflag & O_CREAT) {
-               if (dentry->d_inode) {  /* entry already exists */
-                       audit_inode(name, dentry);
+               if (path.dentry->d_inode) {     /* entry already exists */
+                       audit_inode(name, path.dentry);
                        if (oflag & O_EXCL) {
                                error = -EEXIST;
                                goto out;
                        }
-                       filp = do_open(ipc_ns, dentry, oflag);
+                       filp = do_open(&path, oflag);
                } else {
-                       filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
-                                               dentry, oflag, mode,
+                       filp = do_create(ipc_ns, root->d_inode,
+                                               &path, oflag, mode,
                                                u_attr ? &attr : NULL);
                }
        } else {
-               if (!dentry->d_inode) {
+               if (!path.dentry->d_inode) {
                        error = -ENOENT;
                        goto out;
                }
-               audit_inode(name, dentry);
-               filp = do_open(ipc_ns, dentry, oflag);
+               audit_inode(name, path.dentry);
+               filp = do_open(&path, oflag);
        }
 
-       if (IS_ERR(filp)) {
+       if (!IS_ERR(filp))
+               fd_install(fd, filp);
+       else
                error = PTR_ERR(filp);
-               goto out_putfd;
-       }
-
-       fd_install(fd, filp);
-       goto out_upsem;
-
 out:
-       dput(dentry);
-       mntput(ipc_ns->mq_mnt);
+       path_put(&path);
 out_putfd:
-       put_unused_fd(fd);
-       fd = error;
-out_upsem:
-       mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+       if (error) {
+               put_unused_fd(fd);
+               fd = error;
+       }
+       mutex_unlock(&root->d_inode->i_mutex);
 out_putname:
        putname(name);
        return fd;
@@ -955,7 +957,8 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        struct mqueue_inode_info *info;
        ktime_t expires, *timeout = NULL;
        struct timespec ts;
-       int ret;
+       struct posix_msg_tree_node *new_leaf = NULL;
+       int ret = 0;
 
        if (u_abs_timeout) {
                int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1003,39 +1006,60 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        msg_ptr->m_ts = msg_len;
        msg_ptr->m_type = msg_prio;
 
+       /*
+        * msg_insert really wants us to have a valid, spare node struct so
+        * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
+        * fall back to that if necessary.
+        */
+       if (!info->node_cache)
+               new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
+
        spin_lock(&info->lock);
 
+       if (!info->node_cache && new_leaf) {
+               /* Save our speculative allocation into the cache */
+               rb_init_node(&new_leaf->rb_node);
+               INIT_LIST_HEAD(&new_leaf->msg_list);
+               info->node_cache = new_leaf;
+               info->qsize += sizeof(*new_leaf);
+               new_leaf = NULL;
+       } else {
+               kfree(new_leaf);
+       }
+
        if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
                if (filp->f_flags & O_NONBLOCK) {
-                       spin_unlock(&info->lock);
                        ret = -EAGAIN;
                } else {
                        wait.task = current;
                        wait.msg = (void *) msg_ptr;
                        wait.state = STATE_NONE;
                        ret = wq_sleep(info, SEND, timeout, &wait);
+                       /*
+                        * wq_sleep must be called with info->lock held, and
+                        * returns with the lock released
+                        */
+                       goto out_free;
                }
-               if (ret < 0)
-                       free_msg(msg_ptr);
        } else {
                receiver = wq_get_first_waiter(info, RECV);
                if (receiver) {
                        pipelined_send(info, msg_ptr, receiver);
                } else {
                        /* adds message to the queue */
-                       if (msg_insert(msg_ptr, info)) {
-                               free_msg(msg_ptr);
-                               ret = -ENOMEM;
-                               spin_unlock(&info->lock);
-                               goto out_fput;
-                       }
+                       ret = msg_insert(msg_ptr, info);
+                       if (ret)
+                               goto out_unlock;
                        __do_notify(info);
                }
                inode->i_atime = inode->i_mtime = inode->i_ctime =
                                CURRENT_TIME;
-               spin_unlock(&info->lock);
-               ret = 0;
        }
+out_unlock:
+       spin_unlock(&info->lock);
+out_free:
+       if (ret)
+               free_msg(msg_ptr);
 out_fput:
        fput(filp);
 out:
@@ -1054,6 +1078,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
        struct ext_wait_queue wait;
        ktime_t expires, *timeout = NULL;
        struct timespec ts;
+       struct posix_msg_tree_node *new_leaf = NULL;
 
        if (u_abs_timeout) {
                int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1089,7 +1114,26 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                goto out_fput;
        }
 
+       /*
+        * msg_insert really wants us to have a valid, spare node struct so
+        * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
+        * fall back to that if necessary.
+        */
+       if (!info->node_cache)
+               new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
+
        spin_lock(&info->lock);
+
+       if (!info->node_cache && new_leaf) {
+               /* Save our speculative allocation into the cache */
+               rb_init_node(&new_leaf->rb_node);
+               INIT_LIST_HEAD(&new_leaf->msg_list);
+               info->node_cache = new_leaf;
+               info->qsize += sizeof(*new_leaf);
+       } else {
+               kfree(new_leaf);
+       }
+
        if (info->attr.mq_curmsgs == 0) {
                if (filp->f_flags & O_NONBLOCK) {
                        spin_unlock(&info->lock);