PCI: tegra: Add debugfs support
[firefly-linux-kernel-4.4.55.git] / fs / aio.c
index 12a3de0ee6dacbdea873ec9ea28bdd88d1ea999d..4f078c054b41608fe3a8ed325c55d59898b049cb 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
 
        struct work_struct      free_work;
 
+       /*
+        * signals when all in-flight requests are done
+        */
+       struct completion *requests_done;
+
        struct {
                /*
                 * This counts the number of available slots in the ringbuffer,
@@ -472,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static int kiocb_cancel(struct kiocb *kiocb)
 {
        kiocb_cancel_fn *old, *cancel;
 
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
 
+       /* At this point we know that there are no any in-flight requests */
+       if (ctx->requests_done)
+               complete(ctx->requests_done);
+
        INIT_WORK(&ctx->free_work, free_ioctx);
        schedule_work(&ctx->free_work);
 }
@@ -529,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
                                       struct kiocb, ki_list);
 
                list_del_init(&req->ki_list);
-               kiocb_cancel(ctx, req);
+               kiocb_cancel(req);
        }
 
        spin_unlock_irq(&ctx->ctx_lock);
@@ -718,37 +727,42 @@ err:
  *     when the processes owning a context have all exited to encourage
  *     the rapid destruction of the kioctx.
  */
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+               struct completion *requests_done)
 {
-       if (!atomic_xchg(&ctx->dead, 1)) {
-               struct kioctx_table *table;
+       struct kioctx_table *table;
 
-               spin_lock(&mm->ioctx_lock);
-               rcu_read_lock();
-               table = rcu_dereference(mm->ioctx_table);
+       if (atomic_xchg(&ctx->dead, 1))
+               return -EINVAL;
 
-               WARN_ON(ctx != table->table[ctx->id]);
-               table->table[ctx->id] = NULL;
-               rcu_read_unlock();
-               spin_unlock(&mm->ioctx_lock);
 
-               /* percpu_ref_kill() will do the necessary call_rcu() */
-               wake_up_all(&ctx->wait);
+       spin_lock(&mm->ioctx_lock);
+       rcu_read_lock();
+       table = rcu_dereference(mm->ioctx_table);
 
-               /*
-                * It'd be more correct to do this in free_ioctx(), after all
-                * the outstanding kiocbs have finished - but by then io_destroy
-                * has already returned, so io_setup() could potentially return
-                * -EAGAIN with no ioctxs actually in use (as far as userspace
-                *  could tell).
-                */
-               aio_nr_sub(ctx->max_reqs);
+       WARN_ON(ctx != table->table[ctx->id]);
+       table->table[ctx->id] = NULL;
+       rcu_read_unlock();
+       spin_unlock(&mm->ioctx_lock);
 
-               if (ctx->mmap_size)
-                       vm_munmap(ctx->mmap_base, ctx->mmap_size);
+       /* percpu_ref_kill() will do the necessary call_rcu() */
+       wake_up_all(&ctx->wait);
 
-               percpu_ref_kill(&ctx->users);
-       }
+       /*
+        * It'd be more correct to do this in free_ioctx(), after all
+        * the outstanding kiocbs have finished - but by then io_destroy
+        * has already returned, so io_setup() could potentially return
+        * -EAGAIN with no ioctxs actually in use (as far as userspace
+        *  could tell).
+        */
+       aio_nr_sub(ctx->max_reqs);
+
+       if (ctx->mmap_size)
+               vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+       ctx->requests_done = requests_done;
+       percpu_ref_kill(&ctx->users);
+       return 0;
 }
 
 /* wait_on_sync_kiocb:
@@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm)
                 */
                ctx->mmap_size = 0;
 
-               kill_ioctx(mm, ctx);
+               kill_ioctx(mm, ctx, NULL);
        }
 }
 
@@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
        if (!IS_ERR(ioctx)) {
                ret = put_user(ioctx->user_id, ctxp);
                if (ret)
-                       kill_ioctx(current->mm, ioctx);
+                       kill_ioctx(current->mm, ioctx, NULL);
                percpu_ref_put(&ioctx->users);
        }
 
@@ -1203,9 +1217,25 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
        struct kioctx *ioctx = lookup_ioctx(ctx);
        if (likely(NULL != ioctx)) {
-               kill_ioctx(current->mm, ioctx);
+               struct completion requests_done =
+                       COMPLETION_INITIALIZER_ONSTACK(requests_done);
+               int ret;
+
+               /* Pass requests_done to kill_ioctx() where it can be set
+                * in a thread-safe way. If we try to set it here then we have
+                * a race condition if two io_destroy() called simultaneously.
+                */
+               ret = kill_ioctx(current->mm, ioctx, &requests_done);
                percpu_ref_put(&ioctx->users);
-               return 0;
+
+               /* Wait until all IO for the context are done. Otherwise kernel
+                * keep using user-space buffers even if user thinks the context
+                * is destroyed.
+                */
+               if (!ret)
+                       wait_for_completion(&requests_done);
+
+               return ret;
        }
        pr_debug("EINVAL: io_destroy: invalid context id\n");
        return -EINVAL;
@@ -1213,6 +1243,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
                            unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
                                     int rw, char __user *buf,
@@ -1270,7 +1301,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
        int rw;
        fmode_t mode;
        aio_rw_op *rw_op;
+       rw_iter_op *iter_op;
        struct iovec inline_vec, *iovec = &inline_vec;
+       struct iov_iter iter;
 
        switch (opcode) {
        case IOCB_CMD_PREAD:
@@ -1278,6 +1311,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
                mode    = FMODE_READ;
                rw      = READ;
                rw_op   = file->f_op->aio_read;
+               iter_op = file->f_op->read_iter;
                goto rw_common;
 
        case IOCB_CMD_PWRITE:
@@ -1285,12 +1319,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
                mode    = FMODE_WRITE;
                rw      = WRITE;
                rw_op   = file->f_op->aio_write;
+               iter_op = file->f_op->write_iter;
                goto rw_common;
 rw_common:
                if (unlikely(!(file->f_mode & mode)))
                        return -EBADF;
 
-               if (!rw_op)
+               if (!rw_op && !iter_op)
                        return -EINVAL;
 
                ret = (opcode == IOCB_CMD_PREADV ||
@@ -1299,10 +1334,8 @@ rw_common:
                                                &iovec, compat)
                        : aio_setup_single_vector(req, rw, buf, &nr_segs,
                                                  iovec);
-               if (ret)
-                       return ret;
-
-               ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+               if (!ret)
+                       ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
                if (ret < 0) {
                        if (iovec != &inline_vec)
                                kfree(iovec);
@@ -1321,7 +1354,12 @@ rw_common:
                if (rw == WRITE)
                        file_start_write(file);
 
-               ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+               if (iter_op) {
+                       iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+                       ret = iter_op(req, &iter);
+               } else {
+                       ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+               }
 
                if (rw == WRITE)
                        file_end_write(file);
@@ -1559,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 
        kiocb = lookup_kiocb(ctx, iocb, key);
        if (kiocb)
-               ret = kiocb_cancel(ctx, kiocb);
+               ret = kiocb_cancel(kiocb);
        else
                ret = -EINVAL;