usb: dwc3: rockchip: fix possible circular deadlock

[firefly-linux-kernel-4.4.55.git] / fs / eventpoll.c
diff --git a/fs/eventpoll.c b/fs/eventpoll.c

index db10e00c971a71add3be6b140a40b4f9d68580f1..3ab9c68b8bcee45fee7386140194da2f7d7b859d 100644 (file)
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -42,6 +42,7 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <linux/compat.h>
+#include <linux/rculist.h>
  
  /*
   * LOCKING:
@@ -134,8 +135,12 @@ struct nested_calls {
   * of these on a server and we do not want this to take another cache line.
   */
  struct epitem {
-       /* RB tree node used to link this structure to the eventpoll RB tree */
-       struct rb_node rbn;
+       union {
+               /* RB tree node links this structure to the eventpoll RB tree */
+               struct rb_node rbn;
+               /* Used to free the struct epitem */
+               struct rcu_head rcu;
+       };
  
         /* List header used to link this structure to the eventpoll ready list */
         struct list_head rdllink;
@@ -289,7 +294,7 @@ static LIST_HEAD(tfile_check_list);
  static long zero;
  static long long_max = LONG_MAX;
  
-ctl_table epoll_table[] = {
+struct ctl_table epoll_table[] = {
         {
                 .procname       = "max_user_watches",
                 .data           = &max_user_watches,
@@ -581,14 +586,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
   * @sproc: Pointer to the scan callback.
   * @priv: Private opaque data passed to the @sproc callback.
   * @depth: The current depth of recursive f_op->poll calls.
+ * @ep_locked: caller already holds ep->mtx
   *
   * Returns: The same integer error code returned by the @sproc callback.
   */
  static int ep_scan_ready_list(struct eventpoll *ep,
                               int (*sproc)(struct eventpoll *,
                                            struct list_head *, void *),
-                             void *priv,
-                             int depth)
+                             void *priv, int depth, bool ep_locked)
  {
         int error, pwake = 0;
         unsigned long flags;
@@ -599,7 +604,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
          * We need to lock this because we could be hit by
          * eventpoll_release_file() and epoll_ctl().
          */
-       mutex_lock_nested(&ep->mtx, depth);
+
+       if (!ep_locked)
+               mutex_lock_nested(&ep->mtx, depth);
  
         /*
          * Steal the ready list, and re-init the original one to the
@@ -663,7 +670,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
         }
         spin_unlock_irqrestore(&ep->lock, flags);
  
-       mutex_unlock(&ep->mtx);
+       if (!ep_locked)
+               mutex_unlock(&ep->mtx);
  
         /* We have to call this outside the lock */
         if (pwake)
@@ -672,6 +680,12 @@ static int ep_scan_ready_list(struct eventpoll *ep,
         return error;
  }
  
+static void epi_rcu_free(struct rcu_head *head)
+{
+       struct epitem *epi = container_of(head, struct epitem, rcu);
+       kmem_cache_free(epi_cache, epi);
+}
+
  /*
   * Removes a "struct epitem" from the eventpoll RB tree and deallocates
   * all the associated resources. Must be called with "mtx" held.
@@ -693,8 +707,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
  
         /* Remove the current item from the list of epoll hooks */
         spin_lock(&file->f_lock);
-       if (ep_is_linked(&epi->fllink))
-               list_del_init(&epi->fllink);
+       list_del_rcu(&epi->fllink);
         spin_unlock(&file->f_lock);
  
         rb_erase(&epi->rbn, &ep->rbr);
@@ -705,9 +718,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
         spin_unlock_irqrestore(&ep->lock, flags);
  
         wakeup_source_unregister(ep_wakeup_source(epi));
-
-       /* At this point it is safe to free the eventpoll item */
-       kmem_cache_free(epi_cache, epi);
+       /*
+        * At this point it is safe to free the eventpoll item. Use the union
+        * field epi->rcu, since we are trying to minimize the size of
+        * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
+        * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
+        * use of the rbn field.
+        */
+       call_rcu(&epi->rcu, epi_rcu_free);
  
         atomic_long_dec(&ep->user->epoll_watches);
  
@@ -740,6 +758,7 @@ static void ep_free(struct eventpoll *ep)
                 epi = rb_entry(rbp, struct epitem, rbn);
  
                 ep_unregister_pollwait(ep, epi);
+               cond_resched();
         }
  
         /*
@@ -754,6 +773,7 @@ static void ep_free(struct eventpoll *ep)
         while ((rbp = rb_first(&ep->rbr)) != NULL) {
                 epi = rb_entry(rbp, struct epitem, rbn);
                 ep_remove(ep, epi);
+               cond_resched();
         }
         mutex_unlock(&ep->mtx);
  
@@ -806,15 +826,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
         return 0;
  }
  
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+                                poll_table *pt);
+
+struct readyevents_arg {
+       struct eventpoll *ep;
+       bool locked;
+};
+
  static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
  {
-       return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+       struct readyevents_arg *arg = priv;
+
+       return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
+                                 call_nests + 1, arg->locked);
  }
  
  static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
  {
         int pollflags;
         struct eventpoll *ep = file->private_data;
+       struct readyevents_arg arg;
+
+       /*
+        * During ep_insert() we already hold the ep->mtx for the tfile.
+        * Prevent re-aquisition.
+        */
+       arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
+       arg.ep = ep;
  
         /* Insert inside our poll wait queue */
         poll_wait(file, &ep->poll_wait, wait);
@@ -826,31 +865,28 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
          * could re-enter here.
          */
         pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
-                                  ep_poll_readyevents_proc, ep, ep, current);
+                                  ep_poll_readyevents_proc, &arg, ep, current);
  
         return pollflags != -1 ? pollflags : 0;
  }
  
  #ifdef CONFIG_PROC_FS
-static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+static void ep_show_fdinfo(struct seq_file *m, struct file *f)
  {
         struct eventpoll *ep = f->private_data;
         struct rb_node *rbp;
-       int ret = 0;
  
         mutex_lock(&ep->mtx);
         for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                 struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
  
-               ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
-                                epi->ffd.fd, epi->event.events,
-                                (long long)epi->event.data);
-               if (ret)
+               seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+                          epi->ffd.fd, epi->event.events,
+                          (long long)epi->event.data);
+               if (seq_has_overflowed(m))
                         break;
         }
         mutex_unlock(&ep->mtx);
-
-       return ret;
  }
  #endif
  
@@ -871,9 +907,8 @@ static const struct file_operations eventpoll_fops = {
   */
  void eventpoll_release_file(struct file *file)
  {
-       struct list_head *lsthead = &file->f_ep_links;
         struct eventpoll *ep;
-       struct epitem *epi;
+       struct epitem *epi, *next;
  
         /*
          * We don't want to get "file->f_lock" because it is not
@@ -889,17 +924,12 @@ void eventpoll_release_file(struct file *file)
          * Besides, ep_remove() acquires the lock, so we can't hold it here.
          */
         mutex_lock(&epmutex);
-
-       while (!list_empty(lsthead)) {
-               epi = list_first_entry(lsthead, struct epitem, fllink);
-
+       list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) {
                 ep = epi->ep;
-               list_del_init(&epi->fllink);
                 mutex_lock_nested(&ep->mtx, 0);
                 ep_remove(ep, epi);
                 mutex_unlock(&ep->mtx);
         }
-
         mutex_unlock(&epmutex);
  }
  
@@ -1137,7 +1167,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
         struct file *child_file;
         struct epitem *epi;
  
-       list_for_each_entry(epi, &file->f_ep_links, fllink) {
+       /* CTL_DEL can remove links here, but that can't increase our count */
+       rcu_read_lock();
+       list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
                 child_file = epi->ep->file;
                 if (is_file_epoll(child_file)) {
                         if (list_empty(&child_file->f_ep_links)) {
@@ -1159,6 +1191,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
                                 "file is not an ep!\n");
                 }
         }
+       rcu_read_unlock();
         return error;
  }
  
@@ -1230,7 +1263,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
   * Must be called with "mtx" held.
   */
  static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-                    struct file *tfile, int fd)
+                    struct file *tfile, int fd, int full_check)
  {
         int error, revents, pwake = 0;
         unsigned long flags;
@@ -1285,7 +1318,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
  
         /* Add the current item to the list of active epoll hook for this file */
         spin_lock(&tfile->f_lock);
-       list_add_tail(&epi->fllink, &tfile->f_ep_links);
+       list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
         spin_unlock(&tfile->f_lock);
  
         /*
@@ -1296,7 +1329,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
  
         /* now check if we've created too many backpaths */
         error = -EINVAL;
-       if (reverse_path_check())
+       if (full_check && reverse_path_check())
                 goto error_remove_epi;
  
         /* We have to drop the new item inside our item list to keep track of it */
@@ -1326,8 +1359,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
  
  error_remove_epi:
         spin_lock(&tfile->f_lock);
-       if (ep_is_linked(&epi->fllink))
-               list_del_init(&epi->fllink);
+       list_del_rcu(&epi->fllink);
         spin_unlock(&tfile->f_lock);
  
         rb_erase(&epi->rbn, &ep->rbr);
@@ -1520,7 +1552,7 @@ static int ep_send_events(struct eventpoll *ep,
         esed.maxevents = maxevents;
         esed.events = events;
  
-       return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+       return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
  }
  
  static inline struct timespec ep_set_mstimeout(long ms)
@@ -1556,7 +1588,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
  {
         int res = 0, eavail, timed_out = 0;
         unsigned long flags;
-       long slack = 0;
+       u64 slack = 0;
         wait_queue_t wait;
         ktime_t expires, *to = NULL;
  
@@ -1609,9 +1641,9 @@ fetch_events:
  
                         spin_lock_irqsave(&ep->lock, flags);
                 }
-               __remove_wait_queue(&ep->wq, &wait);
  
-               set_current_state(TASK_RUNNING);
+               __remove_wait_queue(&ep->wq, &wait);
+               __set_current_state(TASK_RUNNING);
         }
  check_events:
         /* Is it worth to try to dig for events ? */
@@ -1791,35 +1823,36 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                 struct epoll_event __user *, event)
  {
         int error;
-       int did_lock_epmutex = 0;
-       struct file *file, *tfile;
+       int full_check = 0;
+       struct fd f, tf;
         struct eventpoll *ep;
         struct epitem *epi;
         struct epoll_event epds;
+       struct eventpoll *tep = NULL;
  
         error = -EFAULT;
         if (ep_op_has_event(op) &&
             copy_from_user(&epds, event, sizeof(struct epoll_event)))
                 goto error_return;
  
-       /* Get the "struct file *" for the eventpoll file */
         error = -EBADF;
-       file = fget(epfd);
-       if (!file)
+       f = fdget(epfd);
+       if (!f.file)
                 goto error_return;
  
         /* Get the "struct file *" for the target file */
-       tfile = fget(fd);
-       if (!tfile)
+       tf = fdget(fd);
+       if (!tf.file)
                 goto error_fput;
  
         /* The target file descriptor must support poll */
         error = -EPERM;
-       if (!tfile->f_op || !tfile->f_op->poll)
+       if (!tf.file->f_op->poll)
                 goto error_tgt_fput;
  
         /* Check if EPOLLWAKEUP is allowed */
-       ep_take_care_of_epollwakeup(&epds);
+       if (ep_op_has_event(op))
+               ep_take_care_of_epollwakeup(&epds);
  
         /*
          * We have to check that the file structure underneath the file descriptor
@@ -1827,14 +1860,14 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
          * adding an epoll file descriptor inside itself.
          */
         error = -EINVAL;
-       if (file == tfile || !is_file_epoll(file))
+       if (f.file == tf.file || !is_file_epoll(f.file))
                 goto error_tgt_fput;
  
         /*
          * At this point it is safe to assume that the "private_data" contains
          * our own data structure.
          */
-       ep = file->private_data;
+       ep = f.file->private_data;
  
         /*
          * When we insert an epoll file descriptor, inside another epoll file
@@ -1844,43 +1877,54 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
          * and hang them on the tfile_check_list, so we can check that we
          * haven't created too many possible wakeup paths.
          *
-        * We need to hold the epmutex across both ep_insert and ep_remove
-        * b/c we want to make sure we are looking at a coherent view of
-        * epoll network.
+        * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
+        * the epoll file descriptor is attaching directly to a wakeup source,
+        * unless the epoll file descriptor is nested. The purpose of taking the
+        * 'epmutex' on add is to prevent complex toplogies such as loops and
+        * deep wakeup paths from forming in parallel through multiple
+        * EPOLL_CTL_ADD operations.
          */
-       if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
-               mutex_lock(&epmutex);
-               did_lock_epmutex = 1;
-       }
+       mutex_lock_nested(&ep->mtx, 0);
         if (op == EPOLL_CTL_ADD) {
-               if (is_file_epoll(tfile)) {
-                       error = -ELOOP;
-                       if (ep_loop_check(ep, tfile) != 0) {
-                               clear_tfile_check_list();
-                               goto error_tgt_fput;
+               if (!list_empty(&f.file->f_ep_links) ||
+                                               is_file_epoll(tf.file)) {
+                       full_check = 1;
+                       mutex_unlock(&ep->mtx);
+                       mutex_lock(&epmutex);
+                       if (is_file_epoll(tf.file)) {
+                               error = -ELOOP;
+                               if (ep_loop_check(ep, tf.file) != 0) {
+                                       clear_tfile_check_list();
+                                       goto error_tgt_fput;
+                               }
+                       } else
+                               list_add(&tf.file->f_tfile_llink,
+                                                       &tfile_check_list);
+                       mutex_lock_nested(&ep->mtx, 0);
+                       if (is_file_epoll(tf.file)) {
+                               tep = tf.file->private_data;
+                               mutex_lock_nested(&tep->mtx, 1);
                         }
-               } else
-                       list_add(&tfile->f_tfile_llink, &tfile_check_list);
+               }
         }
  
-       mutex_lock_nested(&ep->mtx, 0);
-
         /*
          * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
          * above, we can be sure to be able to use the item looked up by
          * ep_find() till we release the mutex.
          */
-       epi = ep_find(ep, tfile, fd);
+       epi = ep_find(ep, tf.file, fd);
  
         error = -EINVAL;
         switch (op) {
         case EPOLL_CTL_ADD:
                 if (!epi) {
                         epds.events |= POLLERR | POLLHUP;
-                       error = ep_insert(ep, &epds, tfile, fd);
+                       error = ep_insert(ep, &epds, tf.file, fd, full_check);
                 } else
                         error = -EEXIST;
-               clear_tfile_check_list();
+               if (full_check)
+                       clear_tfile_check_list();
                 break;
         case EPOLL_CTL_DEL:
                 if (epi)
@@ -1896,15 +1940,17 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                         error = -ENOENT;
                 break;
         }
+       if (tep != NULL)
+               mutex_unlock(&tep->mtx);
         mutex_unlock(&ep->mtx);
  
  error_tgt_fput:
-       if (did_lock_epmutex)
+       if (full_check)
                 mutex_unlock(&epmutex);
  
-       fput(tfile);
+       fdput(tf);
  error_fput:
-       fput(file);
+       fdput(f);
  error_return:
  
         return error;
@@ -1976,8 +2022,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
                         return -EINVAL;
                 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
                         return -EFAULT;
-               sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
-               sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+               sigsaved = current->blocked;
+               set_current_blocked(&ksigmask);
         }
  
         error = sys_epoll_wait(epfd, events, maxevents, timeout);
@@ -1994,7 +2040,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
                                sizeof(sigsaved));
                         set_restore_sigmask();
                 } else
-                       sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+                       set_current_blocked(&sigsaved);
         }
  
         return error;
@@ -2021,8 +2067,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
                 if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
                         return -EFAULT;
                 sigset_from_compat(&ksigmask, &csigmask);
-               sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
-               sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+               sigsaved = current->blocked;
+               set_current_blocked(&ksigmask);
         }
  
         err = sys_epoll_wait(epfd, events, maxevents, timeout);
@@ -2039,7 +2085,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
                                sizeof(sigsaved));
                         set_restore_sigmask();
                 } else
-                       sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+                       set_current_blocked(&sigsaved);
         }
  
         return err;