epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()
authorOleg Nesterov <oleg@redhat.com>
Fri, 24 Feb 2012 19:07:11 +0000 (20:07 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 1 Mar 2012 00:34:34 +0000 (16:34 -0800)
commit d80e731ecab420ddcb79ee9d0ac427acbc187b4b upstream.

This patch is intentionally incomplete to simplify the review.
It ignores ep_unregister_pollwait() which plays with the same wqh.
See the next change.

epoll assumes that the EPOLL_CTL_ADD'ed file controls everything
f_op->poll() needs. In particular it assumes that the wait queue
can't go away until eventpoll_release(). This is not true in case
of signalfd, the task which does EPOLL_CTL_ADD uses its ->sighand
which is not connected to the file.

This patch adds the special event, POLLFREE, currently only for
epoll. It expects that init_poll_funcptr()'ed hook should do the
necessary cleanup. Perhaps it should be defined as EPOLLFREE in
eventpoll.

__cleanup_sighand() is changed to do wake_up_poll(POLLFREE) if
->signalfd_wqh is not empty, we add the new signalfd_cleanup()
helper.

ep_poll_callback(POLLFREE) simply does list_del_init(task_list).
This make this poll entry inconsistent, but we don't care. If you
share epoll fd which contains our sigfd with another process you
should blame yourself. signalfd is "really special". I simply do
not know how we can define the "right" semantics if it used with
epoll.

The main problem is, epoll calls signalfd_poll() once to establish
the connection with the wait queue, after that signalfd_poll(NULL)
returns the different/inconsistent results depending on who does
EPOLL_CTL_MOD/signalfd_read/etc. IOW: apart from sigmask, signalfd
has nothing to do with the file, it works with the current thread.

In short: this patch is the hack which tries to fix the symptoms.
It also assumes that nobody can take tasklist_lock under epoll
locks, this seems to be true.

Note:

- we do not have wake_up_all_poll() but wake_up_poll()
  is fine, poll/epoll doesn't use WQ_FLAG_EXCLUSIVE.

- signalfd_cleanup() uses POLLHUP along with POLLFREE,
  we need a couple of simple changes in eventpoll.c to
  make sure it can't be "lost".

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/eventpoll.c
fs/signalfd.c
include/asm-generic/poll.h
include/linux/signalfd.h
kernel/fork.c

index 2acaf60f4e4da47cc07e8a09d09cae82dde0a23e..2ce761cf92032170607350db0b67dd348ca71bfd 100644 (file)
@@ -827,6 +827,10 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
 
+       /* the caller holds eppoll_entry->whead->lock */
+       if ((unsigned long)key & POLLFREE)
+               list_del_init(&wait->task_list);
+
        spin_lock_irqsave(&ep->lock, flags);
 
        /*
index 492465b451ddd34f6f60214fb48224687649faa9..79c1eea98a3a057dc2152db68921ecb1774a58ac 100644 (file)
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
 
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+       wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+
+       if (likely(!waitqueue_active(wqh)))
+               return;
+
+       /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+       wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
 struct signalfd_ctx {
        sigset_t sigmask;
 };
index 44bce836d350a651284b325adc220d0ead84da58..9ce7f44aebd2ab64a91b2f21b436a3377e54f045 100644 (file)
@@ -28,6 +28,8 @@
 #define POLLRDHUP       0x2000
 #endif
 
+#define POLLFREE       0x4000  /* currently only for epoll */
+
 struct pollfd {
        int fd;
        short events;
index 3ff4961da9b514992cf0edabf07c1b08d2121027..247399b2979a9a331c976e9f413e4dd122ef7711 100644 (file)
@@ -61,13 +61,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig)
                wake_up(&tsk->sighand->signalfd_wqh);
 }
 
+extern void signalfd_cleanup(struct sighand_struct *sighand);
+
 #else /* CONFIG_SIGNALFD */
 
 static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
 
+static inline void signalfd_cleanup(struct sighand_struct *sighand) { }
+
 #endif /* CONFIG_SIGNALFD */
 
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNALFD_H */
-
index 0276c30401a081132d1e4fadac663f3afdf292c0..a4e453b14f7076cfbf5dd9c736cf8bccd921b440 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
+#include <linux/signalfd.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -917,8 +918,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-       if (atomic_dec_and_test(&sighand->count))
+       if (atomic_dec_and_test(&sighand->count)) {
+               signalfd_cleanup(sighand);
                kmem_cache_free(sighand_cachep, sighand);
+       }
 }