2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 static kmem_cache_t *fuse_req_cachep;
24 static struct fuse_conn *fuse_get_conn(struct file *file)
27 * Lockless access is OK, because file->private data is set
28 * once during mount and is valid until the file is released.
30 return file->private_data;
33 static void fuse_request_init(struct fuse_req *req)
35 memset(req, 0, sizeof(*req));
36 INIT_LIST_HEAD(&req->list);
37 init_waitqueue_head(&req->waitq);
38 atomic_set(&req->count, 1);
41 struct fuse_req *fuse_request_alloc(void)
43 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
45 fuse_request_init(req);
49 void fuse_request_free(struct fuse_req *req)
51 kmem_cache_free(fuse_req_cachep, req);
54 static void block_sigs(sigset_t *oldset)
58 siginitsetinv(&mask, sigmask(SIGKILL));
59 sigprocmask(SIG_BLOCK, &mask, oldset);
62 static void restore_sigs(sigset_t *oldset)
64 sigprocmask(SIG_SETMASK, oldset, NULL);
67 static void __fuse_get_request(struct fuse_req *req)
69 atomic_inc(&req->count);
72 /* Must be called with > 1 refcount */
73 static void __fuse_put_request(struct fuse_req *req)
75 BUG_ON(atomic_read(&req->count) < 2);
76 atomic_dec(&req->count);
79 static void fuse_req_init_context(struct fuse_req *req)
81 req->in.h.uid = current->fsuid;
82 req->in.h.gid = current->fsgid;
83 req->in.h.pid = current->pid;
86 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
93 atomic_inc(&fc->num_waiting);
95 intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
96 restore_sigs(&oldset);
105 req = fuse_request_alloc();
110 fuse_req_init_context(req);
115 atomic_dec(&fc->num_waiting);
120 * Return request in fuse_file->reserved_req. However that may
121 * currently be in use. If that is the case, wait for it to become
124 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
127 struct fuse_req *req = NULL;
128 struct fuse_file *ff = file->private_data;
131 wait_event(fc->blocked_waitq, ff->reserved_req);
132 spin_lock(&fc->lock);
133 if (ff->reserved_req) {
134 req = ff->reserved_req;
135 ff->reserved_req = NULL;
137 req->stolen_file = file;
139 spin_unlock(&fc->lock);
146 * Put stolen request back into fuse_file->reserved_req
148 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
150 struct file *file = req->stolen_file;
151 struct fuse_file *ff = file->private_data;
153 spin_lock(&fc->lock);
154 fuse_request_init(req);
155 BUG_ON(ff->reserved_req);
156 ff->reserved_req = req;
157 wake_up(&fc->blocked_waitq);
158 spin_unlock(&fc->lock);
163 * Gets a requests for a file operation, always succeeds
165 * This is used for sending the FLUSH request, which must get to
166 * userspace, due to POSIX locks which may need to be unlocked.
168 * If allocation fails due to OOM, use the reserved request in
171 * This is very unlikely to deadlock accidentally, since the
172 * filesystem should not have it's own file open. If deadlock is
173 * intentional, it can still be broken by "aborting" the filesystem.
175 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
177 struct fuse_req *req;
179 atomic_inc(&fc->num_waiting);
180 wait_event(fc->blocked_waitq, !fc->blocked);
181 req = fuse_request_alloc();
183 req = get_reserved_req(fc, file);
185 fuse_req_init_context(req);
190 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
192 if (atomic_dec_and_test(&req->count)) {
194 atomic_dec(&fc->num_waiting);
196 if (req->stolen_file)
197 put_reserved_req(fc, req);
199 fuse_request_free(req);
204 * This function is called when a request is finished. Either a reply
205 * has arrived or it was aborted (and not yet sent) or some error
206 * occurred during communication with userspace, or the device file
207 * was closed. The requester thread is woken up (if still waiting),
208 * the 'end' callback is called if given, else the reference to the
209 * request is released
211 * Called with fc->lock, unlocks it
213 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
215 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
217 list_del(&req->list);
218 req->state = FUSE_REQ_FINISHED;
219 if (req->background) {
220 if (fc->num_background == FUSE_MAX_BACKGROUND) {
222 wake_up_all(&fc->blocked_waitq);
224 fc->num_background--;
226 spin_unlock(&fc->lock);
228 mntput(req->vfsmount);
231 wake_up(&req->waitq);
235 fuse_put_request(fc, req);
238 /* Called with fc->lock held. Releases, and then reacquires it. */
239 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
243 spin_unlock(&fc->lock);
245 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
248 wait_event_interruptible(req->waitq,
249 req->state == FUSE_REQ_FINISHED);
250 restore_sigs(&oldset);
252 spin_lock(&fc->lock);
253 if (req->state == FUSE_REQ_FINISHED && !req->aborted)
257 req->out.h.error = -EINTR;
261 /* This is uninterruptible sleep, because data is
262 being copied to/from the buffers of req. During
263 locked state, there mustn't be any filesystem
264 operation (e.g. page fault), since that could lead
266 spin_unlock(&fc->lock);
267 wait_event(req->waitq, !req->locked);
268 spin_lock(&fc->lock);
270 if (req->state == FUSE_REQ_PENDING) {
271 list_del(&req->list);
272 __fuse_put_request(req);
273 } else if (req->state == FUSE_REQ_SENT) {
274 spin_unlock(&fc->lock);
275 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
276 spin_lock(&fc->lock);
280 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
285 for (i = 0; i < numargs; i++)
286 nbytes += args[i].size;
291 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
294 /* zero is special */
297 req->in.h.unique = fc->reqctr;
298 req->in.h.len = sizeof(struct fuse_in_header) +
299 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
300 list_add_tail(&req->list, &fc->pending);
301 req->state = FUSE_REQ_PENDING;
304 atomic_inc(&fc->num_waiting);
307 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
311 * This can only be interrupted by a SIGKILL
313 void request_send(struct fuse_conn *fc, struct fuse_req *req)
316 spin_lock(&fc->lock);
318 req->out.h.error = -ENOTCONN;
319 else if (fc->conn_error)
320 req->out.h.error = -ECONNREFUSED;
322 queue_request(fc, req);
323 /* acquire extra reference, since request is still needed
324 after request_end() */
325 __fuse_get_request(req);
327 request_wait_answer(fc, req);
329 spin_unlock(&fc->lock);
332 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
334 spin_lock(&fc->lock);
337 fc->num_background++;
338 if (fc->num_background == FUSE_MAX_BACKGROUND)
341 queue_request(fc, req);
342 spin_unlock(&fc->lock);
344 req->out.h.error = -ENOTCONN;
345 request_end(fc, req);
349 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
352 request_send_nowait(fc, req);
355 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
358 request_send_nowait(fc, req);
362 * Lock the request. Up to the next unlock_request() there mustn't be
363 * anything that could cause a page-fault. If the request was already
366 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
370 spin_lock(&fc->lock);
375 spin_unlock(&fc->lock);
381 * Unlock request. If it was aborted during being locked, the
382 * requester thread is currently waiting for it to be unlocked, so
385 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
388 spin_lock(&fc->lock);
391 wake_up(&req->waitq);
392 spin_unlock(&fc->lock);
396 struct fuse_copy_state {
397 struct fuse_conn *fc;
399 struct fuse_req *req;
400 const struct iovec *iov;
401 unsigned long nr_segs;
402 unsigned long seglen;
410 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
411 int write, struct fuse_req *req,
412 const struct iovec *iov, unsigned long nr_segs)
414 memset(cs, 0, sizeof(*cs));
419 cs->nr_segs = nr_segs;
422 /* Unmap and put previous page of userspace buffer */
423 static void fuse_copy_finish(struct fuse_copy_state *cs)
426 kunmap_atomic(cs->mapaddr, KM_USER0);
428 flush_dcache_page(cs->pg);
429 set_page_dirty_lock(cs->pg);
437 * Get another pagefull of userspace buffer, and map it to kernel
438 * address space, and lock request
440 static int fuse_copy_fill(struct fuse_copy_state *cs)
442 unsigned long offset;
445 unlock_request(cs->fc, cs->req);
446 fuse_copy_finish(cs);
448 BUG_ON(!cs->nr_segs);
449 cs->seglen = cs->iov[0].iov_len;
450 cs->addr = (unsigned long) cs->iov[0].iov_base;
454 down_read(¤t->mm->mmap_sem);
455 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
457 up_read(¤t->mm->mmap_sem);
461 offset = cs->addr % PAGE_SIZE;
462 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
463 cs->buf = cs->mapaddr + offset;
464 cs->len = min(PAGE_SIZE - offset, cs->seglen);
465 cs->seglen -= cs->len;
468 return lock_request(cs->fc, cs->req);
471 /* Do as much copy to/from userspace buffer as we can */
472 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
474 unsigned ncpy = min(*size, cs->len);
477 memcpy(cs->buf, *val, ncpy);
479 memcpy(*val, cs->buf, ncpy);
489 * Copy a page in the request to/from the userspace buffer. Must be
492 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
493 unsigned offset, unsigned count, int zeroing)
495 if (page && zeroing && count < PAGE_SIZE) {
496 void *mapaddr = kmap_atomic(page, KM_USER1);
497 memset(mapaddr, 0, PAGE_SIZE);
498 kunmap_atomic(mapaddr, KM_USER1);
502 if (!cs->len && (err = fuse_copy_fill(cs)))
505 void *mapaddr = kmap_atomic(page, KM_USER1);
506 void *buf = mapaddr + offset;
507 offset += fuse_copy_do(cs, &buf, &count);
508 kunmap_atomic(mapaddr, KM_USER1);
510 offset += fuse_copy_do(cs, NULL, &count);
512 if (page && !cs->write)
513 flush_dcache_page(page);
517 /* Copy pages in the request to/from userspace buffer */
518 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
522 struct fuse_req *req = cs->req;
523 unsigned offset = req->page_offset;
524 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
526 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
527 struct page *page = req->pages[i];
528 int err = fuse_copy_page(cs, page, offset, count, zeroing);
533 count = min(nbytes, (unsigned) PAGE_SIZE);
539 /* Copy a single argument in the request to/from userspace buffer */
540 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
544 if (!cs->len && (err = fuse_copy_fill(cs)))
546 fuse_copy_do(cs, &val, &size);
551 /* Copy request arguments to/from userspace buffer */
552 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
553 unsigned argpages, struct fuse_arg *args,
559 for (i = 0; !err && i < numargs; i++) {
560 struct fuse_arg *arg = &args[i];
561 if (i == numargs - 1 && argpages)
562 err = fuse_copy_pages(cs, arg->size, zeroing);
564 err = fuse_copy_one(cs, arg->value, arg->size);
569 /* Wait until a request is available on the pending list */
570 static void request_wait(struct fuse_conn *fc)
572 DECLARE_WAITQUEUE(wait, current);
574 add_wait_queue_exclusive(&fc->waitq, &wait);
575 while (fc->connected && list_empty(&fc->pending)) {
576 set_current_state(TASK_INTERRUPTIBLE);
577 if (signal_pending(current))
580 spin_unlock(&fc->lock);
582 spin_lock(&fc->lock);
584 set_current_state(TASK_RUNNING);
585 remove_wait_queue(&fc->waitq, &wait);
589 * Read a single request into the userspace filesystem's buffer. This
590 * function waits until a request is available, then removes it from
591 * the pending list and copies request data to userspace buffer. If
592 * no reply is needed (FORGET) or request has been aborted or there
593 * was an error during the copying then it's finished by calling
594 * request_end(). Otherwise add it to the processing list, and set
597 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
598 unsigned long nr_segs, loff_t *off)
601 struct fuse_req *req;
603 struct fuse_copy_state cs;
605 struct fuse_conn *fc = fuse_get_conn(file);
610 spin_lock(&fc->lock);
612 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
613 list_empty(&fc->pending))
621 if (list_empty(&fc->pending))
624 req = list_entry(fc->pending.next, struct fuse_req, list);
625 req->state = FUSE_REQ_READING;
626 list_move(&req->list, &fc->io);
630 /* If request is too large, reply with an error and restart the read */
631 if (iov_length(iov, nr_segs) < reqsize) {
632 req->out.h.error = -EIO;
633 /* SETXATTR is special, since it may contain too large data */
634 if (in->h.opcode == FUSE_SETXATTR)
635 req->out.h.error = -E2BIG;
636 request_end(fc, req);
639 spin_unlock(&fc->lock);
640 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
641 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
643 err = fuse_copy_args(&cs, in->numargs, in->argpages,
644 (struct fuse_arg *) in->args, 0);
645 fuse_copy_finish(&cs);
646 spin_lock(&fc->lock);
648 if (!err && req->aborted)
652 req->out.h.error = -EIO;
653 request_end(fc, req);
657 request_end(fc, req);
659 req->state = FUSE_REQ_SENT;
660 list_move_tail(&req->list, &fc->processing);
661 spin_unlock(&fc->lock);
666 spin_unlock(&fc->lock);
670 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
671 size_t nbytes, loff_t *off)
674 iov.iov_len = nbytes;
676 return fuse_dev_readv(file, &iov, 1, off);
679 /* Look up request on processing list by unique ID */
680 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
682 struct list_head *entry;
684 list_for_each(entry, &fc->processing) {
685 struct fuse_req *req;
686 req = list_entry(entry, struct fuse_req, list);
687 if (req->in.h.unique == unique)
693 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
696 unsigned reqsize = sizeof(struct fuse_out_header);
699 return nbytes != reqsize ? -EINVAL : 0;
701 reqsize += len_args(out->numargs, out->args);
703 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
705 else if (reqsize > nbytes) {
706 struct fuse_arg *lastarg = &out->args[out->numargs-1];
707 unsigned diffsize = reqsize - nbytes;
708 if (diffsize > lastarg->size)
710 lastarg->size -= diffsize;
712 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
717 * Write a single reply to a request. First the header is copied from
718 * the write buffer. The request is then searched on the processing
719 * list by the unique ID found in the header. If found, then remove
720 * it from the list and copy the rest of the buffer to the request.
721 * The request is finished by calling request_end()
723 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
724 unsigned long nr_segs, loff_t *off)
727 unsigned nbytes = iov_length(iov, nr_segs);
728 struct fuse_req *req;
729 struct fuse_out_header oh;
730 struct fuse_copy_state cs;
731 struct fuse_conn *fc = fuse_get_conn(file);
735 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
736 if (nbytes < sizeof(struct fuse_out_header))
739 err = fuse_copy_one(&cs, &oh, sizeof(oh));
743 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
747 spin_lock(&fc->lock);
752 req = request_find(fc, oh.unique);
758 spin_unlock(&fc->lock);
759 fuse_copy_finish(&cs);
760 spin_lock(&fc->lock);
761 request_end(fc, req);
764 list_move(&req->list, &fc->io);
768 spin_unlock(&fc->lock);
770 err = copy_out_args(&cs, &req->out, nbytes);
771 fuse_copy_finish(&cs);
773 spin_lock(&fc->lock);
778 } else if (!req->aborted)
779 req->out.h.error = -EIO;
780 request_end(fc, req);
782 return err ? err : nbytes;
785 spin_unlock(&fc->lock);
787 fuse_copy_finish(&cs);
791 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
792 size_t nbytes, loff_t *off)
795 iov.iov_len = nbytes;
796 iov.iov_base = (char __user *) buf;
797 return fuse_dev_writev(file, &iov, 1, off);
800 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
802 unsigned mask = POLLOUT | POLLWRNORM;
803 struct fuse_conn *fc = fuse_get_conn(file);
807 poll_wait(file, &fc->waitq, wait);
809 spin_lock(&fc->lock);
812 else if (!list_empty(&fc->pending))
813 mask |= POLLIN | POLLRDNORM;
814 spin_unlock(&fc->lock);
820 * Abort all requests on the given list (pending or processing)
822 * This function releases and reacquires fc->lock
824 static void end_requests(struct fuse_conn *fc, struct list_head *head)
826 while (!list_empty(head)) {
827 struct fuse_req *req;
828 req = list_entry(head->next, struct fuse_req, list);
829 req->out.h.error = -ECONNABORTED;
830 request_end(fc, req);
831 spin_lock(&fc->lock);
836 * Abort requests under I/O
838 * The requests are set to aborted and finished, and the request
839 * waiter is woken up. This will make request_wait_answer() wait
840 * until the request is unlocked and then return.
842 * If the request is asynchronous, then the end function needs to be
843 * called after waiting for the request to be unlocked (if it was
846 static void end_io_requests(struct fuse_conn *fc)
848 while (!list_empty(&fc->io)) {
849 struct fuse_req *req =
850 list_entry(fc->io.next, struct fuse_req, list);
851 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
854 req->out.h.error = -ECONNABORTED;
855 req->state = FUSE_REQ_FINISHED;
856 list_del_init(&req->list);
857 wake_up(&req->waitq);
860 /* The end function will consume this reference */
861 __fuse_get_request(req);
862 spin_unlock(&fc->lock);
863 wait_event(req->waitq, !req->locked);
865 spin_lock(&fc->lock);
871 * Abort all requests.
873 * Emergency exit in case of a malicious or accidental deadlock, or
874 * just a hung filesystem.
876 * The same effect is usually achievable through killing the
877 * filesystem daemon and all users of the filesystem. The exception
878 * is the combination of an asynchronous request and the tricky
879 * deadlock (see Documentation/filesystems/fuse.txt).
881 * During the aborting, progression of requests from the pending and
882 * processing lists onto the io list, and progression of new requests
883 * onto the pending list is prevented by req->connected being false.
885 * Progression of requests under I/O to the processing list is
886 * prevented by the req->aborted flag being true for these requests.
887 * For this reason requests on the io list must be aborted first.
889 void fuse_abort_conn(struct fuse_conn *fc)
891 spin_lock(&fc->lock);
896 end_requests(fc, &fc->pending);
897 end_requests(fc, &fc->processing);
898 wake_up_all(&fc->waitq);
899 wake_up_all(&fc->blocked_waitq);
900 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
902 spin_unlock(&fc->lock);
905 static int fuse_dev_release(struct inode *inode, struct file *file)
907 struct fuse_conn *fc = fuse_get_conn(file);
909 spin_lock(&fc->lock);
911 end_requests(fc, &fc->pending);
912 end_requests(fc, &fc->processing);
913 spin_unlock(&fc->lock);
914 fasync_helper(-1, file, 0, &fc->fasync);
921 static int fuse_dev_fasync(int fd, struct file *file, int on)
923 struct fuse_conn *fc = fuse_get_conn(file);
927 /* No locking - fasync_helper does its own locking */
928 return fasync_helper(fd, file, on, &fc->fasync);
931 const struct file_operations fuse_dev_operations = {
932 .owner = THIS_MODULE,
934 .read = fuse_dev_read,
935 .readv = fuse_dev_readv,
936 .write = fuse_dev_write,
937 .writev = fuse_dev_writev,
938 .poll = fuse_dev_poll,
939 .release = fuse_dev_release,
940 .fasync = fuse_dev_fasync,
943 static struct miscdevice fuse_miscdevice = {
946 .fops = &fuse_dev_operations,
949 int __init fuse_dev_init(void)
952 fuse_req_cachep = kmem_cache_create("fuse_request",
953 sizeof(struct fuse_req),
955 if (!fuse_req_cachep)
958 err = misc_register(&fuse_miscdevice);
960 goto out_cache_clean;
965 kmem_cache_destroy(fuse_req_cachep);
970 void fuse_dev_cleanup(void)
972 misc_deregister(&fuse_miscdevice);
973 kmem_cache_destroy(fuse_req_cachep);