ceph: refactor ceph_write_begin, fix ceph_page_mkwrite
authorYehuda Sadeh <yehuda@hq.newdream.net>
Tue, 9 Feb 2010 19:02:51 +0000 (11:02 -0800)
committerSage Weil <sage@newdream.net>
Thu, 11 Feb 2010 19:48:50 +0000 (11:48 -0800)
Originally ceph_page_mkwrite called ceph_write_begin, hoping that
the returned locked page would be the page that it was requested
to mkwrite. Factored out relevant part of ceph_page_mkwrite and
we lock the right page anyway.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
fs/ceph/addr.c

index 92f482150742d88dd416c259421ec82ad1e25a14..89c5ff3b59d5c8485da90b3255ff4def75d3b3c3 100644 (file)
@@ -907,15 +907,13 @@ static int context_is_writeable_or_written(struct inode *inode,
  * We are only allowed to write into/dirty the page if the page is
  * clean, or already dirty within the same snap context.
  */
-static int ceph_write_begin(struct file *file, struct address_space *mapping,
-                           loff_t pos, unsigned len, unsigned flags,
-                           struct page **pagep, void **fsdata)
+static int ceph_update_writeable_page(struct file *file,
+                           loff_t pos, unsigned len,
+                           struct page *page)
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
-       struct page *page;
-       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        loff_t page_off = pos & PAGE_CACHE_MASK;
        int pos_in_page = pos & ~PAGE_CACHE_MASK;
        int end_in_page = pos_in_page + len;
@@ -923,16 +921,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
        struct ceph_snap_context *snapc;
        int r;
 
-       /* get a page*/
-retry:
-       page = grab_cache_page_write_begin(mapping, index, 0);
-       if (!page)
-               return -ENOMEM;
-       *pagep = page;
-
-       dout("write_begin file %p inode %p page %p %d~%d\n", file,
-            inode, page, (int)pos, (int)len);
-
 retry_locked:
        /* writepages currently holds page lock, but if we change that later, */
        wait_on_page_writeback(page);
@@ -964,7 +952,7 @@ retry_locked:
                        wait_event_interruptible(ci->i_cap_wq,
                               context_is_writeable_or_written(inode, snapc));
                        ceph_put_snap_context(snapc);
-                       goto retry;
+                       return -EAGAIN;
                }
 
                /* yay, writeable, do it now (without dropping page lock) */
@@ -1021,6 +1009,35 @@ fail_nosnap:
        return r;
 }
 
+/*
+ * We are only allowed to write into/dirty the page if the page is
+ * clean, or already dirty within the same snap context.
+ */
+static int ceph_write_begin(struct file *file, struct address_space *mapping,
+                           loff_t pos, unsigned len, unsigned flags,
+                           struct page **pagep, void **fsdata)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       struct page *page;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       int r;
+
+       do {
+               /* get a page*/
+               page = grab_cache_page_write_begin(mapping, index, 0);
+               if (!page)
+                       return -ENOMEM;
+               *pagep = page;
+
+               dout("write_begin file %p inode %p page %p %d~%d\n", file,
+               inode, page, (int)pos, (int)len);
+
+               r = ceph_update_writeable_page(file, pos, len, page);
+       } while (r == -EAGAIN);
+
+       return r;
+}
+
 /*
  * we don't do anything in here that simple_write_end doesn't do
  * except adjust dirty page accounting and drop read lock on
@@ -1104,8 +1121,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
        loff_t off = page->index << PAGE_CACHE_SHIFT;
        loff_t size, len;
-       struct page *locked_page = NULL;
-       void *fsdata = NULL;
        int ret;
 
        size = i_size_read(inode);
@@ -1116,23 +1131,30 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode,
             off, len, page, page->index);
-       ret = ceph_write_begin(vma->vm_file, inode->i_mapping, off, len, 0,
-                              &locked_page, &fsdata);
-       WARN_ON(page != locked_page);
-       if (!ret) {
-               /*
-                * doing the following, instead of calling
-                * ceph_write_end. Note that we keep the
-                * page locked
-                */
+
+       lock_page(page);
+
+       ret = VM_FAULT_NOPAGE;
+       if ((off > size) ||
+           (page->mapping != inode->i_mapping))
+               goto out;
+
+       ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
+       if (ret == 0) {
+               /* success.  we'll keep the page locked. */
                set_page_dirty(page);
                up_read(&mdsc->snap_rwsem);
-               page_cache_release(page);
                ret = VM_FAULT_LOCKED;
        } else {
-               ret = VM_FAULT_SIGBUS;
+               if (ret == -ENOMEM)
+                       ret = VM_FAULT_OOM;
+               else
+                       ret = VM_FAULT_SIGBUS;
        }
+out:
        dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret);
+       if (ret != VM_FAULT_LOCKED)
+               unlock_page(page);
        return ret;
 }