From 4af6b2257ee0eb8f4bf3b1dc8acb643c0e8a887f Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 9 Feb 2010 11:02:51 -0800 Subject: [PATCH] ceph: refactor ceph_write_begin, fix ceph_page_mkwrite Originally ceph_page_mkwrite called ceph_write_begin, hoping that the returned locked page would be the page that it was requested to mkwrite. Factored out relevant part of ceph_page_mkwrite and we lock the right page anyway. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/addr.c | 80 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 92f482150742..89c5ff3b59d5 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -907,15 +907,13 @@ static int context_is_writeable_or_written(struct inode *inode, * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. */ -static int ceph_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int ceph_update_writeable_page(struct file *file, + loff_t pos, unsigned len, + struct page *page) { struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; - struct page *page; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; loff_t page_off = pos & PAGE_CACHE_MASK; int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; @@ -923,16 +921,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct ceph_snap_context *snapc; int r; - /* get a page*/ -retry: - page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) - return -ENOMEM; - *pagep = page; - - dout("write_begin file %p inode %p page %p %d~%d\n", file, - inode, page, (int)pos, (int)len); - retry_locked: /* writepages currently holds page lock, but if we change that later, */ wait_on_page_writeback(page); @@ -964,7 +952,7 @@ retry_locked: wait_event_interruptible(ci->i_cap_wq, context_is_writeable_or_written(inode, snapc)); ceph_put_snap_context(snapc); - goto retry; + return -EAGAIN; } /* yay, writeable, do it now (without dropping page lock) */ @@ -1021,6 +1009,35 @@ fail_nosnap: return r; } +/* + * We are only allowed to write into/dirty the page if the page is + * clean, or already dirty within the same snap context. + */ +static int ceph_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = file->f_dentry->d_inode; + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + int r; + + do { + /* get a page*/ + page = grab_cache_page_write_begin(mapping, index, 0); + if (!page) + return -ENOMEM; + *pagep = page; + + dout("write_begin file %p inode %p page %p %d~%d\n", file, + inode, page, (int)pos, (int)len); + + r = ceph_update_writeable_page(file, pos, len, page); + } while (r == -EAGAIN); + + return r; +} + /* * we don't do anything in here that simple_write_end doesn't do * except adjust dirty page accounting and drop read lock on @@ -1104,8 +1121,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; loff_t off = page->index << PAGE_CACHE_SHIFT; loff_t size, len; - struct page *locked_page = NULL; - void *fsdata = NULL; int ret; size = i_size_read(inode); @@ -1116,23 +1131,30 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode, off, len, page, page->index); - ret = ceph_write_begin(vma->vm_file, inode->i_mapping, off, len, 0, - &locked_page, &fsdata); - WARN_ON(page != locked_page); - if (!ret) { - /* - * doing the following, instead of calling - * ceph_write_end. Note that we keep the - * page locked - */ + + lock_page(page); + + ret = VM_FAULT_NOPAGE; + if ((off > size) || + (page->mapping != inode->i_mapping)) + goto out; + + ret = ceph_update_writeable_page(vma->vm_file, off, len, page); + if (ret == 0) { + /* success. we'll keep the page locked. */ set_page_dirty(page); up_read(&mdsc->snap_rwsem); - page_cache_release(page); ret = VM_FAULT_LOCKED; } else { - ret = VM_FAULT_SIGBUS; + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else + ret = VM_FAULT_SIGBUS; } +out: dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret); + if (ret != VM_FAULT_LOCKED) + unlock_page(page); return ret; } -- 2.34.1