ceph: use d_invalidate() to invalidate aliases
[firefly-linux-kernel-4.4.55.git] / fs / ceph / addr.c
index 38b5c1bc6776ceeeab66b77bb683c3acb23716e0..6df8bd481425379006912990ee6f9461eaf3cf1b 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/osd_client.h>
 
 /*
@@ -70,15 +71,16 @@ static int ceph_set_page_dirty(struct page *page)
        struct address_space *mapping = page->mapping;
        struct inode *inode;
        struct ceph_inode_info *ci;
-       int undo = 0;
        struct ceph_snap_context *snapc;
+       int ret;
 
        if (unlikely(!mapping))
                return !TestSetPageDirty(page);
 
-       if (TestSetPageDirty(page)) {
+       if (PageDirty(page)) {
                dout("%p set_page_dirty %p idx %lu -- already dirty\n",
                     mapping->host, page, page->index);
+               BUG_ON(!PagePrivate(page));
                return 0;
        }
 
@@ -107,35 +109,19 @@ static int ceph_set_page_dirty(struct page *page)
             snapc, snapc->seq, snapc->num_snaps);
        spin_unlock(&ci->i_ceph_lock);
 
-       /* now adjust page */
-       spin_lock_irq(&mapping->tree_lock);
-       if (page->mapping) {    /* Race with truncate? */
-               WARN_ON_ONCE(!PageUptodate(page));
-               account_page_dirtied(page, page->mapping);
-               radix_tree_tag_set(&mapping->page_tree,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
-
-               /*
-                * Reference snap context in page->private.  Also set
-                * PagePrivate so that we get invalidatepage callback.
-                */
-               page->private = (unsigned long)snapc;
-               SetPagePrivate(page);
-       } else {
-               dout("ANON set_page_dirty %p (raced truncate?)\n", page);
-               undo = 1;
-       }
-
-       spin_unlock_irq(&mapping->tree_lock);
-
-       if (undo)
-               /* whoops, we failed to dirty the page */
-               ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+       /*
+        * Reference snap context in page->private.  Also set
+        * PagePrivate so that we get invalidatepage callback.
+        */
+       BUG_ON(PagePrivate(page));
+       page->private = (unsigned long)snapc;
+       SetPagePrivate(page);
 
-       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       ret = __set_page_dirty_nobuffers(page);
+       WARN_ON(!PageLocked(page));
+       WARN_ON(!page->mapping);
 
-       BUG_ON(!PageDirty(page));
-       return 1;
+       return ret;
 }
 
 /*
@@ -150,11 +136,19 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
        struct ceph_inode_info *ci;
        struct ceph_snap_context *snapc = page_snap_context(page);
 
-       BUG_ON(!PageLocked(page));
-       BUG_ON(!PagePrivate(page));
-       BUG_ON(!page->mapping);
-
        inode = page->mapping->host;
+       ci = ceph_inode(inode);
+
+       if (offset != 0 || length != PAGE_CACHE_SIZE) {
+               dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
+                    inode, page, page->index, offset, length);
+               return;
+       }
+
+       ceph_invalidate_fscache_page(inode, page);
+
+       if (!PagePrivate(page))
+               return;
 
        /*
         * We can get non-dirty pages here due to races between
@@ -164,31 +158,28 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
        if (!PageDirty(page))
                pr_err("%p invalidatepage %p page not dirty\n", inode, page);
 
-       if (offset == 0 && length == PAGE_CACHE_SIZE)
-               ClearPageChecked(page);
+       ClearPageChecked(page);
 
-       ci = ceph_inode(inode);
-       if (offset == 0 && length == PAGE_CACHE_SIZE) {
-               dout("%p invalidatepage %p idx %lu full dirty page\n",
-                    inode, page, page->index);
-               ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-               ceph_put_snap_context(snapc);
-               page->private = 0;
-               ClearPagePrivate(page);
-       } else {
-               dout("%p invalidatepage %p idx %lu partial dirty page %u(%u)\n",
-                    inode, page, page->index, offset, length);
-       }
+       dout("%p invalidatepage %p idx %lu full dirty page\n",
+            inode, page, page->index);
+
+       ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+       ceph_put_snap_context(snapc);
+       page->private = 0;
+       ClearPagePrivate(page);
 }
 
-/* just a sanity check */
 static int ceph_releasepage(struct page *page, gfp_t g)
 {
        struct inode *inode = page->mapping ? page->mapping->host : NULL;
        dout("%p releasepage %p idx %lu\n", inode, page, page->index);
        WARN_ON(PageDirty(page));
-       WARN_ON(PagePrivate(page));
-       return 0;
+
+       /* Can we release the page from the cache? */
+       if (!ceph_release_fscache_page(page, g))
+               return 0;
+
+       return !PagePrivate(page);
 }
 
 /*
@@ -198,11 +189,16 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 {
        struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = 
+       struct ceph_osd_client *osdc =
                &ceph_inode_to_client(inode)->client->osdc;
        int err = 0;
        u64 len = PAGE_CACHE_SIZE;
 
+       err = ceph_readpage_from_fscache(inode, page);
+
+       if (err == 0)
+               goto out;
+
        dout("readpage inode %p file %p page %p index %lu\n",
             inode, filp, page, page->index);
        err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
@@ -220,6 +216,9 @@ static int readpage_nounlock(struct file *filp, struct page *page)
        }
        SetPageUptodate(page);
 
+       if (err == 0)
+               ceph_readpage_to_fscache(inode, page);
+
 out:
        return err < 0 ? err : 0;
 }
@@ -262,6 +261,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
                     page->index);
                flush_dcache_page(page);
                SetPageUptodate(page);
+               ceph_readpage_to_fscache(inode, page);
                unlock_page(page);
                page_cache_release(page);
                bytes -= PAGE_CACHE_SIZE;
@@ -331,11 +331,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                page = list_entry(page_list->prev, struct page, lru);
                BUG_ON(PageLocked(page));
                list_del(&page->lru);
-               
+
                dout("start_read %p adding %p idx %lu\n", inode, page,
                     page->index);
                if (add_to_page_cache_lru(page, &inode->i_data, page->index,
                                          GFP_NOFS)) {
+                       ceph_fscache_uncache_page(inode, page);
                        page_cache_release(page);
                        dout("start_read %p add_to_page_cache failed %p\n",
                             inode, page);
@@ -378,6 +379,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
        int rc = 0;
        int max = 0;
 
+       rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+                                        &nr_pages);
+
+       if (rc == 0)
+               goto out;
+
        if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
                max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
                        >> PAGE_SHIFT;
@@ -392,6 +399,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
                BUG_ON(rc == 0);
        }
 out:
+       ceph_fscache_readpages_cancel(inode, page_list);
+
        dout("readpages %p file %p ret %d\n", inode, file, rc);
        return rc;
 }
@@ -439,13 +448,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        struct ceph_inode_info *ci;
        struct ceph_fs_client *fsc;
        struct ceph_osd_client *osdc;
-       loff_t page_off = page_offset(page);
-       int len = PAGE_CACHE_SIZE;
-       loff_t i_size;
-       int err = 0;
        struct ceph_snap_context *snapc, *oldest;
-       u64 snap_size = 0;
+       loff_t page_off = page_offset(page);
        long writeback_stat;
+       u64 truncate_size, snap_size = 0;
+       u32 truncate_seq;
+       int err = 0, len = PAGE_CACHE_SIZE;
 
        dout("writepage %p idx %lu\n", page, page->index);
 
@@ -475,13 +483,20 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        }
        ceph_put_snap_context(oldest);
 
+       spin_lock(&ci->i_ceph_lock);
+       truncate_seq = ci->i_truncate_seq;
+       truncate_size = ci->i_truncate_size;
+       if (!snap_size)
+               snap_size = i_size_read(inode);
+       spin_unlock(&ci->i_ceph_lock);
+
        /* is this a partial page at end of file? */
-       if (snap_size)
-               i_size = snap_size;
-       else
-               i_size = i_size_read(inode);
-       if (i_size < page_off + len)
-               len = i_size - page_off;
+       if (page_off >= snap_size) {
+               dout("%p page eof %llu\n", page, snap_size);
+               goto out;
+       }
+       if (snap_size < page_off + len)
+               len = snap_size - page_off;
 
        dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
             inode, page, page->index, page_off, len, snapc);
@@ -491,11 +506,13 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
            CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
                set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
 
+       ceph_readpage_to_fscache(inode, page);
+
        set_page_writeback(page);
        err = ceph_osdc_writepages(osdc, ceph_vino(inode),
                                   &ci->i_layout, snapc,
                                   page_off, len,
-                                  ci->i_truncate_seq, ci->i_truncate_size,
+                                  truncate_seq, truncate_size,
                                   &inode->i_mtime, &page, 1);
        if (err < 0) {
                dout("writepage setting page/mapping error %d %p\n", err, page);
@@ -546,7 +563,6 @@ static void ceph_release_pages(struct page **pages, int num)
        pagevec_release(&pvec);
 }
 
-
 /*
  * async writeback completion handler.
  *
@@ -632,25 +648,6 @@ static void writepages_finish(struct ceph_osd_request *req,
        ceph_osdc_put_request(req);
 }
 
-static struct ceph_osd_request *
-ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len,
-                               struct ceph_snap_context *snapc, int num_ops)
-{
-       struct ceph_fs_client *fsc;
-       struct ceph_inode_info *ci;
-       struct ceph_vino vino;
-
-       fsc = ceph_inode_to_client(inode);
-       ci = ceph_inode(inode);
-       vino = ceph_vino(inode);
-       /* BUG_ON(vino.snap != CEPH_NOSNAP); */
-
-       return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                       vino, offset, len, num_ops, CEPH_OSD_OP_WRITE,
-                       CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK,
-                       snapc, ci->i_truncate_seq, ci->i_truncate_size, true);
-}
-
 /*
  * initiate async writeback
  */
@@ -659,7 +656,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 {
        struct inode *inode = mapping->host;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_fs_client *fsc;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_vino vino = ceph_vino(inode);
        pgoff_t index, start, end;
        int range_whole = 0;
        int should_loop = 1;
@@ -671,22 +669,22 @@ static int ceph_writepages_start(struct address_space *mapping,
        unsigned wsize = 1 << inode->i_blkbits;
        struct ceph_osd_request *req = NULL;
        int do_sync;
-       u64 snap_size;
+       u64 truncate_size, snap_size;
+       u32 truncate_seq;
 
        /*
         * Include a 'sync' in the OSD request if this is a data
         * integrity write (e.g., O_SYNC write or fsync()), or if our
         * cap is being revoked.
         */
-       do_sync = wbc->sync_mode == WB_SYNC_ALL;
-       if (ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER))
+       if ((wbc->sync_mode == WB_SYNC_ALL) ||
+               ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER))
                do_sync = 1;
        dout("writepages_start %p dosync=%d (mode=%s)\n",
             inode, do_sync,
             wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
             (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-       fsc = ceph_inode_to_client(inode);
        if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
                pr_warning("writepage_start %p on forced umount\n", inode);
                return -EIO; /* we're in a forced umount, don't write! */
@@ -729,6 +727,14 @@ retry:
                snap_size = i_size_read(inode);
        dout(" oldest snapc is %p seq %lld (%d snaps)\n",
             snapc, snapc->seq, snapc->num_snaps);
+
+       spin_lock(&ci->i_ceph_lock);
+       truncate_seq = ci->i_truncate_seq;
+       truncate_size = ci->i_truncate_size;
+       if (!snap_size)
+               snap_size = i_size_read(inode);
+       spin_unlock(&ci->i_ceph_lock);
+
        if (last_snapc && snapc != last_snapc) {
                /* if we switched to a newer snapc, restart our scan at the
                 * start of the original file range. */
@@ -740,7 +746,6 @@ retry:
 
        while (!done && index <= end) {
                int num_ops = do_sync ? 2 : 1;
-               struct ceph_vino vino;
                unsigned i;
                int first;
                pgoff_t next;
@@ -834,17 +839,18 @@ get_more_pages:
                         * that it will use.
                         */
                        if (locked_pages == 0) {
-                               size_t size;
-
                                BUG_ON(pages);
-
                                /* prepare async write request */
                                offset = (u64)page_offset(page);
                                len = wsize;
-                               req = ceph_writepages_osd_request(inode,
-                                                       offset, &len, snapc,
-                                                       num_ops);
-
+                               req = ceph_osdc_new_request(&fsc->client->osdc,
+                                                       &ci->i_layout, vino,
+                                                       offset, &len, num_ops,
+                                                       CEPH_OSD_OP_WRITE,
+                                                       CEPH_OSD_FLAG_WRITE |
+                                                       CEPH_OSD_FLAG_ONDISK,
+                                                       snapc, truncate_seq,
+                                                       truncate_size, true);
                                if (IS_ERR(req)) {
                                        rc = PTR_ERR(req);
                                        unlock_page(page);
@@ -855,8 +861,8 @@ get_more_pages:
                                req->r_inode = inode;
 
                                max_pages = calc_pages_for(0, (u64)len);
-                               size = max_pages * sizeof (*pages);
-                               pages = kmalloc(size, GFP_NOFS);
+                               pages = kmalloc(max_pages * sizeof (*pages),
+                                               GFP_NOFS);
                                if (!pages) {
                                        pool = fsc->wb_pagevec_pool;
                                        pages = mempool_alloc(pool, GFP_NOFS);