ocfs2: always unmap in ocfs2_data_convert_worker()
authorMark Fasheh <mark.fasheh@oracle.com>
Mon, 11 Dec 2006 19:06:36 +0000 (11:06 -0800)
committerMark Fasheh <mark.fasheh@oracle.com>
Fri, 29 Dec 2006 00:38:59 +0000 (16:38 -0800)
Mmap-heavy clustered workloads were sometimes finding stale data on mmap
reads. The solution is to call unmap_mapping_range() on any down convert of
a data lock.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
fs/ocfs2/dlmglue.c

index e6220137bf691600be76ac97d86868347269d4d1..e335541727f9bad9130859678b21693f7e25e067 100644 (file)
@@ -2718,6 +2718,15 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
                inode = ocfs2_lock_res_inode(lockres);
        mapping = inode->i_mapping;
 
+       /*
+        * We need this before the filemap_fdatawrite() so that it can
+        * transfer the dirty bit from the PTE to the
+        * page. Unfortunately this means that even for EX->PR
+        * downconverts, we'll lose our mappings and have to build
+        * them up again.
+        */
+       unmap_mapping_range(mapping, 0, 0, 0);
+
        if (filemap_fdatawrite(mapping)) {
                mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
                     (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -2725,7 +2734,6 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
        sync_mapping_buffers(mapping);
        if (blocking == LKM_EXMODE) {
                truncate_inode_pages(mapping, 0);
-               unmap_mapping_range(mapping, 0, 0, 0);
        } else {
                /* We only need to wait on the I/O if we're not also
                 * truncating pages because truncate_inode_pages waits