fs/nfs/file.c

   1 /*
   2  *  linux/fs/nfs/file.c
   3  *
   4  *  Copyright (C) 1992  Rick Sladkey
   5  *
   6  *  Changes Copyright (C) 1994 by Florian La Roche
   7  *   - Do not copy data too often around in the kernel.
   8  *   - In nfs_file_read the return value of kmalloc wasn't checked.
   9  *   - Put in a better version of read look-ahead buffering. Original idea
  10  *     and implementation by Wai S Kok elekokws@ee.nus.sg.
  11  *
  12  *  Expire cache on write to a file by Wai S Kok (Oct 1994).
  13  *
  14  *  Total rewrite of read side for new NFS buffer cache.. Linus.
  15  *
  16  *  nfs regular file handling functions
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/time.h>
  21 #include <linux/kernel.h>
  22 #include <linux/errno.h>
  23 #include <linux/fcntl.h>
  24 #include <linux/stat.h>
  25 #include <linux/nfs_fs.h>
  26 #include <linux/nfs_mount.h>
  27 #include <linux/mm.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/gfp.h>
  30 #include <linux/swap.h>
  31
  32 #include <asm/uaccess.h>
  33
  34 #include "delegation.h"
  35 #include "internal.h"
  36 #include "iostat.h"
  37 #include "fscache.h"
  38 #include "pnfs.h"
  39
  40 #include "nfstrace.h"
  41
  42 #define NFSDBG_FACILITY         NFSDBG_FILE
  43
  44 static const struct vm_operations_struct nfs_file_vm_ops;
  45
  46 /* Hack for future NFS swap support */
  47 #ifndef IS_SWAPFILE
  48 # define IS_SWAPFILE(inode)     (0)
  49 #endif
  50
  51 int nfs_check_flags(int flags)
  52 {
  53         if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
  54                 return -EINVAL;
  55
  56         return 0;
  57 }
  58 EXPORT_SYMBOL_GPL(nfs_check_flags);
  59
  60 /*
  61  * Open file
  62  */
  63 static int
  64 nfs_file_open(struct inode *inode, struct file *filp)
  65 {
  66         int res;
  67
  68         dprintk("NFS: open file(%pD2)\n", filp);
  69
  70         nfs_inc_stats(inode, NFSIOS_VFSOPEN);
  71         res = nfs_check_flags(filp->f_flags);
  72         if (res)
  73                 return res;
  74
  75         res = nfs_open(inode, filp);
  76         return res;
  77 }
  78
  79 int
  80 nfs_file_release(struct inode *inode, struct file *filp)
  81 {
  82         dprintk("NFS: release(%pD2)\n", filp);
  83
  84         nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
  85         return nfs_release(inode, filp);
  86 }
  87 EXPORT_SYMBOL_GPL(nfs_file_release);
  88
  89 /**
  90  * nfs_revalidate_size - Revalidate the file size
  91  * @inode - pointer to inode struct
  92  * @file - pointer to struct file
  93  *
  94  * Revalidates the file length. This is basically a wrapper around
  95  * nfs_revalidate_inode() that takes into account the fact that we may
  96  * have cached writes (in which case we don't care about the server's
  97  * idea of what the file length is), or O_DIRECT (in which case we
  98  * shouldn't trust the cache).
  99  */
 100 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 101 {
 102         struct nfs_server *server = NFS_SERVER(inode);
 103         struct nfs_inode *nfsi = NFS_I(inode);
 104
 105         if (nfs_have_delegated_attributes(inode))
 106                 goto out_noreval;
 107
 108         if (filp->f_flags & O_DIRECT)
 109                 goto force_reval;
 110         if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
 111                 goto force_reval;
 112         if (nfs_attribute_timeout(inode))
 113                 goto force_reval;
 114 out_noreval:
 115         return 0;
 116 force_reval:
 117         return __nfs_revalidate_inode(server, inode);
 118 }
 119
 120 loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
 121 {
 122         dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
 123                         filp, offset, whence);
 124
 125         /*
 126          * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 127          * the cached file length
 128          */
 129         if (whence != SEEK_SET && whence != SEEK_CUR) {
 130                 struct inode *inode = filp->f_mapping->host;
 131
 132                 int retval = nfs_revalidate_file_size(inode, filp);
 133                 if (retval < 0)
 134                         return (loff_t)retval;
 135         }
 136
 137         return generic_file_llseek(filp, offset, whence);
 138 }
 139 EXPORT_SYMBOL_GPL(nfs_file_llseek);
 140
 141 /*
 142  * Flush all dirty pages, and check for write errors.
 143  */
 144 int
 145 nfs_file_flush(struct file *file, fl_owner_t id)
 146 {
 147         struct inode    *inode = file_inode(file);
 148
 149         dprintk("NFS: flush(%pD2)\n", file);
 150
 151         nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 152         if ((file->f_mode & FMODE_WRITE) == 0)
 153                 return 0;
 154
 155         /*
 156          * If we're holding a write delegation, then just start the i/o
 157          * but don't wait for completion (or send a commit).
 158          */
 159         if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
 160                 return filemap_fdatawrite(file->f_mapping);
 161
 162         /* Flush writes to the server and return any errors */
 163         return vfs_fsync(file, 0);
 164 }
 165 EXPORT_SYMBOL_GPL(nfs_file_flush);
 166
 167 ssize_t
 168 nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 169 {
 170         struct inode *inode = file_inode(iocb->ki_filp);
 171         ssize_t result;
 172
 173         if (iocb->ki_flags & IOCB_DIRECT)
 174                 return nfs_file_direct_read(iocb, to, iocb->ki_pos);
 175
 176         dprintk("NFS: read(%pD2, %zu@%lu)\n",
 177                 iocb->ki_filp,
 178                 iov_iter_count(to), (unsigned long) iocb->ki_pos);
 179
 180         result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
 181         if (!result) {
 182                 result = generic_file_read_iter(iocb, to);
 183                 if (result > 0)
 184                         nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 185         }
 186         return result;
 187 }
 188 EXPORT_SYMBOL_GPL(nfs_file_read);
 189
 190 ssize_t
 191 nfs_file_splice_read(struct file *filp, loff_t *ppos,
 192                      struct pipe_inode_info *pipe, size_t count,
 193                      unsigned int flags)
 194 {
 195         struct inode *inode = file_inode(filp);
 196         ssize_t res;
 197
 198         dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
 199                 filp, (unsigned long) count, (unsigned long long) *ppos);
 200
 201         res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
 202         if (!res) {
 203                 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 204                 if (res > 0)
 205                         nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
 206         }
 207         return res;
 208 }
 209 EXPORT_SYMBOL_GPL(nfs_file_splice_read);
 210
 211 int
 212 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 213 {
 214         struct inode *inode = file_inode(file);
 215         int     status;
 216
 217         dprintk("NFS: mmap(%pD2)\n", file);
 218
 219         /* Note: generic_file_mmap() returns ENOSYS on nommu systems
 220          *       so we call that before revalidating the mapping
 221          */
 222         status = generic_file_mmap(file, vma);
 223         if (!status) {
 224                 vma->vm_ops = &nfs_file_vm_ops;
 225                 status = nfs_revalidate_mapping(inode, file->f_mapping);
 226         }
 227         return status;
 228 }
 229 EXPORT_SYMBOL_GPL(nfs_file_mmap);
 230
 231 /*
 232  * Flush any dirty pages for this process, and check for write errors.
 233  * The return status from this call provides a reliable indication of
 234  * whether any write errors occurred for this process.
 235  *
 236  * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
 237  * disk, but it retrieves and clears ctx->error after synching, despite
 238  * the two being set at the same time in nfs_context_set_write_error().
 239  * This is because the former is used to notify the _next_ call to
 240  * nfs_file_write() that a write error occurred, and hence cause it to
 241  * fall back to doing a synchronous write.
 242  */
 243 int
 244 nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
 245 {
 246         struct nfs_open_context *ctx = nfs_file_open_context(file);
 247         struct inode *inode = file_inode(file);
 248         int have_error, do_resend, status;
 249         int ret = 0;
 250
 251         dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
 252
 253         nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 254         do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
 255         have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 256         status = nfs_commit_inode(inode, FLUSH_SYNC);
 257         have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 258         if (have_error) {
 259                 ret = xchg(&ctx->error, 0);
 260                 if (ret)
 261                         goto out;
 262         }
 263         if (status < 0) {
 264                 ret = status;
 265                 goto out;
 266         }
 267         do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
 268         if (do_resend)
 269                 ret = -EAGAIN;
 270 out:
 271         return ret;
 272 }
 273 EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
 274
 275 static int
 276 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 277 {
 278         int ret;
 279         struct inode *inode = file_inode(file);
 280
 281         trace_nfs_fsync_enter(inode);
 282
 283         nfs_inode_dio_wait(inode);
 284         do {
 285                 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 286                 if (ret != 0)
 287                         break;
 288                 mutex_lock(&inode->i_mutex);
 289                 ret = nfs_file_fsync_commit(file, start, end, datasync);
 290                 mutex_unlock(&inode->i_mutex);
 291                 /*
 292                  * If nfs_file_fsync_commit detected a server reboot, then
 293                  * resend all dirty pages that might have been covered by
 294                  * the NFS_CONTEXT_RESEND_WRITES flag
 295                  */
 296                 start = 0;
 297                 end = LLONG_MAX;
 298         } while (ret == -EAGAIN);
 299
 300         trace_nfs_fsync_exit(inode, ret);
 301         return ret;
 302 }
 303
 304 /*
 305  * Decide whether a read/modify/write cycle may be more efficient
 306  * then a modify/write/read cycle when writing to a page in the
 307  * page cache.
 308  *
 309  * The modify/write/read cycle may occur if a page is read before
 310  * being completely filled by the writer.  In this situation, the
 311  * page must be completely written to stable storage on the server
 312  * before it can be refilled by reading in the page from the server.
 313  * This can lead to expensive, small, FILE_SYNC mode writes being
 314  * done.
 315  *
 316  * It may be more efficient to read the page first if the file is
 317  * open for reading in addition to writing, the page is not marked
 318  * as Uptodate, it is not dirty or waiting to be committed,
 319  * indicating that it was previously allocated and then modified,
 320  * that there were valid bytes of data in that range of the file,
 321  * and that the new data won't completely replace the old data in
 322  * that range of the file.
 323  */
 324 static int nfs_want_read_modify_write(struct file *file, struct page *page,
 325                         loff_t pos, unsigned len)
 326 {
 327         unsigned int pglen = nfs_page_length(page);
 328         unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
 329         unsigned int end = offset + len;
 330
 331         if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
 332                 if (!PageUptodate(page))
 333                         return 1;
 334                 return 0;
 335         }
 336
 337         if ((file->f_mode & FMODE_READ) &&      /* open for read? */
 338             !PageUptodate(page) &&              /* Uptodate? */
 339             !PagePrivate(page) &&               /* i/o request already? */
 340             pglen &&                            /* valid bytes of file? */
 341             (end < pglen || offset))            /* replace all valid bytes? */
 342                 return 1;
 343         return 0;
 344 }
 345
 346 /*
 347  * This does the "real" work of the write. We must allocate and lock the
 348  * page to be sent back to the generic routine, which then copies the
 349  * data from user space.
 350  *
 351  * If the writer ends up delaying the write, the writer needs to
 352  * increment the page use counts until he is done with the page.
 353  */
 354 static int nfs_write_begin(struct file *file, struct address_space *mapping,
 355                         loff_t pos, unsigned len, unsigned flags,
 356                         struct page **pagep, void **fsdata)
 357 {
 358         int ret;
 359         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 360         struct page *page;
 361         int once_thru = 0;
 362
 363         dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
 364                 file, mapping->host->i_ino, len, (long long) pos);
 365
 366 start:
 367         /*
 368          * Prevent starvation issues if someone is doing a consistency
 369          * sync-to-disk
 370          */
 371         ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
 372                                  nfs_wait_bit_killable, TASK_KILLABLE);
 373         if (ret)
 374                 return ret;
 375         /*
 376          * Wait for O_DIRECT to complete
 377          */
 378         nfs_inode_dio_wait(mapping->host);
 379
 380         page = grab_cache_page_write_begin(mapping, index, flags);
 381         if (!page)
 382                 return -ENOMEM;
 383         *pagep = page;
 384
 385         ret = nfs_flush_incompatible(file, page);
 386         if (ret) {
 387                 unlock_page(page);
 388                 page_cache_release(page);
 389         } else if (!once_thru &&
 390                    nfs_want_read_modify_write(file, page, pos, len)) {
 391                 once_thru = 1;
 392                 ret = nfs_readpage(file, page);
 393                 page_cache_release(page);
 394                 if (!ret)
 395                         goto start;
 396         }
 397         return ret;
 398 }
 399
 400 static int nfs_write_end(struct file *file, struct address_space *mapping,
 401                         loff_t pos, unsigned len, unsigned copied,
 402                         struct page *page, void *fsdata)
 403 {
 404         unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 405         struct nfs_open_context *ctx = nfs_file_open_context(file);
 406         int status;
 407
 408         dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
 409                 file, mapping->host->i_ino, len, (long long) pos);
 410
 411         /*
 412          * Zero any uninitialised parts of the page, and then mark the page
 413          * as up to date if it turns out that we're extending the file.
 414          */
 415         if (!PageUptodate(page)) {
 416                 unsigned pglen = nfs_page_length(page);
 417                 unsigned end = offset + len;
 418
 419                 if (pglen == 0) {
 420                         zero_user_segments(page, 0, offset,
 421                                         end, PAGE_CACHE_SIZE);
 422                         SetPageUptodate(page);
 423                 } else if (end >= pglen) {
 424                         zero_user_segment(page, end, PAGE_CACHE_SIZE);
 425                         if (offset == 0)
 426                                 SetPageUptodate(page);
 427                 } else
 428                         zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
 429         }
 430
 431         status = nfs_updatepage(file, page, offset, copied);
 432
 433         unlock_page(page);
 434         page_cache_release(page);
 435
 436         if (status < 0)
 437                 return status;
 438         NFS_I(mapping->host)->write_io += copied;
 439
 440         if (nfs_ctx_key_to_expire(ctx)) {
 441                 status = nfs_wb_all(mapping->host);
 442                 if (status < 0)
 443                         return status;
 444         }
 445
 446         return copied;
 447 }
 448
 449 /*
 450  * Partially or wholly invalidate a page
 451  * - Release the private state associated with a page if undergoing complete
 452  *   page invalidation
 453  * - Called if either PG_private or PG_fscache is set on the page
 454  * - Caller holds page lock
 455  */
 456 static void nfs_invalidate_page(struct page *page, unsigned int offset,
 457                                 unsigned int length)
 458 {
 459         dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
 460                  page, offset, length);
 461
 462         if (offset != 0 || length < PAGE_CACHE_SIZE)
 463                 return;
 464         /* Cancel any unstarted writes on this page */
 465         nfs_wb_page_cancel(page_file_mapping(page)->host, page);
 466
 467         nfs_fscache_invalidate_page(page, page->mapping->host);
 468 }
 469
 470 /*
 471  * Attempt to release the private state associated with a page
 472  * - Called if either PG_private or PG_fscache is set on the page
 473  * - Caller holds page lock
 474  * - Return true (may release page) or false (may not)
 475  */
 476 static int nfs_release_page(struct page *page, gfp_t gfp)
 477 {
 478         struct address_space *mapping = page->mapping;
 479
 480         dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 481
 482         /* Always try to initiate a 'commit' if relevant, but only
 483          * wait for it if __GFP_WAIT is set.  Even then, only wait 1
 484          * second and only if the 'bdi' is not congested.
 485          * Waiting indefinitely can cause deadlocks when the NFS
 486          * server is on this machine, when a new TCP connection is
 487          * needed and in other rare cases.  There is no particular
 488          * need to wait extensively here.  A short wait has the
 489          * benefit that someone else can worry about the freezer.
 490          */
 491         if (mapping) {
 492                 struct nfs_server *nfss = NFS_SERVER(mapping->host);
 493                 nfs_commit_inode(mapping->host, 0);
 494                 if ((gfp & __GFP_WAIT) &&
 495                     !bdi_write_congested(&nfss->backing_dev_info)) {
 496                         wait_on_page_bit_killable_timeout(page, PG_private,
 497                                                           HZ);
 498                         if (PagePrivate(page))
 499                                 set_bdi_congested(&nfss->backing_dev_info,
 500                                                   BLK_RW_ASYNC);
 501                 }
 502         }
 503         /* If PagePrivate() is set, then the page is not freeable */
 504         if (PagePrivate(page))
 505                 return 0;
 506         return nfs_fscache_release_page(page, gfp);
 507 }
 508
 509 static void nfs_check_dirty_writeback(struct page *page,
 510                                 bool *dirty, bool *writeback)
 511 {
 512         struct nfs_inode *nfsi;
 513         struct address_space *mapping = page_file_mapping(page);
 514
 515         if (!mapping || PageSwapCache(page))
 516                 return;
 517
 518         /*
 519          * Check if an unstable page is currently being committed and
 520          * if so, have the VM treat it as if the page is under writeback
 521          * so it will not block due to pages that will shortly be freeable.
 522          */
 523         nfsi = NFS_I(mapping->host);
 524         if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
 525                 *writeback = true;
 526                 return;
 527         }
 528
 529         /*
 530          * If PagePrivate() is set, then the page is not freeable and as the
 531          * inode is not being committed, it's not going to be cleaned in the
 532          * near future so treat it as dirty
 533          */
 534         if (PagePrivate(page))
 535                 *dirty = true;
 536 }
 537
 538 /*
 539  * Attempt to clear the private state associated with a page when an error
 540  * occurs that requires the cached contents of an inode to be written back or
 541  * destroyed
 542  * - Called if either PG_private or fscache is set on the page
 543  * - Caller holds page lock
 544  * - Return 0 if successful, -error otherwise
 545  */
 546 static int nfs_launder_page(struct page *page)
 547 {
 548         struct inode *inode = page_file_mapping(page)->host;
 549         struct nfs_inode *nfsi = NFS_I(inode);
 550
 551         dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
 552                 inode->i_ino, (long long)page_offset(page));
 553
 554         nfs_fscache_wait_on_page_write(nfsi, page);
 555         return nfs_wb_page(inode, page);
 556 }
 557
 558 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 559                                                 sector_t *span)
 560 {
 561         struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
 562
 563         *span = sis->pages;
 564
 565         return rpc_clnt_swap_activate(clnt);
 566 }
 567
 568 static void nfs_swap_deactivate(struct file *file)
 569 {
 570         struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
 571
 572         rpc_clnt_swap_deactivate(clnt);
 573 }
 574
 575 const struct address_space_operations nfs_file_aops = {
 576         .readpage = nfs_readpage,
 577         .readpages = nfs_readpages,
 578         .set_page_dirty = __set_page_dirty_nobuffers,
 579         .writepage = nfs_writepage,
 580         .writepages = nfs_writepages,
 581         .write_begin = nfs_write_begin,
 582         .write_end = nfs_write_end,
 583         .invalidatepage = nfs_invalidate_page,
 584         .releasepage = nfs_release_page,
 585         .direct_IO = nfs_direct_IO,
 586         .migratepage = nfs_migrate_page,
 587         .launder_page = nfs_launder_page,
 588         .is_dirty_writeback = nfs_check_dirty_writeback,
 589         .error_remove_page = generic_error_remove_page,
 590         .swap_activate = nfs_swap_activate,
 591         .swap_deactivate = nfs_swap_deactivate,
 592 };
 593
 594 /*
 595  * Notification that a PTE pointing to an NFS page is about to be made
 596  * writable, implying that someone is about to modify the page through a
 597  * shared-writable mapping
 598  */
 599 static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 600 {
 601         struct page *page = vmf->page;
 602         struct file *filp = vma->vm_file;
 603         struct inode *inode = file_inode(filp);
 604         unsigned pagelen;
 605         int ret = VM_FAULT_NOPAGE;
 606         struct address_space *mapping;
 607
 608         dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
 609                 filp, filp->f_mapping->host->i_ino,
 610                 (long long)page_offset(page));
 611
 612         /* make sure the cache has finished storing the page */
 613         nfs_fscache_wait_on_page_write(NFS_I(inode), page);
 614
 615         wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
 616                         nfs_wait_bit_killable, TASK_KILLABLE);
 617
 618         lock_page(page);
 619         mapping = page_file_mapping(page);
 620         if (mapping != inode->i_mapping)
 621                 goto out_unlock;
 622
 623         wait_on_page_writeback(page);
 624
 625         pagelen = nfs_page_length(page);
 626         if (pagelen == 0)
 627                 goto out_unlock;
 628
 629         ret = VM_FAULT_LOCKED;
 630         if (nfs_flush_incompatible(filp, page) == 0 &&
 631             nfs_updatepage(filp, page, 0, pagelen) == 0)
 632                 goto out;
 633
 634         ret = VM_FAULT_SIGBUS;
 635 out_unlock:
 636         unlock_page(page);
 637 out:
 638         return ret;
 639 }
 640
 641 static const struct vm_operations_struct nfs_file_vm_ops = {
 642         .fault = filemap_fault,
 643         .map_pages = filemap_map_pages,
 644         .page_mkwrite = nfs_vm_page_mkwrite,
 645 };
 646
 647 static int nfs_need_sync_write(struct file *filp, struct inode *inode)
 648 {
 649         struct nfs_open_context *ctx;
 650
 651         if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
 652                 return 1;
 653         ctx = nfs_file_open_context(filp);
 654         if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
 655             nfs_ctx_key_to_expire(ctx))
 656                 return 1;
 657         return 0;
 658 }
 659
 660 ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 661 {
 662         struct file *file = iocb->ki_filp;
 663         struct inode *inode = file_inode(file);
 664         unsigned long written = 0;
 665         ssize_t result;
 666         size_t count = iov_iter_count(from);
 667
 668         result = nfs_key_timeout_notify(file, inode);
 669         if (result)
 670                 return result;
 671
 672         if (iocb->ki_flags & IOCB_DIRECT) {
 673                 result = generic_write_checks(iocb, from);
 674                 if (result <= 0)
 675                         return result;
 676                 return nfs_file_direct_write(iocb, from);
 677         }
 678
 679         dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 680                 file, count, (long long) iocb->ki_pos);
 681
 682         result = -EBUSY;
 683         if (IS_SWAPFILE(inode))
 684                 goto out_swapfile;
 685         /*
 686          * O_APPEND implies that we must revalidate the file length.
 687          */
 688         if (iocb->ki_flags & IOCB_APPEND) {
 689                 result = nfs_revalidate_file_size(inode, file);
 690                 if (result)
 691                         goto out;
 692         }
 693
 694         result = count;
 695         if (!count)
 696                 goto out;
 697
 698         result = generic_file_write_iter(iocb, from);
 699         if (result > 0)
 700                 written = result;
 701
 702         /* Return error values for O_DSYNC and IS_SYNC() */
 703         if (result >= 0 && nfs_need_sync_write(file, inode)) {
 704                 int err = vfs_fsync(file, 0);
 705                 if (err < 0)
 706                         result = err;
 707         }
 708         if (result > 0)
 709                 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 710 out:
 711         return result;
 712
 713 out_swapfile:
 714         printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
 715         goto out;
 716 }
 717 EXPORT_SYMBOL_GPL(nfs_file_write);
 718
 719 static int
 720 do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 721 {
 722         struct inode *inode = filp->f_mapping->host;
 723         int status = 0;
 724         unsigned int saved_type = fl->fl_type;
 725
 726         /* Try local locking first */
 727         posix_test_lock(filp, fl);
 728         if (fl->fl_type != F_UNLCK) {
 729                 /* found a conflict */
 730                 goto out;
 731         }
 732         fl->fl_type = saved_type;
 733
 734         if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
 735                 goto out_noconflict;
 736
 737         if (is_local)
 738                 goto out_noconflict;
 739
 740         status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 741 out:
 742         return status;
 743 out_noconflict:
 744         fl->fl_type = F_UNLCK;
 745         goto out;
 746 }
 747
 748 static int do_vfs_lock(struct file *file, struct file_lock *fl)
 749 {
 750         int res = 0;
 751         switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
 752                 case FL_POSIX:
 753                         res = posix_lock_file_wait(file, fl);
 754                         break;
 755                 case FL_FLOCK:
 756                         res = flock_lock_file_wait(file, fl);
 757                         break;
 758                 default:
 759                         BUG();
 760         }
 761         return res;
 762 }
 763
 764 static int
 765 do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 766 {
 767         struct inode *inode = filp->f_mapping->host;
 768         struct nfs_lock_context *l_ctx;
 769         int status;
 770
 771         /*
 772          * Flush all pending writes before doing anything
 773          * with locks..
 774          */
 775         vfs_fsync(filp, 0);
 776
 777         l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
 778         if (!IS_ERR(l_ctx)) {
 779                 status = nfs_iocounter_wait(&l_ctx->io_count);
 780                 nfs_put_lock_context(l_ctx);
 781                 if (status < 0)
 782                         return status;
 783         }
 784
 785         /* NOTE: special case
 786          *      If we're signalled while cleaning up locks on process exit, we
 787          *      still need to complete the unlock.
 788          */
 789         /*
 790          * Use local locking if mounted with "-onolock" or with appropriate
 791          * "-olocal_lock="
 792          */
 793         if (!is_local)
 794                 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 795         else
 796                 status = do_vfs_lock(filp, fl);
 797         return status;
 798 }
 799
 800 static int
 801 is_time_granular(struct timespec *ts) {
 802         return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
 803 }
 804
 805 static int
 806 do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 807 {
 808         struct inode *inode = filp->f_mapping->host;
 809         int status;
 810
 811         /*
 812          * Flush all pending writes before doing anything
 813          * with locks..
 814          */
 815         status = nfs_sync_mapping(filp->f_mapping);
 816         if (status != 0)
 817                 goto out;
 818
 819         /*
 820          * Use local locking if mounted with "-onolock" or with appropriate
 821          * "-olocal_lock="
 822          */
 823         if (!is_local)
 824                 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 825         else
 826                 status = do_vfs_lock(filp, fl);
 827         if (status < 0)
 828                 goto out;
 829
 830         /*
 831          * Revalidate the cache if the server has time stamps granular
 832          * enough to detect subsecond changes.  Otherwise, clear the
 833          * cache to prevent missing any changes.
 834          *
 835          * This makes locking act as a cache coherency point.
 836          */
 837         nfs_sync_mapping(filp->f_mapping);
 838         if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
 839                 if (is_time_granular(&NFS_SERVER(inode)->time_delta))
 840                         __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 841                 else
 842                         nfs_zap_caches(inode);
 843         }
 844 out:
 845         return status;
 846 }
 847
 848 /*
 849  * Lock a (portion of) a file
 850  */
 851 int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 852 {
 853         struct inode *inode = filp->f_mapping->host;
 854         int ret = -ENOLCK;
 855         int is_local = 0;
 856
 857         dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
 858                         filp, fl->fl_type, fl->fl_flags,
 859                         (long long)fl->fl_start, (long long)fl->fl_end);
 860
 861         nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 862
 863         /* No mandatory locks over NFS */
 864         if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
 865                 goto out_err;
 866
 867         if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
 868                 is_local = 1;
 869
 870         if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
 871                 ret = NFS_PROTO(inode)->lock_check_bounds(fl);
 872                 if (ret < 0)
 873                         goto out_err;
 874         }
 875
 876         if (IS_GETLK(cmd))
 877                 ret = do_getlk(filp, cmd, fl, is_local);
 878         else if (fl->fl_type == F_UNLCK)
 879                 ret = do_unlk(filp, cmd, fl, is_local);
 880         else
 881                 ret = do_setlk(filp, cmd, fl, is_local);
 882 out_err:
 883         return ret;
 884 }
 885 EXPORT_SYMBOL_GPL(nfs_lock);
 886
 887 /*
 888  * Lock a (portion of) a file
 889  */
 890 int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 891 {
 892         struct inode *inode = filp->f_mapping->host;
 893         int is_local = 0;
 894
 895         dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
 896                         filp, fl->fl_type, fl->fl_flags);
 897
 898         if (!(fl->fl_flags & FL_FLOCK))
 899                 return -ENOLCK;
 900
 901         /*
 902          * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of
 903          * any standard. In principle we might be able to support LOCK_MAND
 904          * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the
 905          * NFS code is not set up for it.
 906          */
 907         if (fl->fl_type & LOCK_MAND)
 908                 return -EINVAL;
 909
 910         if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
 911                 is_local = 1;
 912
 913         /* We're simulating flock() locks using posix locks on the server */
 914         if (fl->fl_type == F_UNLCK)
 915                 return do_unlk(filp, cmd, fl, is_local);
 916         return do_setlk(filp, cmd, fl, is_local);
 917 }
 918 EXPORT_SYMBOL_GPL(nfs_flock);
 919
 920 const struct file_operations nfs_file_operations = {
 921         .llseek         = nfs_file_llseek,
 922         .read_iter      = nfs_file_read,
 923         .write_iter     = nfs_file_write,
 924         .mmap           = nfs_file_mmap,
 925         .open           = nfs_file_open,
 926         .flush          = nfs_file_flush,
 927         .release        = nfs_file_release,
 928         .fsync          = nfs_file_fsync,
 929         .lock           = nfs_lock,
 930         .flock          = nfs_flock,
 931         .splice_read    = nfs_file_splice_read,
 932         .splice_write   = iter_file_splice_write,
 933         .check_flags    = nfs_check_flags,
 934         .setlease       = simple_nosetlease,
 935 };
 936 EXPORT_SYMBOL_GPL(nfs_file_operations);