4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
267 up_read(&cinode->lock_sem);
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (oplock == server->vals->oplock_read &&
317 cifs_has_mand_locks(cinode)) {
318 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
322 spin_lock(&cifs_file_list_lock);
323 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
324 oplock = fid->pending_open->oplock;
325 list_del(&fid->pending_open->olist);
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 file->private_data = cfile;
341 struct cifsFileInfo *
342 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
344 spin_lock(&cifs_file_list_lock);
345 cifsFileInfo_get_locked(cifs_file);
346 spin_unlock(&cifs_file_list_lock);
351 * Release a reference on the file private data. This may involve closing
352 * the filehandle out on the server. Must be called without holding
353 * cifs_file_list_lock.
355 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
357 struct inode *inode = cifs_file->dentry->d_inode;
358 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
359 struct TCP_Server_Info *server = tcon->ses->server;
360 struct cifsInodeInfo *cifsi = CIFS_I(inode);
361 struct super_block *sb = inode->i_sb;
362 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
363 struct cifsLockInfo *li, *tmp;
365 struct cifs_pending_open open;
367 spin_lock(&cifs_file_list_lock);
368 if (--cifs_file->count > 0) {
369 spin_unlock(&cifs_file_list_lock);
373 if (server->ops->get_lease_key)
374 server->ops->get_lease_key(inode, &fid);
376 /* store open in pending opens to make sure we don't miss lease break */
377 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
379 /* remove it from the lists */
380 list_del(&cifs_file->flist);
381 list_del(&cifs_file->tlist);
383 if (list_empty(&cifsi->openFileList)) {
384 cifs_dbg(FYI, "closing last open instance for inode %p\n",
385 cifs_file->dentry->d_inode);
387 * In strict cache mode we need invalidate mapping on the last
388 * close because it may cause a error when we open this file
389 * again and get at least level II oplock.
391 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
392 CIFS_I(inode)->invalid_mapping = true;
393 cifs_set_oplock_level(cifsi, 0);
395 spin_unlock(&cifs_file_list_lock);
397 cancel_work_sync(&cifs_file->oplock_break);
399 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
400 struct TCP_Server_Info *server = tcon->ses->server;
404 if (server->ops->close)
405 server->ops->close(xid, tcon, &cifs_file->fid);
409 cifs_del_pending_open(&open);
412 * Delete any outstanding lock records. We'll lose them when the file
415 down_write(&cifsi->lock_sem);
416 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
417 list_del(&li->llist);
418 cifs_del_lock_waiters(li);
421 list_del(&cifs_file->llist->llist);
422 kfree(cifs_file->llist);
423 up_write(&cifsi->lock_sem);
425 cifs_put_tlink(cifs_file->tlink);
426 dput(cifs_file->dentry);
427 cifs_sb_deactive(sb);
431 int cifs_open(struct inode *inode, struct file *file)
437 struct cifs_sb_info *cifs_sb;
438 struct TCP_Server_Info *server;
439 struct cifs_tcon *tcon;
440 struct tcon_link *tlink;
441 struct cifsFileInfo *cfile = NULL;
442 char *full_path = NULL;
443 bool posix_open_ok = false;
445 struct cifs_pending_open open;
449 cifs_sb = CIFS_SB(inode->i_sb);
450 tlink = cifs_sb_tlink(cifs_sb);
453 return PTR_ERR(tlink);
455 tcon = tlink_tcon(tlink);
456 server = tcon->ses->server;
458 full_path = build_path_from_dentry(file->f_path.dentry);
459 if (full_path == NULL) {
464 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
465 inode, file->f_flags, full_path);
472 if (!tcon->broken_posix_open && tcon->unix_ext &&
473 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
474 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
475 /* can not refresh inode info since size could be stale */
476 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
477 cifs_sb->mnt_file_mode /* ignored */,
478 file->f_flags, &oplock, &fid.netfid, xid);
480 cifs_dbg(FYI, "posix open succeeded\n");
481 posix_open_ok = true;
482 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
483 if (tcon->ses->serverNOS)
484 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
485 tcon->ses->serverName,
486 tcon->ses->serverNOS);
487 tcon->broken_posix_open = true;
488 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
489 (rc != -EOPNOTSUPP)) /* path not found or net err */
492 * Else fallthrough to retry open the old way on network i/o
497 if (server->ops->get_lease_key)
498 server->ops->get_lease_key(inode, &fid);
500 cifs_add_pending_open(&fid, tlink, &open);
502 if (!posix_open_ok) {
503 if (server->ops->get_lease_key)
504 server->ops->get_lease_key(inode, &fid);
506 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
507 file->f_flags, &oplock, &fid, xid);
509 cifs_del_pending_open(&open);
514 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
516 if (server->ops->close)
517 server->ops->close(xid, tcon, &fid);
518 cifs_del_pending_open(&open);
523 cifs_fscache_set_inode_cookie(inode, file);
525 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
527 * Time to set mode which we can not set earlier due to
528 * problems creating new read-only files.
530 struct cifs_unix_set_info_args args = {
531 .mode = inode->i_mode,
532 .uid = INVALID_UID, /* no change */
533 .gid = INVALID_GID, /* no change */
534 .ctime = NO_CHANGE_64,
535 .atime = NO_CHANGE_64,
536 .mtime = NO_CHANGE_64,
539 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
546 cifs_put_tlink(tlink);
550 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
553 * Try to reacquire byte range locks that were released when session
554 * to server was lost.
557 cifs_relock_file(struct cifsFileInfo *cfile)
559 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
560 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
561 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
564 down_read(&cinode->lock_sem);
565 if (cinode->can_cache_brlcks) {
566 /* can cache locks - no need to relock */
567 up_read(&cinode->lock_sem);
571 if (cap_unix(tcon->ses) &&
572 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
573 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
574 rc = cifs_push_posix_locks(cfile);
576 rc = tcon->ses->server->ops->push_mand_locks(cfile);
578 up_read(&cinode->lock_sem);
583 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
588 struct cifs_sb_info *cifs_sb;
589 struct cifs_tcon *tcon;
590 struct TCP_Server_Info *server;
591 struct cifsInodeInfo *cinode;
593 char *full_path = NULL;
595 int disposition = FILE_OPEN;
596 int create_options = CREATE_NOT_DIR;
597 struct cifs_open_parms oparms;
600 mutex_lock(&cfile->fh_mutex);
601 if (!cfile->invalidHandle) {
602 mutex_unlock(&cfile->fh_mutex);
608 inode = cfile->dentry->d_inode;
609 cifs_sb = CIFS_SB(inode->i_sb);
610 tcon = tlink_tcon(cfile->tlink);
611 server = tcon->ses->server;
614 * Can not grab rename sem here because various ops, including those
615 * that already have the rename sem can end up causing writepage to get
616 * called and if the server was down that means we end up here, and we
617 * can never tell if the caller already has the rename_sem.
619 full_path = build_path_from_dentry(cfile->dentry);
620 if (full_path == NULL) {
622 mutex_unlock(&cfile->fh_mutex);
627 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
628 inode, cfile->f_flags, full_path);
630 if (tcon->ses->server->oplocks)
635 if (tcon->unix_ext && cap_unix(tcon->ses) &&
636 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
637 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
639 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
640 * original open. Must mask them off for a reopen.
642 unsigned int oflags = cfile->f_flags &
643 ~(O_CREAT | O_EXCL | O_TRUNC);
645 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
646 cifs_sb->mnt_file_mode /* ignored */,
647 oflags, &oplock, &cfile->fid.netfid, xid);
649 cifs_dbg(FYI, "posix reopen succeeded\n");
650 oparms.reconnect = true;
654 * fallthrough to retry open the old way on errors, especially
655 * in the reconnect path it is important to retry hard
659 desired_access = cifs_convert_flags(cfile->f_flags);
661 if (backup_cred(cifs_sb))
662 create_options |= CREATE_OPEN_BACKUP_INTENT;
664 if (server->ops->get_lease_key)
665 server->ops->get_lease_key(inode, &cfile->fid);
668 oparms.cifs_sb = cifs_sb;
669 oparms.desired_access = desired_access;
670 oparms.create_options = create_options;
671 oparms.disposition = disposition;
672 oparms.path = full_path;
673 oparms.fid = &cfile->fid;
674 oparms.reconnect = true;
677 * Can not refresh inode by passing in file_info buf to be returned by
678 * CIFSSMBOpen and then calling get_inode_info with returned buf since
679 * file might have write behind data that needs to be flushed and server
680 * version of file size can be stale. If we knew for sure that inode was
681 * not dirty locally we could do this.
683 rc = server->ops->open(xid, &oparms, &oplock, NULL);
684 if (rc == -ENOENT && oparms.reconnect == false) {
685 /* durable handle timeout is expired - open the file again */
686 rc = server->ops->open(xid, &oparms, &oplock, NULL);
687 /* indicate that we need to relock the file */
688 oparms.reconnect = true;
692 mutex_unlock(&cfile->fh_mutex);
693 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
694 cifs_dbg(FYI, "oplock: %d\n", oplock);
695 goto reopen_error_exit;
699 cfile->invalidHandle = false;
700 mutex_unlock(&cfile->fh_mutex);
701 cinode = CIFS_I(inode);
704 rc = filemap_write_and_wait(inode->i_mapping);
705 mapping_set_error(inode->i_mapping, rc);
708 rc = cifs_get_inode_info_unix(&inode, full_path,
711 rc = cifs_get_inode_info(&inode, full_path, NULL,
712 inode->i_sb, xid, NULL);
715 * Else we are writing out data to server already and could deadlock if
716 * we tried to flush data, and since we do not know if we have data that
717 * would invalidate the current end of file on the server we can not go
718 * to the server to get the new inode info.
721 server->ops->set_fid(cfile, &cfile->fid, oplock);
722 if (oparms.reconnect)
723 cifs_relock_file(cfile);
731 int cifs_close(struct inode *inode, struct file *file)
733 if (file->private_data != NULL) {
734 cifsFileInfo_put(file->private_data);
735 file->private_data = NULL;
738 /* return code from the ->release op is always ignored */
742 int cifs_closedir(struct inode *inode, struct file *file)
746 struct cifsFileInfo *cfile = file->private_data;
747 struct cifs_tcon *tcon;
748 struct TCP_Server_Info *server;
751 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
757 tcon = tlink_tcon(cfile->tlink);
758 server = tcon->ses->server;
760 cifs_dbg(FYI, "Freeing private data in close dir\n");
761 spin_lock(&cifs_file_list_lock);
762 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
763 cfile->invalidHandle = true;
764 spin_unlock(&cifs_file_list_lock);
765 if (server->ops->close_dir)
766 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
769 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
770 /* not much we can do if it fails anyway, ignore rc */
773 spin_unlock(&cifs_file_list_lock);
775 buf = cfile->srch_inf.ntwrk_buf_start;
777 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
778 cfile->srch_inf.ntwrk_buf_start = NULL;
779 if (cfile->srch_inf.smallBuf)
780 cifs_small_buf_release(buf);
782 cifs_buf_release(buf);
785 cifs_put_tlink(cfile->tlink);
786 kfree(file->private_data);
787 file->private_data = NULL;
788 /* BB can we lock the filestruct while this is going on? */
793 static struct cifsLockInfo *
794 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
796 struct cifsLockInfo *lock =
797 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
800 lock->offset = offset;
801 lock->length = length;
803 lock->pid = current->tgid;
804 INIT_LIST_HEAD(&lock->blist);
805 init_waitqueue_head(&lock->block_q);
810 cifs_del_lock_waiters(struct cifsLockInfo *lock)
812 struct cifsLockInfo *li, *tmp;
813 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
814 list_del_init(&li->blist);
815 wake_up(&li->block_q);
819 #define CIFS_LOCK_OP 0
820 #define CIFS_READ_OP 1
821 #define CIFS_WRITE_OP 2
823 /* @rw_check : 0 - no op, 1 - read, 2 - write */
825 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
826 __u64 length, __u8 type, struct cifsFileInfo *cfile,
827 struct cifsLockInfo **conf_lock, int rw_check)
829 struct cifsLockInfo *li;
830 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
831 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
833 list_for_each_entry(li, &fdlocks->locks, llist) {
834 if (offset + length <= li->offset ||
835 offset >= li->offset + li->length)
837 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
838 server->ops->compare_fids(cfile, cur_cfile)) {
839 /* shared lock prevents write op through the same fid */
840 if (!(li->type & server->vals->shared_lock_type) ||
841 rw_check != CIFS_WRITE_OP)
844 if ((type & server->vals->shared_lock_type) &&
845 ((server->ops->compare_fids(cfile, cur_cfile) &&
846 current->tgid == li->pid) || type == li->type))
856 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
857 __u8 type, struct cifsLockInfo **conf_lock,
861 struct cifs_fid_locks *cur;
862 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
864 list_for_each_entry(cur, &cinode->llist, llist) {
865 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
866 cfile, conf_lock, rw_check);
875 * Check if there is another lock that prevents us to set the lock (mandatory
876 * style). If such a lock exists, update the flock structure with its
877 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
878 * or leave it the same if we can't. Returns 0 if we don't need to request to
879 * the server or 1 otherwise.
882 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
883 __u8 type, struct file_lock *flock)
886 struct cifsLockInfo *conf_lock;
887 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
888 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
891 down_read(&cinode->lock_sem);
893 exist = cifs_find_lock_conflict(cfile, offset, length, type,
894 &conf_lock, CIFS_LOCK_OP);
896 flock->fl_start = conf_lock->offset;
897 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
898 flock->fl_pid = conf_lock->pid;
899 if (conf_lock->type & server->vals->shared_lock_type)
900 flock->fl_type = F_RDLCK;
902 flock->fl_type = F_WRLCK;
903 } else if (!cinode->can_cache_brlcks)
906 flock->fl_type = F_UNLCK;
908 up_read(&cinode->lock_sem);
913 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
915 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
916 down_write(&cinode->lock_sem);
917 list_add_tail(&lock->llist, &cfile->llist->locks);
918 up_write(&cinode->lock_sem);
922 * Set the byte-range lock (mandatory style). Returns:
923 * 1) 0, if we set the lock and don't need to request to the server;
924 * 2) 1, if no locks prevent us but we need to request to the server;
925 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
928 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
931 struct cifsLockInfo *conf_lock;
932 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
938 down_write(&cinode->lock_sem);
940 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
941 lock->type, &conf_lock, CIFS_LOCK_OP);
942 if (!exist && cinode->can_cache_brlcks) {
943 list_add_tail(&lock->llist, &cfile->llist->locks);
944 up_write(&cinode->lock_sem);
953 list_add_tail(&lock->blist, &conf_lock->blist);
954 up_write(&cinode->lock_sem);
955 rc = wait_event_interruptible(lock->block_q,
956 (lock->blist.prev == &lock->blist) &&
957 (lock->blist.next == &lock->blist));
960 down_write(&cinode->lock_sem);
961 list_del_init(&lock->blist);
964 up_write(&cinode->lock_sem);
969 * Check if there is another lock that prevents us to set the lock (posix
970 * style). If such a lock exists, update the flock structure with its
971 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
972 * or leave it the same if we can't. Returns 0 if we don't need to request to
973 * the server or 1 otherwise.
976 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
979 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
980 unsigned char saved_type = flock->fl_type;
982 if ((flock->fl_flags & FL_POSIX) == 0)
985 down_read(&cinode->lock_sem);
986 posix_test_lock(file, flock);
988 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
989 flock->fl_type = saved_type;
993 up_read(&cinode->lock_sem);
998 * Set the byte-range lock (posix style). Returns:
999 * 1) 0, if we set the lock and don't need to request to the server;
1000 * 2) 1, if we need to request to the server;
1001 * 3) <0, if the error occurs while setting the lock.
1004 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1006 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1009 if ((flock->fl_flags & FL_POSIX) == 0)
1013 down_write(&cinode->lock_sem);
1014 if (!cinode->can_cache_brlcks) {
1015 up_write(&cinode->lock_sem);
1019 rc = posix_lock_file(file, flock, NULL);
1020 up_write(&cinode->lock_sem);
1021 if (rc == FILE_LOCK_DEFERRED) {
1022 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1025 posix_unblock_lock(flock);
1031 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1034 int rc = 0, stored_rc;
1035 struct cifsLockInfo *li, *tmp;
1036 struct cifs_tcon *tcon;
1037 unsigned int num, max_num, max_buf;
1038 LOCKING_ANDX_RANGE *buf, *cur;
1039 int types[] = {LOCKING_ANDX_LARGE_FILES,
1040 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1044 tcon = tlink_tcon(cfile->tlink);
1047 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1048 * and check it for zero before using.
1050 max_buf = tcon->ses->server->maxBuf;
1056 max_num = (max_buf - sizeof(struct smb_hdr)) /
1057 sizeof(LOCKING_ANDX_RANGE);
1058 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1064 for (i = 0; i < 2; i++) {
1067 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1068 if (li->type != types[i])
1070 cur->Pid = cpu_to_le16(li->pid);
1071 cur->LengthLow = cpu_to_le32((u32)li->length);
1072 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1073 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1074 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1075 if (++num == max_num) {
1076 stored_rc = cifs_lockv(xid, tcon,
1078 (__u8)li->type, 0, num,
1089 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1090 (__u8)types[i], 0, num, buf);
1101 /* copied from fs/locks.c with a name change */
1102 #define cifs_for_each_lock(inode, lockp) \
1103 for (lockp = &inode->i_flock; *lockp != NULL; \
1104 lockp = &(*lockp)->fl_next)
1106 struct lock_to_push {
1107 struct list_head llist;
1116 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1118 struct inode *inode = cfile->dentry->d_inode;
1119 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1120 struct file_lock *flock, **before;
1121 unsigned int count = 0, i = 0;
1122 int rc = 0, xid, type;
1123 struct list_head locks_to_send, *el;
1124 struct lock_to_push *lck, *tmp;
1129 spin_lock(&inode->i_lock);
1130 cifs_for_each_lock(inode, before) {
1131 if ((*before)->fl_flags & FL_POSIX)
1134 spin_unlock(&inode->i_lock);
1136 INIT_LIST_HEAD(&locks_to_send);
1139 * Allocating count locks is enough because no FL_POSIX locks can be
1140 * added to the list while we are holding cinode->lock_sem that
1141 * protects locking operations of this inode.
1143 for (; i < count; i++) {
1144 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1149 list_add_tail(&lck->llist, &locks_to_send);
1152 el = locks_to_send.next;
1153 spin_lock(&inode->i_lock);
1154 cifs_for_each_lock(inode, before) {
1156 if ((flock->fl_flags & FL_POSIX) == 0)
1158 if (el == &locks_to_send) {
1160 * The list ended. We don't have enough allocated
1161 * structures - something is really wrong.
1163 cifs_dbg(VFS, "Can't push all brlocks!\n");
1166 length = 1 + flock->fl_end - flock->fl_start;
1167 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1171 lck = list_entry(el, struct lock_to_push, llist);
1172 lck->pid = flock->fl_pid;
1173 lck->netfid = cfile->fid.netfid;
1174 lck->length = length;
1176 lck->offset = flock->fl_start;
1179 spin_unlock(&inode->i_lock);
1181 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1184 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1185 lck->offset, lck->length, NULL,
1189 list_del(&lck->llist);
1197 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1198 list_del(&lck->llist);
1205 cifs_push_locks(struct cifsFileInfo *cfile)
1207 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1208 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1209 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1212 /* we are going to update can_cache_brlcks here - need a write access */
1213 down_write(&cinode->lock_sem);
1214 if (!cinode->can_cache_brlcks) {
1215 up_write(&cinode->lock_sem);
1219 if (cap_unix(tcon->ses) &&
1220 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1221 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1222 rc = cifs_push_posix_locks(cfile);
1224 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1226 cinode->can_cache_brlcks = false;
1227 up_write(&cinode->lock_sem);
1232 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1233 bool *wait_flag, struct TCP_Server_Info *server)
1235 if (flock->fl_flags & FL_POSIX)
1236 cifs_dbg(FYI, "Posix\n");
1237 if (flock->fl_flags & FL_FLOCK)
1238 cifs_dbg(FYI, "Flock\n");
1239 if (flock->fl_flags & FL_SLEEP) {
1240 cifs_dbg(FYI, "Blocking lock\n");
1243 if (flock->fl_flags & FL_ACCESS)
1244 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1245 if (flock->fl_flags & FL_LEASE)
1246 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1247 if (flock->fl_flags &
1248 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1249 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1250 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1252 *type = server->vals->large_lock_type;
1253 if (flock->fl_type == F_WRLCK) {
1254 cifs_dbg(FYI, "F_WRLCK\n");
1255 *type |= server->vals->exclusive_lock_type;
1257 } else if (flock->fl_type == F_UNLCK) {
1258 cifs_dbg(FYI, "F_UNLCK\n");
1259 *type |= server->vals->unlock_lock_type;
1261 /* Check if unlock includes more than one lock range */
1262 } else if (flock->fl_type == F_RDLCK) {
1263 cifs_dbg(FYI, "F_RDLCK\n");
1264 *type |= server->vals->shared_lock_type;
1266 } else if (flock->fl_type == F_EXLCK) {
1267 cifs_dbg(FYI, "F_EXLCK\n");
1268 *type |= server->vals->exclusive_lock_type;
1270 } else if (flock->fl_type == F_SHLCK) {
1271 cifs_dbg(FYI, "F_SHLCK\n");
1272 *type |= server->vals->shared_lock_type;
1275 cifs_dbg(FYI, "Unknown type of lock\n");
1279 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1280 bool wait_flag, bool posix_lck, unsigned int xid)
1283 __u64 length = 1 + flock->fl_end - flock->fl_start;
1284 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1285 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1286 struct TCP_Server_Info *server = tcon->ses->server;
1287 __u16 netfid = cfile->fid.netfid;
1290 int posix_lock_type;
1292 rc = cifs_posix_lock_test(file, flock);
1296 if (type & server->vals->shared_lock_type)
1297 posix_lock_type = CIFS_RDLCK;
1299 posix_lock_type = CIFS_WRLCK;
1300 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1301 flock->fl_start, length, flock,
1302 posix_lock_type, wait_flag);
1306 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1310 /* BB we could chain these into one lock request BB */
1311 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1314 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1316 flock->fl_type = F_UNLCK;
1318 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1323 if (type & server->vals->shared_lock_type) {
1324 flock->fl_type = F_WRLCK;
1328 type &= ~server->vals->exclusive_lock_type;
1330 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1331 type | server->vals->shared_lock_type,
1334 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1335 type | server->vals->shared_lock_type, 0, 1, false);
1336 flock->fl_type = F_RDLCK;
1338 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1341 flock->fl_type = F_WRLCK;
1347 cifs_move_llist(struct list_head *source, struct list_head *dest)
1349 struct list_head *li, *tmp;
1350 list_for_each_safe(li, tmp, source)
1351 list_move(li, dest);
1355 cifs_free_llist(struct list_head *llist)
1357 struct cifsLockInfo *li, *tmp;
1358 list_for_each_entry_safe(li, tmp, llist, llist) {
1359 cifs_del_lock_waiters(li);
1360 list_del(&li->llist);
1366 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1369 int rc = 0, stored_rc;
1370 int types[] = {LOCKING_ANDX_LARGE_FILES,
1371 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1373 unsigned int max_num, num, max_buf;
1374 LOCKING_ANDX_RANGE *buf, *cur;
1375 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1376 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1377 struct cifsLockInfo *li, *tmp;
1378 __u64 length = 1 + flock->fl_end - flock->fl_start;
1379 struct list_head tmp_llist;
1381 INIT_LIST_HEAD(&tmp_llist);
1384 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1385 * and check it for zero before using.
1387 max_buf = tcon->ses->server->maxBuf;
1391 max_num = (max_buf - sizeof(struct smb_hdr)) /
1392 sizeof(LOCKING_ANDX_RANGE);
1393 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1397 down_write(&cinode->lock_sem);
1398 for (i = 0; i < 2; i++) {
1401 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1402 if (flock->fl_start > li->offset ||
1403 (flock->fl_start + length) <
1404 (li->offset + li->length))
1406 if (current->tgid != li->pid)
1408 if (types[i] != li->type)
1410 if (cinode->can_cache_brlcks) {
1412 * We can cache brlock requests - simply remove
1413 * a lock from the file's list.
1415 list_del(&li->llist);
1416 cifs_del_lock_waiters(li);
1420 cur->Pid = cpu_to_le16(li->pid);
1421 cur->LengthLow = cpu_to_le32((u32)li->length);
1422 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1423 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1424 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1426 * We need to save a lock here to let us add it again to
1427 * the file's list if the unlock range request fails on
1430 list_move(&li->llist, &tmp_llist);
1431 if (++num == max_num) {
1432 stored_rc = cifs_lockv(xid, tcon,
1434 li->type, num, 0, buf);
1437 * We failed on the unlock range
1438 * request - add all locks from the tmp
1439 * list to the head of the file's list.
1441 cifs_move_llist(&tmp_llist,
1442 &cfile->llist->locks);
1446 * The unlock range request succeed -
1447 * free the tmp list.
1449 cifs_free_llist(&tmp_llist);
1456 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1457 types[i], num, 0, buf);
1459 cifs_move_llist(&tmp_llist,
1460 &cfile->llist->locks);
1463 cifs_free_llist(&tmp_llist);
1467 up_write(&cinode->lock_sem);
1473 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1474 bool wait_flag, bool posix_lck, int lock, int unlock,
1478 __u64 length = 1 + flock->fl_end - flock->fl_start;
1479 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1480 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1481 struct TCP_Server_Info *server = tcon->ses->server;
1482 struct inode *inode = cfile->dentry->d_inode;
1485 int posix_lock_type;
1487 rc = cifs_posix_lock_set(file, flock);
1491 if (type & server->vals->shared_lock_type)
1492 posix_lock_type = CIFS_RDLCK;
1494 posix_lock_type = CIFS_WRLCK;
1497 posix_lock_type = CIFS_UNLCK;
1499 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1500 current->tgid, flock->fl_start, length,
1501 NULL, posix_lock_type, wait_flag);
1506 struct cifsLockInfo *lock;
1508 lock = cifs_lock_init(flock->fl_start, length, type);
1512 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1521 * Windows 7 server can delay breaking lease from read to None
1522 * if we set a byte-range lock on a file - break it explicitly
1523 * before sending the lock to the server to be sure the next
1524 * read won't conflict with non-overlapted locks due to
1527 if (!CIFS_I(inode)->clientCanCacheAll &&
1528 CIFS_I(inode)->clientCanCacheRead) {
1529 cifs_invalidate_mapping(inode);
1530 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1532 CIFS_I(inode)->clientCanCacheRead = false;
1535 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1536 type, 1, 0, wait_flag);
1542 cifs_lock_add(cfile, lock);
1544 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1547 if (flock->fl_flags & FL_POSIX)
1548 posix_lock_file_wait(file, flock);
1552 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1555 int lock = 0, unlock = 0;
1556 bool wait_flag = false;
1557 bool posix_lck = false;
1558 struct cifs_sb_info *cifs_sb;
1559 struct cifs_tcon *tcon;
1560 struct cifsInodeInfo *cinode;
1561 struct cifsFileInfo *cfile;
1568 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1569 cmd, flock->fl_flags, flock->fl_type,
1570 flock->fl_start, flock->fl_end);
1572 cfile = (struct cifsFileInfo *)file->private_data;
1573 tcon = tlink_tcon(cfile->tlink);
1575 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1578 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1579 netfid = cfile->fid.netfid;
1580 cinode = CIFS_I(file_inode(file));
1582 if (cap_unix(tcon->ses) &&
1583 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1584 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1587 * BB add code here to normalize offset and length to account for
1588 * negative length which we can not accept over the wire.
1590 if (IS_GETLK(cmd)) {
1591 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1596 if (!lock && !unlock) {
1598 * if no lock or unlock then nothing to do since we do not
1605 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1612 * update the file size (if needed) after a write. Should be called with
1613 * the inode->i_lock held
1616 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1617 unsigned int bytes_written)
1619 loff_t end_of_write = offset + bytes_written;
1621 if (end_of_write > cifsi->server_eof)
1622 cifsi->server_eof = end_of_write;
1626 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1627 size_t write_size, loff_t *offset)
1630 unsigned int bytes_written = 0;
1631 unsigned int total_written;
1632 struct cifs_sb_info *cifs_sb;
1633 struct cifs_tcon *tcon;
1634 struct TCP_Server_Info *server;
1636 struct dentry *dentry = open_file->dentry;
1637 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1638 struct cifs_io_parms io_parms;
1640 cifs_sb = CIFS_SB(dentry->d_sb);
1642 cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1643 write_size, *offset, dentry->d_name.name);
1645 tcon = tlink_tcon(open_file->tlink);
1646 server = tcon->ses->server;
1648 if (!server->ops->sync_write)
1653 for (total_written = 0; write_size > total_written;
1654 total_written += bytes_written) {
1656 while (rc == -EAGAIN) {
1660 if (open_file->invalidHandle) {
1661 /* we could deadlock if we called
1662 filemap_fdatawait from here so tell
1663 reopen_file not to flush data to
1665 rc = cifs_reopen_file(open_file, false);
1670 len = min((size_t)cifs_sb->wsize,
1671 write_size - total_written);
1672 /* iov[0] is reserved for smb header */
1673 iov[1].iov_base = (char *)write_data + total_written;
1674 iov[1].iov_len = len;
1676 io_parms.tcon = tcon;
1677 io_parms.offset = *offset;
1678 io_parms.length = len;
1679 rc = server->ops->sync_write(xid, open_file, &io_parms,
1680 &bytes_written, iov, 1);
1682 if (rc || (bytes_written == 0)) {
1690 spin_lock(&dentry->d_inode->i_lock);
1691 cifs_update_eof(cifsi, *offset, bytes_written);
1692 spin_unlock(&dentry->d_inode->i_lock);
1693 *offset += bytes_written;
1697 cifs_stats_bytes_written(tcon, total_written);
1699 if (total_written > 0) {
1700 spin_lock(&dentry->d_inode->i_lock);
1701 if (*offset > dentry->d_inode->i_size)
1702 i_size_write(dentry->d_inode, *offset);
1703 spin_unlock(&dentry->d_inode->i_lock);
1705 mark_inode_dirty_sync(dentry->d_inode);
1707 return total_written;
1710 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1713 struct cifsFileInfo *open_file = NULL;
1714 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1716 /* only filter by fsuid on multiuser mounts */
1717 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1720 spin_lock(&cifs_file_list_lock);
1721 /* we could simply get the first_list_entry since write-only entries
1722 are always at the end of the list but since the first entry might
1723 have a close pending, we go through the whole list */
1724 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1725 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1727 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1728 if (!open_file->invalidHandle) {
1729 /* found a good file */
1730 /* lock it so it will not be closed on us */
1731 cifsFileInfo_get_locked(open_file);
1732 spin_unlock(&cifs_file_list_lock);
1734 } /* else might as well continue, and look for
1735 another, or simply have the caller reopen it
1736 again rather than trying to fix this handle */
1737 } else /* write only file */
1738 break; /* write only files are last so must be done */
1740 spin_unlock(&cifs_file_list_lock);
1744 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1747 struct cifsFileInfo *open_file, *inv_file = NULL;
1748 struct cifs_sb_info *cifs_sb;
1749 bool any_available = false;
1751 unsigned int refind = 0;
1753 /* Having a null inode here (because mapping->host was set to zero by
1754 the VFS or MM) should not happen but we had reports of on oops (due to
1755 it being zero) during stress testcases so we need to check for it */
1757 if (cifs_inode == NULL) {
1758 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1763 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1765 /* only filter by fsuid on multiuser mounts */
1766 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1769 spin_lock(&cifs_file_list_lock);
1771 if (refind > MAX_REOPEN_ATT) {
1772 spin_unlock(&cifs_file_list_lock);
1775 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1776 if (!any_available && open_file->pid != current->tgid)
1778 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1780 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1781 if (!open_file->invalidHandle) {
1782 /* found a good writable file */
1783 cifsFileInfo_get_locked(open_file);
1784 spin_unlock(&cifs_file_list_lock);
1788 inv_file = open_file;
1792 /* couldn't find useable FH with same pid, try any available */
1793 if (!any_available) {
1794 any_available = true;
1795 goto refind_writable;
1799 any_available = false;
1800 cifsFileInfo_get_locked(inv_file);
1803 spin_unlock(&cifs_file_list_lock);
1806 rc = cifs_reopen_file(inv_file, false);
1810 spin_lock(&cifs_file_list_lock);
1811 list_move_tail(&inv_file->flist,
1812 &cifs_inode->openFileList);
1813 spin_unlock(&cifs_file_list_lock);
1814 cifsFileInfo_put(inv_file);
1815 spin_lock(&cifs_file_list_lock);
1817 goto refind_writable;
1824 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1826 struct address_space *mapping = page->mapping;
1827 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1830 int bytes_written = 0;
1831 struct inode *inode;
1832 struct cifsFileInfo *open_file;
1834 if (!mapping || !mapping->host)
1837 inode = page->mapping->host;
1839 offset += (loff_t)from;
1840 write_data = kmap(page);
1843 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1848 /* racing with truncate? */
1849 if (offset > mapping->host->i_size) {
1851 return 0; /* don't care */
1854 /* check to make sure that we are not extending the file */
1855 if (mapping->host->i_size - offset < (loff_t)to)
1856 to = (unsigned)(mapping->host->i_size - offset);
1858 open_file = find_writable_file(CIFS_I(mapping->host), false);
1860 bytes_written = cifs_write(open_file, open_file->pid,
1861 write_data, to - from, &offset);
1862 cifsFileInfo_put(open_file);
1863 /* Does mm or vfs already set times? */
1864 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1865 if ((bytes_written > 0) && (offset))
1867 else if (bytes_written < 0)
1870 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1878 static int cifs_writepages(struct address_space *mapping,
1879 struct writeback_control *wbc)
1881 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1882 bool done = false, scanned = false, range_whole = false;
1884 struct cifs_writedata *wdata;
1885 struct TCP_Server_Info *server;
1890 * If wsize is smaller than the page cache size, default to writing
1891 * one page at a time via cifs_writepage
1893 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1894 return generic_writepages(mapping, wbc);
1896 if (wbc->range_cyclic) {
1897 index = mapping->writeback_index; /* Start from prev offset */
1900 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1901 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1902 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1907 while (!done && index <= end) {
1908 unsigned int i, nr_pages, found_pages;
1909 pgoff_t next = 0, tofind;
1910 struct page **pages;
1912 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1915 wdata = cifs_writedata_alloc((unsigned int)tofind,
1916 cifs_writev_complete);
1923 * find_get_pages_tag seems to return a max of 256 on each
1924 * iteration, so we must call it several times in order to
1925 * fill the array or the wsize is effectively limited to
1926 * 256 * PAGE_CACHE_SIZE.
1929 pages = wdata->pages;
1931 nr_pages = find_get_pages_tag(mapping, &index,
1932 PAGECACHE_TAG_DIRTY,
1934 found_pages += nr_pages;
1937 } while (nr_pages && tofind && index <= end);
1939 if (found_pages == 0) {
1940 kref_put(&wdata->refcount, cifs_writedata_release);
1945 for (i = 0; i < found_pages; i++) {
1946 page = wdata->pages[i];
1948 * At this point we hold neither mapping->tree_lock nor
1949 * lock on the page itself: the page may be truncated or
1950 * invalidated (changing page->mapping to NULL), or even
1951 * swizzled back from swapper_space to tmpfs file
1957 else if (!trylock_page(page))
1960 if (unlikely(page->mapping != mapping)) {
1965 if (!wbc->range_cyclic && page->index > end) {
1971 if (next && (page->index != next)) {
1972 /* Not next consecutive page */
1977 if (wbc->sync_mode != WB_SYNC_NONE)
1978 wait_on_page_writeback(page);
1980 if (PageWriteback(page) ||
1981 !clear_page_dirty_for_io(page)) {
1987 * This actually clears the dirty bit in the radix tree.
1988 * See cifs_writepage() for more commentary.
1990 set_page_writeback(page);
1992 if (page_offset(page) >= i_size_read(mapping->host)) {
1995 end_page_writeback(page);
1999 wdata->pages[i] = page;
2000 next = page->index + 1;
2004 /* reset index to refind any pages skipped */
2006 index = wdata->pages[0]->index + 1;
2008 /* put any pages we aren't going to use */
2009 for (i = nr_pages; i < found_pages; i++) {
2010 page_cache_release(wdata->pages[i]);
2011 wdata->pages[i] = NULL;
2014 /* nothing to write? */
2015 if (nr_pages == 0) {
2016 kref_put(&wdata->refcount, cifs_writedata_release);
2020 wdata->sync_mode = wbc->sync_mode;
2021 wdata->nr_pages = nr_pages;
2022 wdata->offset = page_offset(wdata->pages[0]);
2023 wdata->pagesz = PAGE_CACHE_SIZE;
2025 min(i_size_read(mapping->host) -
2026 page_offset(wdata->pages[nr_pages - 1]),
2027 (loff_t)PAGE_CACHE_SIZE);
2028 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2032 if (wdata->cfile != NULL)
2033 cifsFileInfo_put(wdata->cfile);
2034 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2036 if (!wdata->cfile) {
2037 cifs_dbg(VFS, "No writable handles for inode\n");
2041 wdata->pid = wdata->cfile->pid;
2042 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2043 rc = server->ops->async_writev(wdata);
2044 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2046 for (i = 0; i < nr_pages; ++i)
2047 unlock_page(wdata->pages[i]);
2049 /* send failure -- clean up the mess */
2051 for (i = 0; i < nr_pages; ++i) {
2053 redirty_page_for_writepage(wbc,
2056 SetPageError(wdata->pages[i]);
2057 end_page_writeback(wdata->pages[i]);
2058 page_cache_release(wdata->pages[i]);
2061 mapping_set_error(mapping, rc);
2063 kref_put(&wdata->refcount, cifs_writedata_release);
2065 wbc->nr_to_write -= nr_pages;
2066 if (wbc->nr_to_write <= 0)
2072 if (!scanned && !done) {
2074 * We hit the last page and there is more work to be done: wrap
2075 * back to the start of the file
2082 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2083 mapping->writeback_index = index;
2089 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2095 /* BB add check for wbc flags */
2096 page_cache_get(page);
2097 if (!PageUptodate(page))
2098 cifs_dbg(FYI, "ppw - page not up to date\n");
2101 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2103 * A writepage() implementation always needs to do either this,
2104 * or re-dirty the page with "redirty_page_for_writepage()" in
2105 * the case of a failure.
2107 * Just unlocking the page will cause the radix tree tag-bits
2108 * to fail to update with the state of the page correctly.
2110 set_page_writeback(page);
2112 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2113 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2115 else if (rc == -EAGAIN)
2116 redirty_page_for_writepage(wbc, page);
2120 SetPageUptodate(page);
2121 end_page_writeback(page);
2122 page_cache_release(page);
2127 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2129 int rc = cifs_writepage_locked(page, wbc);
2134 static int cifs_write_end(struct file *file, struct address_space *mapping,
2135 loff_t pos, unsigned len, unsigned copied,
2136 struct page *page, void *fsdata)
2139 struct inode *inode = mapping->host;
2140 struct cifsFileInfo *cfile = file->private_data;
2141 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2144 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2147 pid = current->tgid;
2149 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2152 if (PageChecked(page)) {
2154 SetPageUptodate(page);
2155 ClearPageChecked(page);
2156 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2157 SetPageUptodate(page);
2159 if (!PageUptodate(page)) {
2161 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2165 /* this is probably better than directly calling
2166 partialpage_write since in this function the file handle is
2167 known which we might as well leverage */
2168 /* BB check if anything else missing out of ppw
2169 such as updating last write time */
2170 page_data = kmap(page);
2171 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2172 /* if (rc < 0) should we set writebehind rc? */
2179 set_page_dirty(page);
2183 spin_lock(&inode->i_lock);
2184 if (pos > inode->i_size)
2185 i_size_write(inode, pos);
2186 spin_unlock(&inode->i_lock);
2190 page_cache_release(page);
2195 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2200 struct cifs_tcon *tcon;
2201 struct TCP_Server_Info *server;
2202 struct cifsFileInfo *smbfile = file->private_data;
2203 struct inode *inode = file_inode(file);
2204 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2206 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2209 mutex_lock(&inode->i_mutex);
2213 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2214 file->f_path.dentry->d_name.name, datasync);
2216 if (!CIFS_I(inode)->clientCanCacheRead) {
2217 rc = cifs_invalidate_mapping(inode);
2219 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2220 rc = 0; /* don't care about it in fsync */
2224 tcon = tlink_tcon(smbfile->tlink);
2225 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2226 server = tcon->ses->server;
2227 if (server->ops->flush)
2228 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2234 mutex_unlock(&inode->i_mutex);
2238 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2242 struct cifs_tcon *tcon;
2243 struct TCP_Server_Info *server;
2244 struct cifsFileInfo *smbfile = file->private_data;
2245 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2246 struct inode *inode = file->f_mapping->host;
2248 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2251 mutex_lock(&inode->i_mutex);
2255 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2256 file->f_path.dentry->d_name.name, datasync);
2258 tcon = tlink_tcon(smbfile->tlink);
2259 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2260 server = tcon->ses->server;
2261 if (server->ops->flush)
2262 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2268 mutex_unlock(&inode->i_mutex);
2273 * As file closes, flush all cached write data for this inode checking
2274 * for write behind errors.
2276 int cifs_flush(struct file *file, fl_owner_t id)
2278 struct inode *inode = file_inode(file);
2281 if (file->f_mode & FMODE_WRITE)
2282 rc = filemap_write_and_wait(inode->i_mapping);
2284 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2290 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2295 for (i = 0; i < num_pages; i++) {
2296 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2299 * save number of pages we have already allocated and
2300 * return with ENOMEM error
2309 for (i = 0; i < num_pages; i++)
2316 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2321 clen = min_t(const size_t, len, wsize);
2322 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2331 cifs_uncached_writev_complete(struct work_struct *work)
2334 struct cifs_writedata *wdata = container_of(work,
2335 struct cifs_writedata, work);
2336 struct inode *inode = wdata->cfile->dentry->d_inode;
2337 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2339 spin_lock(&inode->i_lock);
2340 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2341 if (cifsi->server_eof > inode->i_size)
2342 i_size_write(inode, cifsi->server_eof);
2343 spin_unlock(&inode->i_lock);
2345 complete(&wdata->done);
2347 if (wdata->result != -EAGAIN) {
2348 for (i = 0; i < wdata->nr_pages; i++)
2349 put_page(wdata->pages[i]);
2352 kref_put(&wdata->refcount, cifs_writedata_release);
2355 /* attempt to send write to server, retry on any -EAGAIN errors */
2357 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2360 struct TCP_Server_Info *server;
2362 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2365 if (wdata->cfile->invalidHandle) {
2366 rc = cifs_reopen_file(wdata->cfile, false);
2370 rc = server->ops->async_writev(wdata);
2371 } while (rc == -EAGAIN);
2377 cifs_iovec_write(struct file *file, const struct iovec *iov,
2378 unsigned long nr_segs, loff_t *poffset)
2380 unsigned long nr_pages, i;
2381 size_t copied, len, cur_len;
2382 ssize_t total_written = 0;
2385 struct cifsFileInfo *open_file;
2386 struct cifs_tcon *tcon;
2387 struct cifs_sb_info *cifs_sb;
2388 struct cifs_writedata *wdata, *tmp;
2389 struct list_head wdata_list;
2393 len = iov_length(iov, nr_segs);
2397 rc = generic_write_checks(file, poffset, &len, 0);
2401 INIT_LIST_HEAD(&wdata_list);
2402 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2403 open_file = file->private_data;
2404 tcon = tlink_tcon(open_file->tlink);
2406 if (!tcon->ses->server->ops->async_writev)
2411 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2412 pid = open_file->pid;
2414 pid = current->tgid;
2416 iov_iter_init(&it, iov, nr_segs, len, 0);
2420 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2421 wdata = cifs_writedata_alloc(nr_pages,
2422 cifs_uncached_writev_complete);
2428 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2435 for (i = 0; i < nr_pages; i++) {
2436 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2437 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2440 iov_iter_advance(&it, copied);
2442 cur_len = save_len - cur_len;
2444 wdata->sync_mode = WB_SYNC_ALL;
2445 wdata->nr_pages = nr_pages;
2446 wdata->offset = (__u64)offset;
2447 wdata->cfile = cifsFileInfo_get(open_file);
2449 wdata->bytes = cur_len;
2450 wdata->pagesz = PAGE_SIZE;
2451 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2452 rc = cifs_uncached_retry_writev(wdata);
2454 kref_put(&wdata->refcount, cifs_writedata_release);
2458 list_add_tail(&wdata->list, &wdata_list);
2464 * If at least one write was successfully sent, then discard any rc
2465 * value from the later writes. If the other write succeeds, then
2466 * we'll end up returning whatever was written. If it fails, then
2467 * we'll get a new rc value from that.
2469 if (!list_empty(&wdata_list))
2473 * Wait for and collect replies for any successful sends in order of
2474 * increasing offset. Once an error is hit or we get a fatal signal
2475 * while waiting, then return without waiting for any more replies.
2478 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2480 /* FIXME: freezable too? */
2481 rc = wait_for_completion_killable(&wdata->done);
2484 else if (wdata->result)
2487 total_written += wdata->bytes;
2489 /* resend call if it's a retryable error */
2490 if (rc == -EAGAIN) {
2491 rc = cifs_uncached_retry_writev(wdata);
2495 list_del_init(&wdata->list);
2496 kref_put(&wdata->refcount, cifs_writedata_release);
2499 if (total_written > 0)
2500 *poffset += total_written;
2502 cifs_stats_bytes_written(tcon, total_written);
2503 return total_written ? total_written : (ssize_t)rc;
2506 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2507 unsigned long nr_segs, loff_t pos)
2510 struct inode *inode;
2512 inode = file_inode(iocb->ki_filp);
2515 * BB - optimize the way when signing is disabled. We can drop this
2516 * extra memory-to-memory copying and use iovec buffers for constructing
2520 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2522 CIFS_I(inode)->invalid_mapping = true;
2530 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2531 unsigned long nr_segs, loff_t pos)
2533 struct file *file = iocb->ki_filp;
2534 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2535 struct inode *inode = file->f_mapping->host;
2536 struct cifsInodeInfo *cinode = CIFS_I(inode);
2537 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2538 ssize_t rc = -EACCES;
2540 BUG_ON(iocb->ki_pos != pos);
2543 * We need to hold the sem to be sure nobody modifies lock list
2544 * with a brlock that prevents writing.
2546 down_read(&cinode->lock_sem);
2547 if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2548 server->vals->exclusive_lock_type, NULL,
2550 mutex_lock(&inode->i_mutex);
2551 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2553 mutex_unlock(&inode->i_mutex);
2556 if (rc > 0 || rc == -EIOCBQUEUED) {
2559 err = generic_write_sync(file, pos, rc);
2560 if (err < 0 && rc > 0)
2564 up_read(&cinode->lock_sem);
2569 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2570 unsigned long nr_segs, loff_t pos)
2572 struct inode *inode = file_inode(iocb->ki_filp);
2573 struct cifsInodeInfo *cinode = CIFS_I(inode);
2574 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2575 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2576 iocb->ki_filp->private_data;
2577 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2580 if (cinode->clientCanCacheAll) {
2581 if (cap_unix(tcon->ses) &&
2582 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2583 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2584 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2585 return cifs_writev(iocb, iov, nr_segs, pos);
2588 * For non-oplocked files in strict cache mode we need to write the data
2589 * to the server exactly from the pos to pos+len-1 rather than flush all
2590 * affected pages because it may cause a error with mandatory locks on
2591 * these pages but not on the region from pos to ppos+len-1.
2593 written = cifs_user_writev(iocb, iov, nr_segs, pos);
2594 if (written > 0 && cinode->clientCanCacheRead) {
2596 * Windows 7 server can delay breaking level2 oplock if a write
2597 * request comes - break it on the client to prevent reading
2600 cifs_invalidate_mapping(inode);
2601 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2603 cinode->clientCanCacheRead = false;
2608 static struct cifs_readdata *
2609 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2611 struct cifs_readdata *rdata;
2613 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2615 if (rdata != NULL) {
2616 kref_init(&rdata->refcount);
2617 INIT_LIST_HEAD(&rdata->list);
2618 init_completion(&rdata->done);
2619 INIT_WORK(&rdata->work, complete);
2626 cifs_readdata_release(struct kref *refcount)
2628 struct cifs_readdata *rdata = container_of(refcount,
2629 struct cifs_readdata, refcount);
2632 cifsFileInfo_put(rdata->cfile);
2638 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2644 for (i = 0; i < nr_pages; i++) {
2645 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2650 rdata->pages[i] = page;
2654 for (i = 0; i < nr_pages; i++) {
2655 put_page(rdata->pages[i]);
2656 rdata->pages[i] = NULL;
2663 cifs_uncached_readdata_release(struct kref *refcount)
2665 struct cifs_readdata *rdata = container_of(refcount,
2666 struct cifs_readdata, refcount);
2669 for (i = 0; i < rdata->nr_pages; i++) {
2670 put_page(rdata->pages[i]);
2671 rdata->pages[i] = NULL;
2673 cifs_readdata_release(refcount);
2677 cifs_retry_async_readv(struct cifs_readdata *rdata)
2680 struct TCP_Server_Info *server;
2682 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2685 if (rdata->cfile->invalidHandle) {
2686 rc = cifs_reopen_file(rdata->cfile, true);
2690 rc = server->ops->async_readv(rdata);
2691 } while (rc == -EAGAIN);
2697 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2698 * @rdata: the readdata response with list of pages holding data
2699 * @iov: vector in which we should copy the data
2700 * @nr_segs: number of segments in vector
2701 * @offset: offset into file of the first iovec
2702 * @copied: used to return the amount of data copied to the iov
2704 * This function copies data from a list of pages in a readdata response into
2705 * an array of iovecs. It will first calculate where the data should go
2706 * based on the info in the readdata and then copy the data into that spot.
2709 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2710 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2714 size_t pos = rdata->offset - offset;
2715 ssize_t remaining = rdata->bytes;
2716 unsigned char *pdata;
2719 /* set up iov_iter and advance to the correct offset */
2720 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2721 iov_iter_advance(&ii, pos);
2724 for (i = 0; i < rdata->nr_pages; i++) {
2726 struct page *page = rdata->pages[i];
2728 /* copy a whole page or whatever's left */
2729 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2731 /* ...but limit it to whatever space is left in the iov */
2732 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2734 /* go while there's data to be copied and no errors */
2737 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2743 iov_iter_advance(&ii, copy);
2752 cifs_uncached_readv_complete(struct work_struct *work)
2754 struct cifs_readdata *rdata = container_of(work,
2755 struct cifs_readdata, work);
2757 complete(&rdata->done);
2758 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2762 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2763 struct cifs_readdata *rdata, unsigned int len)
2765 int total_read = 0, result = 0;
2767 unsigned int nr_pages = rdata->nr_pages;
2770 rdata->tailsz = PAGE_SIZE;
2771 for (i = 0; i < nr_pages; i++) {
2772 struct page *page = rdata->pages[i];
2774 if (len >= PAGE_SIZE) {
2775 /* enough data to fill the page */
2776 iov.iov_base = kmap(page);
2777 iov.iov_len = PAGE_SIZE;
2778 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2779 i, iov.iov_base, iov.iov_len);
2781 } else if (len > 0) {
2782 /* enough for partial page, fill and zero the rest */
2783 iov.iov_base = kmap(page);
2785 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2786 i, iov.iov_base, iov.iov_len);
2787 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2788 rdata->tailsz = len;
2791 /* no need to hold page hostage */
2792 rdata->pages[i] = NULL;
2798 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2803 total_read += result;
2806 return total_read > 0 ? total_read : result;
2810 cifs_iovec_read(struct file *file, const struct iovec *iov,
2811 unsigned long nr_segs, loff_t *poffset)
2814 size_t len, cur_len;
2815 ssize_t total_read = 0;
2816 loff_t offset = *poffset;
2817 unsigned int npages;
2818 struct cifs_sb_info *cifs_sb;
2819 struct cifs_tcon *tcon;
2820 struct cifsFileInfo *open_file;
2821 struct cifs_readdata *rdata, *tmp;
2822 struct list_head rdata_list;
2828 len = iov_length(iov, nr_segs);
2832 INIT_LIST_HEAD(&rdata_list);
2833 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2834 open_file = file->private_data;
2835 tcon = tlink_tcon(open_file->tlink);
2837 if (!tcon->ses->server->ops->async_readv)
2840 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2841 pid = open_file->pid;
2843 pid = current->tgid;
2845 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2846 cifs_dbg(FYI, "attempting read on write only file instance\n");
2849 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2850 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2852 /* allocate a readdata struct */
2853 rdata = cifs_readdata_alloc(npages,
2854 cifs_uncached_readv_complete);
2860 rc = cifs_read_allocate_pages(rdata, npages);
2864 rdata->cfile = cifsFileInfo_get(open_file);
2865 rdata->nr_pages = npages;
2866 rdata->offset = offset;
2867 rdata->bytes = cur_len;
2869 rdata->pagesz = PAGE_SIZE;
2870 rdata->read_into_pages = cifs_uncached_read_into_pages;
2872 rc = cifs_retry_async_readv(rdata);
2875 kref_put(&rdata->refcount,
2876 cifs_uncached_readdata_release);
2880 list_add_tail(&rdata->list, &rdata_list);
2885 /* if at least one read request send succeeded, then reset rc */
2886 if (!list_empty(&rdata_list))
2889 /* the loop below should proceed in the order of increasing offsets */
2891 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2895 /* FIXME: freezable sleep too? */
2896 rc = wait_for_completion_killable(&rdata->done);
2899 else if (rdata->result)
2902 rc = cifs_readdata_to_iov(rdata, iov,
2905 total_read += copied;
2908 /* resend call if it's a retryable error */
2909 if (rc == -EAGAIN) {
2910 rc = cifs_retry_async_readv(rdata);
2914 list_del_init(&rdata->list);
2915 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2918 cifs_stats_bytes_read(tcon, total_read);
2919 *poffset += total_read;
2921 /* mask nodata case */
2925 return total_read ? total_read : rc;
2928 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2929 unsigned long nr_segs, loff_t pos)
2933 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2941 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2942 unsigned long nr_segs, loff_t pos)
2944 struct inode *inode = file_inode(iocb->ki_filp);
2945 struct cifsInodeInfo *cinode = CIFS_I(inode);
2946 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2947 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2948 iocb->ki_filp->private_data;
2949 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2953 * In strict cache mode we need to read from the server all the time
2954 * if we don't have level II oplock because the server can delay mtime
2955 * change - so we can't make a decision about inode invalidating.
2956 * And we can also fail with pagereading if there are mandatory locks
2957 * on pages affected by this read but not on the region from pos to
2960 if (!cinode->clientCanCacheRead)
2961 return cifs_user_readv(iocb, iov, nr_segs, pos);
2963 if (cap_unix(tcon->ses) &&
2964 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2965 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2966 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2969 * We need to hold the sem to be sure nobody modifies lock list
2970 * with a brlock that prevents reading.
2972 down_read(&cinode->lock_sem);
2973 if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2974 tcon->ses->server->vals->shared_lock_type,
2975 NULL, CIFS_READ_OP))
2976 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2977 up_read(&cinode->lock_sem);
2982 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2985 unsigned int bytes_read = 0;
2986 unsigned int total_read;
2987 unsigned int current_read_size;
2989 struct cifs_sb_info *cifs_sb;
2990 struct cifs_tcon *tcon;
2991 struct TCP_Server_Info *server;
2994 struct cifsFileInfo *open_file;
2995 struct cifs_io_parms io_parms;
2996 int buf_type = CIFS_NO_BUFFER;
3000 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3002 /* FIXME: set up handlers for larger reads and/or convert to async */
3003 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3005 if (file->private_data == NULL) {
3010 open_file = file->private_data;
3011 tcon = tlink_tcon(open_file->tlink);
3012 server = tcon->ses->server;
3014 if (!server->ops->sync_read) {
3019 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3020 pid = open_file->pid;
3022 pid = current->tgid;
3024 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3025 cifs_dbg(FYI, "attempting read on write only file instance\n");
3027 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3028 total_read += bytes_read, cur_offset += bytes_read) {
3029 current_read_size = min_t(uint, read_size - total_read, rsize);
3031 * For windows me and 9x we do not want to request more than it
3032 * negotiated since it will refuse the read then.
3034 if ((tcon->ses) && !(tcon->ses->capabilities &
3035 tcon->ses->server->vals->cap_large_files)) {
3036 current_read_size = min_t(uint, current_read_size,
3040 while (rc == -EAGAIN) {
3041 if (open_file->invalidHandle) {
3042 rc = cifs_reopen_file(open_file, true);
3047 io_parms.tcon = tcon;
3048 io_parms.offset = *offset;
3049 io_parms.length = current_read_size;
3050 rc = server->ops->sync_read(xid, open_file, &io_parms,
3051 &bytes_read, &cur_offset,
3054 if (rc || (bytes_read == 0)) {
3062 cifs_stats_bytes_read(tcon, total_read);
3063 *offset += bytes_read;
3071 * If the page is mmap'ed into a process' page tables, then we need to make
3072 * sure that it doesn't change while being written back.
3075 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3077 struct page *page = vmf->page;
3080 return VM_FAULT_LOCKED;
3083 static struct vm_operations_struct cifs_file_vm_ops = {
3084 .fault = filemap_fault,
3085 .page_mkwrite = cifs_page_mkwrite,
3086 .remap_pages = generic_file_remap_pages,
3089 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3092 struct inode *inode = file_inode(file);
3096 if (!CIFS_I(inode)->clientCanCacheRead) {
3097 rc = cifs_invalidate_mapping(inode);
3102 rc = generic_file_mmap(file, vma);
3104 vma->vm_ops = &cifs_file_vm_ops;
3109 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3114 rc = cifs_revalidate_file(file);
3116 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3121 rc = generic_file_mmap(file, vma);
3123 vma->vm_ops = &cifs_file_vm_ops;
3129 cifs_readv_complete(struct work_struct *work)
3132 struct cifs_readdata *rdata = container_of(work,
3133 struct cifs_readdata, work);
3135 for (i = 0; i < rdata->nr_pages; i++) {
3136 struct page *page = rdata->pages[i];
3138 lru_cache_add_file(page);
3140 if (rdata->result == 0) {
3141 flush_dcache_page(page);
3142 SetPageUptodate(page);
3147 if (rdata->result == 0)
3148 cifs_readpage_to_fscache(rdata->mapping->host, page);
3150 page_cache_release(page);
3151 rdata->pages[i] = NULL;
3153 kref_put(&rdata->refcount, cifs_readdata_release);
3157 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3158 struct cifs_readdata *rdata, unsigned int len)
3160 int total_read = 0, result = 0;
3164 unsigned int nr_pages = rdata->nr_pages;
3167 /* determine the eof that the server (probably) has */
3168 eof = CIFS_I(rdata->mapping->host)->server_eof;
3169 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3170 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3172 rdata->tailsz = PAGE_CACHE_SIZE;
3173 for (i = 0; i < nr_pages; i++) {
3174 struct page *page = rdata->pages[i];
3176 if (len >= PAGE_CACHE_SIZE) {
3177 /* enough data to fill the page */
3178 iov.iov_base = kmap(page);
3179 iov.iov_len = PAGE_CACHE_SIZE;
3180 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3181 i, page->index, iov.iov_base, iov.iov_len);
3182 len -= PAGE_CACHE_SIZE;
3183 } else if (len > 0) {
3184 /* enough for partial page, fill and zero the rest */
3185 iov.iov_base = kmap(page);
3187 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3188 i, page->index, iov.iov_base, iov.iov_len);
3189 memset(iov.iov_base + len,
3190 '\0', PAGE_CACHE_SIZE - len);
3191 rdata->tailsz = len;
3193 } else if (page->index > eof_index) {
3195 * The VFS will not try to do readahead past the
3196 * i_size, but it's possible that we have outstanding
3197 * writes with gaps in the middle and the i_size hasn't
3198 * caught up yet. Populate those with zeroed out pages
3199 * to prevent the VFS from repeatedly attempting to
3200 * fill them until the writes are flushed.
3202 zero_user(page, 0, PAGE_CACHE_SIZE);
3203 lru_cache_add_file(page);
3204 flush_dcache_page(page);
3205 SetPageUptodate(page);
3207 page_cache_release(page);
3208 rdata->pages[i] = NULL;
3212 /* no need to hold page hostage */
3213 lru_cache_add_file(page);
3215 page_cache_release(page);
3216 rdata->pages[i] = NULL;
3221 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3226 total_read += result;
3229 return total_read > 0 ? total_read : result;
3232 static int cifs_readpages(struct file *file, struct address_space *mapping,
3233 struct list_head *page_list, unsigned num_pages)
3236 struct list_head tmplist;
3237 struct cifsFileInfo *open_file = file->private_data;
3238 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3239 unsigned int rsize = cifs_sb->rsize;
3243 * Give up immediately if rsize is too small to read an entire page.
3244 * The VFS will fall back to readpage. We should never reach this
3245 * point however since we set ra_pages to 0 when the rsize is smaller
3246 * than a cache page.
3248 if (unlikely(rsize < PAGE_CACHE_SIZE))
3252 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3253 * immediately if the cookie is negative
3255 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3260 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3261 pid = open_file->pid;
3263 pid = current->tgid;
3266 INIT_LIST_HEAD(&tmplist);
3268 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3269 __func__, file, mapping, num_pages);
3272 * Start with the page at end of list and move it to private
3273 * list. Do the same with any following pages until we hit
3274 * the rsize limit, hit an index discontinuity, or run out of
3275 * pages. Issue the async read and then start the loop again
3276 * until the list is empty.
3278 * Note that list order is important. The page_list is in
3279 * the order of declining indexes. When we put the pages in
3280 * the rdata->pages, then we want them in increasing order.
3282 while (!list_empty(page_list)) {
3284 unsigned int bytes = PAGE_CACHE_SIZE;
3285 unsigned int expected_index;
3286 unsigned int nr_pages = 1;
3288 struct page *page, *tpage;
3289 struct cifs_readdata *rdata;
3291 page = list_entry(page_list->prev, struct page, lru);
3294 * Lock the page and put it in the cache. Since no one else
3295 * should have access to this page, we're safe to simply set
3296 * PG_locked without checking it first.
3298 __set_page_locked(page);
3299 rc = add_to_page_cache_locked(page, mapping,
3300 page->index, GFP_KERNEL);
3302 /* give up if we can't stick it in the cache */
3304 __clear_page_locked(page);
3308 /* move first page to the tmplist */
3309 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3310 list_move_tail(&page->lru, &tmplist);
3312 /* now try and add more pages onto the request */
3313 expected_index = page->index + 1;
3314 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3315 /* discontinuity ? */
3316 if (page->index != expected_index)
3319 /* would this page push the read over the rsize? */
3320 if (bytes + PAGE_CACHE_SIZE > rsize)
3323 __set_page_locked(page);
3324 if (add_to_page_cache_locked(page, mapping,
3325 page->index, GFP_KERNEL)) {
3326 __clear_page_locked(page);
3329 list_move_tail(&page->lru, &tmplist);
3330 bytes += PAGE_CACHE_SIZE;
3335 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3337 /* best to give up if we're out of mem */
3338 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3339 list_del(&page->lru);
3340 lru_cache_add_file(page);
3342 page_cache_release(page);
3348 rdata->cfile = cifsFileInfo_get(open_file);
3349 rdata->mapping = mapping;
3350 rdata->offset = offset;
3351 rdata->bytes = bytes;
3353 rdata->pagesz = PAGE_CACHE_SIZE;
3354 rdata->read_into_pages = cifs_readpages_read_into_pages;
3356 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3357 list_del(&page->lru);
3358 rdata->pages[rdata->nr_pages++] = page;
3361 rc = cifs_retry_async_readv(rdata);
3363 for (i = 0; i < rdata->nr_pages; i++) {
3364 page = rdata->pages[i];
3365 lru_cache_add_file(page);
3367 page_cache_release(page);
3369 kref_put(&rdata->refcount, cifs_readdata_release);
3373 kref_put(&rdata->refcount, cifs_readdata_release);
3379 static int cifs_readpage_worker(struct file *file, struct page *page,
3385 /* Is the page cached? */
3386 rc = cifs_readpage_from_fscache(file_inode(file), page);
3390 page_cache_get(page);
3391 read_data = kmap(page);
3392 /* for reads over a certain size could initiate async read ahead */
3394 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3399 cifs_dbg(FYI, "Bytes read %d\n", rc);
3401 file_inode(file)->i_atime =
3402 current_fs_time(file_inode(file)->i_sb);
3404 if (PAGE_CACHE_SIZE > rc)
3405 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3407 flush_dcache_page(page);
3408 SetPageUptodate(page);
3410 /* send this page to the cache */
3411 cifs_readpage_to_fscache(file_inode(file), page);
3417 page_cache_release(page);
3423 static int cifs_readpage(struct file *file, struct page *page)
3425 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3431 if (file->private_data == NULL) {
3437 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3438 page, (int)offset, (int)offset);
3440 rc = cifs_readpage_worker(file, page, &offset);
3448 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3450 struct cifsFileInfo *open_file;
3452 spin_lock(&cifs_file_list_lock);
3453 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3454 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3455 spin_unlock(&cifs_file_list_lock);
3459 spin_unlock(&cifs_file_list_lock);
3463 /* We do not want to update the file size from server for inodes
3464 open for write - to avoid races with writepage extending
3465 the file - in the future we could consider allowing
3466 refreshing the inode only on increases in the file size
3467 but this is tricky to do without racing with writebehind
3468 page caching in the current Linux kernel design */
3469 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3474 if (is_inode_writable(cifsInode)) {
3475 /* This inode is open for write at least once */
3476 struct cifs_sb_info *cifs_sb;
3478 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3479 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3480 /* since no page cache to corrupt on directio
3481 we can change size safely */
3485 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3493 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3494 loff_t pos, unsigned len, unsigned flags,
3495 struct page **pagep, void **fsdata)
3497 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3498 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3499 loff_t page_start = pos & PAGE_MASK;
3504 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3506 page = grab_cache_page_write_begin(mapping, index, flags);
3512 if (PageUptodate(page))
3516 * If we write a full page it will be up to date, no need to read from
3517 * the server. If the write is short, we'll end up doing a sync write
3520 if (len == PAGE_CACHE_SIZE)
3524 * optimize away the read when we have an oplock, and we're not
3525 * expecting to use any of the data we'd be reading in. That
3526 * is, when the page lies beyond the EOF, or straddles the EOF
3527 * and the write will cover all of the existing data.
3529 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3530 i_size = i_size_read(mapping->host);
3531 if (page_start >= i_size ||
3532 (offset == 0 && (pos + len) >= i_size)) {
3533 zero_user_segments(page, 0, offset,
3537 * PageChecked means that the parts of the page
3538 * to which we're not writing are considered up
3539 * to date. Once the data is copied to the
3540 * page, it can be set uptodate.
3542 SetPageChecked(page);
3547 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3549 * might as well read a page, it is fast enough. If we get
3550 * an error, we don't need to return it. cifs_write_end will
3551 * do a sync write instead since PG_uptodate isn't set.
3553 cifs_readpage_worker(file, page, &page_start);
3555 /* we could try using another file handle if there is one -
3556 but how would we lock it to prevent close of that handle
3557 racing with this read? In any case
3558 this will be written out by write_end so is fine */
3565 static int cifs_release_page(struct page *page, gfp_t gfp)
3567 if (PagePrivate(page))
3570 return cifs_fscache_release_page(page, gfp);
3573 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3574 unsigned int length)
3576 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3578 if (offset == 0 && length == PAGE_CACHE_SIZE)
3579 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3582 static int cifs_launder_page(struct page *page)
3585 loff_t range_start = page_offset(page);
3586 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3587 struct writeback_control wbc = {
3588 .sync_mode = WB_SYNC_ALL,
3590 .range_start = range_start,
3591 .range_end = range_end,
3594 cifs_dbg(FYI, "Launder page: %p\n", page);
3596 if (clear_page_dirty_for_io(page))
3597 rc = cifs_writepage_locked(page, &wbc);
3599 cifs_fscache_invalidate_page(page, page->mapping->host);
3603 void cifs_oplock_break(struct work_struct *work)
3605 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3607 struct inode *inode = cfile->dentry->d_inode;
3608 struct cifsInodeInfo *cinode = CIFS_I(inode);
3609 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3612 if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3613 cifs_has_mand_locks(cinode)) {
3614 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3616 cinode->clientCanCacheRead = false;
3619 if (inode && S_ISREG(inode->i_mode)) {
3620 if (cinode->clientCanCacheRead)
3621 break_lease(inode, O_RDONLY);
3623 break_lease(inode, O_WRONLY);
3624 rc = filemap_fdatawrite(inode->i_mapping);
3625 if (cinode->clientCanCacheRead == 0) {
3626 rc = filemap_fdatawait(inode->i_mapping);
3627 mapping_set_error(inode->i_mapping, rc);
3628 cifs_invalidate_mapping(inode);
3630 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3633 rc = cifs_push_locks(cfile);
3635 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3638 * releasing stale oplock after recent reconnect of smb session using
3639 * a now incorrect file handle is not a data integrity issue but do
3640 * not bother sending an oplock release if session to server still is
3641 * disconnected since oplock already released by the server
3643 if (!cfile->oplock_break_cancelled) {
3644 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3646 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3650 const struct address_space_operations cifs_addr_ops = {
3651 .readpage = cifs_readpage,
3652 .readpages = cifs_readpages,
3653 .writepage = cifs_writepage,
3654 .writepages = cifs_writepages,
3655 .write_begin = cifs_write_begin,
3656 .write_end = cifs_write_end,
3657 .set_page_dirty = __set_page_dirty_nobuffers,
3658 .releasepage = cifs_release_page,
3659 .invalidatepage = cifs_invalidate_page,
3660 .launder_page = cifs_launder_page,
3664 * cifs_readpages requires the server to support a buffer large enough to
3665 * contain the header plus one complete page of data. Otherwise, we need
3666 * to leave cifs_readpages out of the address space operations.
3668 const struct address_space_operations cifs_addr_ops_smallbuf = {
3669 .readpage = cifs_readpage,
3670 .writepage = cifs_writepage,
3671 .writepages = cifs_writepages,
3672 .write_begin = cifs_write_begin,
3673 .write_end = cifs_write_end,
3674 .set_page_dirty = __set_page_dirty_nobuffers,
3675 .releasepage = cifs_release_page,
3676 .invalidatepage = cifs_invalidate_page,
3677 .launder_page = cifs_launder_page,