4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
267 up_read(&cinode->lock_sem);
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
340 file->private_data = cfile;
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
368 struct cifs_pending_open open;
369 bool oplock_break_cancelled;
371 spin_lock(&cifs_file_list_lock);
372 if (--cifs_file->count > 0) {
373 spin_unlock(&cifs_file_list_lock);
377 if (server->ops->get_lease_key)
378 server->ops->get_lease_key(inode, &fid);
380 /* store open in pending opens to make sure we don't miss lease break */
381 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
383 /* remove it from the lists */
384 list_del(&cifs_file->flist);
385 list_del(&cifs_file->tlist);
387 if (list_empty(&cifsi->openFileList)) {
388 cifs_dbg(FYI, "closing last open instance for inode %p\n",
389 cifs_file->dentry->d_inode);
391 * In strict cache mode we need invalidate mapping on the last
392 * close because it may cause a error when we open this file
393 * again and get at least level II oplock.
395 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
396 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
397 cifs_set_oplock_level(cifsi, 0);
399 spin_unlock(&cifs_file_list_lock);
401 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
403 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
404 struct TCP_Server_Info *server = tcon->ses->server;
408 if (server->ops->close)
409 server->ops->close(xid, tcon, &cifs_file->fid);
413 if (oplock_break_cancelled)
414 cifs_done_oplock_break(cifsi);
416 cifs_del_pending_open(&open);
419 * Delete any outstanding lock records. We'll lose them when the file
422 down_write(&cifsi->lock_sem);
423 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
424 list_del(&li->llist);
425 cifs_del_lock_waiters(li);
428 list_del(&cifs_file->llist->llist);
429 kfree(cifs_file->llist);
430 up_write(&cifsi->lock_sem);
432 cifs_put_tlink(cifs_file->tlink);
433 dput(cifs_file->dentry);
434 cifs_sb_deactive(sb);
438 int cifs_open(struct inode *inode, struct file *file)
444 struct cifs_sb_info *cifs_sb;
445 struct TCP_Server_Info *server;
446 struct cifs_tcon *tcon;
447 struct tcon_link *tlink;
448 struct cifsFileInfo *cfile = NULL;
449 char *full_path = NULL;
450 bool posix_open_ok = false;
452 struct cifs_pending_open open;
456 cifs_sb = CIFS_SB(inode->i_sb);
457 tlink = cifs_sb_tlink(cifs_sb);
460 return PTR_ERR(tlink);
462 tcon = tlink_tcon(tlink);
463 server = tcon->ses->server;
465 full_path = build_path_from_dentry(file->f_path.dentry);
466 if (full_path == NULL) {
471 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
472 inode, file->f_flags, full_path);
474 if (file->f_flags & O_DIRECT &&
475 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
477 file->f_op = &cifs_file_direct_nobrl_ops;
479 file->f_op = &cifs_file_direct_ops;
487 if (!tcon->broken_posix_open && tcon->unix_ext &&
488 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490 /* can not refresh inode info since size could be stale */
491 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
492 cifs_sb->mnt_file_mode /* ignored */,
493 file->f_flags, &oplock, &fid.netfid, xid);
495 cifs_dbg(FYI, "posix open succeeded\n");
496 posix_open_ok = true;
497 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
498 if (tcon->ses->serverNOS)
499 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
500 tcon->ses->serverName,
501 tcon->ses->serverNOS);
502 tcon->broken_posix_open = true;
503 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
504 (rc != -EOPNOTSUPP)) /* path not found or net err */
507 * Else fallthrough to retry open the old way on network i/o
512 if (server->ops->get_lease_key)
513 server->ops->get_lease_key(inode, &fid);
515 cifs_add_pending_open(&fid, tlink, &open);
517 if (!posix_open_ok) {
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
521 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
522 file->f_flags, &oplock, &fid, xid);
524 cifs_del_pending_open(&open);
529 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
531 if (server->ops->close)
532 server->ops->close(xid, tcon, &fid);
533 cifs_del_pending_open(&open);
538 cifs_fscache_set_inode_cookie(inode, file);
540 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
542 * Time to set mode which we can not set earlier due to
543 * problems creating new read-only files.
545 struct cifs_unix_set_info_args args = {
546 .mode = inode->i_mode,
547 .uid = INVALID_UID, /* no change */
548 .gid = INVALID_GID, /* no change */
549 .ctime = NO_CHANGE_64,
550 .atime = NO_CHANGE_64,
551 .mtime = NO_CHANGE_64,
554 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561 cifs_put_tlink(tlink);
565 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
568 * Try to reacquire byte range locks that were released when session
569 * to server was lost.
572 cifs_relock_file(struct cifsFileInfo *cfile)
574 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
575 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
576 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
579 down_read(&cinode->lock_sem);
580 if (cinode->can_cache_brlcks) {
581 /* can cache locks - no need to relock */
582 up_read(&cinode->lock_sem);
586 if (cap_unix(tcon->ses) &&
587 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
588 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
589 rc = cifs_push_posix_locks(cfile);
591 rc = tcon->ses->server->ops->push_mand_locks(cfile);
593 up_read(&cinode->lock_sem);
598 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
603 struct cifs_sb_info *cifs_sb;
604 struct cifs_tcon *tcon;
605 struct TCP_Server_Info *server;
606 struct cifsInodeInfo *cinode;
608 char *full_path = NULL;
610 int disposition = FILE_OPEN;
611 int create_options = CREATE_NOT_DIR;
612 struct cifs_open_parms oparms;
615 mutex_lock(&cfile->fh_mutex);
616 if (!cfile->invalidHandle) {
617 mutex_unlock(&cfile->fh_mutex);
623 inode = cfile->dentry->d_inode;
624 cifs_sb = CIFS_SB(inode->i_sb);
625 tcon = tlink_tcon(cfile->tlink);
626 server = tcon->ses->server;
629 * Can not grab rename sem here because various ops, including those
630 * that already have the rename sem can end up causing writepage to get
631 * called and if the server was down that means we end up here, and we
632 * can never tell if the caller already has the rename_sem.
634 full_path = build_path_from_dentry(cfile->dentry);
635 if (full_path == NULL) {
637 mutex_unlock(&cfile->fh_mutex);
642 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
643 inode, cfile->f_flags, full_path);
645 if (tcon->ses->server->oplocks)
650 if (tcon->unix_ext && cap_unix(tcon->ses) &&
651 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
652 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
654 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
655 * original open. Must mask them off for a reopen.
657 unsigned int oflags = cfile->f_flags &
658 ~(O_CREAT | O_EXCL | O_TRUNC);
660 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
661 cifs_sb->mnt_file_mode /* ignored */,
662 oflags, &oplock, &cfile->fid.netfid, xid);
664 cifs_dbg(FYI, "posix reopen succeeded\n");
665 oparms.reconnect = true;
669 * fallthrough to retry open the old way on errors, especially
670 * in the reconnect path it is important to retry hard
674 desired_access = cifs_convert_flags(cfile->f_flags);
676 if (backup_cred(cifs_sb))
677 create_options |= CREATE_OPEN_BACKUP_INTENT;
679 if (server->ops->get_lease_key)
680 server->ops->get_lease_key(inode, &cfile->fid);
683 oparms.cifs_sb = cifs_sb;
684 oparms.desired_access = desired_access;
685 oparms.create_options = create_options;
686 oparms.disposition = disposition;
687 oparms.path = full_path;
688 oparms.fid = &cfile->fid;
689 oparms.reconnect = true;
692 * Can not refresh inode by passing in file_info buf to be returned by
693 * ops->open and then calling get_inode_info with returned buf since
694 * file might have write behind data that needs to be flushed and server
695 * version of file size can be stale. If we knew for sure that inode was
696 * not dirty locally we could do this.
698 rc = server->ops->open(xid, &oparms, &oplock, NULL);
699 if (rc == -ENOENT && oparms.reconnect == false) {
700 /* durable handle timeout is expired - open the file again */
701 rc = server->ops->open(xid, &oparms, &oplock, NULL);
702 /* indicate that we need to relock the file */
703 oparms.reconnect = true;
707 mutex_unlock(&cfile->fh_mutex);
708 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
709 cifs_dbg(FYI, "oplock: %d\n", oplock);
710 goto reopen_error_exit;
714 cfile->invalidHandle = false;
715 mutex_unlock(&cfile->fh_mutex);
716 cinode = CIFS_I(inode);
719 rc = filemap_write_and_wait(inode->i_mapping);
720 mapping_set_error(inode->i_mapping, rc);
723 rc = cifs_get_inode_info_unix(&inode, full_path,
726 rc = cifs_get_inode_info(&inode, full_path, NULL,
727 inode->i_sb, xid, NULL);
730 * Else we are writing out data to server already and could deadlock if
731 * we tried to flush data, and since we do not know if we have data that
732 * would invalidate the current end of file on the server we can not go
733 * to the server to get the new inode info.
736 server->ops->set_fid(cfile, &cfile->fid, oplock);
737 if (oparms.reconnect)
738 cifs_relock_file(cfile);
746 int cifs_close(struct inode *inode, struct file *file)
748 if (file->private_data != NULL) {
749 cifsFileInfo_put(file->private_data);
750 file->private_data = NULL;
753 /* return code from the ->release op is always ignored */
757 int cifs_closedir(struct inode *inode, struct file *file)
761 struct cifsFileInfo *cfile = file->private_data;
762 struct cifs_tcon *tcon;
763 struct TCP_Server_Info *server;
766 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
772 tcon = tlink_tcon(cfile->tlink);
773 server = tcon->ses->server;
775 cifs_dbg(FYI, "Freeing private data in close dir\n");
776 spin_lock(&cifs_file_list_lock);
777 if (server->ops->dir_needs_close(cfile)) {
778 cfile->invalidHandle = true;
779 spin_unlock(&cifs_file_list_lock);
780 if (server->ops->close_dir)
781 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
784 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
785 /* not much we can do if it fails anyway, ignore rc */
788 spin_unlock(&cifs_file_list_lock);
790 buf = cfile->srch_inf.ntwrk_buf_start;
792 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
793 cfile->srch_inf.ntwrk_buf_start = NULL;
794 if (cfile->srch_inf.smallBuf)
795 cifs_small_buf_release(buf);
797 cifs_buf_release(buf);
800 cifs_put_tlink(cfile->tlink);
801 kfree(file->private_data);
802 file->private_data = NULL;
803 /* BB can we lock the filestruct while this is going on? */
808 static struct cifsLockInfo *
809 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
811 struct cifsLockInfo *lock =
812 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
815 lock->offset = offset;
816 lock->length = length;
818 lock->pid = current->tgid;
819 INIT_LIST_HEAD(&lock->blist);
820 init_waitqueue_head(&lock->block_q);
825 cifs_del_lock_waiters(struct cifsLockInfo *lock)
827 struct cifsLockInfo *li, *tmp;
828 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
829 list_del_init(&li->blist);
830 wake_up(&li->block_q);
834 #define CIFS_LOCK_OP 0
835 #define CIFS_READ_OP 1
836 #define CIFS_WRITE_OP 2
838 /* @rw_check : 0 - no op, 1 - read, 2 - write */
840 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
841 __u64 length, __u8 type, struct cifsFileInfo *cfile,
842 struct cifsLockInfo **conf_lock, int rw_check)
844 struct cifsLockInfo *li;
845 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
846 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
848 list_for_each_entry(li, &fdlocks->locks, llist) {
849 if (offset + length <= li->offset ||
850 offset >= li->offset + li->length)
852 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
853 server->ops->compare_fids(cfile, cur_cfile)) {
854 /* shared lock prevents write op through the same fid */
855 if (!(li->type & server->vals->shared_lock_type) ||
856 rw_check != CIFS_WRITE_OP)
859 if ((type & server->vals->shared_lock_type) &&
860 ((server->ops->compare_fids(cfile, cur_cfile) &&
861 current->tgid == li->pid) || type == li->type))
871 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
872 __u8 type, struct cifsLockInfo **conf_lock,
876 struct cifs_fid_locks *cur;
877 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 list_for_each_entry(cur, &cinode->llist, llist) {
880 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
881 cfile, conf_lock, rw_check);
890 * Check if there is another lock that prevents us to set the lock (mandatory
891 * style). If such a lock exists, update the flock structure with its
892 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
893 * or leave it the same if we can't. Returns 0 if we don't need to request to
894 * the server or 1 otherwise.
897 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
898 __u8 type, struct file_lock *flock)
901 struct cifsLockInfo *conf_lock;
902 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
903 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
906 down_read(&cinode->lock_sem);
908 exist = cifs_find_lock_conflict(cfile, offset, length, type,
909 &conf_lock, CIFS_LOCK_OP);
911 flock->fl_start = conf_lock->offset;
912 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
913 flock->fl_pid = conf_lock->pid;
914 if (conf_lock->type & server->vals->shared_lock_type)
915 flock->fl_type = F_RDLCK;
917 flock->fl_type = F_WRLCK;
918 } else if (!cinode->can_cache_brlcks)
921 flock->fl_type = F_UNLCK;
923 up_read(&cinode->lock_sem);
928 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
930 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
931 down_write(&cinode->lock_sem);
932 list_add_tail(&lock->llist, &cfile->llist->locks);
933 up_write(&cinode->lock_sem);
937 * Set the byte-range lock (mandatory style). Returns:
938 * 1) 0, if we set the lock and don't need to request to the server;
939 * 2) 1, if no locks prevent us but we need to request to the server;
940 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
943 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
946 struct cifsLockInfo *conf_lock;
947 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
953 down_write(&cinode->lock_sem);
955 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
956 lock->type, &conf_lock, CIFS_LOCK_OP);
957 if (!exist && cinode->can_cache_brlcks) {
958 list_add_tail(&lock->llist, &cfile->llist->locks);
959 up_write(&cinode->lock_sem);
968 list_add_tail(&lock->blist, &conf_lock->blist);
969 up_write(&cinode->lock_sem);
970 rc = wait_event_interruptible(lock->block_q,
971 (lock->blist.prev == &lock->blist) &&
972 (lock->blist.next == &lock->blist));
975 down_write(&cinode->lock_sem);
976 list_del_init(&lock->blist);
979 up_write(&cinode->lock_sem);
984 * Check if there is another lock that prevents us to set the lock (posix
985 * style). If such a lock exists, update the flock structure with its
986 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
987 * or leave it the same if we can't. Returns 0 if we don't need to request to
988 * the server or 1 otherwise.
991 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
994 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
995 unsigned char saved_type = flock->fl_type;
997 if ((flock->fl_flags & FL_POSIX) == 0)
1000 down_read(&cinode->lock_sem);
1001 posix_test_lock(file, flock);
1003 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004 flock->fl_type = saved_type;
1008 up_read(&cinode->lock_sem);
1013 * Set the byte-range lock (posix style). Returns:
1014 * 1) 0, if we set the lock and don't need to request to the server;
1015 * 2) 1, if we need to request to the server;
1016 * 3) <0, if the error occurs while setting the lock.
1019 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1021 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1028 down_write(&cinode->lock_sem);
1029 if (!cinode->can_cache_brlcks) {
1030 up_write(&cinode->lock_sem);
1034 rc = posix_lock_file(file, flock, NULL);
1035 up_write(&cinode->lock_sem);
1036 if (rc == FILE_LOCK_DEFERRED) {
1037 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1040 posix_unblock_lock(flock);
1046 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1049 int rc = 0, stored_rc;
1050 struct cifsLockInfo *li, *tmp;
1051 struct cifs_tcon *tcon;
1052 unsigned int num, max_num, max_buf;
1053 LOCKING_ANDX_RANGE *buf, *cur;
1054 int types[] = {LOCKING_ANDX_LARGE_FILES,
1055 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1059 tcon = tlink_tcon(cfile->tlink);
1062 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063 * and check it for zero before using.
1065 max_buf = tcon->ses->server->maxBuf;
1071 max_num = (max_buf - sizeof(struct smb_hdr)) /
1072 sizeof(LOCKING_ANDX_RANGE);
1073 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1079 for (i = 0; i < 2; i++) {
1082 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083 if (li->type != types[i])
1085 cur->Pid = cpu_to_le16(li->pid);
1086 cur->LengthLow = cpu_to_le32((u32)li->length);
1087 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090 if (++num == max_num) {
1091 stored_rc = cifs_lockv(xid, tcon,
1093 (__u8)li->type, 0, num,
1104 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105 (__u8)types[i], 0, num, buf);
1116 /* copied from fs/locks.c with a name change */
1117 #define cifs_for_each_lock(inode, lockp) \
1118 for (lockp = &inode->i_flock; *lockp != NULL; \
1119 lockp = &(*lockp)->fl_next)
1121 struct lock_to_push {
1122 struct list_head llist;
1131 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1133 struct inode *inode = cfile->dentry->d_inode;
1134 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135 struct file_lock *flock, **before;
1136 unsigned int count = 0, i = 0;
1137 int rc = 0, xid, type;
1138 struct list_head locks_to_send, *el;
1139 struct lock_to_push *lck, *tmp;
1144 spin_lock(&inode->i_lock);
1145 cifs_for_each_lock(inode, before) {
1146 if ((*before)->fl_flags & FL_POSIX)
1149 spin_unlock(&inode->i_lock);
1151 INIT_LIST_HEAD(&locks_to_send);
1154 * Allocating count locks is enough because no FL_POSIX locks can be
1155 * added to the list while we are holding cinode->lock_sem that
1156 * protects locking operations of this inode.
1158 for (; i < count; i++) {
1159 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1164 list_add_tail(&lck->llist, &locks_to_send);
1167 el = locks_to_send.next;
1168 spin_lock(&inode->i_lock);
1169 cifs_for_each_lock(inode, before) {
1171 if ((flock->fl_flags & FL_POSIX) == 0)
1173 if (el == &locks_to_send) {
1175 * The list ended. We don't have enough allocated
1176 * structures - something is really wrong.
1178 cifs_dbg(VFS, "Can't push all brlocks!\n");
1181 length = 1 + flock->fl_end - flock->fl_start;
1182 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1186 lck = list_entry(el, struct lock_to_push, llist);
1187 lck->pid = flock->fl_pid;
1188 lck->netfid = cfile->fid.netfid;
1189 lck->length = length;
1191 lck->offset = flock->fl_start;
1194 spin_unlock(&inode->i_lock);
1196 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1199 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1200 lck->offset, lck->length, NULL,
1204 list_del(&lck->llist);
1212 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1213 list_del(&lck->llist);
1220 cifs_push_locks(struct cifsFileInfo *cfile)
1222 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1223 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1224 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1227 /* we are going to update can_cache_brlcks here - need a write access */
1228 down_write(&cinode->lock_sem);
1229 if (!cinode->can_cache_brlcks) {
1230 up_write(&cinode->lock_sem);
1234 if (cap_unix(tcon->ses) &&
1235 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1236 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1237 rc = cifs_push_posix_locks(cfile);
1239 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1241 cinode->can_cache_brlcks = false;
1242 up_write(&cinode->lock_sem);
1247 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1248 bool *wait_flag, struct TCP_Server_Info *server)
1250 if (flock->fl_flags & FL_POSIX)
1251 cifs_dbg(FYI, "Posix\n");
1252 if (flock->fl_flags & FL_FLOCK)
1253 cifs_dbg(FYI, "Flock\n");
1254 if (flock->fl_flags & FL_SLEEP) {
1255 cifs_dbg(FYI, "Blocking lock\n");
1258 if (flock->fl_flags & FL_ACCESS)
1259 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1260 if (flock->fl_flags & FL_LEASE)
1261 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1262 if (flock->fl_flags &
1263 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1264 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1265 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1267 *type = server->vals->large_lock_type;
1268 if (flock->fl_type == F_WRLCK) {
1269 cifs_dbg(FYI, "F_WRLCK\n");
1270 *type |= server->vals->exclusive_lock_type;
1272 } else if (flock->fl_type == F_UNLCK) {
1273 cifs_dbg(FYI, "F_UNLCK\n");
1274 *type |= server->vals->unlock_lock_type;
1276 /* Check if unlock includes more than one lock range */
1277 } else if (flock->fl_type == F_RDLCK) {
1278 cifs_dbg(FYI, "F_RDLCK\n");
1279 *type |= server->vals->shared_lock_type;
1281 } else if (flock->fl_type == F_EXLCK) {
1282 cifs_dbg(FYI, "F_EXLCK\n");
1283 *type |= server->vals->exclusive_lock_type;
1285 } else if (flock->fl_type == F_SHLCK) {
1286 cifs_dbg(FYI, "F_SHLCK\n");
1287 *type |= server->vals->shared_lock_type;
1290 cifs_dbg(FYI, "Unknown type of lock\n");
1294 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1295 bool wait_flag, bool posix_lck, unsigned int xid)
1298 __u64 length = 1 + flock->fl_end - flock->fl_start;
1299 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1300 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1301 struct TCP_Server_Info *server = tcon->ses->server;
1302 __u16 netfid = cfile->fid.netfid;
1305 int posix_lock_type;
1307 rc = cifs_posix_lock_test(file, flock);
1311 if (type & server->vals->shared_lock_type)
1312 posix_lock_type = CIFS_RDLCK;
1314 posix_lock_type = CIFS_WRLCK;
1315 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1316 flock->fl_start, length, flock,
1317 posix_lock_type, wait_flag);
1321 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1325 /* BB we could chain these into one lock request BB */
1326 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1329 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1331 flock->fl_type = F_UNLCK;
1333 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1338 if (type & server->vals->shared_lock_type) {
1339 flock->fl_type = F_WRLCK;
1343 type &= ~server->vals->exclusive_lock_type;
1345 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346 type | server->vals->shared_lock_type,
1349 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350 type | server->vals->shared_lock_type, 0, 1, false);
1351 flock->fl_type = F_RDLCK;
1353 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1356 flock->fl_type = F_WRLCK;
1362 cifs_move_llist(struct list_head *source, struct list_head *dest)
1364 struct list_head *li, *tmp;
1365 list_for_each_safe(li, tmp, source)
1366 list_move(li, dest);
1370 cifs_free_llist(struct list_head *llist)
1372 struct cifsLockInfo *li, *tmp;
1373 list_for_each_entry_safe(li, tmp, llist, llist) {
1374 cifs_del_lock_waiters(li);
1375 list_del(&li->llist);
1381 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1384 int rc = 0, stored_rc;
1385 int types[] = {LOCKING_ANDX_LARGE_FILES,
1386 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1388 unsigned int max_num, num, max_buf;
1389 LOCKING_ANDX_RANGE *buf, *cur;
1390 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1392 struct cifsLockInfo *li, *tmp;
1393 __u64 length = 1 + flock->fl_end - flock->fl_start;
1394 struct list_head tmp_llist;
1396 INIT_LIST_HEAD(&tmp_llist);
1399 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400 * and check it for zero before using.
1402 max_buf = tcon->ses->server->maxBuf;
1406 max_num = (max_buf - sizeof(struct smb_hdr)) /
1407 sizeof(LOCKING_ANDX_RANGE);
1408 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1412 down_write(&cinode->lock_sem);
1413 for (i = 0; i < 2; i++) {
1416 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417 if (flock->fl_start > li->offset ||
1418 (flock->fl_start + length) <
1419 (li->offset + li->length))
1421 if (current->tgid != li->pid)
1423 if (types[i] != li->type)
1425 if (cinode->can_cache_brlcks) {
1427 * We can cache brlock requests - simply remove
1428 * a lock from the file's list.
1430 list_del(&li->llist);
1431 cifs_del_lock_waiters(li);
1435 cur->Pid = cpu_to_le16(li->pid);
1436 cur->LengthLow = cpu_to_le32((u32)li->length);
1437 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1441 * We need to save a lock here to let us add it again to
1442 * the file's list if the unlock range request fails on
1445 list_move(&li->llist, &tmp_llist);
1446 if (++num == max_num) {
1447 stored_rc = cifs_lockv(xid, tcon,
1449 li->type, num, 0, buf);
1452 * We failed on the unlock range
1453 * request - add all locks from the tmp
1454 * list to the head of the file's list.
1456 cifs_move_llist(&tmp_llist,
1457 &cfile->llist->locks);
1461 * The unlock range request succeed -
1462 * free the tmp list.
1464 cifs_free_llist(&tmp_llist);
1471 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472 types[i], num, 0, buf);
1474 cifs_move_llist(&tmp_llist,
1475 &cfile->llist->locks);
1478 cifs_free_llist(&tmp_llist);
1482 up_write(&cinode->lock_sem);
1488 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489 bool wait_flag, bool posix_lck, int lock, int unlock,
1493 __u64 length = 1 + flock->fl_end - flock->fl_start;
1494 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496 struct TCP_Server_Info *server = tcon->ses->server;
1497 struct inode *inode = cfile->dentry->d_inode;
1500 int posix_lock_type;
1502 rc = cifs_posix_lock_set(file, flock);
1506 if (type & server->vals->shared_lock_type)
1507 posix_lock_type = CIFS_RDLCK;
1509 posix_lock_type = CIFS_WRLCK;
1512 posix_lock_type = CIFS_UNLCK;
1514 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515 current->tgid, flock->fl_start, length,
1516 NULL, posix_lock_type, wait_flag);
1521 struct cifsLockInfo *lock;
1523 lock = cifs_lock_init(flock->fl_start, length, type);
1527 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1536 * Windows 7 server can delay breaking lease from read to None
1537 * if we set a byte-range lock on a file - break it explicitly
1538 * before sending the lock to the server to be sure the next
1539 * read won't conflict with non-overlapted locks due to
1542 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1543 CIFS_CACHE_READ(CIFS_I(inode))) {
1544 cifs_zap_mapping(inode);
1545 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1547 CIFS_I(inode)->oplock = 0;
1550 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1551 type, 1, 0, wait_flag);
1557 cifs_lock_add(cfile, lock);
1559 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1562 if (flock->fl_flags & FL_POSIX)
1563 posix_lock_file_wait(file, flock);
1567 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1570 int lock = 0, unlock = 0;
1571 bool wait_flag = false;
1572 bool posix_lck = false;
1573 struct cifs_sb_info *cifs_sb;
1574 struct cifs_tcon *tcon;
1575 struct cifsInodeInfo *cinode;
1576 struct cifsFileInfo *cfile;
1583 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1584 cmd, flock->fl_flags, flock->fl_type,
1585 flock->fl_start, flock->fl_end);
1587 cfile = (struct cifsFileInfo *)file->private_data;
1588 tcon = tlink_tcon(cfile->tlink);
1590 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1593 cifs_sb = CIFS_FILE_SB(file);
1594 netfid = cfile->fid.netfid;
1595 cinode = CIFS_I(file_inode(file));
1597 if (cap_unix(tcon->ses) &&
1598 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1599 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1602 * BB add code here to normalize offset and length to account for
1603 * negative length which we can not accept over the wire.
1605 if (IS_GETLK(cmd)) {
1606 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1611 if (!lock && !unlock) {
1613 * if no lock or unlock then nothing to do since we do not
1620 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1627 * update the file size (if needed) after a write. Should be called with
1628 * the inode->i_lock held
1631 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1632 unsigned int bytes_written)
1634 loff_t end_of_write = offset + bytes_written;
1636 if (end_of_write > cifsi->server_eof)
1637 cifsi->server_eof = end_of_write;
1641 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1642 size_t write_size, loff_t *offset)
1645 unsigned int bytes_written = 0;
1646 unsigned int total_written;
1647 struct cifs_sb_info *cifs_sb;
1648 struct cifs_tcon *tcon;
1649 struct TCP_Server_Info *server;
1651 struct dentry *dentry = open_file->dentry;
1652 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1653 struct cifs_io_parms io_parms;
1655 cifs_sb = CIFS_SB(dentry->d_sb);
1657 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1658 write_size, *offset, dentry);
1660 tcon = tlink_tcon(open_file->tlink);
1661 server = tcon->ses->server;
1663 if (!server->ops->sync_write)
1668 for (total_written = 0; write_size > total_written;
1669 total_written += bytes_written) {
1671 while (rc == -EAGAIN) {
1675 if (open_file->invalidHandle) {
1676 /* we could deadlock if we called
1677 filemap_fdatawait from here so tell
1678 reopen_file not to flush data to
1680 rc = cifs_reopen_file(open_file, false);
1685 len = min(server->ops->wp_retry_size(dentry->d_inode),
1686 (unsigned int)write_size - total_written);
1687 /* iov[0] is reserved for smb header */
1688 iov[1].iov_base = (char *)write_data + total_written;
1689 iov[1].iov_len = len;
1691 io_parms.tcon = tcon;
1692 io_parms.offset = *offset;
1693 io_parms.length = len;
1694 rc = server->ops->sync_write(xid, &open_file->fid,
1695 &io_parms, &bytes_written, iov, 1);
1697 if (rc || (bytes_written == 0)) {
1705 spin_lock(&dentry->d_inode->i_lock);
1706 cifs_update_eof(cifsi, *offset, bytes_written);
1707 spin_unlock(&dentry->d_inode->i_lock);
1708 *offset += bytes_written;
1712 cifs_stats_bytes_written(tcon, total_written);
1714 if (total_written > 0) {
1715 spin_lock(&dentry->d_inode->i_lock);
1716 if (*offset > dentry->d_inode->i_size)
1717 i_size_write(dentry->d_inode, *offset);
1718 spin_unlock(&dentry->d_inode->i_lock);
1720 mark_inode_dirty_sync(dentry->d_inode);
1722 return total_written;
1725 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1728 struct cifsFileInfo *open_file = NULL;
1729 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1731 /* only filter by fsuid on multiuser mounts */
1732 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1735 spin_lock(&cifs_file_list_lock);
1736 /* we could simply get the first_list_entry since write-only entries
1737 are always at the end of the list but since the first entry might
1738 have a close pending, we go through the whole list */
1739 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1740 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1742 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1743 if (!open_file->invalidHandle) {
1744 /* found a good file */
1745 /* lock it so it will not be closed on us */
1746 cifsFileInfo_get_locked(open_file);
1747 spin_unlock(&cifs_file_list_lock);
1749 } /* else might as well continue, and look for
1750 another, or simply have the caller reopen it
1751 again rather than trying to fix this handle */
1752 } else /* write only file */
1753 break; /* write only files are last so must be done */
1755 spin_unlock(&cifs_file_list_lock);
1759 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1762 struct cifsFileInfo *open_file, *inv_file = NULL;
1763 struct cifs_sb_info *cifs_sb;
1764 bool any_available = false;
1766 unsigned int refind = 0;
1768 /* Having a null inode here (because mapping->host was set to zero by
1769 the VFS or MM) should not happen but we had reports of on oops (due to
1770 it being zero) during stress testcases so we need to check for it */
1772 if (cifs_inode == NULL) {
1773 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1778 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1780 /* only filter by fsuid on multiuser mounts */
1781 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1784 spin_lock(&cifs_file_list_lock);
1786 if (refind > MAX_REOPEN_ATT) {
1787 spin_unlock(&cifs_file_list_lock);
1790 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1791 if (!any_available && open_file->pid != current->tgid)
1793 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1796 if (!open_file->invalidHandle) {
1797 /* found a good writable file */
1798 cifsFileInfo_get_locked(open_file);
1799 spin_unlock(&cifs_file_list_lock);
1803 inv_file = open_file;
1807 /* couldn't find useable FH with same pid, try any available */
1808 if (!any_available) {
1809 any_available = true;
1810 goto refind_writable;
1814 any_available = false;
1815 cifsFileInfo_get_locked(inv_file);
1818 spin_unlock(&cifs_file_list_lock);
1821 rc = cifs_reopen_file(inv_file, false);
1825 spin_lock(&cifs_file_list_lock);
1826 list_move_tail(&inv_file->flist,
1827 &cifs_inode->openFileList);
1828 spin_unlock(&cifs_file_list_lock);
1829 cifsFileInfo_put(inv_file);
1830 spin_lock(&cifs_file_list_lock);
1832 goto refind_writable;
1839 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1841 struct address_space *mapping = page->mapping;
1842 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1845 int bytes_written = 0;
1846 struct inode *inode;
1847 struct cifsFileInfo *open_file;
1849 if (!mapping || !mapping->host)
1852 inode = page->mapping->host;
1854 offset += (loff_t)from;
1855 write_data = kmap(page);
1858 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1863 /* racing with truncate? */
1864 if (offset > mapping->host->i_size) {
1866 return 0; /* don't care */
1869 /* check to make sure that we are not extending the file */
1870 if (mapping->host->i_size - offset < (loff_t)to)
1871 to = (unsigned)(mapping->host->i_size - offset);
1873 open_file = find_writable_file(CIFS_I(mapping->host), false);
1875 bytes_written = cifs_write(open_file, open_file->pid,
1876 write_data, to - from, &offset);
1877 cifsFileInfo_put(open_file);
1878 /* Does mm or vfs already set times? */
1879 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1880 if ((bytes_written > 0) && (offset))
1882 else if (bytes_written < 0)
1885 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1893 static struct cifs_writedata *
1894 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1895 pgoff_t end, pgoff_t *index,
1896 unsigned int *found_pages)
1898 unsigned int nr_pages;
1899 struct page **pages;
1900 struct cifs_writedata *wdata;
1902 wdata = cifs_writedata_alloc((unsigned int)tofind,
1903 cifs_writev_complete);
1908 * find_get_pages_tag seems to return a max of 256 on each
1909 * iteration, so we must call it several times in order to
1910 * fill the array or the wsize is effectively limited to
1911 * 256 * PAGE_CACHE_SIZE.
1914 pages = wdata->pages;
1916 nr_pages = find_get_pages_tag(mapping, index,
1917 PAGECACHE_TAG_DIRTY, tofind,
1919 *found_pages += nr_pages;
1922 } while (nr_pages && tofind && *index <= end);
1928 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1929 struct address_space *mapping,
1930 struct writeback_control *wbc,
1931 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1933 unsigned int nr_pages = 0, i;
1936 for (i = 0; i < found_pages; i++) {
1937 page = wdata->pages[i];
1939 * At this point we hold neither mapping->tree_lock nor
1940 * lock on the page itself: the page may be truncated or
1941 * invalidated (changing page->mapping to NULL), or even
1942 * swizzled back from swapper_space to tmpfs file
1948 else if (!trylock_page(page))
1951 if (unlikely(page->mapping != mapping)) {
1956 if (!wbc->range_cyclic && page->index > end) {
1962 if (*next && (page->index != *next)) {
1963 /* Not next consecutive page */
1968 if (wbc->sync_mode != WB_SYNC_NONE)
1969 wait_on_page_writeback(page);
1971 if (PageWriteback(page) ||
1972 !clear_page_dirty_for_io(page)) {
1978 * This actually clears the dirty bit in the radix tree.
1979 * See cifs_writepage() for more commentary.
1981 set_page_writeback(page);
1982 if (page_offset(page) >= i_size_read(mapping->host)) {
1985 end_page_writeback(page);
1989 wdata->pages[i] = page;
1990 *next = page->index + 1;
1994 /* reset index to refind any pages skipped */
1996 *index = wdata->pages[0]->index + 1;
1998 /* put any pages we aren't going to use */
1999 for (i = nr_pages; i < found_pages; i++) {
2000 page_cache_release(wdata->pages[i]);
2001 wdata->pages[i] = NULL;
2008 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2009 struct address_space *mapping, struct writeback_control *wbc)
2012 struct TCP_Server_Info *server;
2015 wdata->sync_mode = wbc->sync_mode;
2016 wdata->nr_pages = nr_pages;
2017 wdata->offset = page_offset(wdata->pages[0]);
2018 wdata->pagesz = PAGE_CACHE_SIZE;
2019 wdata->tailsz = min(i_size_read(mapping->host) -
2020 page_offset(wdata->pages[nr_pages - 1]),
2021 (loff_t)PAGE_CACHE_SIZE);
2022 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2024 if (wdata->cfile != NULL)
2025 cifsFileInfo_put(wdata->cfile);
2026 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2027 if (!wdata->cfile) {
2028 cifs_dbg(VFS, "No writable handles for inode\n");
2031 wdata->pid = wdata->cfile->pid;
2032 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2033 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2036 for (i = 0; i < nr_pages; ++i)
2037 unlock_page(wdata->pages[i]);
2042 static int cifs_writepages(struct address_space *mapping,
2043 struct writeback_control *wbc)
2045 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2046 struct TCP_Server_Info *server;
2047 bool done = false, scanned = false, range_whole = false;
2049 struct cifs_writedata *wdata;
2053 * If wsize is smaller than the page cache size, default to writing
2054 * one page at a time via cifs_writepage
2056 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2057 return generic_writepages(mapping, wbc);
2059 if (wbc->range_cyclic) {
2060 index = mapping->writeback_index; /* Start from prev offset */
2063 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2064 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2065 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2069 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2071 while (!done && index <= end) {
2072 unsigned int i, nr_pages, found_pages, wsize, credits;
2073 pgoff_t next = 0, tofind, saved_index = index;
2075 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2080 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2082 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2086 add_credits_and_wake_if(server, credits, 0);
2090 if (found_pages == 0) {
2091 kref_put(&wdata->refcount, cifs_writedata_release);
2092 add_credits_and_wake_if(server, credits, 0);
2096 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2097 end, &index, &next, &done);
2099 /* nothing to write? */
2100 if (nr_pages == 0) {
2101 kref_put(&wdata->refcount, cifs_writedata_release);
2102 add_credits_and_wake_if(server, credits, 0);
2106 wdata->credits = credits;
2108 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2110 /* send failure -- clean up the mess */
2112 add_credits_and_wake_if(server, wdata->credits, 0);
2113 for (i = 0; i < nr_pages; ++i) {
2115 redirty_page_for_writepage(wbc,
2118 SetPageError(wdata->pages[i]);
2119 end_page_writeback(wdata->pages[i]);
2120 page_cache_release(wdata->pages[i]);
2123 mapping_set_error(mapping, rc);
2125 kref_put(&wdata->refcount, cifs_writedata_release);
2127 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2128 index = saved_index;
2132 wbc->nr_to_write -= nr_pages;
2133 if (wbc->nr_to_write <= 0)
2139 if (!scanned && !done) {
2141 * We hit the last page and there is more work to be done: wrap
2142 * back to the start of the file
2149 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2150 mapping->writeback_index = index;
2156 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2162 /* BB add check for wbc flags */
2163 page_cache_get(page);
2164 if (!PageUptodate(page))
2165 cifs_dbg(FYI, "ppw - page not up to date\n");
2168 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2170 * A writepage() implementation always needs to do either this,
2171 * or re-dirty the page with "redirty_page_for_writepage()" in
2172 * the case of a failure.
2174 * Just unlocking the page will cause the radix tree tag-bits
2175 * to fail to update with the state of the page correctly.
2177 set_page_writeback(page);
2179 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2180 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2182 else if (rc == -EAGAIN)
2183 redirty_page_for_writepage(wbc, page);
2187 SetPageUptodate(page);
2188 end_page_writeback(page);
2189 page_cache_release(page);
2194 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2196 int rc = cifs_writepage_locked(page, wbc);
2201 static int cifs_write_end(struct file *file, struct address_space *mapping,
2202 loff_t pos, unsigned len, unsigned copied,
2203 struct page *page, void *fsdata)
2206 struct inode *inode = mapping->host;
2207 struct cifsFileInfo *cfile = file->private_data;
2208 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2211 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2214 pid = current->tgid;
2216 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2219 if (PageChecked(page)) {
2221 SetPageUptodate(page);
2222 ClearPageChecked(page);
2223 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2224 SetPageUptodate(page);
2226 if (!PageUptodate(page)) {
2228 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2232 /* this is probably better than directly calling
2233 partialpage_write since in this function the file handle is
2234 known which we might as well leverage */
2235 /* BB check if anything else missing out of ppw
2236 such as updating last write time */
2237 page_data = kmap(page);
2238 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2239 /* if (rc < 0) should we set writebehind rc? */
2246 set_page_dirty(page);
2250 spin_lock(&inode->i_lock);
2251 if (pos > inode->i_size)
2252 i_size_write(inode, pos);
2253 spin_unlock(&inode->i_lock);
2257 page_cache_release(page);
2262 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2267 struct cifs_tcon *tcon;
2268 struct TCP_Server_Info *server;
2269 struct cifsFileInfo *smbfile = file->private_data;
2270 struct inode *inode = file_inode(file);
2271 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2273 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2276 mutex_lock(&inode->i_mutex);
2280 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2283 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2284 rc = cifs_zap_mapping(inode);
2286 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2287 rc = 0; /* don't care about it in fsync */
2291 tcon = tlink_tcon(smbfile->tlink);
2292 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2293 server = tcon->ses->server;
2294 if (server->ops->flush)
2295 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2301 mutex_unlock(&inode->i_mutex);
2305 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2309 struct cifs_tcon *tcon;
2310 struct TCP_Server_Info *server;
2311 struct cifsFileInfo *smbfile = file->private_data;
2312 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2313 struct inode *inode = file->f_mapping->host;
2315 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2318 mutex_lock(&inode->i_mutex);
2322 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2325 tcon = tlink_tcon(smbfile->tlink);
2326 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2327 server = tcon->ses->server;
2328 if (server->ops->flush)
2329 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2335 mutex_unlock(&inode->i_mutex);
2340 * As file closes, flush all cached write data for this inode checking
2341 * for write behind errors.
2343 int cifs_flush(struct file *file, fl_owner_t id)
2345 struct inode *inode = file_inode(file);
2348 if (file->f_mode & FMODE_WRITE)
2349 rc = filemap_write_and_wait(inode->i_mapping);
2351 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2357 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2362 for (i = 0; i < num_pages; i++) {
2363 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2366 * save number of pages we have already allocated and
2367 * return with ENOMEM error
2376 for (i = 0; i < num_pages; i++)
2383 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2388 clen = min_t(const size_t, len, wsize);
2389 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2398 cifs_uncached_writedata_release(struct kref *refcount)
2401 struct cifs_writedata *wdata = container_of(refcount,
2402 struct cifs_writedata, refcount);
2404 for (i = 0; i < wdata->nr_pages; i++)
2405 put_page(wdata->pages[i]);
2406 cifs_writedata_release(refcount);
2410 cifs_uncached_writev_complete(struct work_struct *work)
2412 struct cifs_writedata *wdata = container_of(work,
2413 struct cifs_writedata, work);
2414 struct inode *inode = wdata->cfile->dentry->d_inode;
2415 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2417 spin_lock(&inode->i_lock);
2418 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2419 if (cifsi->server_eof > inode->i_size)
2420 i_size_write(inode, cifsi->server_eof);
2421 spin_unlock(&inode->i_lock);
2423 complete(&wdata->done);
2425 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2429 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2430 size_t *len, unsigned long *num_pages)
2432 size_t save_len, copied, bytes, cur_len = *len;
2433 unsigned long i, nr_pages = *num_pages;
2436 for (i = 0; i < nr_pages; i++) {
2437 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2438 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2441 * If we didn't copy as much as we expected, then that
2442 * may mean we trod into an unmapped area. Stop copying
2443 * at that point. On the next pass through the big
2444 * loop, we'll likely end up getting a zero-length
2445 * write and bailing out of it.
2450 cur_len = save_len - cur_len;
2454 * If we have no data to send, then that probably means that
2455 * the copy above failed altogether. That's most likely because
2456 * the address in the iovec was bogus. Return -EFAULT and let
2457 * the caller free anything we allocated and bail out.
2463 * i + 1 now represents the number of pages we actually used in
2464 * the copy phase above.
2471 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2472 struct cifsFileInfo *open_file,
2473 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2477 unsigned long nr_pages, num_pages, i;
2478 struct cifs_writedata *wdata;
2479 struct iov_iter saved_from;
2480 loff_t saved_offset = offset;
2482 struct TCP_Server_Info *server;
2484 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2485 pid = open_file->pid;
2487 pid = current->tgid;
2489 server = tlink_tcon(open_file->tlink)->ses->server;
2490 memcpy(&saved_from, from, sizeof(struct iov_iter));
2493 unsigned int wsize, credits;
2495 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2500 nr_pages = get_numpages(wsize, len, &cur_len);
2501 wdata = cifs_writedata_alloc(nr_pages,
2502 cifs_uncached_writev_complete);
2505 add_credits_and_wake_if(server, credits, 0);
2509 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2512 add_credits_and_wake_if(server, credits, 0);
2516 num_pages = nr_pages;
2517 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2519 for (i = 0; i < nr_pages; i++)
2520 put_page(wdata->pages[i]);
2522 add_credits_and_wake_if(server, credits, 0);
2527 * Bring nr_pages down to the number of pages we actually used,
2528 * and free any pages that we didn't use.
2530 for ( ; nr_pages > num_pages; nr_pages--)
2531 put_page(wdata->pages[nr_pages - 1]);
2533 wdata->sync_mode = WB_SYNC_ALL;
2534 wdata->nr_pages = nr_pages;
2535 wdata->offset = (__u64)offset;
2536 wdata->cfile = cifsFileInfo_get(open_file);
2538 wdata->bytes = cur_len;
2539 wdata->pagesz = PAGE_SIZE;
2540 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2541 wdata->credits = credits;
2543 if (!wdata->cfile->invalidHandle ||
2544 !cifs_reopen_file(wdata->cfile, false))
2545 rc = server->ops->async_writev(wdata,
2546 cifs_uncached_writedata_release);
2548 add_credits_and_wake_if(server, wdata->credits, 0);
2549 kref_put(&wdata->refcount,
2550 cifs_uncached_writedata_release);
2551 if (rc == -EAGAIN) {
2552 memcpy(from, &saved_from,
2553 sizeof(struct iov_iter));
2554 iov_iter_advance(from, offset - saved_offset);
2560 list_add_tail(&wdata->list, wdata_list);
2569 cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2572 ssize_t total_written = 0;
2573 struct cifsFileInfo *open_file;
2574 struct cifs_tcon *tcon;
2575 struct cifs_sb_info *cifs_sb;
2576 struct cifs_writedata *wdata, *tmp;
2577 struct list_head wdata_list;
2578 struct iov_iter saved_from;
2581 len = iov_iter_count(from);
2582 rc = generic_write_checks(file, poffset, &len, 0);
2589 iov_iter_truncate(from, len);
2591 INIT_LIST_HEAD(&wdata_list);
2592 cifs_sb = CIFS_FILE_SB(file);
2593 open_file = file->private_data;
2594 tcon = tlink_tcon(open_file->tlink);
2596 if (!tcon->ses->server->ops->async_writev)
2599 memcpy(&saved_from, from, sizeof(struct iov_iter));
2601 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2605 * If at least one write was successfully sent, then discard any rc
2606 * value from the later writes. If the other write succeeds, then
2607 * we'll end up returning whatever was written. If it fails, then
2608 * we'll get a new rc value from that.
2610 if (!list_empty(&wdata_list))
2614 * Wait for and collect replies for any successful sends in order of
2615 * increasing offset. Once an error is hit or we get a fatal signal
2616 * while waiting, then return without waiting for any more replies.
2619 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2621 /* FIXME: freezable too? */
2622 rc = wait_for_completion_killable(&wdata->done);
2625 else if (wdata->result)
2628 total_written += wdata->bytes;
2630 /* resend call if it's a retryable error */
2631 if (rc == -EAGAIN) {
2632 struct list_head tmp_list;
2633 struct iov_iter tmp_from;
2635 INIT_LIST_HEAD(&tmp_list);
2636 list_del_init(&wdata->list);
2638 memcpy(&tmp_from, &saved_from,
2639 sizeof(struct iov_iter));
2640 iov_iter_advance(&tmp_from,
2641 wdata->offset - *poffset);
2643 rc = cifs_write_from_iter(wdata->offset,
2644 wdata->bytes, &tmp_from,
2645 open_file, cifs_sb, &tmp_list);
2647 list_splice(&tmp_list, &wdata_list);
2649 kref_put(&wdata->refcount,
2650 cifs_uncached_writedata_release);
2654 list_del_init(&wdata->list);
2655 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2658 if (total_written > 0)
2659 *poffset += total_written;
2661 cifs_stats_bytes_written(tcon, total_written);
2662 return total_written ? total_written : (ssize_t)rc;
2665 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2668 struct inode *inode;
2669 loff_t pos = iocb->ki_pos;
2671 inode = file_inode(iocb->ki_filp);
2674 * BB - optimize the way when signing is disabled. We can drop this
2675 * extra memory-to-memory copying and use iovec buffers for constructing
2679 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2681 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2689 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2691 struct file *file = iocb->ki_filp;
2692 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2693 struct inode *inode = file->f_mapping->host;
2694 struct cifsInodeInfo *cinode = CIFS_I(inode);
2695 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2696 ssize_t rc = -EACCES;
2697 loff_t lock_pos = iocb->ki_pos;
2700 * We need to hold the sem to be sure nobody modifies lock list
2701 * with a brlock that prevents writing.
2703 down_read(&cinode->lock_sem);
2704 mutex_lock(&inode->i_mutex);
2705 if (file->f_flags & O_APPEND)
2706 lock_pos = i_size_read(inode);
2707 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2708 server->vals->exclusive_lock_type, NULL,
2710 rc = __generic_file_write_iter(iocb, from);
2711 mutex_unlock(&inode->i_mutex);
2716 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2721 mutex_unlock(&inode->i_mutex);
2723 up_read(&cinode->lock_sem);
2728 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2730 struct inode *inode = file_inode(iocb->ki_filp);
2731 struct cifsInodeInfo *cinode = CIFS_I(inode);
2732 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2733 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2734 iocb->ki_filp->private_data;
2735 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2738 written = cifs_get_writer(cinode);
2742 if (CIFS_CACHE_WRITE(cinode)) {
2743 if (cap_unix(tcon->ses) &&
2744 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2745 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2746 written = generic_file_write_iter(iocb, from);
2749 written = cifs_writev(iocb, from);
2753 * For non-oplocked files in strict cache mode we need to write the data
2754 * to the server exactly from the pos to pos+len-1 rather than flush all
2755 * affected pages because it may cause a error with mandatory locks on
2756 * these pages but not on the region from pos to ppos+len-1.
2758 written = cifs_user_writev(iocb, from);
2759 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2761 * Windows 7 server can delay breaking level2 oplock if a write
2762 * request comes - break it on the client to prevent reading
2765 cifs_zap_mapping(inode);
2766 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2771 cifs_put_writer(cinode);
2775 static struct cifs_readdata *
2776 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2778 struct cifs_readdata *rdata;
2780 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2782 if (rdata != NULL) {
2783 kref_init(&rdata->refcount);
2784 INIT_LIST_HEAD(&rdata->list);
2785 init_completion(&rdata->done);
2786 INIT_WORK(&rdata->work, complete);
2793 cifs_readdata_release(struct kref *refcount)
2795 struct cifs_readdata *rdata = container_of(refcount,
2796 struct cifs_readdata, refcount);
2799 cifsFileInfo_put(rdata->cfile);
2805 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2811 for (i = 0; i < nr_pages; i++) {
2812 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2817 rdata->pages[i] = page;
2821 for (i = 0; i < nr_pages; i++) {
2822 put_page(rdata->pages[i]);
2823 rdata->pages[i] = NULL;
2830 cifs_uncached_readdata_release(struct kref *refcount)
2832 struct cifs_readdata *rdata = container_of(refcount,
2833 struct cifs_readdata, refcount);
2836 for (i = 0; i < rdata->nr_pages; i++) {
2837 put_page(rdata->pages[i]);
2838 rdata->pages[i] = NULL;
2840 cifs_readdata_release(refcount);
2844 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2845 * @rdata: the readdata response with list of pages holding data
2846 * @iter: destination for our data
2848 * This function copies data from a list of pages in a readdata response into
2849 * an array of iovecs. It will first calculate where the data should go
2850 * based on the info in the readdata and then copy the data into that spot.
2853 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2855 size_t remaining = rdata->got_bytes;
2858 for (i = 0; i < rdata->nr_pages; i++) {
2859 struct page *page = rdata->pages[i];
2860 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2861 size_t written = copy_page_to_iter(page, 0, copy, iter);
2862 remaining -= written;
2863 if (written < copy && iov_iter_count(iter) > 0)
2866 return remaining ? -EFAULT : 0;
2870 cifs_uncached_readv_complete(struct work_struct *work)
2872 struct cifs_readdata *rdata = container_of(work,
2873 struct cifs_readdata, work);
2875 complete(&rdata->done);
2876 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2880 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2881 struct cifs_readdata *rdata, unsigned int len)
2885 unsigned int nr_pages = rdata->nr_pages;
2888 rdata->got_bytes = 0;
2889 rdata->tailsz = PAGE_SIZE;
2890 for (i = 0; i < nr_pages; i++) {
2891 struct page *page = rdata->pages[i];
2893 if (len >= PAGE_SIZE) {
2894 /* enough data to fill the page */
2895 iov.iov_base = kmap(page);
2896 iov.iov_len = PAGE_SIZE;
2897 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2898 i, iov.iov_base, iov.iov_len);
2900 } else if (len > 0) {
2901 /* enough for partial page, fill and zero the rest */
2902 iov.iov_base = kmap(page);
2904 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2905 i, iov.iov_base, iov.iov_len);
2906 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2907 rdata->tailsz = len;
2910 /* no need to hold page hostage */
2911 rdata->pages[i] = NULL;
2917 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2922 rdata->got_bytes += result;
2925 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2926 rdata->got_bytes : result;
2930 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2931 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2933 struct cifs_readdata *rdata;
2934 unsigned int npages, rsize, credits;
2938 struct TCP_Server_Info *server;
2940 server = tlink_tcon(open_file->tlink)->ses->server;
2942 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2943 pid = open_file->pid;
2945 pid = current->tgid;
2948 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2953 cur_len = min_t(const size_t, len, rsize);
2954 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2956 /* allocate a readdata struct */
2957 rdata = cifs_readdata_alloc(npages,
2958 cifs_uncached_readv_complete);
2960 add_credits_and_wake_if(server, credits, 0);
2965 rc = cifs_read_allocate_pages(rdata, npages);
2969 rdata->cfile = cifsFileInfo_get(open_file);
2970 rdata->nr_pages = npages;
2971 rdata->offset = offset;
2972 rdata->bytes = cur_len;
2974 rdata->pagesz = PAGE_SIZE;
2975 rdata->read_into_pages = cifs_uncached_read_into_pages;
2976 rdata->credits = credits;
2978 if (!rdata->cfile->invalidHandle ||
2979 !cifs_reopen_file(rdata->cfile, true))
2980 rc = server->ops->async_readv(rdata);
2983 add_credits_and_wake_if(server, rdata->credits, 0);
2984 kref_put(&rdata->refcount,
2985 cifs_uncached_readdata_release);
2991 list_add_tail(&rdata->list, rdata_list);
2999 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3001 struct file *file = iocb->ki_filp;
3004 ssize_t total_read = 0;
3005 loff_t offset = iocb->ki_pos;
3006 struct cifs_sb_info *cifs_sb;
3007 struct cifs_tcon *tcon;
3008 struct cifsFileInfo *open_file;
3009 struct cifs_readdata *rdata, *tmp;
3010 struct list_head rdata_list;
3012 len = iov_iter_count(to);
3016 INIT_LIST_HEAD(&rdata_list);
3017 cifs_sb = CIFS_FILE_SB(file);
3018 open_file = file->private_data;
3019 tcon = tlink_tcon(open_file->tlink);
3021 if (!tcon->ses->server->ops->async_readv)
3024 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3025 cifs_dbg(FYI, "attempting read on write only file instance\n");
3027 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3029 /* if at least one read request send succeeded, then reset rc */
3030 if (!list_empty(&rdata_list))
3033 len = iov_iter_count(to);
3034 /* the loop below should proceed in the order of increasing offsets */
3036 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3038 /* FIXME: freezable sleep too? */
3039 rc = wait_for_completion_killable(&rdata->done);
3042 else if (rdata->result == -EAGAIN) {
3043 /* resend call if it's a retryable error */
3044 struct list_head tmp_list;
3045 unsigned int got_bytes = rdata->got_bytes;
3047 list_del_init(&rdata->list);
3048 INIT_LIST_HEAD(&tmp_list);
3051 * Got a part of data and then reconnect has
3052 * happened -- fill the buffer and continue
3055 if (got_bytes && got_bytes < rdata->bytes) {
3056 rc = cifs_readdata_to_iov(rdata, to);
3058 kref_put(&rdata->refcount,
3059 cifs_uncached_readdata_release);
3064 rc = cifs_send_async_read(
3065 rdata->offset + got_bytes,
3066 rdata->bytes - got_bytes,
3067 rdata->cfile, cifs_sb,
3070 list_splice(&tmp_list, &rdata_list);
3072 kref_put(&rdata->refcount,
3073 cifs_uncached_readdata_release);
3075 } else if (rdata->result)
3078 rc = cifs_readdata_to_iov(rdata, to);
3080 /* if there was a short read -- discard anything left */
3081 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3084 list_del_init(&rdata->list);
3085 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3088 total_read = len - iov_iter_count(to);
3090 cifs_stats_bytes_read(tcon, total_read);
3092 /* mask nodata case */
3097 iocb->ki_pos += total_read;
3104 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3106 struct inode *inode = file_inode(iocb->ki_filp);
3107 struct cifsInodeInfo *cinode = CIFS_I(inode);
3108 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3109 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3110 iocb->ki_filp->private_data;
3111 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3115 * In strict cache mode we need to read from the server all the time
3116 * if we don't have level II oplock because the server can delay mtime
3117 * change - so we can't make a decision about inode invalidating.
3118 * And we can also fail with pagereading if there are mandatory locks
3119 * on pages affected by this read but not on the region from pos to
3122 if (!CIFS_CACHE_READ(cinode))
3123 return cifs_user_readv(iocb, to);
3125 if (cap_unix(tcon->ses) &&
3126 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3127 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3128 return generic_file_read_iter(iocb, to);
3131 * We need to hold the sem to be sure nobody modifies lock list
3132 * with a brlock that prevents reading.
3134 down_read(&cinode->lock_sem);
3135 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3136 tcon->ses->server->vals->shared_lock_type,
3137 NULL, CIFS_READ_OP))
3138 rc = generic_file_read_iter(iocb, to);
3139 up_read(&cinode->lock_sem);
3144 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3147 unsigned int bytes_read = 0;
3148 unsigned int total_read;
3149 unsigned int current_read_size;
3151 struct cifs_sb_info *cifs_sb;
3152 struct cifs_tcon *tcon;
3153 struct TCP_Server_Info *server;
3156 struct cifsFileInfo *open_file;
3157 struct cifs_io_parms io_parms;
3158 int buf_type = CIFS_NO_BUFFER;
3162 cifs_sb = CIFS_FILE_SB(file);
3164 /* FIXME: set up handlers for larger reads and/or convert to async */
3165 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3167 if (file->private_data == NULL) {
3172 open_file = file->private_data;
3173 tcon = tlink_tcon(open_file->tlink);
3174 server = tcon->ses->server;
3176 if (!server->ops->sync_read) {
3181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182 pid = open_file->pid;
3184 pid = current->tgid;
3186 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3187 cifs_dbg(FYI, "attempting read on write only file instance\n");
3189 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3190 total_read += bytes_read, cur_offset += bytes_read) {
3192 current_read_size = min_t(uint, read_size - total_read,
3195 * For windows me and 9x we do not want to request more
3196 * than it negotiated since it will refuse the read
3199 if ((tcon->ses) && !(tcon->ses->capabilities &
3200 tcon->ses->server->vals->cap_large_files)) {
3201 current_read_size = min_t(uint,
3202 current_read_size, CIFSMaxBufSize);
3204 if (open_file->invalidHandle) {
3205 rc = cifs_reopen_file(open_file, true);
3210 io_parms.tcon = tcon;
3211 io_parms.offset = *offset;
3212 io_parms.length = current_read_size;
3213 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3214 &bytes_read, &cur_offset,
3216 } while (rc == -EAGAIN);
3218 if (rc || (bytes_read == 0)) {
3226 cifs_stats_bytes_read(tcon, total_read);
3227 *offset += bytes_read;
3235 * If the page is mmap'ed into a process' page tables, then we need to make
3236 * sure that it doesn't change while being written back.
3239 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3241 struct page *page = vmf->page;
3244 return VM_FAULT_LOCKED;
3247 static struct vm_operations_struct cifs_file_vm_ops = {
3248 .fault = filemap_fault,
3249 .map_pages = filemap_map_pages,
3250 .page_mkwrite = cifs_page_mkwrite,
3251 .remap_pages = generic_file_remap_pages,
3254 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3257 struct inode *inode = file_inode(file);
3261 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3262 rc = cifs_zap_mapping(inode);
3267 rc = generic_file_mmap(file, vma);
3269 vma->vm_ops = &cifs_file_vm_ops;
3274 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3279 rc = cifs_revalidate_file(file);
3281 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3286 rc = generic_file_mmap(file, vma);
3288 vma->vm_ops = &cifs_file_vm_ops;
3294 cifs_readv_complete(struct work_struct *work)
3296 unsigned int i, got_bytes;
3297 struct cifs_readdata *rdata = container_of(work,
3298 struct cifs_readdata, work);
3300 got_bytes = rdata->got_bytes;
3301 for (i = 0; i < rdata->nr_pages; i++) {
3302 struct page *page = rdata->pages[i];
3304 lru_cache_add_file(page);
3306 if (rdata->result == 0 ||
3307 (rdata->result == -EAGAIN && got_bytes)) {
3308 flush_dcache_page(page);
3309 SetPageUptodate(page);
3314 if (rdata->result == 0 ||
3315 (rdata->result == -EAGAIN && got_bytes))
3316 cifs_readpage_to_fscache(rdata->mapping->host, page);
3318 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3320 page_cache_release(page);
3321 rdata->pages[i] = NULL;
3323 kref_put(&rdata->refcount, cifs_readdata_release);
3327 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3328 struct cifs_readdata *rdata, unsigned int len)
3334 unsigned int nr_pages = rdata->nr_pages;
3337 /* determine the eof that the server (probably) has */
3338 eof = CIFS_I(rdata->mapping->host)->server_eof;
3339 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3340 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3342 rdata->got_bytes = 0;
3343 rdata->tailsz = PAGE_CACHE_SIZE;
3344 for (i = 0; i < nr_pages; i++) {
3345 struct page *page = rdata->pages[i];
3347 if (len >= PAGE_CACHE_SIZE) {
3348 /* enough data to fill the page */
3349 iov.iov_base = kmap(page);
3350 iov.iov_len = PAGE_CACHE_SIZE;
3351 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3352 i, page->index, iov.iov_base, iov.iov_len);
3353 len -= PAGE_CACHE_SIZE;
3354 } else if (len > 0) {
3355 /* enough for partial page, fill and zero the rest */
3356 iov.iov_base = kmap(page);
3358 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3359 i, page->index, iov.iov_base, iov.iov_len);
3360 memset(iov.iov_base + len,
3361 '\0', PAGE_CACHE_SIZE - len);
3362 rdata->tailsz = len;
3364 } else if (page->index > eof_index) {
3366 * The VFS will not try to do readahead past the
3367 * i_size, but it's possible that we have outstanding
3368 * writes with gaps in the middle and the i_size hasn't
3369 * caught up yet. Populate those with zeroed out pages
3370 * to prevent the VFS from repeatedly attempting to
3371 * fill them until the writes are flushed.
3373 zero_user(page, 0, PAGE_CACHE_SIZE);
3374 lru_cache_add_file(page);
3375 flush_dcache_page(page);
3376 SetPageUptodate(page);
3378 page_cache_release(page);
3379 rdata->pages[i] = NULL;
3383 /* no need to hold page hostage */
3384 lru_cache_add_file(page);
3386 page_cache_release(page);
3387 rdata->pages[i] = NULL;
3392 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3397 rdata->got_bytes += result;
3400 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3401 rdata->got_bytes : result;
3405 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3406 unsigned int rsize, struct list_head *tmplist,
3407 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3409 struct page *page, *tpage;
3410 unsigned int expected_index;
3413 INIT_LIST_HEAD(tmplist);
3415 page = list_entry(page_list->prev, struct page, lru);
3418 * Lock the page and put it in the cache. Since no one else
3419 * should have access to this page, we're safe to simply set
3420 * PG_locked without checking it first.
3422 __set_page_locked(page);
3423 rc = add_to_page_cache_locked(page, mapping,
3424 page->index, GFP_KERNEL);
3426 /* give up if we can't stick it in the cache */
3428 __clear_page_locked(page);
3432 /* move first page to the tmplist */
3433 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3434 *bytes = PAGE_CACHE_SIZE;
3436 list_move_tail(&page->lru, tmplist);
3438 /* now try and add more pages onto the request */
3439 expected_index = page->index + 1;
3440 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3441 /* discontinuity ? */
3442 if (page->index != expected_index)
3445 /* would this page push the read over the rsize? */
3446 if (*bytes + PAGE_CACHE_SIZE > rsize)
3449 __set_page_locked(page);
3450 if (add_to_page_cache_locked(page, mapping, page->index,
3452 __clear_page_locked(page);
3455 list_move_tail(&page->lru, tmplist);
3456 (*bytes) += PAGE_CACHE_SIZE;
3463 static int cifs_readpages(struct file *file, struct address_space *mapping,
3464 struct list_head *page_list, unsigned num_pages)
3467 struct list_head tmplist;
3468 struct cifsFileInfo *open_file = file->private_data;
3469 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3470 struct TCP_Server_Info *server;
3474 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3475 * immediately if the cookie is negative
3477 * After this point, every page in the list might have PG_fscache set,
3478 * so we will need to clean that up off of every page we don't use.
3480 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3485 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3486 pid = open_file->pid;
3488 pid = current->tgid;
3491 server = tlink_tcon(open_file->tlink)->ses->server;
3493 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3494 __func__, file, mapping, num_pages);
3497 * Start with the page at end of list and move it to private
3498 * list. Do the same with any following pages until we hit
3499 * the rsize limit, hit an index discontinuity, or run out of
3500 * pages. Issue the async read and then start the loop again
3501 * until the list is empty.
3503 * Note that list order is important. The page_list is in
3504 * the order of declining indexes. When we put the pages in
3505 * the rdata->pages, then we want them in increasing order.
3507 while (!list_empty(page_list)) {
3508 unsigned int i, nr_pages, bytes, rsize;
3510 struct page *page, *tpage;
3511 struct cifs_readdata *rdata;
3514 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3520 * Give up immediately if rsize is too small to read an entire
3521 * page. The VFS will fall back to readpage. We should never
3522 * reach this point however since we set ra_pages to 0 when the
3523 * rsize is smaller than a cache page.
3525 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3526 add_credits_and_wake_if(server, credits, 0);
3530 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3531 &nr_pages, &offset, &bytes);
3533 add_credits_and_wake_if(server, credits, 0);
3537 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3539 /* best to give up if we're out of mem */
3540 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3541 list_del(&page->lru);
3542 lru_cache_add_file(page);
3544 page_cache_release(page);
3547 add_credits_and_wake_if(server, credits, 0);
3551 rdata->cfile = cifsFileInfo_get(open_file);
3552 rdata->mapping = mapping;
3553 rdata->offset = offset;
3554 rdata->bytes = bytes;
3556 rdata->pagesz = PAGE_CACHE_SIZE;
3557 rdata->read_into_pages = cifs_readpages_read_into_pages;
3558 rdata->credits = credits;
3560 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3561 list_del(&page->lru);
3562 rdata->pages[rdata->nr_pages++] = page;
3565 if (!rdata->cfile->invalidHandle ||
3566 !cifs_reopen_file(rdata->cfile, true))
3567 rc = server->ops->async_readv(rdata);
3569 add_credits_and_wake_if(server, rdata->credits, 0);
3570 for (i = 0; i < rdata->nr_pages; i++) {
3571 page = rdata->pages[i];
3572 lru_cache_add_file(page);
3574 page_cache_release(page);
3576 /* Fallback to the readpage in error/reconnect cases */
3577 kref_put(&rdata->refcount, cifs_readdata_release);
3581 kref_put(&rdata->refcount, cifs_readdata_release);
3584 /* Any pages that have been shown to fscache but didn't get added to
3585 * the pagecache must be uncached before they get returned to the
3588 cifs_fscache_readpages_cancel(mapping->host, page_list);
3593 * cifs_readpage_worker must be called with the page pinned
3595 static int cifs_readpage_worker(struct file *file, struct page *page,
3601 /* Is the page cached? */
3602 rc = cifs_readpage_from_fscache(file_inode(file), page);
3606 read_data = kmap(page);
3607 /* for reads over a certain size could initiate async read ahead */
3609 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3614 cifs_dbg(FYI, "Bytes read %d\n", rc);
3616 file_inode(file)->i_atime =
3617 current_fs_time(file_inode(file)->i_sb);
3619 if (PAGE_CACHE_SIZE > rc)
3620 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3622 flush_dcache_page(page);
3623 SetPageUptodate(page);
3625 /* send this page to the cache */
3626 cifs_readpage_to_fscache(file_inode(file), page);
3638 static int cifs_readpage(struct file *file, struct page *page)
3640 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3646 if (file->private_data == NULL) {
3652 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3653 page, (int)offset, (int)offset);
3655 rc = cifs_readpage_worker(file, page, &offset);
3661 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3663 struct cifsFileInfo *open_file;
3665 spin_lock(&cifs_file_list_lock);
3666 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3667 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3668 spin_unlock(&cifs_file_list_lock);
3672 spin_unlock(&cifs_file_list_lock);
3676 /* We do not want to update the file size from server for inodes
3677 open for write - to avoid races with writepage extending
3678 the file - in the future we could consider allowing
3679 refreshing the inode only on increases in the file size
3680 but this is tricky to do without racing with writebehind
3681 page caching in the current Linux kernel design */
3682 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3687 if (is_inode_writable(cifsInode)) {
3688 /* This inode is open for write at least once */
3689 struct cifs_sb_info *cifs_sb;
3691 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3692 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3693 /* since no page cache to corrupt on directio
3694 we can change size safely */
3698 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3706 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3707 loff_t pos, unsigned len, unsigned flags,
3708 struct page **pagep, void **fsdata)
3711 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3712 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3713 loff_t page_start = pos & PAGE_MASK;
3718 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3721 page = grab_cache_page_write_begin(mapping, index, flags);
3727 if (PageUptodate(page))
3731 * If we write a full page it will be up to date, no need to read from
3732 * the server. If the write is short, we'll end up doing a sync write
3735 if (len == PAGE_CACHE_SIZE)
3739 * optimize away the read when we have an oplock, and we're not
3740 * expecting to use any of the data we'd be reading in. That
3741 * is, when the page lies beyond the EOF, or straddles the EOF
3742 * and the write will cover all of the existing data.
3744 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3745 i_size = i_size_read(mapping->host);
3746 if (page_start >= i_size ||
3747 (offset == 0 && (pos + len) >= i_size)) {
3748 zero_user_segments(page, 0, offset,
3752 * PageChecked means that the parts of the page
3753 * to which we're not writing are considered up
3754 * to date. Once the data is copied to the
3755 * page, it can be set uptodate.
3757 SetPageChecked(page);
3762 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3764 * might as well read a page, it is fast enough. If we get
3765 * an error, we don't need to return it. cifs_write_end will
3766 * do a sync write instead since PG_uptodate isn't set.
3768 cifs_readpage_worker(file, page, &page_start);
3769 page_cache_release(page);
3773 /* we could try using another file handle if there is one -
3774 but how would we lock it to prevent close of that handle
3775 racing with this read? In any case
3776 this will be written out by write_end so is fine */
3783 static int cifs_release_page(struct page *page, gfp_t gfp)
3785 if (PagePrivate(page))
3788 return cifs_fscache_release_page(page, gfp);
3791 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3792 unsigned int length)
3794 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3796 if (offset == 0 && length == PAGE_CACHE_SIZE)
3797 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3800 static int cifs_launder_page(struct page *page)
3803 loff_t range_start = page_offset(page);
3804 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3805 struct writeback_control wbc = {
3806 .sync_mode = WB_SYNC_ALL,
3808 .range_start = range_start,
3809 .range_end = range_end,
3812 cifs_dbg(FYI, "Launder page: %p\n", page);
3814 if (clear_page_dirty_for_io(page))
3815 rc = cifs_writepage_locked(page, &wbc);
3817 cifs_fscache_invalidate_page(page, page->mapping->host);
3821 void cifs_oplock_break(struct work_struct *work)
3823 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3825 struct inode *inode = cfile->dentry->d_inode;
3826 struct cifsInodeInfo *cinode = CIFS_I(inode);
3827 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3828 struct TCP_Server_Info *server = tcon->ses->server;
3831 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3832 TASK_UNINTERRUPTIBLE);
3834 server->ops->downgrade_oplock(server, cinode,
3835 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3837 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3838 cifs_has_mand_locks(cinode)) {
3839 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3844 if (inode && S_ISREG(inode->i_mode)) {
3845 if (CIFS_CACHE_READ(cinode))
3846 break_lease(inode, O_RDONLY);
3848 break_lease(inode, O_WRONLY);
3849 rc = filemap_fdatawrite(inode->i_mapping);
3850 if (!CIFS_CACHE_READ(cinode)) {
3851 rc = filemap_fdatawait(inode->i_mapping);
3852 mapping_set_error(inode->i_mapping, rc);
3853 cifs_zap_mapping(inode);
3855 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3858 rc = cifs_push_locks(cfile);
3860 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3863 * releasing stale oplock after recent reconnect of smb session using
3864 * a now incorrect file handle is not a data integrity issue but do
3865 * not bother sending an oplock release if session to server still is
3866 * disconnected since oplock already released by the server
3868 if (!cfile->oplock_break_cancelled) {
3869 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3871 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3873 cifs_done_oplock_break(cinode);
3877 * The presence of cifs_direct_io() in the address space ops vector
3878 * allowes open() O_DIRECT flags which would have failed otherwise.
3880 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3881 * so this method should never be called.
3883 * Direct IO is not yet supported in the cached mode.
3886 cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3891 * Eventually need to support direct IO for non forcedirectio mounts
3897 const struct address_space_operations cifs_addr_ops = {
3898 .readpage = cifs_readpage,
3899 .readpages = cifs_readpages,
3900 .writepage = cifs_writepage,
3901 .writepages = cifs_writepages,
3902 .write_begin = cifs_write_begin,
3903 .write_end = cifs_write_end,
3904 .set_page_dirty = __set_page_dirty_nobuffers,
3905 .releasepage = cifs_release_page,
3906 .direct_IO = cifs_direct_io,
3907 .invalidatepage = cifs_invalidate_page,
3908 .launder_page = cifs_launder_page,
3912 * cifs_readpages requires the server to support a buffer large enough to
3913 * contain the header plus one complete page of data. Otherwise, we need
3914 * to leave cifs_readpages out of the address space operations.
3916 const struct address_space_operations cifs_addr_ops_smallbuf = {
3917 .readpage = cifs_readpage,
3918 .writepage = cifs_writepage,
3919 .writepages = cifs_writepages,
3920 .write_begin = cifs_write_begin,
3921 .write_end = cifs_write_end,
3922 .set_page_dirty = __set_page_dirty_nobuffers,
3923 .releasepage = cifs_release_page,
3924 .invalidatepage = cifs_invalidate_page,
3925 .launder_page = cifs_launder_page,