4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
178 int create_options = CREATE_NOT_DIR;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
288 * Release a reference on the file private data. This may involve closing
289 * the filehandle out on the server. Must be called without holding
290 * cifs_file_list_lock.
292 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
294 struct inode *inode = cifs_file->dentry->d_inode;
295 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
296 struct cifsInodeInfo *cifsi = CIFS_I(inode);
297 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
298 struct cifsLockInfo *li, *tmp;
300 spin_lock(&cifs_file_list_lock);
301 if (--cifs_file->count > 0) {
302 spin_unlock(&cifs_file_list_lock);
306 /* remove it from the lists */
307 list_del(&cifs_file->flist);
308 list_del(&cifs_file->tlist);
310 if (list_empty(&cifsi->openFileList)) {
311 cFYI(1, "closing last open instance for inode %p",
312 cifs_file->dentry->d_inode);
314 /* in strict cache mode we need invalidate mapping on the last
315 close because it may cause a error when we open this file
316 again and get at least level II oplock */
317 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
318 CIFS_I(inode)->invalid_mapping = true;
320 cifs_set_oplock_level(cifsi, 0);
322 spin_unlock(&cifs_file_list_lock);
324 cancel_work_sync(&cifs_file->oplock_break);
326 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
330 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
334 /* Delete any outstanding lock records. We'll lose them when the file
337 mutex_lock(&cifsi->lock_mutex);
338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
339 list_del(&li->llist);
340 cifs_del_lock_waiters(li);
343 mutex_unlock(&cifsi->lock_mutex);
345 cifs_put_tlink(cifs_file->tlink);
346 dput(cifs_file->dentry);
350 int cifs_open(struct inode *inode, struct file *file)
355 struct cifs_sb_info *cifs_sb;
356 struct cifs_tcon *tcon;
357 struct tcon_link *tlink;
358 struct cifsFileInfo *pCifsFile = NULL;
359 char *full_path = NULL;
360 bool posix_open_ok = false;
365 cifs_sb = CIFS_SB(inode->i_sb);
366 tlink = cifs_sb_tlink(cifs_sb);
369 return PTR_ERR(tlink);
371 tcon = tlink_tcon(tlink);
373 full_path = build_path_from_dentry(file->f_path.dentry);
374 if (full_path == NULL) {
379 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
380 inode, file->f_flags, full_path);
382 if (tcon->ses->server->oplocks)
387 if (!tcon->broken_posix_open && tcon->unix_ext &&
388 (tcon->ses->capabilities & CAP_UNIX) &&
389 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
390 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
391 /* can not refresh inode info since size could be stale */
392 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
393 cifs_sb->mnt_file_mode /* ignored */,
394 file->f_flags, &oplock, &netfid, xid);
396 cFYI(1, "posix open succeeded");
397 posix_open_ok = true;
398 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
399 if (tcon->ses->serverNOS)
400 cERROR(1, "server %s of type %s returned"
401 " unexpected error on SMB posix open"
402 ", disabling posix open support."
403 " Check if server update available.",
404 tcon->ses->serverName,
405 tcon->ses->serverNOS);
406 tcon->broken_posix_open = true;
407 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
408 (rc != -EOPNOTSUPP)) /* path not found or net err */
410 /* else fallthrough to retry open the old way on network i/o
414 if (!posix_open_ok) {
415 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
416 file->f_flags, &oplock, &netfid, xid);
421 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
422 if (pCifsFile == NULL) {
423 CIFSSMBClose(xid, tcon, netfid);
428 cifs_fscache_set_inode_cookie(inode, file);
430 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
431 /* time to set mode which we can not set earlier due to
432 problems creating new read-only files */
433 struct cifs_unix_set_info_args args = {
434 .mode = inode->i_mode,
437 .ctime = NO_CHANGE_64,
438 .atime = NO_CHANGE_64,
439 .mtime = NO_CHANGE_64,
442 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
449 cifs_put_tlink(tlink);
453 /* Try to reacquire byte range locks that were released when session */
454 /* to server was lost */
455 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
459 /* BB list all locks open on this file and relock */
464 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
469 struct cifs_sb_info *cifs_sb;
470 struct cifs_tcon *tcon;
471 struct cifsInodeInfo *pCifsInode;
473 char *full_path = NULL;
475 int disposition = FILE_OPEN;
476 int create_options = CREATE_NOT_DIR;
480 mutex_lock(&pCifsFile->fh_mutex);
481 if (!pCifsFile->invalidHandle) {
482 mutex_unlock(&pCifsFile->fh_mutex);
488 inode = pCifsFile->dentry->d_inode;
489 cifs_sb = CIFS_SB(inode->i_sb);
490 tcon = tlink_tcon(pCifsFile->tlink);
492 /* can not grab rename sem here because various ops, including
493 those that already have the rename sem can end up causing writepage
494 to get called and if the server was down that means we end up here,
495 and we can never tell if the caller already has the rename_sem */
496 full_path = build_path_from_dentry(pCifsFile->dentry);
497 if (full_path == NULL) {
499 mutex_unlock(&pCifsFile->fh_mutex);
504 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
505 inode, pCifsFile->f_flags, full_path);
507 if (tcon->ses->server->oplocks)
512 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
513 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
514 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
517 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
518 * original open. Must mask them off for a reopen.
520 unsigned int oflags = pCifsFile->f_flags &
521 ~(O_CREAT | O_EXCL | O_TRUNC);
523 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
524 cifs_sb->mnt_file_mode /* ignored */,
525 oflags, &oplock, &netfid, xid);
527 cFYI(1, "posix reopen succeeded");
530 /* fallthrough to retry open the old way on errors, especially
531 in the reconnect path it is important to retry hard */
534 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
536 if (backup_cred(cifs_sb))
537 create_options |= CREATE_OPEN_BACKUP_INTENT;
539 /* Can not refresh inode by passing in file_info buf to be returned
540 by SMBOpen and then calling get_inode_info with returned buf
541 since file might have write behind data that needs to be flushed
542 and server version of file size can be stale. If we knew for sure
543 that inode was not dirty locally we could do this */
545 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
546 create_options, &netfid, &oplock, NULL,
547 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
548 CIFS_MOUNT_MAP_SPECIAL_CHR);
550 mutex_unlock(&pCifsFile->fh_mutex);
551 cFYI(1, "cifs_open returned 0x%x", rc);
552 cFYI(1, "oplock: %d", oplock);
553 goto reopen_error_exit;
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
563 rc = filemap_write_and_wait(inode->i_mapping);
564 mapping_set_error(inode->i_mapping, rc);
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
580 cifs_set_oplock_level(pCifsInode, oplock);
582 cifs_relock_file(pCifsFile);
590 int cifs_close(struct inode *inode, struct file *file)
592 if (file->private_data != NULL) {
593 cifsFileInfo_put(file->private_data);
594 file->private_data = NULL;
597 /* return code from the ->release op is always ignored */
601 int cifs_closedir(struct inode *inode, struct file *file)
605 struct cifsFileInfo *pCFileStruct = file->private_data;
608 cFYI(1, "Closedir inode = 0x%p", inode);
613 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
615 cFYI(1, "Freeing private data in close dir");
616 spin_lock(&cifs_file_list_lock);
617 if (!pCFileStruct->srch_inf.endOfSearch &&
618 !pCFileStruct->invalidHandle) {
619 pCFileStruct->invalidHandle = true;
620 spin_unlock(&cifs_file_list_lock);
621 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
622 cFYI(1, "Closing uncompleted readdir with rc %d",
624 /* not much we can do if it fails anyway, ignore rc */
627 spin_unlock(&cifs_file_list_lock);
628 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
630 cFYI(1, "closedir free smb buf in srch struct");
631 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
632 if (pCFileStruct->srch_inf.smallBuf)
633 cifs_small_buf_release(ptmp);
635 cifs_buf_release(ptmp);
637 cifs_put_tlink(pCFileStruct->tlink);
638 kfree(file->private_data);
639 file->private_data = NULL;
641 /* BB can we lock the filestruct while this is going on? */
646 static struct cifsLockInfo *
647 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649 struct cifsLockInfo *lock =
650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
653 lock->offset = offset;
654 lock->length = length;
656 lock->pid = current->tgid;
657 INIT_LIST_HEAD(&lock->blist);
658 init_waitqueue_head(&lock->block_q);
663 cifs_del_lock_waiters(struct cifsLockInfo *lock)
665 struct cifsLockInfo *li, *tmp;
666 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
667 list_del_init(&li->blist);
668 wake_up(&li->block_q);
673 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
674 __u64 length, __u8 type, struct cifsFileInfo *cur,
675 struct cifsLockInfo **conf_lock)
677 struct cifsLockInfo *li;
678 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
680 list_for_each_entry(li, &cfile->llist, llist) {
681 if (offset + length <= li->offset ||
682 offset >= li->offset + li->length)
684 else if ((type & server->vals->shared_lock_type) &&
685 ((server->ops->compare_fids(cur, cfile) &&
686 current->tgid == li->pid) || type == li->type))
697 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
698 __u8 type, struct cifsLockInfo **conf_lock)
701 struct cifsFileInfo *fid, *tmp;
702 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
704 spin_lock(&cifs_file_list_lock);
705 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
706 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
711 spin_unlock(&cifs_file_list_lock);
717 * Check if there is another lock that prevents us to set the lock (mandatory
718 * style). If such a lock exists, update the flock structure with its
719 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
720 * or leave it the same if we can't. Returns 0 if we don't need to request to
721 * the server or 1 otherwise.
724 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
725 __u8 type, struct file_lock *flock)
728 struct cifsLockInfo *conf_lock;
729 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
730 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
733 mutex_lock(&cinode->lock_mutex);
735 exist = cifs_find_lock_conflict(cfile, offset, length, type,
738 flock->fl_start = conf_lock->offset;
739 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
740 flock->fl_pid = conf_lock->pid;
741 if (conf_lock->type & server->vals->shared_lock_type)
742 flock->fl_type = F_RDLCK;
744 flock->fl_type = F_WRLCK;
745 } else if (!cinode->can_cache_brlcks)
748 flock->fl_type = F_UNLCK;
750 mutex_unlock(&cinode->lock_mutex);
755 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
757 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
758 mutex_lock(&cinode->lock_mutex);
759 list_add_tail(&lock->llist, &cfile->llist);
760 mutex_unlock(&cinode->lock_mutex);
764 * Set the byte-range lock (mandatory style). Returns:
765 * 1) 0, if we set the lock and don't need to request to the server;
766 * 2) 1, if no locks prevent us but we need to request to the server;
767 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
770 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
773 struct cifsLockInfo *conf_lock;
774 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
780 mutex_lock(&cinode->lock_mutex);
782 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
783 lock->type, &conf_lock);
784 if (!exist && cinode->can_cache_brlcks) {
785 list_add_tail(&lock->llist, &cfile->llist);
786 mutex_unlock(&cinode->lock_mutex);
795 list_add_tail(&lock->blist, &conf_lock->blist);
796 mutex_unlock(&cinode->lock_mutex);
797 rc = wait_event_interruptible(lock->block_q,
798 (lock->blist.prev == &lock->blist) &&
799 (lock->blist.next == &lock->blist));
802 mutex_lock(&cinode->lock_mutex);
803 list_del_init(&lock->blist);
806 mutex_unlock(&cinode->lock_mutex);
811 * Check if there is another lock that prevents us to set the lock (posix
812 * style). If such a lock exists, update the flock structure with its
813 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
814 * or leave it the same if we can't. Returns 0 if we don't need to request to
815 * the server or 1 otherwise.
818 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
821 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
822 unsigned char saved_type = flock->fl_type;
824 if ((flock->fl_flags & FL_POSIX) == 0)
827 mutex_lock(&cinode->lock_mutex);
828 posix_test_lock(file, flock);
830 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
831 flock->fl_type = saved_type;
835 mutex_unlock(&cinode->lock_mutex);
840 * Set the byte-range lock (posix style). Returns:
841 * 1) 0, if we set the lock and don't need to request to the server;
842 * 2) 1, if we need to request to the server;
843 * 3) <0, if the error occurs while setting the lock.
846 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
848 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
851 if ((flock->fl_flags & FL_POSIX) == 0)
855 mutex_lock(&cinode->lock_mutex);
856 if (!cinode->can_cache_brlcks) {
857 mutex_unlock(&cinode->lock_mutex);
861 rc = posix_lock_file(file, flock, NULL);
862 mutex_unlock(&cinode->lock_mutex);
863 if (rc == FILE_LOCK_DEFERRED) {
864 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
867 locks_delete_block(flock);
873 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
875 int xid, rc = 0, stored_rc;
876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num, max_buf;
880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
886 tcon = tlink_tcon(cfile->tlink);
888 mutex_lock(&cinode->lock_mutex);
889 if (!cinode->can_cache_brlcks) {
890 mutex_unlock(&cinode->lock_mutex);
896 * Accessing maxBuf is racy with cifs_reconnect - need to store value
897 * and check it for zero before using.
899 max_buf = tcon->ses->server->maxBuf;
901 mutex_unlock(&cinode->lock_mutex);
906 max_num = (max_buf - sizeof(struct smb_hdr)) /
907 sizeof(LOCKING_ANDX_RANGE);
908 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
910 mutex_unlock(&cinode->lock_mutex);
915 for (i = 0; i < 2; i++) {
918 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
919 if (li->type != types[i])
921 cur->Pid = cpu_to_le16(li->pid);
922 cur->LengthLow = cpu_to_le32((u32)li->length);
923 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
924 cur->OffsetLow = cpu_to_le32((u32)li->offset);
925 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
926 if (++num == max_num) {
927 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
928 (__u8)li->type, 0, num,
939 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
940 (__u8)types[i], 0, num, buf);
946 cinode->can_cache_brlcks = false;
947 mutex_unlock(&cinode->lock_mutex);
954 /* copied from fs/locks.c with a name change */
955 #define cifs_for_each_lock(inode, lockp) \
956 for (lockp = &inode->i_flock; *lockp != NULL; \
957 lockp = &(*lockp)->fl_next)
959 struct lock_to_push {
960 struct list_head llist;
969 cifs_push_posix_locks(struct cifsFileInfo *cfile)
971 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
972 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
973 struct file_lock *flock, **before;
974 unsigned int count = 0, i = 0;
975 int rc = 0, xid, type;
976 struct list_head locks_to_send, *el;
977 struct lock_to_push *lck, *tmp;
982 mutex_lock(&cinode->lock_mutex);
983 if (!cinode->can_cache_brlcks) {
984 mutex_unlock(&cinode->lock_mutex);
990 cifs_for_each_lock(cfile->dentry->d_inode, before) {
991 if ((*before)->fl_flags & FL_POSIX)
996 INIT_LIST_HEAD(&locks_to_send);
999 * Allocating count locks is enough because no FL_POSIX locks can be
1000 * added to the list while we are holding cinode->lock_mutex that
1001 * protects locking operations of this inode.
1003 for (; i < count; i++) {
1004 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1009 list_add_tail(&lck->llist, &locks_to_send);
1012 el = locks_to_send.next;
1014 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1016 if ((flock->fl_flags & FL_POSIX) == 0)
1018 if (el == &locks_to_send) {
1020 * The list ended. We don't have enough allocated
1021 * structures - something is really wrong.
1023 cERROR(1, "Can't push all brlocks!");
1026 length = 1 + flock->fl_end - flock->fl_start;
1027 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1031 lck = list_entry(el, struct lock_to_push, llist);
1032 lck->pid = flock->fl_pid;
1033 lck->netfid = cfile->netfid;
1034 lck->length = length;
1036 lck->offset = flock->fl_start;
1041 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1042 struct file_lock tmp_lock;
1045 tmp_lock.fl_start = lck->offset;
1046 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1047 0, lck->length, &tmp_lock,
1051 list_del(&lck->llist);
1056 cinode->can_cache_brlcks = false;
1057 mutex_unlock(&cinode->lock_mutex);
1062 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1063 list_del(&lck->llist);
1070 cifs_push_locks(struct cifsFileInfo *cfile)
1072 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1073 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1075 if ((tcon->ses->capabilities & CAP_UNIX) &&
1076 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1077 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1078 return cifs_push_posix_locks(cfile);
1080 return cifs_push_mandatory_locks(cfile);
1084 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1085 bool *wait_flag, struct TCP_Server_Info *server)
1087 if (flock->fl_flags & FL_POSIX)
1089 if (flock->fl_flags & FL_FLOCK)
1091 if (flock->fl_flags & FL_SLEEP) {
1092 cFYI(1, "Blocking lock");
1095 if (flock->fl_flags & FL_ACCESS)
1096 cFYI(1, "Process suspended by mandatory locking - "
1097 "not implemented yet");
1098 if (flock->fl_flags & FL_LEASE)
1099 cFYI(1, "Lease on file - not implemented yet");
1100 if (flock->fl_flags &
1101 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1102 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1104 *type = server->vals->large_lock_type;
1105 if (flock->fl_type == F_WRLCK) {
1106 cFYI(1, "F_WRLCK ");
1107 *type |= server->vals->exclusive_lock_type;
1109 } else if (flock->fl_type == F_UNLCK) {
1111 *type |= server->vals->unlock_lock_type;
1113 /* Check if unlock includes more than one lock range */
1114 } else if (flock->fl_type == F_RDLCK) {
1116 *type |= server->vals->shared_lock_type;
1118 } else if (flock->fl_type == F_EXLCK) {
1120 *type |= server->vals->exclusive_lock_type;
1122 } else if (flock->fl_type == F_SHLCK) {
1124 *type |= server->vals->shared_lock_type;
1127 cFYI(1, "Unknown type of lock");
1131 cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset,
1132 __u64 length, __u32 type, int lock, int unlock, bool wait)
1134 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1135 current->tgid, length, offset, unlock, lock,
1136 (__u8)type, wait, 0);
1140 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1141 bool wait_flag, bool posix_lck, int xid)
1144 __u64 length = 1 + flock->fl_end - flock->fl_start;
1145 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1146 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1147 struct TCP_Server_Info *server = tcon->ses->server;
1148 __u16 netfid = cfile->netfid;
1151 int posix_lock_type;
1153 rc = cifs_posix_lock_test(file, flock);
1157 if (type & server->vals->shared_lock_type)
1158 posix_lock_type = CIFS_RDLCK;
1160 posix_lock_type = CIFS_WRLCK;
1161 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1162 1 /* get */, length, flock,
1163 posix_lock_type, wait_flag);
1167 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1171 /* BB we could chain these into one lock request BB */
1172 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1175 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1177 flock->fl_type = F_UNLCK;
1179 cERROR(1, "Error unlocking previously locked "
1180 "range %d during test of lock", rc);
1184 if (type & server->vals->shared_lock_type) {
1185 flock->fl_type = F_WRLCK;
1189 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1190 type | server->vals->shared_lock_type, 1, 0,
1193 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1194 type | server->vals->shared_lock_type,
1196 flock->fl_type = F_RDLCK;
1198 cERROR(1, "Error unlocking previously locked "
1199 "range %d during test of lock", rc);
1201 flock->fl_type = F_WRLCK;
1207 cifs_move_llist(struct list_head *source, struct list_head *dest)
1209 struct list_head *li, *tmp;
1210 list_for_each_safe(li, tmp, source)
1211 list_move(li, dest);
1215 cifs_free_llist(struct list_head *llist)
1217 struct cifsLockInfo *li, *tmp;
1218 list_for_each_entry_safe(li, tmp, llist, llist) {
1219 cifs_del_lock_waiters(li);
1220 list_del(&li->llist);
1226 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1228 int rc = 0, stored_rc;
1229 int types[] = {LOCKING_ANDX_LARGE_FILES,
1230 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1232 unsigned int max_num, num, max_buf;
1233 LOCKING_ANDX_RANGE *buf, *cur;
1234 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1235 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1236 struct cifsLockInfo *li, *tmp;
1237 __u64 length = 1 + flock->fl_end - flock->fl_start;
1238 struct list_head tmp_llist;
1240 INIT_LIST_HEAD(&tmp_llist);
1243 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1244 * and check it for zero before using.
1246 max_buf = tcon->ses->server->maxBuf;
1250 max_num = (max_buf - sizeof(struct smb_hdr)) /
1251 sizeof(LOCKING_ANDX_RANGE);
1252 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1256 mutex_lock(&cinode->lock_mutex);
1257 for (i = 0; i < 2; i++) {
1260 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1261 if (flock->fl_start > li->offset ||
1262 (flock->fl_start + length) <
1263 (li->offset + li->length))
1265 if (current->tgid != li->pid)
1267 if (types[i] != li->type)
1269 if (!cinode->can_cache_brlcks) {
1270 cur->Pid = cpu_to_le16(li->pid);
1271 cur->LengthLow = cpu_to_le32((u32)li->length);
1273 cpu_to_le32((u32)(li->length>>32));
1274 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1276 cpu_to_le32((u32)(li->offset>>32));
1278 * We need to save a lock here to let us add
1279 * it again to the file's list if the unlock
1280 * range request fails on the server.
1282 list_move(&li->llist, &tmp_llist);
1283 if (++num == max_num) {
1284 stored_rc = cifs_lockv(xid, tcon,
1290 * We failed on the unlock range
1291 * request - add all locks from
1292 * the tmp list to the head of
1295 cifs_move_llist(&tmp_llist,
1300 * The unlock range request
1301 * succeed - free the tmp list.
1303 cifs_free_llist(&tmp_llist);
1310 * We can cache brlock requests - simply remove
1311 * a lock from the file's list.
1313 list_del(&li->llist);
1314 cifs_del_lock_waiters(li);
1319 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1320 types[i], num, 0, buf);
1322 cifs_move_llist(&tmp_llist, &cfile->llist);
1325 cifs_free_llist(&tmp_llist);
1329 mutex_unlock(&cinode->lock_mutex);
1335 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1336 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1339 __u64 length = 1 + flock->fl_end - flock->fl_start;
1340 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1341 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1342 struct TCP_Server_Info *server = tcon->ses->server;
1343 __u16 netfid = cfile->netfid;
1346 int posix_lock_type;
1348 rc = cifs_posix_lock_set(file, flock);
1352 if (type & server->vals->shared_lock_type)
1353 posix_lock_type = CIFS_RDLCK;
1355 posix_lock_type = CIFS_WRLCK;
1358 posix_lock_type = CIFS_UNLCK;
1360 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1361 0 /* set */, length, flock,
1362 posix_lock_type, wait_flag);
1367 struct cifsLockInfo *lock;
1369 lock = cifs_lock_init(flock->fl_start, length, type);
1373 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1379 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1380 type, 1, 0, wait_flag);
1386 cifs_lock_add(cfile, lock);
1388 rc = cifs_unlock_range(cfile, flock, xid);
1391 if (flock->fl_flags & FL_POSIX)
1392 posix_lock_file_wait(file, flock);
1396 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1399 int lock = 0, unlock = 0;
1400 bool wait_flag = false;
1401 bool posix_lck = false;
1402 struct cifs_sb_info *cifs_sb;
1403 struct cifs_tcon *tcon;
1404 struct cifsInodeInfo *cinode;
1405 struct cifsFileInfo *cfile;
1412 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1413 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1414 flock->fl_start, flock->fl_end);
1416 cfile = (struct cifsFileInfo *)file->private_data;
1417 tcon = tlink_tcon(cfile->tlink);
1419 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1422 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1423 netfid = cfile->netfid;
1424 cinode = CIFS_I(file->f_path.dentry->d_inode);
1426 if ((tcon->ses->capabilities & CAP_UNIX) &&
1427 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1428 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1431 * BB add code here to normalize offset and length to account for
1432 * negative length which we can not accept over the wire.
1434 if (IS_GETLK(cmd)) {
1435 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1440 if (!lock && !unlock) {
1442 * if no lock or unlock then nothing to do since we do not
1449 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1456 * update the file size (if needed) after a write. Should be called with
1457 * the inode->i_lock held
1460 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1461 unsigned int bytes_written)
1463 loff_t end_of_write = offset + bytes_written;
1465 if (end_of_write > cifsi->server_eof)
1466 cifsi->server_eof = end_of_write;
1469 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1470 const char *write_data, size_t write_size,
1474 unsigned int bytes_written = 0;
1475 unsigned int total_written;
1476 struct cifs_sb_info *cifs_sb;
1477 struct cifs_tcon *pTcon;
1479 struct dentry *dentry = open_file->dentry;
1480 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1481 struct cifs_io_parms io_parms;
1483 cifs_sb = CIFS_SB(dentry->d_sb);
1485 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1486 *poffset, dentry->d_name.name);
1488 pTcon = tlink_tcon(open_file->tlink);
1492 for (total_written = 0; write_size > total_written;
1493 total_written += bytes_written) {
1495 while (rc == -EAGAIN) {
1499 if (open_file->invalidHandle) {
1500 /* we could deadlock if we called
1501 filemap_fdatawait from here so tell
1502 reopen_file not to flush data to
1504 rc = cifs_reopen_file(open_file, false);
1509 len = min((size_t)cifs_sb->wsize,
1510 write_size - total_written);
1511 /* iov[0] is reserved for smb header */
1512 iov[1].iov_base = (char *)write_data + total_written;
1513 iov[1].iov_len = len;
1514 io_parms.netfid = open_file->netfid;
1516 io_parms.tcon = pTcon;
1517 io_parms.offset = *poffset;
1518 io_parms.length = len;
1519 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1522 if (rc || (bytes_written == 0)) {
1530 spin_lock(&dentry->d_inode->i_lock);
1531 cifs_update_eof(cifsi, *poffset, bytes_written);
1532 spin_unlock(&dentry->d_inode->i_lock);
1533 *poffset += bytes_written;
1537 cifs_stats_bytes_written(pTcon, total_written);
1539 if (total_written > 0) {
1540 spin_lock(&dentry->d_inode->i_lock);
1541 if (*poffset > dentry->d_inode->i_size)
1542 i_size_write(dentry->d_inode, *poffset);
1543 spin_unlock(&dentry->d_inode->i_lock);
1545 mark_inode_dirty_sync(dentry->d_inode);
1547 return total_written;
1550 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1553 struct cifsFileInfo *open_file = NULL;
1554 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1556 /* only filter by fsuid on multiuser mounts */
1557 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1560 spin_lock(&cifs_file_list_lock);
1561 /* we could simply get the first_list_entry since write-only entries
1562 are always at the end of the list but since the first entry might
1563 have a close pending, we go through the whole list */
1564 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1565 if (fsuid_only && open_file->uid != current_fsuid())
1567 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1568 if (!open_file->invalidHandle) {
1569 /* found a good file */
1570 /* lock it so it will not be closed on us */
1571 cifsFileInfo_get(open_file);
1572 spin_unlock(&cifs_file_list_lock);
1574 } /* else might as well continue, and look for
1575 another, or simply have the caller reopen it
1576 again rather than trying to fix this handle */
1577 } else /* write only file */
1578 break; /* write only files are last so must be done */
1580 spin_unlock(&cifs_file_list_lock);
1584 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1587 struct cifsFileInfo *open_file, *inv_file = NULL;
1588 struct cifs_sb_info *cifs_sb;
1589 bool any_available = false;
1591 unsigned int refind = 0;
1593 /* Having a null inode here (because mapping->host was set to zero by
1594 the VFS or MM) should not happen but we had reports of on oops (due to
1595 it being zero) during stress testcases so we need to check for it */
1597 if (cifs_inode == NULL) {
1598 cERROR(1, "Null inode passed to cifs_writeable_file");
1603 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1605 /* only filter by fsuid on multiuser mounts */
1606 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1609 spin_lock(&cifs_file_list_lock);
1611 if (refind > MAX_REOPEN_ATT) {
1612 spin_unlock(&cifs_file_list_lock);
1615 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1616 if (!any_available && open_file->pid != current->tgid)
1618 if (fsuid_only && open_file->uid != current_fsuid())
1620 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1621 if (!open_file->invalidHandle) {
1622 /* found a good writable file */
1623 cifsFileInfo_get(open_file);
1624 spin_unlock(&cifs_file_list_lock);
1628 inv_file = open_file;
1632 /* couldn't find useable FH with same pid, try any available */
1633 if (!any_available) {
1634 any_available = true;
1635 goto refind_writable;
1639 any_available = false;
1640 cifsFileInfo_get(inv_file);
1643 spin_unlock(&cifs_file_list_lock);
1646 rc = cifs_reopen_file(inv_file, false);
1650 spin_lock(&cifs_file_list_lock);
1651 list_move_tail(&inv_file->flist,
1652 &cifs_inode->openFileList);
1653 spin_unlock(&cifs_file_list_lock);
1654 cifsFileInfo_put(inv_file);
1655 spin_lock(&cifs_file_list_lock);
1657 goto refind_writable;
1664 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1666 struct address_space *mapping = page->mapping;
1667 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1670 int bytes_written = 0;
1671 struct inode *inode;
1672 struct cifsFileInfo *open_file;
1674 if (!mapping || !mapping->host)
1677 inode = page->mapping->host;
1679 offset += (loff_t)from;
1680 write_data = kmap(page);
1683 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1688 /* racing with truncate? */
1689 if (offset > mapping->host->i_size) {
1691 return 0; /* don't care */
1694 /* check to make sure that we are not extending the file */
1695 if (mapping->host->i_size - offset < (loff_t)to)
1696 to = (unsigned)(mapping->host->i_size - offset);
1698 open_file = find_writable_file(CIFS_I(mapping->host), false);
1700 bytes_written = cifs_write(open_file, open_file->pid,
1701 write_data, to - from, &offset);
1702 cifsFileInfo_put(open_file);
1703 /* Does mm or vfs already set times? */
1704 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1705 if ((bytes_written > 0) && (offset))
1707 else if (bytes_written < 0)
1710 cFYI(1, "No writeable filehandles for inode");
1719 * Marshal up the iov array, reserving the first one for the header. Also,
1723 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1726 struct inode *inode = wdata->cfile->dentry->d_inode;
1727 loff_t size = i_size_read(inode);
1729 /* marshal up the pages into iov array */
1731 for (i = 0; i < wdata->nr_pages; i++) {
1732 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1733 (loff_t)PAGE_CACHE_SIZE);
1734 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1735 wdata->bytes += iov[i + 1].iov_len;
1739 static int cifs_writepages(struct address_space *mapping,
1740 struct writeback_control *wbc)
1742 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1743 bool done = false, scanned = false, range_whole = false;
1745 struct cifs_writedata *wdata;
1750 * If wsize is smaller than the page cache size, default to writing
1751 * one page at a time via cifs_writepage
1753 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1754 return generic_writepages(mapping, wbc);
1756 if (wbc->range_cyclic) {
1757 index = mapping->writeback_index; /* Start from prev offset */
1760 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1761 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1762 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1767 while (!done && index <= end) {
1768 unsigned int i, nr_pages, found_pages;
1769 pgoff_t next = 0, tofind;
1770 struct page **pages;
1772 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1775 wdata = cifs_writedata_alloc((unsigned int)tofind,
1776 cifs_writev_complete);
1783 * find_get_pages_tag seems to return a max of 256 on each
1784 * iteration, so we must call it several times in order to
1785 * fill the array or the wsize is effectively limited to
1786 * 256 * PAGE_CACHE_SIZE.
1789 pages = wdata->pages;
1791 nr_pages = find_get_pages_tag(mapping, &index,
1792 PAGECACHE_TAG_DIRTY,
1794 found_pages += nr_pages;
1797 } while (nr_pages && tofind && index <= end);
1799 if (found_pages == 0) {
1800 kref_put(&wdata->refcount, cifs_writedata_release);
1805 for (i = 0; i < found_pages; i++) {
1806 page = wdata->pages[i];
1808 * At this point we hold neither mapping->tree_lock nor
1809 * lock on the page itself: the page may be truncated or
1810 * invalidated (changing page->mapping to NULL), or even
1811 * swizzled back from swapper_space to tmpfs file
1817 else if (!trylock_page(page))
1820 if (unlikely(page->mapping != mapping)) {
1825 if (!wbc->range_cyclic && page->index > end) {
1831 if (next && (page->index != next)) {
1832 /* Not next consecutive page */
1837 if (wbc->sync_mode != WB_SYNC_NONE)
1838 wait_on_page_writeback(page);
1840 if (PageWriteback(page) ||
1841 !clear_page_dirty_for_io(page)) {
1847 * This actually clears the dirty bit in the radix tree.
1848 * See cifs_writepage() for more commentary.
1850 set_page_writeback(page);
1852 if (page_offset(page) >= mapping->host->i_size) {
1855 end_page_writeback(page);
1859 wdata->pages[i] = page;
1860 next = page->index + 1;
1864 /* reset index to refind any pages skipped */
1866 index = wdata->pages[0]->index + 1;
1868 /* put any pages we aren't going to use */
1869 for (i = nr_pages; i < found_pages; i++) {
1870 page_cache_release(wdata->pages[i]);
1871 wdata->pages[i] = NULL;
1874 /* nothing to write? */
1875 if (nr_pages == 0) {
1876 kref_put(&wdata->refcount, cifs_writedata_release);
1880 wdata->sync_mode = wbc->sync_mode;
1881 wdata->nr_pages = nr_pages;
1882 wdata->offset = page_offset(wdata->pages[0]);
1883 wdata->marshal_iov = cifs_writepages_marshal_iov;
1886 if (wdata->cfile != NULL)
1887 cifsFileInfo_put(wdata->cfile);
1888 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1890 if (!wdata->cfile) {
1891 cERROR(1, "No writable handles for inode");
1895 wdata->pid = wdata->cfile->pid;
1896 rc = cifs_async_writev(wdata);
1897 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1899 for (i = 0; i < nr_pages; ++i)
1900 unlock_page(wdata->pages[i]);
1902 /* send failure -- clean up the mess */
1904 for (i = 0; i < nr_pages; ++i) {
1906 redirty_page_for_writepage(wbc,
1909 SetPageError(wdata->pages[i]);
1910 end_page_writeback(wdata->pages[i]);
1911 page_cache_release(wdata->pages[i]);
1914 mapping_set_error(mapping, rc);
1916 kref_put(&wdata->refcount, cifs_writedata_release);
1918 wbc->nr_to_write -= nr_pages;
1919 if (wbc->nr_to_write <= 0)
1925 if (!scanned && !done) {
1927 * We hit the last page and there is more work to be done: wrap
1928 * back to the start of the file
1935 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1936 mapping->writeback_index = index;
1942 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1948 /* BB add check for wbc flags */
1949 page_cache_get(page);
1950 if (!PageUptodate(page))
1951 cFYI(1, "ppw - page not up to date");
1954 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1956 * A writepage() implementation always needs to do either this,
1957 * or re-dirty the page with "redirty_page_for_writepage()" in
1958 * the case of a failure.
1960 * Just unlocking the page will cause the radix tree tag-bits
1961 * to fail to update with the state of the page correctly.
1963 set_page_writeback(page);
1965 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1966 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1968 else if (rc == -EAGAIN)
1969 redirty_page_for_writepage(wbc, page);
1973 SetPageUptodate(page);
1974 end_page_writeback(page);
1975 page_cache_release(page);
1980 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1982 int rc = cifs_writepage_locked(page, wbc);
1987 static int cifs_write_end(struct file *file, struct address_space *mapping,
1988 loff_t pos, unsigned len, unsigned copied,
1989 struct page *page, void *fsdata)
1992 struct inode *inode = mapping->host;
1993 struct cifsFileInfo *cfile = file->private_data;
1994 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1997 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2000 pid = current->tgid;
2002 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2005 if (PageChecked(page)) {
2007 SetPageUptodate(page);
2008 ClearPageChecked(page);
2009 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2010 SetPageUptodate(page);
2012 if (!PageUptodate(page)) {
2014 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2018 /* this is probably better than directly calling
2019 partialpage_write since in this function the file handle is
2020 known which we might as well leverage */
2021 /* BB check if anything else missing out of ppw
2022 such as updating last write time */
2023 page_data = kmap(page);
2024 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2025 /* if (rc < 0) should we set writebehind rc? */
2032 set_page_dirty(page);
2036 spin_lock(&inode->i_lock);
2037 if (pos > inode->i_size)
2038 i_size_write(inode, pos);
2039 spin_unlock(&inode->i_lock);
2043 page_cache_release(page);
2048 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2053 struct cifs_tcon *tcon;
2054 struct cifsFileInfo *smbfile = file->private_data;
2055 struct inode *inode = file->f_path.dentry->d_inode;
2056 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2058 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2061 mutex_lock(&inode->i_mutex);
2065 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2066 file->f_path.dentry->d_name.name, datasync);
2068 if (!CIFS_I(inode)->clientCanCacheRead) {
2069 rc = cifs_invalidate_mapping(inode);
2071 cFYI(1, "rc: %d during invalidate phase", rc);
2072 rc = 0; /* don't care about it in fsync */
2076 tcon = tlink_tcon(smbfile->tlink);
2077 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2078 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2081 mutex_unlock(&inode->i_mutex);
2085 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2089 struct cifs_tcon *tcon;
2090 struct cifsFileInfo *smbfile = file->private_data;
2091 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2092 struct inode *inode = file->f_mapping->host;
2094 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2097 mutex_lock(&inode->i_mutex);
2101 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2102 file->f_path.dentry->d_name.name, datasync);
2104 tcon = tlink_tcon(smbfile->tlink);
2105 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2106 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2109 mutex_unlock(&inode->i_mutex);
2114 * As file closes, flush all cached write data for this inode checking
2115 * for write behind errors.
2117 int cifs_flush(struct file *file, fl_owner_t id)
2119 struct inode *inode = file->f_path.dentry->d_inode;
2122 if (file->f_mode & FMODE_WRITE)
2123 rc = filemap_write_and_wait(inode->i_mapping);
2125 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2131 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2136 for (i = 0; i < num_pages; i++) {
2137 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2140 * save number of pages we have already allocated and
2141 * return with ENOMEM error
2150 for (i = 0; i < num_pages; i++)
2157 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2162 clen = min_t(const size_t, len, wsize);
2163 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2172 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2175 size_t bytes = wdata->bytes;
2177 /* marshal up the pages into iov array */
2178 for (i = 0; i < wdata->nr_pages; i++) {
2179 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2180 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2181 bytes -= iov[i + 1].iov_len;
2186 cifs_uncached_writev_complete(struct work_struct *work)
2189 struct cifs_writedata *wdata = container_of(work,
2190 struct cifs_writedata, work);
2191 struct inode *inode = wdata->cfile->dentry->d_inode;
2192 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2194 spin_lock(&inode->i_lock);
2195 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2196 if (cifsi->server_eof > inode->i_size)
2197 i_size_write(inode, cifsi->server_eof);
2198 spin_unlock(&inode->i_lock);
2200 complete(&wdata->done);
2202 if (wdata->result != -EAGAIN) {
2203 for (i = 0; i < wdata->nr_pages; i++)
2204 put_page(wdata->pages[i]);
2207 kref_put(&wdata->refcount, cifs_writedata_release);
2210 /* attempt to send write to server, retry on any -EAGAIN errors */
2212 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2217 if (wdata->cfile->invalidHandle) {
2218 rc = cifs_reopen_file(wdata->cfile, false);
2222 rc = cifs_async_writev(wdata);
2223 } while (rc == -EAGAIN);
2229 cifs_iovec_write(struct file *file, const struct iovec *iov,
2230 unsigned long nr_segs, loff_t *poffset)
2232 unsigned long nr_pages, i;
2233 size_t copied, len, cur_len;
2234 ssize_t total_written = 0;
2237 struct cifsFileInfo *open_file;
2238 struct cifs_tcon *tcon;
2239 struct cifs_sb_info *cifs_sb;
2240 struct cifs_writedata *wdata, *tmp;
2241 struct list_head wdata_list;
2245 len = iov_length(iov, nr_segs);
2249 rc = generic_write_checks(file, poffset, &len, 0);
2253 INIT_LIST_HEAD(&wdata_list);
2254 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2255 open_file = file->private_data;
2256 tcon = tlink_tcon(open_file->tlink);
2259 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2260 pid = open_file->pid;
2262 pid = current->tgid;
2264 iov_iter_init(&it, iov, nr_segs, len, 0);
2268 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2269 wdata = cifs_writedata_alloc(nr_pages,
2270 cifs_uncached_writev_complete);
2276 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2283 for (i = 0; i < nr_pages; i++) {
2284 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2285 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2288 iov_iter_advance(&it, copied);
2290 cur_len = save_len - cur_len;
2292 wdata->sync_mode = WB_SYNC_ALL;
2293 wdata->nr_pages = nr_pages;
2294 wdata->offset = (__u64)offset;
2295 wdata->cfile = cifsFileInfo_get(open_file);
2297 wdata->bytes = cur_len;
2298 wdata->marshal_iov = cifs_uncached_marshal_iov;
2299 rc = cifs_uncached_retry_writev(wdata);
2301 kref_put(&wdata->refcount, cifs_writedata_release);
2305 list_add_tail(&wdata->list, &wdata_list);
2311 * If at least one write was successfully sent, then discard any rc
2312 * value from the later writes. If the other write succeeds, then
2313 * we'll end up returning whatever was written. If it fails, then
2314 * we'll get a new rc value from that.
2316 if (!list_empty(&wdata_list))
2320 * Wait for and collect replies for any successful sends in order of
2321 * increasing offset. Once an error is hit or we get a fatal signal
2322 * while waiting, then return without waiting for any more replies.
2325 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2327 /* FIXME: freezable too? */
2328 rc = wait_for_completion_killable(&wdata->done);
2331 else if (wdata->result)
2334 total_written += wdata->bytes;
2336 /* resend call if it's a retryable error */
2337 if (rc == -EAGAIN) {
2338 rc = cifs_uncached_retry_writev(wdata);
2342 list_del_init(&wdata->list);
2343 kref_put(&wdata->refcount, cifs_writedata_release);
2346 if (total_written > 0)
2347 *poffset += total_written;
2349 cifs_stats_bytes_written(tcon, total_written);
2350 return total_written ? total_written : (ssize_t)rc;
2353 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2354 unsigned long nr_segs, loff_t pos)
2357 struct inode *inode;
2359 inode = iocb->ki_filp->f_path.dentry->d_inode;
2362 * BB - optimize the way when signing is disabled. We can drop this
2363 * extra memory-to-memory copying and use iovec buffers for constructing
2367 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2369 CIFS_I(inode)->invalid_mapping = true;
2376 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2377 unsigned long nr_segs, loff_t pos)
2379 struct inode *inode;
2381 inode = iocb->ki_filp->f_path.dentry->d_inode;
2383 if (CIFS_I(inode)->clientCanCacheAll)
2384 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2387 * In strict cache mode we need to write the data to the server exactly
2388 * from the pos to pos+len-1 rather than flush all affected pages
2389 * because it may cause a error with mandatory locks on these pages but
2390 * not on the region from pos to ppos+len-1.
2393 return cifs_user_writev(iocb, iov, nr_segs, pos);
2396 static struct cifs_readdata *
2397 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2399 struct cifs_readdata *rdata;
2401 rdata = kzalloc(sizeof(*rdata) +
2402 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2403 if (rdata != NULL) {
2404 kref_init(&rdata->refcount);
2405 INIT_LIST_HEAD(&rdata->list);
2406 init_completion(&rdata->done);
2407 INIT_WORK(&rdata->work, complete);
2408 INIT_LIST_HEAD(&rdata->pages);
2414 cifs_readdata_release(struct kref *refcount)
2416 struct cifs_readdata *rdata = container_of(refcount,
2417 struct cifs_readdata, refcount);
2420 cifsFileInfo_put(rdata->cfile);
2426 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2429 struct page *page, *tpage;
2432 for (i = 0; i < npages; i++) {
2433 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2438 list_add(&page->lru, list);
2442 list_for_each_entry_safe(page, tpage, list, lru) {
2443 list_del(&page->lru);
2451 cifs_uncached_readdata_release(struct kref *refcount)
2453 struct page *page, *tpage;
2454 struct cifs_readdata *rdata = container_of(refcount,
2455 struct cifs_readdata, refcount);
2457 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2458 list_del(&page->lru);
2461 cifs_readdata_release(refcount);
2465 cifs_retry_async_readv(struct cifs_readdata *rdata)
2470 if (rdata->cfile->invalidHandle) {
2471 rc = cifs_reopen_file(rdata->cfile, true);
2475 rc = cifs_async_readv(rdata);
2476 } while (rc == -EAGAIN);
2482 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2483 * @rdata: the readdata response with list of pages holding data
2484 * @iov: vector in which we should copy the data
2485 * @nr_segs: number of segments in vector
2486 * @offset: offset into file of the first iovec
2487 * @copied: used to return the amount of data copied to the iov
2489 * This function copies data from a list of pages in a readdata response into
2490 * an array of iovecs. It will first calculate where the data should go
2491 * based on the info in the readdata and then copy the data into that spot.
2494 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2495 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2499 size_t pos = rdata->offset - offset;
2500 struct page *page, *tpage;
2501 ssize_t remaining = rdata->bytes;
2502 unsigned char *pdata;
2504 /* set up iov_iter and advance to the correct offset */
2505 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2506 iov_iter_advance(&ii, pos);
2509 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2512 /* copy a whole page or whatever's left */
2513 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2515 /* ...but limit it to whatever space is left in the iov */
2516 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2518 /* go while there's data to be copied and no errors */
2521 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2527 iov_iter_advance(&ii, copy);
2531 list_del(&page->lru);
2539 cifs_uncached_readv_complete(struct work_struct *work)
2541 struct cifs_readdata *rdata = container_of(work,
2542 struct cifs_readdata, work);
2544 /* if the result is non-zero then the pages weren't kmapped */
2545 if (rdata->result == 0) {
2548 list_for_each_entry(page, &rdata->pages, lru)
2552 complete(&rdata->done);
2553 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2557 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2558 unsigned int remaining)
2561 struct page *page, *tpage;
2564 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2565 if (remaining >= PAGE_SIZE) {
2566 /* enough data to fill the page */
2567 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2568 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2569 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2570 rdata->nr_iov, page->index,
2571 rdata->iov[rdata->nr_iov].iov_base,
2572 rdata->iov[rdata->nr_iov].iov_len);
2575 remaining -= PAGE_SIZE;
2576 } else if (remaining > 0) {
2577 /* enough for partial page, fill and zero the rest */
2578 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2579 rdata->iov[rdata->nr_iov].iov_len = remaining;
2580 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2581 rdata->nr_iov, page->index,
2582 rdata->iov[rdata->nr_iov].iov_base,
2583 rdata->iov[rdata->nr_iov].iov_len);
2584 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2585 '\0', PAGE_SIZE - remaining);
2590 /* no need to hold page hostage */
2591 list_del(&page->lru);
2600 cifs_iovec_read(struct file *file, const struct iovec *iov,
2601 unsigned long nr_segs, loff_t *poffset)
2604 size_t len, cur_len;
2605 ssize_t total_read = 0;
2606 loff_t offset = *poffset;
2607 unsigned int npages;
2608 struct cifs_sb_info *cifs_sb;
2609 struct cifs_tcon *tcon;
2610 struct cifsFileInfo *open_file;
2611 struct cifs_readdata *rdata, *tmp;
2612 struct list_head rdata_list;
2618 len = iov_length(iov, nr_segs);
2622 INIT_LIST_HEAD(&rdata_list);
2623 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2624 open_file = file->private_data;
2625 tcon = tlink_tcon(open_file->tlink);
2627 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2628 pid = open_file->pid;
2630 pid = current->tgid;
2632 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2633 cFYI(1, "attempting read on write only file instance");
2636 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2637 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2639 /* allocate a readdata struct */
2640 rdata = cifs_readdata_alloc(npages,
2641 cifs_uncached_readv_complete);
2647 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2651 rdata->cfile = cifsFileInfo_get(open_file);
2652 rdata->offset = offset;
2653 rdata->bytes = cur_len;
2655 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2657 rc = cifs_retry_async_readv(rdata);
2660 kref_put(&rdata->refcount,
2661 cifs_uncached_readdata_release);
2665 list_add_tail(&rdata->list, &rdata_list);
2670 /* if at least one read request send succeeded, then reset rc */
2671 if (!list_empty(&rdata_list))
2674 /* the loop below should proceed in the order of increasing offsets */
2676 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2680 /* FIXME: freezable sleep too? */
2681 rc = wait_for_completion_killable(&rdata->done);
2684 else if (rdata->result)
2687 rc = cifs_readdata_to_iov(rdata, iov,
2690 total_read += copied;
2693 /* resend call if it's a retryable error */
2694 if (rc == -EAGAIN) {
2695 rc = cifs_retry_async_readv(rdata);
2699 list_del_init(&rdata->list);
2700 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2703 cifs_stats_bytes_read(tcon, total_read);
2704 *poffset += total_read;
2706 return total_read ? total_read : rc;
2709 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2710 unsigned long nr_segs, loff_t pos)
2714 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2721 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2722 unsigned long nr_segs, loff_t pos)
2724 struct inode *inode;
2726 inode = iocb->ki_filp->f_path.dentry->d_inode;
2728 if (CIFS_I(inode)->clientCanCacheRead)
2729 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2732 * In strict cache mode we need to read from the server all the time
2733 * if we don't have level II oplock because the server can delay mtime
2734 * change - so we can't make a decision about inode invalidating.
2735 * And we can also fail with pagereading if there are mandatory locks
2736 * on pages affected by this read but not on the region from pos to
2740 return cifs_user_readv(iocb, iov, nr_segs, pos);
2743 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2747 unsigned int bytes_read = 0;
2748 unsigned int total_read;
2749 unsigned int current_read_size;
2751 struct cifs_sb_info *cifs_sb;
2752 struct cifs_tcon *pTcon;
2754 char *current_offset;
2755 struct cifsFileInfo *open_file;
2756 struct cifs_io_parms io_parms;
2757 int buf_type = CIFS_NO_BUFFER;
2761 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2763 /* FIXME: set up handlers for larger reads and/or convert to async */
2764 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2766 if (file->private_data == NULL) {
2771 open_file = file->private_data;
2772 pTcon = tlink_tcon(open_file->tlink);
2774 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2775 pid = open_file->pid;
2777 pid = current->tgid;
2779 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2780 cFYI(1, "attempting read on write only file instance");
2782 for (total_read = 0, current_offset = read_data;
2783 read_size > total_read;
2784 total_read += bytes_read, current_offset += bytes_read) {
2785 current_read_size = min_t(uint, read_size - total_read, rsize);
2787 /* For windows me and 9x we do not want to request more
2788 than it negotiated since it will refuse the read then */
2790 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2791 current_read_size = min_t(uint, current_read_size,
2795 while (rc == -EAGAIN) {
2796 if (open_file->invalidHandle) {
2797 rc = cifs_reopen_file(open_file, true);
2801 io_parms.netfid = open_file->netfid;
2803 io_parms.tcon = pTcon;
2804 io_parms.offset = *poffset;
2805 io_parms.length = current_read_size;
2806 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2807 ¤t_offset, &buf_type);
2809 if (rc || (bytes_read == 0)) {
2817 cifs_stats_bytes_read(pTcon, total_read);
2818 *poffset += bytes_read;
2826 * If the page is mmap'ed into a process' page tables, then we need to make
2827 * sure that it doesn't change while being written back.
2830 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2832 struct page *page = vmf->page;
2835 return VM_FAULT_LOCKED;
2838 static struct vm_operations_struct cifs_file_vm_ops = {
2839 .fault = filemap_fault,
2840 .page_mkwrite = cifs_page_mkwrite,
2843 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2846 struct inode *inode = file->f_path.dentry->d_inode;
2850 if (!CIFS_I(inode)->clientCanCacheRead) {
2851 rc = cifs_invalidate_mapping(inode);
2856 rc = generic_file_mmap(file, vma);
2858 vma->vm_ops = &cifs_file_vm_ops;
2863 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2868 rc = cifs_revalidate_file(file);
2870 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2874 rc = generic_file_mmap(file, vma);
2876 vma->vm_ops = &cifs_file_vm_ops;
2882 cifs_readv_complete(struct work_struct *work)
2884 struct cifs_readdata *rdata = container_of(work,
2885 struct cifs_readdata, work);
2886 struct page *page, *tpage;
2888 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2889 list_del(&page->lru);
2890 lru_cache_add_file(page);
2892 if (rdata->result == 0) {
2894 flush_dcache_page(page);
2895 SetPageUptodate(page);
2900 if (rdata->result == 0)
2901 cifs_readpage_to_fscache(rdata->mapping->host, page);
2903 page_cache_release(page);
2905 kref_put(&rdata->refcount, cifs_readdata_release);
2909 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2912 struct page *page, *tpage;
2916 /* determine the eof that the server (probably) has */
2917 eof = CIFS_I(rdata->mapping->host)->server_eof;
2918 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2919 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2922 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2923 if (remaining >= PAGE_CACHE_SIZE) {
2924 /* enough data to fill the page */
2925 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2926 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2927 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2928 rdata->nr_iov, page->index,
2929 rdata->iov[rdata->nr_iov].iov_base,
2930 rdata->iov[rdata->nr_iov].iov_len);
2932 len += PAGE_CACHE_SIZE;
2933 remaining -= PAGE_CACHE_SIZE;
2934 } else if (remaining > 0) {
2935 /* enough for partial page, fill and zero the rest */
2936 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2937 rdata->iov[rdata->nr_iov].iov_len = remaining;
2938 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2939 rdata->nr_iov, page->index,
2940 rdata->iov[rdata->nr_iov].iov_base,
2941 rdata->iov[rdata->nr_iov].iov_len);
2942 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2943 '\0', PAGE_CACHE_SIZE - remaining);
2947 } else if (page->index > eof_index) {
2949 * The VFS will not try to do readahead past the
2950 * i_size, but it's possible that we have outstanding
2951 * writes with gaps in the middle and the i_size hasn't
2952 * caught up yet. Populate those with zeroed out pages
2953 * to prevent the VFS from repeatedly attempting to
2954 * fill them until the writes are flushed.
2956 zero_user(page, 0, PAGE_CACHE_SIZE);
2957 list_del(&page->lru);
2958 lru_cache_add_file(page);
2959 flush_dcache_page(page);
2960 SetPageUptodate(page);
2962 page_cache_release(page);
2964 /* no need to hold page hostage */
2965 list_del(&page->lru);
2966 lru_cache_add_file(page);
2968 page_cache_release(page);
2975 static int cifs_readpages(struct file *file, struct address_space *mapping,
2976 struct list_head *page_list, unsigned num_pages)
2979 struct list_head tmplist;
2980 struct cifsFileInfo *open_file = file->private_data;
2981 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2982 unsigned int rsize = cifs_sb->rsize;
2986 * Give up immediately if rsize is too small to read an entire page.
2987 * The VFS will fall back to readpage. We should never reach this
2988 * point however since we set ra_pages to 0 when the rsize is smaller
2989 * than a cache page.
2991 if (unlikely(rsize < PAGE_CACHE_SIZE))
2995 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2996 * immediately if the cookie is negative
2998 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3003 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3004 pid = open_file->pid;
3006 pid = current->tgid;
3009 INIT_LIST_HEAD(&tmplist);
3011 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3012 mapping, num_pages);
3015 * Start with the page at end of list and move it to private
3016 * list. Do the same with any following pages until we hit
3017 * the rsize limit, hit an index discontinuity, or run out of
3018 * pages. Issue the async read and then start the loop again
3019 * until the list is empty.
3021 * Note that list order is important. The page_list is in
3022 * the order of declining indexes. When we put the pages in
3023 * the rdata->pages, then we want them in increasing order.
3025 while (!list_empty(page_list)) {
3026 unsigned int bytes = PAGE_CACHE_SIZE;
3027 unsigned int expected_index;
3028 unsigned int nr_pages = 1;
3030 struct page *page, *tpage;
3031 struct cifs_readdata *rdata;
3033 page = list_entry(page_list->prev, struct page, lru);
3036 * Lock the page and put it in the cache. Since no one else
3037 * should have access to this page, we're safe to simply set
3038 * PG_locked without checking it first.
3040 __set_page_locked(page);
3041 rc = add_to_page_cache_locked(page, mapping,
3042 page->index, GFP_KERNEL);
3044 /* give up if we can't stick it in the cache */
3046 __clear_page_locked(page);
3050 /* move first page to the tmplist */
3051 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3052 list_move_tail(&page->lru, &tmplist);
3054 /* now try and add more pages onto the request */
3055 expected_index = page->index + 1;
3056 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3057 /* discontinuity ? */
3058 if (page->index != expected_index)
3061 /* would this page push the read over the rsize? */
3062 if (bytes + PAGE_CACHE_SIZE > rsize)
3065 __set_page_locked(page);
3066 if (add_to_page_cache_locked(page, mapping,
3067 page->index, GFP_KERNEL)) {
3068 __clear_page_locked(page);
3071 list_move_tail(&page->lru, &tmplist);
3072 bytes += PAGE_CACHE_SIZE;
3077 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3079 /* best to give up if we're out of mem */
3080 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3081 list_del(&page->lru);
3082 lru_cache_add_file(page);
3084 page_cache_release(page);
3090 spin_lock(&cifs_file_list_lock);
3091 spin_unlock(&cifs_file_list_lock);
3092 rdata->cfile = cifsFileInfo_get(open_file);
3093 rdata->mapping = mapping;
3094 rdata->offset = offset;
3095 rdata->bytes = bytes;
3097 rdata->marshal_iov = cifs_readpages_marshal_iov;
3098 list_splice_init(&tmplist, &rdata->pages);
3100 rc = cifs_retry_async_readv(rdata);
3102 list_for_each_entry_safe(page, tpage, &rdata->pages,
3104 list_del(&page->lru);
3105 lru_cache_add_file(page);
3107 page_cache_release(page);
3109 kref_put(&rdata->refcount, cifs_readdata_release);
3113 kref_put(&rdata->refcount, cifs_readdata_release);
3119 static int cifs_readpage_worker(struct file *file, struct page *page,
3125 /* Is the page cached? */
3126 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3130 page_cache_get(page);
3131 read_data = kmap(page);
3132 /* for reads over a certain size could initiate async read ahead */
3134 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3139 cFYI(1, "Bytes read %d", rc);
3141 file->f_path.dentry->d_inode->i_atime =
3142 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3144 if (PAGE_CACHE_SIZE > rc)
3145 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3147 flush_dcache_page(page);
3148 SetPageUptodate(page);
3150 /* send this page to the cache */
3151 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3157 page_cache_release(page);
3163 static int cifs_readpage(struct file *file, struct page *page)
3165 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3171 if (file->private_data == NULL) {
3177 cFYI(1, "readpage %p at offset %d 0x%x\n",
3178 page, (int)offset, (int)offset);
3180 rc = cifs_readpage_worker(file, page, &offset);
3188 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3190 struct cifsFileInfo *open_file;
3192 spin_lock(&cifs_file_list_lock);
3193 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3194 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3195 spin_unlock(&cifs_file_list_lock);
3199 spin_unlock(&cifs_file_list_lock);
3203 /* We do not want to update the file size from server for inodes
3204 open for write - to avoid races with writepage extending
3205 the file - in the future we could consider allowing
3206 refreshing the inode only on increases in the file size
3207 but this is tricky to do without racing with writebehind
3208 page caching in the current Linux kernel design */
3209 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3214 if (is_inode_writable(cifsInode)) {
3215 /* This inode is open for write at least once */
3216 struct cifs_sb_info *cifs_sb;
3218 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3219 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3220 /* since no page cache to corrupt on directio
3221 we can change size safely */
3225 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3233 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3234 loff_t pos, unsigned len, unsigned flags,
3235 struct page **pagep, void **fsdata)
3237 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3238 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3239 loff_t page_start = pos & PAGE_MASK;
3244 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3246 page = grab_cache_page_write_begin(mapping, index, flags);
3252 if (PageUptodate(page))
3256 * If we write a full page it will be up to date, no need to read from
3257 * the server. If the write is short, we'll end up doing a sync write
3260 if (len == PAGE_CACHE_SIZE)
3264 * optimize away the read when we have an oplock, and we're not
3265 * expecting to use any of the data we'd be reading in. That
3266 * is, when the page lies beyond the EOF, or straddles the EOF
3267 * and the write will cover all of the existing data.
3269 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3270 i_size = i_size_read(mapping->host);
3271 if (page_start >= i_size ||
3272 (offset == 0 && (pos + len) >= i_size)) {
3273 zero_user_segments(page, 0, offset,
3277 * PageChecked means that the parts of the page
3278 * to which we're not writing are considered up
3279 * to date. Once the data is copied to the
3280 * page, it can be set uptodate.
3282 SetPageChecked(page);
3287 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3289 * might as well read a page, it is fast enough. If we get
3290 * an error, we don't need to return it. cifs_write_end will
3291 * do a sync write instead since PG_uptodate isn't set.
3293 cifs_readpage_worker(file, page, &page_start);
3295 /* we could try using another file handle if there is one -
3296 but how would we lock it to prevent close of that handle
3297 racing with this read? In any case
3298 this will be written out by write_end so is fine */
3305 static int cifs_release_page(struct page *page, gfp_t gfp)
3307 if (PagePrivate(page))
3310 return cifs_fscache_release_page(page, gfp);
3313 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3315 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3318 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3321 static int cifs_launder_page(struct page *page)
3324 loff_t range_start = page_offset(page);
3325 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3326 struct writeback_control wbc = {
3327 .sync_mode = WB_SYNC_ALL,
3329 .range_start = range_start,
3330 .range_end = range_end,
3333 cFYI(1, "Launder page: %p", page);
3335 if (clear_page_dirty_for_io(page))
3336 rc = cifs_writepage_locked(page, &wbc);
3338 cifs_fscache_invalidate_page(page, page->mapping->host);
3342 void cifs_oplock_break(struct work_struct *work)
3344 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3346 struct inode *inode = cfile->dentry->d_inode;
3347 struct cifsInodeInfo *cinode = CIFS_I(inode);
3350 if (inode && S_ISREG(inode->i_mode)) {
3351 if (cinode->clientCanCacheRead)
3352 break_lease(inode, O_RDONLY);
3354 break_lease(inode, O_WRONLY);
3355 rc = filemap_fdatawrite(inode->i_mapping);
3356 if (cinode->clientCanCacheRead == 0) {
3357 rc = filemap_fdatawait(inode->i_mapping);
3358 mapping_set_error(inode->i_mapping, rc);
3359 invalidate_remote_inode(inode);
3361 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3364 rc = cifs_push_locks(cfile);
3366 cERROR(1, "Push locks rc = %d", rc);
3369 * releasing stale oplock after recent reconnect of smb session using
3370 * a now incorrect file handle is not a data integrity issue but do
3371 * not bother sending an oplock release if session to server still is
3372 * disconnected since oplock already released by the server
3374 if (!cfile->oplock_break_cancelled) {
3375 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3376 current->tgid, 0, 0, 0, 0,
3377 LOCKING_ANDX_OPLOCK_RELEASE, false,
3378 cinode->clientCanCacheRead ? 1 : 0);
3379 cFYI(1, "Oplock release rc = %d", rc);
3383 const struct address_space_operations cifs_addr_ops = {
3384 .readpage = cifs_readpage,
3385 .readpages = cifs_readpages,
3386 .writepage = cifs_writepage,
3387 .writepages = cifs_writepages,
3388 .write_begin = cifs_write_begin,
3389 .write_end = cifs_write_end,
3390 .set_page_dirty = __set_page_dirty_nobuffers,
3391 .releasepage = cifs_release_page,
3392 .invalidatepage = cifs_invalidate_page,
3393 .launder_page = cifs_launder_page,
3397 * cifs_readpages requires the server to support a buffer large enough to
3398 * contain the header plus one complete page of data. Otherwise, we need
3399 * to leave cifs_readpages out of the address space operations.
3401 const struct address_space_operations cifs_addr_ops_smallbuf = {
3402 .readpage = cifs_readpage,
3403 .writepage = cifs_writepage,
3404 .writepages = cifs_writepages,
3405 .write_begin = cifs_write_begin,
3406 .write_end = cifs_write_end,
3407 .set_page_dirty = __set_page_dirty_nobuffers,
3408 .releasepage = cifs_release_page,
3409 .invalidatepage = cifs_invalidate_page,
3410 .launder_page = cifs_launder_page,