4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
178 int create_options = CREATE_NOT_DIR;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
288 * Release a reference on the file private data. This may involve closing
289 * the filehandle out on the server. Must be called without holding
290 * cifs_file_list_lock.
292 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
294 struct inode *inode = cifs_file->dentry->d_inode;
295 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
296 struct cifsInodeInfo *cifsi = CIFS_I(inode);
297 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
298 struct cifsLockInfo *li, *tmp;
300 spin_lock(&cifs_file_list_lock);
301 if (--cifs_file->count > 0) {
302 spin_unlock(&cifs_file_list_lock);
306 /* remove it from the lists */
307 list_del(&cifs_file->flist);
308 list_del(&cifs_file->tlist);
310 if (list_empty(&cifsi->openFileList)) {
311 cFYI(1, "closing last open instance for inode %p",
312 cifs_file->dentry->d_inode);
314 /* in strict cache mode we need invalidate mapping on the last
315 close because it may cause a error when we open this file
316 again and get at least level II oplock */
317 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
318 CIFS_I(inode)->invalid_mapping = true;
320 cifs_set_oplock_level(cifsi, 0);
322 spin_unlock(&cifs_file_list_lock);
324 cancel_work_sync(&cifs_file->oplock_break);
326 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
330 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
334 /* Delete any outstanding lock records. We'll lose them when the file
337 mutex_lock(&cifsi->lock_mutex);
338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
339 list_del(&li->llist);
340 cifs_del_lock_waiters(li);
343 mutex_unlock(&cifsi->lock_mutex);
345 cifs_put_tlink(cifs_file->tlink);
346 dput(cifs_file->dentry);
350 int cifs_open(struct inode *inode, struct file *file)
355 struct cifs_sb_info *cifs_sb;
356 struct cifs_tcon *tcon;
357 struct tcon_link *tlink;
358 struct cifsFileInfo *pCifsFile = NULL;
359 char *full_path = NULL;
360 bool posix_open_ok = false;
365 cifs_sb = CIFS_SB(inode->i_sb);
366 tlink = cifs_sb_tlink(cifs_sb);
369 return PTR_ERR(tlink);
371 tcon = tlink_tcon(tlink);
373 full_path = build_path_from_dentry(file->f_path.dentry);
374 if (full_path == NULL) {
379 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
380 inode, file->f_flags, full_path);
382 if (tcon->ses->server->oplocks)
387 if (!tcon->broken_posix_open && tcon->unix_ext &&
388 (tcon->ses->capabilities & CAP_UNIX) &&
389 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
390 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
391 /* can not refresh inode info since size could be stale */
392 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
393 cifs_sb->mnt_file_mode /* ignored */,
394 file->f_flags, &oplock, &netfid, xid);
396 cFYI(1, "posix open succeeded");
397 posix_open_ok = true;
398 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
399 if (tcon->ses->serverNOS)
400 cERROR(1, "server %s of type %s returned"
401 " unexpected error on SMB posix open"
402 ", disabling posix open support."
403 " Check if server update available.",
404 tcon->ses->serverName,
405 tcon->ses->serverNOS);
406 tcon->broken_posix_open = true;
407 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
408 (rc != -EOPNOTSUPP)) /* path not found or net err */
410 /* else fallthrough to retry open the old way on network i/o
414 if (!posix_open_ok) {
415 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
416 file->f_flags, &oplock, &netfid, xid);
421 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
422 if (pCifsFile == NULL) {
423 CIFSSMBClose(xid, tcon, netfid);
428 cifs_fscache_set_inode_cookie(inode, file);
430 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
431 /* time to set mode which we can not set earlier due to
432 problems creating new read-only files */
433 struct cifs_unix_set_info_args args = {
434 .mode = inode->i_mode,
437 .ctime = NO_CHANGE_64,
438 .atime = NO_CHANGE_64,
439 .mtime = NO_CHANGE_64,
442 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
449 cifs_put_tlink(tlink);
453 /* Try to reacquire byte range locks that were released when session */
454 /* to server was lost */
455 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
459 /* BB list all locks open on this file and relock */
464 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
469 struct cifs_sb_info *cifs_sb;
470 struct cifs_tcon *tcon;
471 struct cifsInodeInfo *pCifsInode;
473 char *full_path = NULL;
475 int disposition = FILE_OPEN;
476 int create_options = CREATE_NOT_DIR;
480 mutex_lock(&pCifsFile->fh_mutex);
481 if (!pCifsFile->invalidHandle) {
482 mutex_unlock(&pCifsFile->fh_mutex);
488 inode = pCifsFile->dentry->d_inode;
489 cifs_sb = CIFS_SB(inode->i_sb);
490 tcon = tlink_tcon(pCifsFile->tlink);
492 /* can not grab rename sem here because various ops, including
493 those that already have the rename sem can end up causing writepage
494 to get called and if the server was down that means we end up here,
495 and we can never tell if the caller already has the rename_sem */
496 full_path = build_path_from_dentry(pCifsFile->dentry);
497 if (full_path == NULL) {
499 mutex_unlock(&pCifsFile->fh_mutex);
504 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
505 inode, pCifsFile->f_flags, full_path);
507 if (tcon->ses->server->oplocks)
512 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
513 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
514 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
517 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
518 * original open. Must mask them off for a reopen.
520 unsigned int oflags = pCifsFile->f_flags &
521 ~(O_CREAT | O_EXCL | O_TRUNC);
523 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
524 cifs_sb->mnt_file_mode /* ignored */,
525 oflags, &oplock, &netfid, xid);
527 cFYI(1, "posix reopen succeeded");
530 /* fallthrough to retry open the old way on errors, especially
531 in the reconnect path it is important to retry hard */
534 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
536 if (backup_cred(cifs_sb))
537 create_options |= CREATE_OPEN_BACKUP_INTENT;
539 /* Can not refresh inode by passing in file_info buf to be returned
540 by SMBOpen and then calling get_inode_info with returned buf
541 since file might have write behind data that needs to be flushed
542 and server version of file size can be stale. If we knew for sure
543 that inode was not dirty locally we could do this */
545 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
546 create_options, &netfid, &oplock, NULL,
547 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
548 CIFS_MOUNT_MAP_SPECIAL_CHR);
550 mutex_unlock(&pCifsFile->fh_mutex);
551 cFYI(1, "cifs_open returned 0x%x", rc);
552 cFYI(1, "oplock: %d", oplock);
553 goto reopen_error_exit;
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
563 rc = filemap_write_and_wait(inode->i_mapping);
564 mapping_set_error(inode->i_mapping, rc);
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
580 cifs_set_oplock_level(pCifsInode, oplock);
582 cifs_relock_file(pCifsFile);
590 int cifs_close(struct inode *inode, struct file *file)
592 if (file->private_data != NULL) {
593 cifsFileInfo_put(file->private_data);
594 file->private_data = NULL;
597 /* return code from the ->release op is always ignored */
601 int cifs_closedir(struct inode *inode, struct file *file)
605 struct cifsFileInfo *pCFileStruct = file->private_data;
608 cFYI(1, "Closedir inode = 0x%p", inode);
613 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
615 cFYI(1, "Freeing private data in close dir");
616 spin_lock(&cifs_file_list_lock);
617 if (!pCFileStruct->srch_inf.endOfSearch &&
618 !pCFileStruct->invalidHandle) {
619 pCFileStruct->invalidHandle = true;
620 spin_unlock(&cifs_file_list_lock);
621 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
622 cFYI(1, "Closing uncompleted readdir with rc %d",
624 /* not much we can do if it fails anyway, ignore rc */
627 spin_unlock(&cifs_file_list_lock);
628 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
630 cFYI(1, "closedir free smb buf in srch struct");
631 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
632 if (pCFileStruct->srch_inf.smallBuf)
633 cifs_small_buf_release(ptmp);
635 cifs_buf_release(ptmp);
637 cifs_put_tlink(pCFileStruct->tlink);
638 kfree(file->private_data);
639 file->private_data = NULL;
641 /* BB can we lock the filestruct while this is going on? */
646 static struct cifsLockInfo *
647 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649 struct cifsLockInfo *lock =
650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
653 lock->offset = offset;
654 lock->length = length;
656 lock->pid = current->tgid;
657 INIT_LIST_HEAD(&lock->blist);
658 init_waitqueue_head(&lock->block_q);
663 cifs_del_lock_waiters(struct cifsLockInfo *lock)
665 struct cifsLockInfo *li, *tmp;
666 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
667 list_del_init(&li->blist);
668 wake_up(&li->block_q);
673 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
674 __u64 length, __u8 type, struct cifsFileInfo *cur,
675 struct cifsLockInfo **conf_lock)
677 struct cifsLockInfo *li;
678 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
680 list_for_each_entry(li, &cfile->llist, llist) {
681 if (offset + length <= li->offset ||
682 offset >= li->offset + li->length)
684 else if ((type & server->vals->shared_lock_type) &&
685 ((server->ops->compare_fids(cur, cfile) &&
686 current->tgid == li->pid) || type == li->type))
697 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
698 __u8 type, struct cifsLockInfo **conf_lock)
701 struct cifsFileInfo *fid, *tmp;
702 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
704 spin_lock(&cifs_file_list_lock);
705 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
706 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
711 spin_unlock(&cifs_file_list_lock);
717 * Check if there is another lock that prevents us to set the lock (mandatory
718 * style). If such a lock exists, update the flock structure with its
719 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
720 * or leave it the same if we can't. Returns 0 if we don't need to request to
721 * the server or 1 otherwise.
724 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
725 __u8 type, struct file_lock *flock)
728 struct cifsLockInfo *conf_lock;
729 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
730 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
733 mutex_lock(&cinode->lock_mutex);
735 exist = cifs_find_lock_conflict(cfile, offset, length, type,
738 flock->fl_start = conf_lock->offset;
739 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
740 flock->fl_pid = conf_lock->pid;
741 if (conf_lock->type & server->vals->shared_lock_type)
742 flock->fl_type = F_RDLCK;
744 flock->fl_type = F_WRLCK;
745 } else if (!cinode->can_cache_brlcks)
748 flock->fl_type = F_UNLCK;
750 mutex_unlock(&cinode->lock_mutex);
755 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
757 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
758 mutex_lock(&cinode->lock_mutex);
759 list_add_tail(&lock->llist, &cfile->llist);
760 mutex_unlock(&cinode->lock_mutex);
764 * Set the byte-range lock (mandatory style). Returns:
765 * 1) 0, if we set the lock and don't need to request to the server;
766 * 2) 1, if no locks prevent us but we need to request to the server;
767 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
770 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
773 struct cifsLockInfo *conf_lock;
774 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
780 mutex_lock(&cinode->lock_mutex);
782 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
783 lock->type, &conf_lock);
784 if (!exist && cinode->can_cache_brlcks) {
785 list_add_tail(&lock->llist, &cfile->llist);
786 mutex_unlock(&cinode->lock_mutex);
795 list_add_tail(&lock->blist, &conf_lock->blist);
796 mutex_unlock(&cinode->lock_mutex);
797 rc = wait_event_interruptible(lock->block_q,
798 (lock->blist.prev == &lock->blist) &&
799 (lock->blist.next == &lock->blist));
802 mutex_lock(&cinode->lock_mutex);
803 list_del_init(&lock->blist);
806 mutex_unlock(&cinode->lock_mutex);
811 * Check if there is another lock that prevents us to set the lock (posix
812 * style). If such a lock exists, update the flock structure with its
813 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
814 * or leave it the same if we can't. Returns 0 if we don't need to request to
815 * the server or 1 otherwise.
818 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
821 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
822 unsigned char saved_type = flock->fl_type;
824 if ((flock->fl_flags & FL_POSIX) == 0)
827 mutex_lock(&cinode->lock_mutex);
828 posix_test_lock(file, flock);
830 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
831 flock->fl_type = saved_type;
835 mutex_unlock(&cinode->lock_mutex);
840 * Set the byte-range lock (posix style). Returns:
841 * 1) 0, if we set the lock and don't need to request to the server;
842 * 2) 1, if we need to request to the server;
843 * 3) <0, if the error occurs while setting the lock.
846 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
848 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
851 if ((flock->fl_flags & FL_POSIX) == 0)
855 mutex_lock(&cinode->lock_mutex);
856 if (!cinode->can_cache_brlcks) {
857 mutex_unlock(&cinode->lock_mutex);
861 rc = posix_lock_file(file, flock, NULL);
862 mutex_unlock(&cinode->lock_mutex);
863 if (rc == FILE_LOCK_DEFERRED) {
864 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
867 locks_delete_block(flock);
873 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
875 int xid, rc = 0, stored_rc;
876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num, max_buf;
880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
886 tcon = tlink_tcon(cfile->tlink);
888 mutex_lock(&cinode->lock_mutex);
889 if (!cinode->can_cache_brlcks) {
890 mutex_unlock(&cinode->lock_mutex);
896 * Accessing maxBuf is racy with cifs_reconnect - need to store value
897 * and check it for zero before using.
899 max_buf = tcon->ses->server->maxBuf;
901 mutex_unlock(&cinode->lock_mutex);
906 max_num = (max_buf - sizeof(struct smb_hdr)) /
907 sizeof(LOCKING_ANDX_RANGE);
908 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
910 mutex_unlock(&cinode->lock_mutex);
915 for (i = 0; i < 2; i++) {
918 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
919 if (li->type != types[i])
921 cur->Pid = cpu_to_le16(li->pid);
922 cur->LengthLow = cpu_to_le32((u32)li->length);
923 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
924 cur->OffsetLow = cpu_to_le32((u32)li->offset);
925 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
926 if (++num == max_num) {
927 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
928 (__u8)li->type, 0, num,
939 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
940 (__u8)types[i], 0, num, buf);
946 cinode->can_cache_brlcks = false;
947 mutex_unlock(&cinode->lock_mutex);
954 /* copied from fs/locks.c with a name change */
955 #define cifs_for_each_lock(inode, lockp) \
956 for (lockp = &inode->i_flock; *lockp != NULL; \
957 lockp = &(*lockp)->fl_next)
959 struct lock_to_push {
960 struct list_head llist;
969 cifs_push_posix_locks(struct cifsFileInfo *cfile)
971 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
972 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
973 struct file_lock *flock, **before;
974 unsigned int count = 0, i = 0;
975 int rc = 0, xid, type;
976 struct list_head locks_to_send, *el;
977 struct lock_to_push *lck, *tmp;
982 mutex_lock(&cinode->lock_mutex);
983 if (!cinode->can_cache_brlcks) {
984 mutex_unlock(&cinode->lock_mutex);
990 cifs_for_each_lock(cfile->dentry->d_inode, before) {
991 if ((*before)->fl_flags & FL_POSIX)
996 INIT_LIST_HEAD(&locks_to_send);
999 * Allocating count locks is enough because no FL_POSIX locks can be
1000 * added to the list while we are holding cinode->lock_mutex that
1001 * protects locking operations of this inode.
1003 for (; i < count; i++) {
1004 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1009 list_add_tail(&lck->llist, &locks_to_send);
1012 el = locks_to_send.next;
1014 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1016 if ((flock->fl_flags & FL_POSIX) == 0)
1018 if (el == &locks_to_send) {
1020 * The list ended. We don't have enough allocated
1021 * structures - something is really wrong.
1023 cERROR(1, "Can't push all brlocks!");
1026 length = 1 + flock->fl_end - flock->fl_start;
1027 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1031 lck = list_entry(el, struct lock_to_push, llist);
1032 lck->pid = flock->fl_pid;
1033 lck->netfid = cfile->netfid;
1034 lck->length = length;
1036 lck->offset = flock->fl_start;
1041 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1044 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1045 lck->offset, lck->length, NULL,
1049 list_del(&lck->llist);
1054 cinode->can_cache_brlcks = false;
1055 mutex_unlock(&cinode->lock_mutex);
1060 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1061 list_del(&lck->llist);
1068 cifs_push_locks(struct cifsFileInfo *cfile)
1070 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1071 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1073 if ((tcon->ses->capabilities & CAP_UNIX) &&
1074 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1075 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1076 return cifs_push_posix_locks(cfile);
1078 return cifs_push_mandatory_locks(cfile);
1082 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1083 bool *wait_flag, struct TCP_Server_Info *server)
1085 if (flock->fl_flags & FL_POSIX)
1087 if (flock->fl_flags & FL_FLOCK)
1089 if (flock->fl_flags & FL_SLEEP) {
1090 cFYI(1, "Blocking lock");
1093 if (flock->fl_flags & FL_ACCESS)
1094 cFYI(1, "Process suspended by mandatory locking - "
1095 "not implemented yet");
1096 if (flock->fl_flags & FL_LEASE)
1097 cFYI(1, "Lease on file - not implemented yet");
1098 if (flock->fl_flags &
1099 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1100 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1102 *type = server->vals->large_lock_type;
1103 if (flock->fl_type == F_WRLCK) {
1104 cFYI(1, "F_WRLCK ");
1105 *type |= server->vals->exclusive_lock_type;
1107 } else if (flock->fl_type == F_UNLCK) {
1109 *type |= server->vals->unlock_lock_type;
1111 /* Check if unlock includes more than one lock range */
1112 } else if (flock->fl_type == F_RDLCK) {
1114 *type |= server->vals->shared_lock_type;
1116 } else if (flock->fl_type == F_EXLCK) {
1118 *type |= server->vals->exclusive_lock_type;
1120 } else if (flock->fl_type == F_SHLCK) {
1122 *type |= server->vals->shared_lock_type;
1125 cFYI(1, "Unknown type of lock");
1129 cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset,
1130 __u64 length, __u32 type, int lock, int unlock, bool wait)
1132 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1133 current->tgid, length, offset, unlock, lock,
1134 (__u8)type, wait, 0);
1138 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1139 bool wait_flag, bool posix_lck, int xid)
1142 __u64 length = 1 + flock->fl_end - flock->fl_start;
1143 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1144 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1145 struct TCP_Server_Info *server = tcon->ses->server;
1146 __u16 netfid = cfile->netfid;
1149 int posix_lock_type;
1151 rc = cifs_posix_lock_test(file, flock);
1155 if (type & server->vals->shared_lock_type)
1156 posix_lock_type = CIFS_RDLCK;
1158 posix_lock_type = CIFS_WRLCK;
1159 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1160 flock->fl_start, length, flock,
1161 posix_lock_type, wait_flag);
1165 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1169 /* BB we could chain these into one lock request BB */
1170 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1173 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1175 flock->fl_type = F_UNLCK;
1177 cERROR(1, "Error unlocking previously locked "
1178 "range %d during test of lock", rc);
1182 if (type & server->vals->shared_lock_type) {
1183 flock->fl_type = F_WRLCK;
1187 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1188 type | server->vals->shared_lock_type, 1, 0,
1191 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1192 type | server->vals->shared_lock_type,
1194 flock->fl_type = F_RDLCK;
1196 cERROR(1, "Error unlocking previously locked "
1197 "range %d during test of lock", rc);
1199 flock->fl_type = F_WRLCK;
1205 cifs_move_llist(struct list_head *source, struct list_head *dest)
1207 struct list_head *li, *tmp;
1208 list_for_each_safe(li, tmp, source)
1209 list_move(li, dest);
1213 cifs_free_llist(struct list_head *llist)
1215 struct cifsLockInfo *li, *tmp;
1216 list_for_each_entry_safe(li, tmp, llist, llist) {
1217 cifs_del_lock_waiters(li);
1218 list_del(&li->llist);
1224 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1226 int rc = 0, stored_rc;
1227 int types[] = {LOCKING_ANDX_LARGE_FILES,
1228 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1230 unsigned int max_num, num, max_buf;
1231 LOCKING_ANDX_RANGE *buf, *cur;
1232 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1233 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1234 struct cifsLockInfo *li, *tmp;
1235 __u64 length = 1 + flock->fl_end - flock->fl_start;
1236 struct list_head tmp_llist;
1238 INIT_LIST_HEAD(&tmp_llist);
1241 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1242 * and check it for zero before using.
1244 max_buf = tcon->ses->server->maxBuf;
1248 max_num = (max_buf - sizeof(struct smb_hdr)) /
1249 sizeof(LOCKING_ANDX_RANGE);
1250 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1254 mutex_lock(&cinode->lock_mutex);
1255 for (i = 0; i < 2; i++) {
1258 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1259 if (flock->fl_start > li->offset ||
1260 (flock->fl_start + length) <
1261 (li->offset + li->length))
1263 if (current->tgid != li->pid)
1265 if (types[i] != li->type)
1267 if (cinode->can_cache_brlcks) {
1269 * We can cache brlock requests - simply remove
1270 * a lock from the file's list.
1272 list_del(&li->llist);
1273 cifs_del_lock_waiters(li);
1277 cur->Pid = cpu_to_le16(li->pid);
1278 cur->LengthLow = cpu_to_le32((u32)li->length);
1279 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1280 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1281 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1283 * We need to save a lock here to let us add it again to
1284 * the file's list if the unlock range request fails on
1287 list_move(&li->llist, &tmp_llist);
1288 if (++num == max_num) {
1289 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1290 li->type, num, 0, buf);
1293 * We failed on the unlock range
1294 * request - add all locks from the tmp
1295 * list to the head of the file's list.
1297 cifs_move_llist(&tmp_llist,
1302 * The unlock range request succeed -
1303 * free the tmp list.
1305 cifs_free_llist(&tmp_llist);
1312 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1313 types[i], num, 0, buf);
1315 cifs_move_llist(&tmp_llist, &cfile->llist);
1318 cifs_free_llist(&tmp_llist);
1322 mutex_unlock(&cinode->lock_mutex);
1328 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1329 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1332 __u64 length = 1 + flock->fl_end - flock->fl_start;
1333 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1334 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1335 struct TCP_Server_Info *server = tcon->ses->server;
1336 __u16 netfid = cfile->netfid;
1339 int posix_lock_type;
1341 rc = cifs_posix_lock_set(file, flock);
1345 if (type & server->vals->shared_lock_type)
1346 posix_lock_type = CIFS_RDLCK;
1348 posix_lock_type = CIFS_WRLCK;
1351 posix_lock_type = CIFS_UNLCK;
1353 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1354 flock->fl_start, length, NULL,
1355 posix_lock_type, wait_flag);
1360 struct cifsLockInfo *lock;
1362 lock = cifs_lock_init(flock->fl_start, length, type);
1366 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1372 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1373 type, 1, 0, wait_flag);
1379 cifs_lock_add(cfile, lock);
1381 rc = cifs_unlock_range(cfile, flock, xid);
1384 if (flock->fl_flags & FL_POSIX)
1385 posix_lock_file_wait(file, flock);
1389 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1392 int lock = 0, unlock = 0;
1393 bool wait_flag = false;
1394 bool posix_lck = false;
1395 struct cifs_sb_info *cifs_sb;
1396 struct cifs_tcon *tcon;
1397 struct cifsInodeInfo *cinode;
1398 struct cifsFileInfo *cfile;
1405 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1406 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1407 flock->fl_start, flock->fl_end);
1409 cfile = (struct cifsFileInfo *)file->private_data;
1410 tcon = tlink_tcon(cfile->tlink);
1412 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1415 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1416 netfid = cfile->netfid;
1417 cinode = CIFS_I(file->f_path.dentry->d_inode);
1419 if ((tcon->ses->capabilities & CAP_UNIX) &&
1420 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1421 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1424 * BB add code here to normalize offset and length to account for
1425 * negative length which we can not accept over the wire.
1427 if (IS_GETLK(cmd)) {
1428 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1433 if (!lock && !unlock) {
1435 * if no lock or unlock then nothing to do since we do not
1442 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1449 * update the file size (if needed) after a write. Should be called with
1450 * the inode->i_lock held
1453 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1454 unsigned int bytes_written)
1456 loff_t end_of_write = offset + bytes_written;
1458 if (end_of_write > cifsi->server_eof)
1459 cifsi->server_eof = end_of_write;
1462 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1463 const char *write_data, size_t write_size,
1467 unsigned int bytes_written = 0;
1468 unsigned int total_written;
1469 struct cifs_sb_info *cifs_sb;
1470 struct cifs_tcon *pTcon;
1472 struct dentry *dentry = open_file->dentry;
1473 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1474 struct cifs_io_parms io_parms;
1476 cifs_sb = CIFS_SB(dentry->d_sb);
1478 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1479 *poffset, dentry->d_name.name);
1481 pTcon = tlink_tcon(open_file->tlink);
1485 for (total_written = 0; write_size > total_written;
1486 total_written += bytes_written) {
1488 while (rc == -EAGAIN) {
1492 if (open_file->invalidHandle) {
1493 /* we could deadlock if we called
1494 filemap_fdatawait from here so tell
1495 reopen_file not to flush data to
1497 rc = cifs_reopen_file(open_file, false);
1502 len = min((size_t)cifs_sb->wsize,
1503 write_size - total_written);
1504 /* iov[0] is reserved for smb header */
1505 iov[1].iov_base = (char *)write_data + total_written;
1506 iov[1].iov_len = len;
1507 io_parms.netfid = open_file->netfid;
1509 io_parms.tcon = pTcon;
1510 io_parms.offset = *poffset;
1511 io_parms.length = len;
1512 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1515 if (rc || (bytes_written == 0)) {
1523 spin_lock(&dentry->d_inode->i_lock);
1524 cifs_update_eof(cifsi, *poffset, bytes_written);
1525 spin_unlock(&dentry->d_inode->i_lock);
1526 *poffset += bytes_written;
1530 cifs_stats_bytes_written(pTcon, total_written);
1532 if (total_written > 0) {
1533 spin_lock(&dentry->d_inode->i_lock);
1534 if (*poffset > dentry->d_inode->i_size)
1535 i_size_write(dentry->d_inode, *poffset);
1536 spin_unlock(&dentry->d_inode->i_lock);
1538 mark_inode_dirty_sync(dentry->d_inode);
1540 return total_written;
1543 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1546 struct cifsFileInfo *open_file = NULL;
1547 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1549 /* only filter by fsuid on multiuser mounts */
1550 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1553 spin_lock(&cifs_file_list_lock);
1554 /* we could simply get the first_list_entry since write-only entries
1555 are always at the end of the list but since the first entry might
1556 have a close pending, we go through the whole list */
1557 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1558 if (fsuid_only && open_file->uid != current_fsuid())
1560 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1561 if (!open_file->invalidHandle) {
1562 /* found a good file */
1563 /* lock it so it will not be closed on us */
1564 cifsFileInfo_get(open_file);
1565 spin_unlock(&cifs_file_list_lock);
1567 } /* else might as well continue, and look for
1568 another, or simply have the caller reopen it
1569 again rather than trying to fix this handle */
1570 } else /* write only file */
1571 break; /* write only files are last so must be done */
1573 spin_unlock(&cifs_file_list_lock);
1577 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1580 struct cifsFileInfo *open_file, *inv_file = NULL;
1581 struct cifs_sb_info *cifs_sb;
1582 bool any_available = false;
1584 unsigned int refind = 0;
1586 /* Having a null inode here (because mapping->host was set to zero by
1587 the VFS or MM) should not happen but we had reports of on oops (due to
1588 it being zero) during stress testcases so we need to check for it */
1590 if (cifs_inode == NULL) {
1591 cERROR(1, "Null inode passed to cifs_writeable_file");
1596 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1598 /* only filter by fsuid on multiuser mounts */
1599 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1602 spin_lock(&cifs_file_list_lock);
1604 if (refind > MAX_REOPEN_ATT) {
1605 spin_unlock(&cifs_file_list_lock);
1608 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1609 if (!any_available && open_file->pid != current->tgid)
1611 if (fsuid_only && open_file->uid != current_fsuid())
1613 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1614 if (!open_file->invalidHandle) {
1615 /* found a good writable file */
1616 cifsFileInfo_get(open_file);
1617 spin_unlock(&cifs_file_list_lock);
1621 inv_file = open_file;
1625 /* couldn't find useable FH with same pid, try any available */
1626 if (!any_available) {
1627 any_available = true;
1628 goto refind_writable;
1632 any_available = false;
1633 cifsFileInfo_get(inv_file);
1636 spin_unlock(&cifs_file_list_lock);
1639 rc = cifs_reopen_file(inv_file, false);
1643 spin_lock(&cifs_file_list_lock);
1644 list_move_tail(&inv_file->flist,
1645 &cifs_inode->openFileList);
1646 spin_unlock(&cifs_file_list_lock);
1647 cifsFileInfo_put(inv_file);
1648 spin_lock(&cifs_file_list_lock);
1650 goto refind_writable;
1657 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1659 struct address_space *mapping = page->mapping;
1660 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1663 int bytes_written = 0;
1664 struct inode *inode;
1665 struct cifsFileInfo *open_file;
1667 if (!mapping || !mapping->host)
1670 inode = page->mapping->host;
1672 offset += (loff_t)from;
1673 write_data = kmap(page);
1676 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1681 /* racing with truncate? */
1682 if (offset > mapping->host->i_size) {
1684 return 0; /* don't care */
1687 /* check to make sure that we are not extending the file */
1688 if (mapping->host->i_size - offset < (loff_t)to)
1689 to = (unsigned)(mapping->host->i_size - offset);
1691 open_file = find_writable_file(CIFS_I(mapping->host), false);
1693 bytes_written = cifs_write(open_file, open_file->pid,
1694 write_data, to - from, &offset);
1695 cifsFileInfo_put(open_file);
1696 /* Does mm or vfs already set times? */
1697 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1698 if ((bytes_written > 0) && (offset))
1700 else if (bytes_written < 0)
1703 cFYI(1, "No writeable filehandles for inode");
1712 * Marshal up the iov array, reserving the first one for the header. Also,
1716 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1719 struct inode *inode = wdata->cfile->dentry->d_inode;
1720 loff_t size = i_size_read(inode);
1722 /* marshal up the pages into iov array */
1724 for (i = 0; i < wdata->nr_pages; i++) {
1725 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1726 (loff_t)PAGE_CACHE_SIZE);
1727 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1728 wdata->bytes += iov[i + 1].iov_len;
1732 static int cifs_writepages(struct address_space *mapping,
1733 struct writeback_control *wbc)
1735 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1736 bool done = false, scanned = false, range_whole = false;
1738 struct cifs_writedata *wdata;
1743 * If wsize is smaller than the page cache size, default to writing
1744 * one page at a time via cifs_writepage
1746 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1747 return generic_writepages(mapping, wbc);
1749 if (wbc->range_cyclic) {
1750 index = mapping->writeback_index; /* Start from prev offset */
1753 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1754 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1755 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1760 while (!done && index <= end) {
1761 unsigned int i, nr_pages, found_pages;
1762 pgoff_t next = 0, tofind;
1763 struct page **pages;
1765 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1768 wdata = cifs_writedata_alloc((unsigned int)tofind,
1769 cifs_writev_complete);
1776 * find_get_pages_tag seems to return a max of 256 on each
1777 * iteration, so we must call it several times in order to
1778 * fill the array or the wsize is effectively limited to
1779 * 256 * PAGE_CACHE_SIZE.
1782 pages = wdata->pages;
1784 nr_pages = find_get_pages_tag(mapping, &index,
1785 PAGECACHE_TAG_DIRTY,
1787 found_pages += nr_pages;
1790 } while (nr_pages && tofind && index <= end);
1792 if (found_pages == 0) {
1793 kref_put(&wdata->refcount, cifs_writedata_release);
1798 for (i = 0; i < found_pages; i++) {
1799 page = wdata->pages[i];
1801 * At this point we hold neither mapping->tree_lock nor
1802 * lock on the page itself: the page may be truncated or
1803 * invalidated (changing page->mapping to NULL), or even
1804 * swizzled back from swapper_space to tmpfs file
1810 else if (!trylock_page(page))
1813 if (unlikely(page->mapping != mapping)) {
1818 if (!wbc->range_cyclic && page->index > end) {
1824 if (next && (page->index != next)) {
1825 /* Not next consecutive page */
1830 if (wbc->sync_mode != WB_SYNC_NONE)
1831 wait_on_page_writeback(page);
1833 if (PageWriteback(page) ||
1834 !clear_page_dirty_for_io(page)) {
1840 * This actually clears the dirty bit in the radix tree.
1841 * See cifs_writepage() for more commentary.
1843 set_page_writeback(page);
1845 if (page_offset(page) >= mapping->host->i_size) {
1848 end_page_writeback(page);
1852 wdata->pages[i] = page;
1853 next = page->index + 1;
1857 /* reset index to refind any pages skipped */
1859 index = wdata->pages[0]->index + 1;
1861 /* put any pages we aren't going to use */
1862 for (i = nr_pages; i < found_pages; i++) {
1863 page_cache_release(wdata->pages[i]);
1864 wdata->pages[i] = NULL;
1867 /* nothing to write? */
1868 if (nr_pages == 0) {
1869 kref_put(&wdata->refcount, cifs_writedata_release);
1873 wdata->sync_mode = wbc->sync_mode;
1874 wdata->nr_pages = nr_pages;
1875 wdata->offset = page_offset(wdata->pages[0]);
1876 wdata->marshal_iov = cifs_writepages_marshal_iov;
1879 if (wdata->cfile != NULL)
1880 cifsFileInfo_put(wdata->cfile);
1881 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1883 if (!wdata->cfile) {
1884 cERROR(1, "No writable handles for inode");
1888 wdata->pid = wdata->cfile->pid;
1889 rc = cifs_async_writev(wdata);
1890 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1892 for (i = 0; i < nr_pages; ++i)
1893 unlock_page(wdata->pages[i]);
1895 /* send failure -- clean up the mess */
1897 for (i = 0; i < nr_pages; ++i) {
1899 redirty_page_for_writepage(wbc,
1902 SetPageError(wdata->pages[i]);
1903 end_page_writeback(wdata->pages[i]);
1904 page_cache_release(wdata->pages[i]);
1907 mapping_set_error(mapping, rc);
1909 kref_put(&wdata->refcount, cifs_writedata_release);
1911 wbc->nr_to_write -= nr_pages;
1912 if (wbc->nr_to_write <= 0)
1918 if (!scanned && !done) {
1920 * We hit the last page and there is more work to be done: wrap
1921 * back to the start of the file
1928 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1929 mapping->writeback_index = index;
1935 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1941 /* BB add check for wbc flags */
1942 page_cache_get(page);
1943 if (!PageUptodate(page))
1944 cFYI(1, "ppw - page not up to date");
1947 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1949 * A writepage() implementation always needs to do either this,
1950 * or re-dirty the page with "redirty_page_for_writepage()" in
1951 * the case of a failure.
1953 * Just unlocking the page will cause the radix tree tag-bits
1954 * to fail to update with the state of the page correctly.
1956 set_page_writeback(page);
1958 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1959 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1961 else if (rc == -EAGAIN)
1962 redirty_page_for_writepage(wbc, page);
1966 SetPageUptodate(page);
1967 end_page_writeback(page);
1968 page_cache_release(page);
1973 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1975 int rc = cifs_writepage_locked(page, wbc);
1980 static int cifs_write_end(struct file *file, struct address_space *mapping,
1981 loff_t pos, unsigned len, unsigned copied,
1982 struct page *page, void *fsdata)
1985 struct inode *inode = mapping->host;
1986 struct cifsFileInfo *cfile = file->private_data;
1987 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1990 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1993 pid = current->tgid;
1995 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1998 if (PageChecked(page)) {
2000 SetPageUptodate(page);
2001 ClearPageChecked(page);
2002 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2003 SetPageUptodate(page);
2005 if (!PageUptodate(page)) {
2007 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2011 /* this is probably better than directly calling
2012 partialpage_write since in this function the file handle is
2013 known which we might as well leverage */
2014 /* BB check if anything else missing out of ppw
2015 such as updating last write time */
2016 page_data = kmap(page);
2017 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2018 /* if (rc < 0) should we set writebehind rc? */
2025 set_page_dirty(page);
2029 spin_lock(&inode->i_lock);
2030 if (pos > inode->i_size)
2031 i_size_write(inode, pos);
2032 spin_unlock(&inode->i_lock);
2036 page_cache_release(page);
2041 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2046 struct cifs_tcon *tcon;
2047 struct cifsFileInfo *smbfile = file->private_data;
2048 struct inode *inode = file->f_path.dentry->d_inode;
2049 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2051 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2054 mutex_lock(&inode->i_mutex);
2058 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2059 file->f_path.dentry->d_name.name, datasync);
2061 if (!CIFS_I(inode)->clientCanCacheRead) {
2062 rc = cifs_invalidate_mapping(inode);
2064 cFYI(1, "rc: %d during invalidate phase", rc);
2065 rc = 0; /* don't care about it in fsync */
2069 tcon = tlink_tcon(smbfile->tlink);
2070 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2071 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2074 mutex_unlock(&inode->i_mutex);
2078 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2082 struct cifs_tcon *tcon;
2083 struct cifsFileInfo *smbfile = file->private_data;
2084 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2085 struct inode *inode = file->f_mapping->host;
2087 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2090 mutex_lock(&inode->i_mutex);
2094 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2095 file->f_path.dentry->d_name.name, datasync);
2097 tcon = tlink_tcon(smbfile->tlink);
2098 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2099 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2102 mutex_unlock(&inode->i_mutex);
2107 * As file closes, flush all cached write data for this inode checking
2108 * for write behind errors.
2110 int cifs_flush(struct file *file, fl_owner_t id)
2112 struct inode *inode = file->f_path.dentry->d_inode;
2115 if (file->f_mode & FMODE_WRITE)
2116 rc = filemap_write_and_wait(inode->i_mapping);
2118 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2124 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2129 for (i = 0; i < num_pages; i++) {
2130 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2133 * save number of pages we have already allocated and
2134 * return with ENOMEM error
2143 for (i = 0; i < num_pages; i++)
2150 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2155 clen = min_t(const size_t, len, wsize);
2156 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2165 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2168 size_t bytes = wdata->bytes;
2170 /* marshal up the pages into iov array */
2171 for (i = 0; i < wdata->nr_pages; i++) {
2172 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2173 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2174 bytes -= iov[i + 1].iov_len;
2179 cifs_uncached_writev_complete(struct work_struct *work)
2182 struct cifs_writedata *wdata = container_of(work,
2183 struct cifs_writedata, work);
2184 struct inode *inode = wdata->cfile->dentry->d_inode;
2185 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2187 spin_lock(&inode->i_lock);
2188 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2189 if (cifsi->server_eof > inode->i_size)
2190 i_size_write(inode, cifsi->server_eof);
2191 spin_unlock(&inode->i_lock);
2193 complete(&wdata->done);
2195 if (wdata->result != -EAGAIN) {
2196 for (i = 0; i < wdata->nr_pages; i++)
2197 put_page(wdata->pages[i]);
2200 kref_put(&wdata->refcount, cifs_writedata_release);
2203 /* attempt to send write to server, retry on any -EAGAIN errors */
2205 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2210 if (wdata->cfile->invalidHandle) {
2211 rc = cifs_reopen_file(wdata->cfile, false);
2215 rc = cifs_async_writev(wdata);
2216 } while (rc == -EAGAIN);
2222 cifs_iovec_write(struct file *file, const struct iovec *iov,
2223 unsigned long nr_segs, loff_t *poffset)
2225 unsigned long nr_pages, i;
2226 size_t copied, len, cur_len;
2227 ssize_t total_written = 0;
2230 struct cifsFileInfo *open_file;
2231 struct cifs_tcon *tcon;
2232 struct cifs_sb_info *cifs_sb;
2233 struct cifs_writedata *wdata, *tmp;
2234 struct list_head wdata_list;
2238 len = iov_length(iov, nr_segs);
2242 rc = generic_write_checks(file, poffset, &len, 0);
2246 INIT_LIST_HEAD(&wdata_list);
2247 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2248 open_file = file->private_data;
2249 tcon = tlink_tcon(open_file->tlink);
2252 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2253 pid = open_file->pid;
2255 pid = current->tgid;
2257 iov_iter_init(&it, iov, nr_segs, len, 0);
2261 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2262 wdata = cifs_writedata_alloc(nr_pages,
2263 cifs_uncached_writev_complete);
2269 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2276 for (i = 0; i < nr_pages; i++) {
2277 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2278 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2281 iov_iter_advance(&it, copied);
2283 cur_len = save_len - cur_len;
2285 wdata->sync_mode = WB_SYNC_ALL;
2286 wdata->nr_pages = nr_pages;
2287 wdata->offset = (__u64)offset;
2288 wdata->cfile = cifsFileInfo_get(open_file);
2290 wdata->bytes = cur_len;
2291 wdata->marshal_iov = cifs_uncached_marshal_iov;
2292 rc = cifs_uncached_retry_writev(wdata);
2294 kref_put(&wdata->refcount, cifs_writedata_release);
2298 list_add_tail(&wdata->list, &wdata_list);
2304 * If at least one write was successfully sent, then discard any rc
2305 * value from the later writes. If the other write succeeds, then
2306 * we'll end up returning whatever was written. If it fails, then
2307 * we'll get a new rc value from that.
2309 if (!list_empty(&wdata_list))
2313 * Wait for and collect replies for any successful sends in order of
2314 * increasing offset. Once an error is hit or we get a fatal signal
2315 * while waiting, then return without waiting for any more replies.
2318 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2320 /* FIXME: freezable too? */
2321 rc = wait_for_completion_killable(&wdata->done);
2324 else if (wdata->result)
2327 total_written += wdata->bytes;
2329 /* resend call if it's a retryable error */
2330 if (rc == -EAGAIN) {
2331 rc = cifs_uncached_retry_writev(wdata);
2335 list_del_init(&wdata->list);
2336 kref_put(&wdata->refcount, cifs_writedata_release);
2339 if (total_written > 0)
2340 *poffset += total_written;
2342 cifs_stats_bytes_written(tcon, total_written);
2343 return total_written ? total_written : (ssize_t)rc;
2346 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2347 unsigned long nr_segs, loff_t pos)
2350 struct inode *inode;
2352 inode = iocb->ki_filp->f_path.dentry->d_inode;
2355 * BB - optimize the way when signing is disabled. We can drop this
2356 * extra memory-to-memory copying and use iovec buffers for constructing
2360 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2362 CIFS_I(inode)->invalid_mapping = true;
2369 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2370 unsigned long nr_segs, loff_t pos)
2372 struct inode *inode;
2374 inode = iocb->ki_filp->f_path.dentry->d_inode;
2376 if (CIFS_I(inode)->clientCanCacheAll)
2377 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2380 * In strict cache mode we need to write the data to the server exactly
2381 * from the pos to pos+len-1 rather than flush all affected pages
2382 * because it may cause a error with mandatory locks on these pages but
2383 * not on the region from pos to ppos+len-1.
2386 return cifs_user_writev(iocb, iov, nr_segs, pos);
2389 static struct cifs_readdata *
2390 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2392 struct cifs_readdata *rdata;
2394 rdata = kzalloc(sizeof(*rdata) +
2395 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2396 if (rdata != NULL) {
2397 kref_init(&rdata->refcount);
2398 INIT_LIST_HEAD(&rdata->list);
2399 init_completion(&rdata->done);
2400 INIT_WORK(&rdata->work, complete);
2401 INIT_LIST_HEAD(&rdata->pages);
2407 cifs_readdata_release(struct kref *refcount)
2409 struct cifs_readdata *rdata = container_of(refcount,
2410 struct cifs_readdata, refcount);
2413 cifsFileInfo_put(rdata->cfile);
2419 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2422 struct page *page, *tpage;
2425 for (i = 0; i < npages; i++) {
2426 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2431 list_add(&page->lru, list);
2435 list_for_each_entry_safe(page, tpage, list, lru) {
2436 list_del(&page->lru);
2444 cifs_uncached_readdata_release(struct kref *refcount)
2446 struct page *page, *tpage;
2447 struct cifs_readdata *rdata = container_of(refcount,
2448 struct cifs_readdata, refcount);
2450 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2451 list_del(&page->lru);
2454 cifs_readdata_release(refcount);
2458 cifs_retry_async_readv(struct cifs_readdata *rdata)
2463 if (rdata->cfile->invalidHandle) {
2464 rc = cifs_reopen_file(rdata->cfile, true);
2468 rc = cifs_async_readv(rdata);
2469 } while (rc == -EAGAIN);
2475 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2476 * @rdata: the readdata response with list of pages holding data
2477 * @iov: vector in which we should copy the data
2478 * @nr_segs: number of segments in vector
2479 * @offset: offset into file of the first iovec
2480 * @copied: used to return the amount of data copied to the iov
2482 * This function copies data from a list of pages in a readdata response into
2483 * an array of iovecs. It will first calculate where the data should go
2484 * based on the info in the readdata and then copy the data into that spot.
2487 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2488 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2492 size_t pos = rdata->offset - offset;
2493 struct page *page, *tpage;
2494 ssize_t remaining = rdata->bytes;
2495 unsigned char *pdata;
2497 /* set up iov_iter and advance to the correct offset */
2498 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2499 iov_iter_advance(&ii, pos);
2502 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2505 /* copy a whole page or whatever's left */
2506 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2508 /* ...but limit it to whatever space is left in the iov */
2509 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2511 /* go while there's data to be copied and no errors */
2514 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2520 iov_iter_advance(&ii, copy);
2524 list_del(&page->lru);
2532 cifs_uncached_readv_complete(struct work_struct *work)
2534 struct cifs_readdata *rdata = container_of(work,
2535 struct cifs_readdata, work);
2537 /* if the result is non-zero then the pages weren't kmapped */
2538 if (rdata->result == 0) {
2541 list_for_each_entry(page, &rdata->pages, lru)
2545 complete(&rdata->done);
2546 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2550 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2551 unsigned int remaining)
2554 struct page *page, *tpage;
2557 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2558 if (remaining >= PAGE_SIZE) {
2559 /* enough data to fill the page */
2560 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2561 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2562 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2563 rdata->nr_iov, page->index,
2564 rdata->iov[rdata->nr_iov].iov_base,
2565 rdata->iov[rdata->nr_iov].iov_len);
2568 remaining -= PAGE_SIZE;
2569 } else if (remaining > 0) {
2570 /* enough for partial page, fill and zero the rest */
2571 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2572 rdata->iov[rdata->nr_iov].iov_len = remaining;
2573 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2574 rdata->nr_iov, page->index,
2575 rdata->iov[rdata->nr_iov].iov_base,
2576 rdata->iov[rdata->nr_iov].iov_len);
2577 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2578 '\0', PAGE_SIZE - remaining);
2583 /* no need to hold page hostage */
2584 list_del(&page->lru);
2593 cifs_iovec_read(struct file *file, const struct iovec *iov,
2594 unsigned long nr_segs, loff_t *poffset)
2597 size_t len, cur_len;
2598 ssize_t total_read = 0;
2599 loff_t offset = *poffset;
2600 unsigned int npages;
2601 struct cifs_sb_info *cifs_sb;
2602 struct cifs_tcon *tcon;
2603 struct cifsFileInfo *open_file;
2604 struct cifs_readdata *rdata, *tmp;
2605 struct list_head rdata_list;
2611 len = iov_length(iov, nr_segs);
2615 INIT_LIST_HEAD(&rdata_list);
2616 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2617 open_file = file->private_data;
2618 tcon = tlink_tcon(open_file->tlink);
2620 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2621 pid = open_file->pid;
2623 pid = current->tgid;
2625 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2626 cFYI(1, "attempting read on write only file instance");
2629 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2630 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2632 /* allocate a readdata struct */
2633 rdata = cifs_readdata_alloc(npages,
2634 cifs_uncached_readv_complete);
2640 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2644 rdata->cfile = cifsFileInfo_get(open_file);
2645 rdata->offset = offset;
2646 rdata->bytes = cur_len;
2648 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2650 rc = cifs_retry_async_readv(rdata);
2653 kref_put(&rdata->refcount,
2654 cifs_uncached_readdata_release);
2658 list_add_tail(&rdata->list, &rdata_list);
2663 /* if at least one read request send succeeded, then reset rc */
2664 if (!list_empty(&rdata_list))
2667 /* the loop below should proceed in the order of increasing offsets */
2669 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2673 /* FIXME: freezable sleep too? */
2674 rc = wait_for_completion_killable(&rdata->done);
2677 else if (rdata->result)
2680 rc = cifs_readdata_to_iov(rdata, iov,
2683 total_read += copied;
2686 /* resend call if it's a retryable error */
2687 if (rc == -EAGAIN) {
2688 rc = cifs_retry_async_readv(rdata);
2692 list_del_init(&rdata->list);
2693 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2696 cifs_stats_bytes_read(tcon, total_read);
2697 *poffset += total_read;
2699 return total_read ? total_read : rc;
2702 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2703 unsigned long nr_segs, loff_t pos)
2707 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2714 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2715 unsigned long nr_segs, loff_t pos)
2717 struct inode *inode;
2719 inode = iocb->ki_filp->f_path.dentry->d_inode;
2721 if (CIFS_I(inode)->clientCanCacheRead)
2722 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2725 * In strict cache mode we need to read from the server all the time
2726 * if we don't have level II oplock because the server can delay mtime
2727 * change - so we can't make a decision about inode invalidating.
2728 * And we can also fail with pagereading if there are mandatory locks
2729 * on pages affected by this read but not on the region from pos to
2733 return cifs_user_readv(iocb, iov, nr_segs, pos);
2736 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2740 unsigned int bytes_read = 0;
2741 unsigned int total_read;
2742 unsigned int current_read_size;
2744 struct cifs_sb_info *cifs_sb;
2745 struct cifs_tcon *pTcon;
2747 char *current_offset;
2748 struct cifsFileInfo *open_file;
2749 struct cifs_io_parms io_parms;
2750 int buf_type = CIFS_NO_BUFFER;
2754 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2756 /* FIXME: set up handlers for larger reads and/or convert to async */
2757 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2759 if (file->private_data == NULL) {
2764 open_file = file->private_data;
2765 pTcon = tlink_tcon(open_file->tlink);
2767 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2768 pid = open_file->pid;
2770 pid = current->tgid;
2772 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2773 cFYI(1, "attempting read on write only file instance");
2775 for (total_read = 0, current_offset = read_data;
2776 read_size > total_read;
2777 total_read += bytes_read, current_offset += bytes_read) {
2778 current_read_size = min_t(uint, read_size - total_read, rsize);
2780 /* For windows me and 9x we do not want to request more
2781 than it negotiated since it will refuse the read then */
2783 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2784 current_read_size = min_t(uint, current_read_size,
2788 while (rc == -EAGAIN) {
2789 if (open_file->invalidHandle) {
2790 rc = cifs_reopen_file(open_file, true);
2794 io_parms.netfid = open_file->netfid;
2796 io_parms.tcon = pTcon;
2797 io_parms.offset = *poffset;
2798 io_parms.length = current_read_size;
2799 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2800 ¤t_offset, &buf_type);
2802 if (rc || (bytes_read == 0)) {
2810 cifs_stats_bytes_read(pTcon, total_read);
2811 *poffset += bytes_read;
2819 * If the page is mmap'ed into a process' page tables, then we need to make
2820 * sure that it doesn't change while being written back.
2823 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2825 struct page *page = vmf->page;
2828 return VM_FAULT_LOCKED;
2831 static struct vm_operations_struct cifs_file_vm_ops = {
2832 .fault = filemap_fault,
2833 .page_mkwrite = cifs_page_mkwrite,
2836 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2839 struct inode *inode = file->f_path.dentry->d_inode;
2843 if (!CIFS_I(inode)->clientCanCacheRead) {
2844 rc = cifs_invalidate_mapping(inode);
2849 rc = generic_file_mmap(file, vma);
2851 vma->vm_ops = &cifs_file_vm_ops;
2856 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2861 rc = cifs_revalidate_file(file);
2863 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2867 rc = generic_file_mmap(file, vma);
2869 vma->vm_ops = &cifs_file_vm_ops;
2875 cifs_readv_complete(struct work_struct *work)
2877 struct cifs_readdata *rdata = container_of(work,
2878 struct cifs_readdata, work);
2879 struct page *page, *tpage;
2881 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2882 list_del(&page->lru);
2883 lru_cache_add_file(page);
2885 if (rdata->result == 0) {
2887 flush_dcache_page(page);
2888 SetPageUptodate(page);
2893 if (rdata->result == 0)
2894 cifs_readpage_to_fscache(rdata->mapping->host, page);
2896 page_cache_release(page);
2898 kref_put(&rdata->refcount, cifs_readdata_release);
2902 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2905 struct page *page, *tpage;
2909 /* determine the eof that the server (probably) has */
2910 eof = CIFS_I(rdata->mapping->host)->server_eof;
2911 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2912 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2915 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2916 if (remaining >= PAGE_CACHE_SIZE) {
2917 /* enough data to fill the page */
2918 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2919 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2920 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2921 rdata->nr_iov, page->index,
2922 rdata->iov[rdata->nr_iov].iov_base,
2923 rdata->iov[rdata->nr_iov].iov_len);
2925 len += PAGE_CACHE_SIZE;
2926 remaining -= PAGE_CACHE_SIZE;
2927 } else if (remaining > 0) {
2928 /* enough for partial page, fill and zero the rest */
2929 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2930 rdata->iov[rdata->nr_iov].iov_len = remaining;
2931 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2932 rdata->nr_iov, page->index,
2933 rdata->iov[rdata->nr_iov].iov_base,
2934 rdata->iov[rdata->nr_iov].iov_len);
2935 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2936 '\0', PAGE_CACHE_SIZE - remaining);
2940 } else if (page->index > eof_index) {
2942 * The VFS will not try to do readahead past the
2943 * i_size, but it's possible that we have outstanding
2944 * writes with gaps in the middle and the i_size hasn't
2945 * caught up yet. Populate those with zeroed out pages
2946 * to prevent the VFS from repeatedly attempting to
2947 * fill them until the writes are flushed.
2949 zero_user(page, 0, PAGE_CACHE_SIZE);
2950 list_del(&page->lru);
2951 lru_cache_add_file(page);
2952 flush_dcache_page(page);
2953 SetPageUptodate(page);
2955 page_cache_release(page);
2957 /* no need to hold page hostage */
2958 list_del(&page->lru);
2959 lru_cache_add_file(page);
2961 page_cache_release(page);
2968 static int cifs_readpages(struct file *file, struct address_space *mapping,
2969 struct list_head *page_list, unsigned num_pages)
2972 struct list_head tmplist;
2973 struct cifsFileInfo *open_file = file->private_data;
2974 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2975 unsigned int rsize = cifs_sb->rsize;
2979 * Give up immediately if rsize is too small to read an entire page.
2980 * The VFS will fall back to readpage. We should never reach this
2981 * point however since we set ra_pages to 0 when the rsize is smaller
2982 * than a cache page.
2984 if (unlikely(rsize < PAGE_CACHE_SIZE))
2988 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2989 * immediately if the cookie is negative
2991 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2996 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2997 pid = open_file->pid;
2999 pid = current->tgid;
3002 INIT_LIST_HEAD(&tmplist);
3004 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3005 mapping, num_pages);
3008 * Start with the page at end of list and move it to private
3009 * list. Do the same with any following pages until we hit
3010 * the rsize limit, hit an index discontinuity, or run out of
3011 * pages. Issue the async read and then start the loop again
3012 * until the list is empty.
3014 * Note that list order is important. The page_list is in
3015 * the order of declining indexes. When we put the pages in
3016 * the rdata->pages, then we want them in increasing order.
3018 while (!list_empty(page_list)) {
3019 unsigned int bytes = PAGE_CACHE_SIZE;
3020 unsigned int expected_index;
3021 unsigned int nr_pages = 1;
3023 struct page *page, *tpage;
3024 struct cifs_readdata *rdata;
3026 page = list_entry(page_list->prev, struct page, lru);
3029 * Lock the page and put it in the cache. Since no one else
3030 * should have access to this page, we're safe to simply set
3031 * PG_locked without checking it first.
3033 __set_page_locked(page);
3034 rc = add_to_page_cache_locked(page, mapping,
3035 page->index, GFP_KERNEL);
3037 /* give up if we can't stick it in the cache */
3039 __clear_page_locked(page);
3043 /* move first page to the tmplist */
3044 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3045 list_move_tail(&page->lru, &tmplist);
3047 /* now try and add more pages onto the request */
3048 expected_index = page->index + 1;
3049 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3050 /* discontinuity ? */
3051 if (page->index != expected_index)
3054 /* would this page push the read over the rsize? */
3055 if (bytes + PAGE_CACHE_SIZE > rsize)
3058 __set_page_locked(page);
3059 if (add_to_page_cache_locked(page, mapping,
3060 page->index, GFP_KERNEL)) {
3061 __clear_page_locked(page);
3064 list_move_tail(&page->lru, &tmplist);
3065 bytes += PAGE_CACHE_SIZE;
3070 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3072 /* best to give up if we're out of mem */
3073 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3074 list_del(&page->lru);
3075 lru_cache_add_file(page);
3077 page_cache_release(page);
3083 spin_lock(&cifs_file_list_lock);
3084 spin_unlock(&cifs_file_list_lock);
3085 rdata->cfile = cifsFileInfo_get(open_file);
3086 rdata->mapping = mapping;
3087 rdata->offset = offset;
3088 rdata->bytes = bytes;
3090 rdata->marshal_iov = cifs_readpages_marshal_iov;
3091 list_splice_init(&tmplist, &rdata->pages);
3093 rc = cifs_retry_async_readv(rdata);
3095 list_for_each_entry_safe(page, tpage, &rdata->pages,
3097 list_del(&page->lru);
3098 lru_cache_add_file(page);
3100 page_cache_release(page);
3102 kref_put(&rdata->refcount, cifs_readdata_release);
3106 kref_put(&rdata->refcount, cifs_readdata_release);
3112 static int cifs_readpage_worker(struct file *file, struct page *page,
3118 /* Is the page cached? */
3119 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3123 page_cache_get(page);
3124 read_data = kmap(page);
3125 /* for reads over a certain size could initiate async read ahead */
3127 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3132 cFYI(1, "Bytes read %d", rc);
3134 file->f_path.dentry->d_inode->i_atime =
3135 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3137 if (PAGE_CACHE_SIZE > rc)
3138 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3140 flush_dcache_page(page);
3141 SetPageUptodate(page);
3143 /* send this page to the cache */
3144 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3150 page_cache_release(page);
3156 static int cifs_readpage(struct file *file, struct page *page)
3158 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3164 if (file->private_data == NULL) {
3170 cFYI(1, "readpage %p at offset %d 0x%x",
3171 page, (int)offset, (int)offset);
3173 rc = cifs_readpage_worker(file, page, &offset);
3181 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3183 struct cifsFileInfo *open_file;
3185 spin_lock(&cifs_file_list_lock);
3186 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3187 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3188 spin_unlock(&cifs_file_list_lock);
3192 spin_unlock(&cifs_file_list_lock);
3196 /* We do not want to update the file size from server for inodes
3197 open for write - to avoid races with writepage extending
3198 the file - in the future we could consider allowing
3199 refreshing the inode only on increases in the file size
3200 but this is tricky to do without racing with writebehind
3201 page caching in the current Linux kernel design */
3202 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3207 if (is_inode_writable(cifsInode)) {
3208 /* This inode is open for write at least once */
3209 struct cifs_sb_info *cifs_sb;
3211 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3212 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3213 /* since no page cache to corrupt on directio
3214 we can change size safely */
3218 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3226 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3227 loff_t pos, unsigned len, unsigned flags,
3228 struct page **pagep, void **fsdata)
3230 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3231 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3232 loff_t page_start = pos & PAGE_MASK;
3237 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3239 page = grab_cache_page_write_begin(mapping, index, flags);
3245 if (PageUptodate(page))
3249 * If we write a full page it will be up to date, no need to read from
3250 * the server. If the write is short, we'll end up doing a sync write
3253 if (len == PAGE_CACHE_SIZE)
3257 * optimize away the read when we have an oplock, and we're not
3258 * expecting to use any of the data we'd be reading in. That
3259 * is, when the page lies beyond the EOF, or straddles the EOF
3260 * and the write will cover all of the existing data.
3262 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3263 i_size = i_size_read(mapping->host);
3264 if (page_start >= i_size ||
3265 (offset == 0 && (pos + len) >= i_size)) {
3266 zero_user_segments(page, 0, offset,
3270 * PageChecked means that the parts of the page
3271 * to which we're not writing are considered up
3272 * to date. Once the data is copied to the
3273 * page, it can be set uptodate.
3275 SetPageChecked(page);
3280 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3282 * might as well read a page, it is fast enough. If we get
3283 * an error, we don't need to return it. cifs_write_end will
3284 * do a sync write instead since PG_uptodate isn't set.
3286 cifs_readpage_worker(file, page, &page_start);
3288 /* we could try using another file handle if there is one -
3289 but how would we lock it to prevent close of that handle
3290 racing with this read? In any case
3291 this will be written out by write_end so is fine */
3298 static int cifs_release_page(struct page *page, gfp_t gfp)
3300 if (PagePrivate(page))
3303 return cifs_fscache_release_page(page, gfp);
3306 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3308 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3311 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3314 static int cifs_launder_page(struct page *page)
3317 loff_t range_start = page_offset(page);
3318 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3319 struct writeback_control wbc = {
3320 .sync_mode = WB_SYNC_ALL,
3322 .range_start = range_start,
3323 .range_end = range_end,
3326 cFYI(1, "Launder page: %p", page);
3328 if (clear_page_dirty_for_io(page))
3329 rc = cifs_writepage_locked(page, &wbc);
3331 cifs_fscache_invalidate_page(page, page->mapping->host);
3335 void cifs_oplock_break(struct work_struct *work)
3337 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3339 struct inode *inode = cfile->dentry->d_inode;
3340 struct cifsInodeInfo *cinode = CIFS_I(inode);
3343 if (inode && S_ISREG(inode->i_mode)) {
3344 if (cinode->clientCanCacheRead)
3345 break_lease(inode, O_RDONLY);
3347 break_lease(inode, O_WRONLY);
3348 rc = filemap_fdatawrite(inode->i_mapping);
3349 if (cinode->clientCanCacheRead == 0) {
3350 rc = filemap_fdatawait(inode->i_mapping);
3351 mapping_set_error(inode->i_mapping, rc);
3352 invalidate_remote_inode(inode);
3354 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3357 rc = cifs_push_locks(cfile);
3359 cERROR(1, "Push locks rc = %d", rc);
3362 * releasing stale oplock after recent reconnect of smb session using
3363 * a now incorrect file handle is not a data integrity issue but do
3364 * not bother sending an oplock release if session to server still is
3365 * disconnected since oplock already released by the server
3367 if (!cfile->oplock_break_cancelled) {
3368 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3369 current->tgid, 0, 0, 0, 0,
3370 LOCKING_ANDX_OPLOCK_RELEASE, false,
3371 cinode->clientCanCacheRead ? 1 : 0);
3372 cFYI(1, "Oplock release rc = %d", rc);
3376 const struct address_space_operations cifs_addr_ops = {
3377 .readpage = cifs_readpage,
3378 .readpages = cifs_readpages,
3379 .writepage = cifs_writepage,
3380 .writepages = cifs_writepages,
3381 .write_begin = cifs_write_begin,
3382 .write_end = cifs_write_end,
3383 .set_page_dirty = __set_page_dirty_nobuffers,
3384 .releasepage = cifs_release_page,
3385 .invalidatepage = cifs_invalidate_page,
3386 .launder_page = cifs_launder_page,
3390 * cifs_readpages requires the server to support a buffer large enough to
3391 * contain the header plus one complete page of data. Otherwise, we need
3392 * to leave cifs_readpages out of the address space operations.
3394 const struct address_space_operations cifs_addr_ops_smallbuf = {
3395 .readpage = cifs_readpage,
3396 .writepage = cifs_writepage,
3397 .writepages = cifs_writepages,
3398 .write_begin = cifs_write_begin,
3399 .write_end = cifs_write_end,
3400 .set_page_dirty = __set_page_dirty_nobuffers,
3401 .releasepage = cifs_release_page,
3402 .invalidatepage = cifs_invalidate_page,
3403 .launder_page = cifs_launder_page,