4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
178 int create_options = CREATE_NOT_DIR;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
288 * Release a reference on the file private data. This may involve closing
289 * the filehandle out on the server. Must be called without holding
290 * cifs_file_list_lock.
292 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
294 struct inode *inode = cifs_file->dentry->d_inode;
295 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
296 struct cifsInodeInfo *cifsi = CIFS_I(inode);
297 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
298 struct cifsLockInfo *li, *tmp;
300 spin_lock(&cifs_file_list_lock);
301 if (--cifs_file->count > 0) {
302 spin_unlock(&cifs_file_list_lock);
306 /* remove it from the lists */
307 list_del(&cifs_file->flist);
308 list_del(&cifs_file->tlist);
310 if (list_empty(&cifsi->openFileList)) {
311 cFYI(1, "closing last open instance for inode %p",
312 cifs_file->dentry->d_inode);
314 /* in strict cache mode we need invalidate mapping on the last
315 close because it may cause a error when we open this file
316 again and get at least level II oplock */
317 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
318 CIFS_I(inode)->invalid_mapping = true;
320 cifs_set_oplock_level(cifsi, 0);
322 spin_unlock(&cifs_file_list_lock);
324 cancel_work_sync(&cifs_file->oplock_break);
326 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
330 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
334 /* Delete any outstanding lock records. We'll lose them when the file
337 mutex_lock(&cifsi->lock_mutex);
338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
339 list_del(&li->llist);
340 cifs_del_lock_waiters(li);
343 mutex_unlock(&cifsi->lock_mutex);
345 cifs_put_tlink(cifs_file->tlink);
346 dput(cifs_file->dentry);
350 int cifs_open(struct inode *inode, struct file *file)
355 struct cifs_sb_info *cifs_sb;
356 struct cifs_tcon *tcon;
357 struct tcon_link *tlink;
358 struct cifsFileInfo *pCifsFile = NULL;
359 char *full_path = NULL;
360 bool posix_open_ok = false;
365 cifs_sb = CIFS_SB(inode->i_sb);
366 tlink = cifs_sb_tlink(cifs_sb);
369 return PTR_ERR(tlink);
371 tcon = tlink_tcon(tlink);
373 full_path = build_path_from_dentry(file->f_path.dentry);
374 if (full_path == NULL) {
379 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
380 inode, file->f_flags, full_path);
382 if (tcon->ses->server->oplocks)
387 if (!tcon->broken_posix_open && tcon->unix_ext &&
388 (tcon->ses->capabilities & CAP_UNIX) &&
389 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
390 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
391 /* can not refresh inode info since size could be stale */
392 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
393 cifs_sb->mnt_file_mode /* ignored */,
394 file->f_flags, &oplock, &netfid, xid);
396 cFYI(1, "posix open succeeded");
397 posix_open_ok = true;
398 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
399 if (tcon->ses->serverNOS)
400 cERROR(1, "server %s of type %s returned"
401 " unexpected error on SMB posix open"
402 ", disabling posix open support."
403 " Check if server update available.",
404 tcon->ses->serverName,
405 tcon->ses->serverNOS);
406 tcon->broken_posix_open = true;
407 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
408 (rc != -EOPNOTSUPP)) /* path not found or net err */
410 /* else fallthrough to retry open the old way on network i/o
414 if (!posix_open_ok) {
415 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
416 file->f_flags, &oplock, &netfid, xid);
421 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
422 if (pCifsFile == NULL) {
423 CIFSSMBClose(xid, tcon, netfid);
428 cifs_fscache_set_inode_cookie(inode, file);
430 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
431 /* time to set mode which we can not set earlier due to
432 problems creating new read-only files */
433 struct cifs_unix_set_info_args args = {
434 .mode = inode->i_mode,
437 .ctime = NO_CHANGE_64,
438 .atime = NO_CHANGE_64,
439 .mtime = NO_CHANGE_64,
442 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
449 cifs_put_tlink(tlink);
453 /* Try to reacquire byte range locks that were released when session */
454 /* to server was lost */
455 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
459 /* BB list all locks open on this file and relock */
464 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
469 struct cifs_sb_info *cifs_sb;
470 struct cifs_tcon *tcon;
471 struct cifsInodeInfo *pCifsInode;
473 char *full_path = NULL;
475 int disposition = FILE_OPEN;
476 int create_options = CREATE_NOT_DIR;
480 mutex_lock(&pCifsFile->fh_mutex);
481 if (!pCifsFile->invalidHandle) {
482 mutex_unlock(&pCifsFile->fh_mutex);
488 inode = pCifsFile->dentry->d_inode;
489 cifs_sb = CIFS_SB(inode->i_sb);
490 tcon = tlink_tcon(pCifsFile->tlink);
492 /* can not grab rename sem here because various ops, including
493 those that already have the rename sem can end up causing writepage
494 to get called and if the server was down that means we end up here,
495 and we can never tell if the caller already has the rename_sem */
496 full_path = build_path_from_dentry(pCifsFile->dentry);
497 if (full_path == NULL) {
499 mutex_unlock(&pCifsFile->fh_mutex);
504 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
505 inode, pCifsFile->f_flags, full_path);
507 if (tcon->ses->server->oplocks)
512 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
513 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
514 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
517 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
518 * original open. Must mask them off for a reopen.
520 unsigned int oflags = pCifsFile->f_flags &
521 ~(O_CREAT | O_EXCL | O_TRUNC);
523 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
524 cifs_sb->mnt_file_mode /* ignored */,
525 oflags, &oplock, &netfid, xid);
527 cFYI(1, "posix reopen succeeded");
530 /* fallthrough to retry open the old way on errors, especially
531 in the reconnect path it is important to retry hard */
534 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
536 if (backup_cred(cifs_sb))
537 create_options |= CREATE_OPEN_BACKUP_INTENT;
539 /* Can not refresh inode by passing in file_info buf to be returned
540 by SMBOpen and then calling get_inode_info with returned buf
541 since file might have write behind data that needs to be flushed
542 and server version of file size can be stale. If we knew for sure
543 that inode was not dirty locally we could do this */
545 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
546 create_options, &netfid, &oplock, NULL,
547 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
548 CIFS_MOUNT_MAP_SPECIAL_CHR);
550 mutex_unlock(&pCifsFile->fh_mutex);
551 cFYI(1, "cifs_open returned 0x%x", rc);
552 cFYI(1, "oplock: %d", oplock);
553 goto reopen_error_exit;
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
563 rc = filemap_write_and_wait(inode->i_mapping);
564 mapping_set_error(inode->i_mapping, rc);
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
580 cifs_set_oplock_level(pCifsInode, oplock);
582 cifs_relock_file(pCifsFile);
590 int cifs_close(struct inode *inode, struct file *file)
592 if (file->private_data != NULL) {
593 cifsFileInfo_put(file->private_data);
594 file->private_data = NULL;
597 /* return code from the ->release op is always ignored */
601 int cifs_closedir(struct inode *inode, struct file *file)
605 struct cifsFileInfo *pCFileStruct = file->private_data;
608 cFYI(1, "Closedir inode = 0x%p", inode);
613 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
615 cFYI(1, "Freeing private data in close dir");
616 spin_lock(&cifs_file_list_lock);
617 if (!pCFileStruct->srch_inf.endOfSearch &&
618 !pCFileStruct->invalidHandle) {
619 pCFileStruct->invalidHandle = true;
620 spin_unlock(&cifs_file_list_lock);
621 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
622 cFYI(1, "Closing uncompleted readdir with rc %d",
624 /* not much we can do if it fails anyway, ignore rc */
627 spin_unlock(&cifs_file_list_lock);
628 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
630 cFYI(1, "closedir free smb buf in srch struct");
631 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
632 if (pCFileStruct->srch_inf.smallBuf)
633 cifs_small_buf_release(ptmp);
635 cifs_buf_release(ptmp);
637 cifs_put_tlink(pCFileStruct->tlink);
638 kfree(file->private_data);
639 file->private_data = NULL;
641 /* BB can we lock the filestruct while this is going on? */
646 static struct cifsLockInfo *
647 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649 struct cifsLockInfo *lock =
650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
653 lock->offset = offset;
654 lock->length = length;
656 lock->pid = current->tgid;
657 INIT_LIST_HEAD(&lock->blist);
658 init_waitqueue_head(&lock->block_q);
663 cifs_del_lock_waiters(struct cifsLockInfo *lock)
665 struct cifsLockInfo *li, *tmp;
666 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
667 list_del_init(&li->blist);
668 wake_up(&li->block_q);
673 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
674 __u64 length, __u8 type, struct cifsFileInfo *cur,
675 struct cifsLockInfo **conf_lock)
677 struct cifsLockInfo *li;
678 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
680 list_for_each_entry(li, &cfile->llist, llist) {
681 if (offset + length <= li->offset ||
682 offset >= li->offset + li->length)
684 else if ((type & server->vals->shared_lock_type) &&
685 ((server->ops->compare_fids(cur, cfile) &&
686 current->tgid == li->pid) || type == li->type))
697 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
698 __u8 type, struct cifsLockInfo **conf_lock)
701 struct cifsFileInfo *fid, *tmp;
702 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
704 spin_lock(&cifs_file_list_lock);
705 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
706 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
711 spin_unlock(&cifs_file_list_lock);
717 * Check if there is another lock that prevents us to set the lock (mandatory
718 * style). If such a lock exists, update the flock structure with its
719 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
720 * or leave it the same if we can't. Returns 0 if we don't need to request to
721 * the server or 1 otherwise.
724 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
725 __u8 type, struct file_lock *flock)
728 struct cifsLockInfo *conf_lock;
729 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
730 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
733 mutex_lock(&cinode->lock_mutex);
735 exist = cifs_find_lock_conflict(cfile, offset, length, type,
738 flock->fl_start = conf_lock->offset;
739 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
740 flock->fl_pid = conf_lock->pid;
741 if (conf_lock->type & server->vals->shared_lock_type)
742 flock->fl_type = F_RDLCK;
744 flock->fl_type = F_WRLCK;
745 } else if (!cinode->can_cache_brlcks)
748 flock->fl_type = F_UNLCK;
750 mutex_unlock(&cinode->lock_mutex);
755 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
757 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
758 mutex_lock(&cinode->lock_mutex);
759 list_add_tail(&lock->llist, &cfile->llist);
760 mutex_unlock(&cinode->lock_mutex);
764 * Set the byte-range lock (mandatory style). Returns:
765 * 1) 0, if we set the lock and don't need to request to the server;
766 * 2) 1, if no locks prevent us but we need to request to the server;
767 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
770 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
773 struct cifsLockInfo *conf_lock;
774 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
780 mutex_lock(&cinode->lock_mutex);
782 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
783 lock->type, &conf_lock);
784 if (!exist && cinode->can_cache_brlcks) {
785 list_add_tail(&lock->llist, &cfile->llist);
786 mutex_unlock(&cinode->lock_mutex);
795 list_add_tail(&lock->blist, &conf_lock->blist);
796 mutex_unlock(&cinode->lock_mutex);
797 rc = wait_event_interruptible(lock->block_q,
798 (lock->blist.prev == &lock->blist) &&
799 (lock->blist.next == &lock->blist));
802 mutex_lock(&cinode->lock_mutex);
803 list_del_init(&lock->blist);
806 mutex_unlock(&cinode->lock_mutex);
811 * Check if there is another lock that prevents us to set the lock (posix
812 * style). If such a lock exists, update the flock structure with its
813 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
814 * or leave it the same if we can't. Returns 0 if we don't need to request to
815 * the server or 1 otherwise.
818 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
821 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
822 unsigned char saved_type = flock->fl_type;
824 if ((flock->fl_flags & FL_POSIX) == 0)
827 mutex_lock(&cinode->lock_mutex);
828 posix_test_lock(file, flock);
830 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
831 flock->fl_type = saved_type;
835 mutex_unlock(&cinode->lock_mutex);
840 * Set the byte-range lock (posix style). Returns:
841 * 1) 0, if we set the lock and don't need to request to the server;
842 * 2) 1, if we need to request to the server;
843 * 3) <0, if the error occurs while setting the lock.
846 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
848 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
851 if ((flock->fl_flags & FL_POSIX) == 0)
855 mutex_lock(&cinode->lock_mutex);
856 if (!cinode->can_cache_brlcks) {
857 mutex_unlock(&cinode->lock_mutex);
861 rc = posix_lock_file(file, flock, NULL);
862 mutex_unlock(&cinode->lock_mutex);
863 if (rc == FILE_LOCK_DEFERRED) {
864 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
867 locks_delete_block(flock);
873 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
875 int xid, rc = 0, stored_rc;
876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num;
880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
886 tcon = tlink_tcon(cfile->tlink);
888 mutex_lock(&cinode->lock_mutex);
889 if (!cinode->can_cache_brlcks) {
890 mutex_unlock(&cinode->lock_mutex);
895 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
896 sizeof(LOCKING_ANDX_RANGE);
897 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
899 mutex_unlock(&cinode->lock_mutex);
904 for (i = 0; i < 2; i++) {
907 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
908 if (li->type != types[i])
910 cur->Pid = cpu_to_le16(li->pid);
911 cur->LengthLow = cpu_to_le32((u32)li->length);
912 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
913 cur->OffsetLow = cpu_to_le32((u32)li->offset);
914 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
915 if (++num == max_num) {
916 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
917 (__u8)li->type, 0, num,
928 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
929 (__u8)types[i], 0, num, buf);
935 cinode->can_cache_brlcks = false;
936 mutex_unlock(&cinode->lock_mutex);
943 /* copied from fs/locks.c with a name change */
944 #define cifs_for_each_lock(inode, lockp) \
945 for (lockp = &inode->i_flock; *lockp != NULL; \
946 lockp = &(*lockp)->fl_next)
948 struct lock_to_push {
949 struct list_head llist;
958 cifs_push_posix_locks(struct cifsFileInfo *cfile)
960 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
961 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
962 struct file_lock *flock, **before;
963 unsigned int count = 0, i = 0;
964 int rc = 0, xid, type;
965 struct list_head locks_to_send, *el;
966 struct lock_to_push *lck, *tmp;
971 mutex_lock(&cinode->lock_mutex);
972 if (!cinode->can_cache_brlcks) {
973 mutex_unlock(&cinode->lock_mutex);
979 cifs_for_each_lock(cfile->dentry->d_inode, before) {
980 if ((*before)->fl_flags & FL_POSIX)
985 INIT_LIST_HEAD(&locks_to_send);
988 * Allocating count locks is enough because no FL_POSIX locks can be
989 * added to the list while we are holding cinode->lock_mutex that
990 * protects locking operations of this inode.
992 for (; i < count; i++) {
993 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
998 list_add_tail(&lck->llist, &locks_to_send);
1001 el = locks_to_send.next;
1003 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1005 if ((flock->fl_flags & FL_POSIX) == 0)
1007 if (el == &locks_to_send) {
1009 * The list ended. We don't have enough allocated
1010 * structures - something is really wrong.
1012 cERROR(1, "Can't push all brlocks!");
1015 length = 1 + flock->fl_end - flock->fl_start;
1016 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1020 lck = list_entry(el, struct lock_to_push, llist);
1021 lck->pid = flock->fl_pid;
1022 lck->netfid = cfile->netfid;
1023 lck->length = length;
1025 lck->offset = flock->fl_start;
1030 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1031 struct file_lock tmp_lock;
1034 tmp_lock.fl_start = lck->offset;
1035 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1036 0, lck->length, &tmp_lock,
1040 list_del(&lck->llist);
1045 cinode->can_cache_brlcks = false;
1046 mutex_unlock(&cinode->lock_mutex);
1051 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1052 list_del(&lck->llist);
1059 cifs_push_locks(struct cifsFileInfo *cfile)
1061 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1062 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1064 if ((tcon->ses->capabilities & CAP_UNIX) &&
1065 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1066 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1067 return cifs_push_posix_locks(cfile);
1069 return cifs_push_mandatory_locks(cfile);
1073 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1074 bool *wait_flag, struct TCP_Server_Info *server)
1076 if (flock->fl_flags & FL_POSIX)
1078 if (flock->fl_flags & FL_FLOCK)
1080 if (flock->fl_flags & FL_SLEEP) {
1081 cFYI(1, "Blocking lock");
1084 if (flock->fl_flags & FL_ACCESS)
1085 cFYI(1, "Process suspended by mandatory locking - "
1086 "not implemented yet");
1087 if (flock->fl_flags & FL_LEASE)
1088 cFYI(1, "Lease on file - not implemented yet");
1089 if (flock->fl_flags &
1090 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1091 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1093 *type = server->vals->large_lock_type;
1094 if (flock->fl_type == F_WRLCK) {
1095 cFYI(1, "F_WRLCK ");
1096 *type |= server->vals->exclusive_lock_type;
1098 } else if (flock->fl_type == F_UNLCK) {
1100 *type |= server->vals->unlock_lock_type;
1102 /* Check if unlock includes more than one lock range */
1103 } else if (flock->fl_type == F_RDLCK) {
1105 *type |= server->vals->shared_lock_type;
1107 } else if (flock->fl_type == F_EXLCK) {
1109 *type |= server->vals->exclusive_lock_type;
1111 } else if (flock->fl_type == F_SHLCK) {
1113 *type |= server->vals->shared_lock_type;
1116 cFYI(1, "Unknown type of lock");
1120 cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset,
1121 __u64 length, __u32 type, int lock, int unlock, bool wait)
1123 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1124 current->tgid, length, offset, unlock, lock,
1125 (__u8)type, wait, 0);
1129 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1130 bool wait_flag, bool posix_lck, int xid)
1133 __u64 length = 1 + flock->fl_end - flock->fl_start;
1134 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1135 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1136 struct TCP_Server_Info *server = tcon->ses->server;
1137 __u16 netfid = cfile->netfid;
1140 int posix_lock_type;
1142 rc = cifs_posix_lock_test(file, flock);
1146 if (type & server->vals->shared_lock_type)
1147 posix_lock_type = CIFS_RDLCK;
1149 posix_lock_type = CIFS_WRLCK;
1150 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1151 1 /* get */, length, flock,
1152 posix_lock_type, wait_flag);
1156 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1160 /* BB we could chain these into one lock request BB */
1161 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1164 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1166 flock->fl_type = F_UNLCK;
1168 cERROR(1, "Error unlocking previously locked "
1169 "range %d during test of lock", rc);
1173 if (type & server->vals->shared_lock_type) {
1174 flock->fl_type = F_WRLCK;
1178 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1179 type | server->vals->shared_lock_type, 1, 0,
1182 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1183 type | server->vals->shared_lock_type,
1185 flock->fl_type = F_RDLCK;
1187 cERROR(1, "Error unlocking previously locked "
1188 "range %d during test of lock", rc);
1190 flock->fl_type = F_WRLCK;
1196 cifs_move_llist(struct list_head *source, struct list_head *dest)
1198 struct list_head *li, *tmp;
1199 list_for_each_safe(li, tmp, source)
1200 list_move(li, dest);
1204 cifs_free_llist(struct list_head *llist)
1206 struct cifsLockInfo *li, *tmp;
1207 list_for_each_entry_safe(li, tmp, llist, llist) {
1208 cifs_del_lock_waiters(li);
1209 list_del(&li->llist);
1215 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1217 int rc = 0, stored_rc;
1218 int types[] = {LOCKING_ANDX_LARGE_FILES,
1219 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1221 unsigned int max_num, num;
1222 LOCKING_ANDX_RANGE *buf, *cur;
1223 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1225 struct cifsLockInfo *li, *tmp;
1226 __u64 length = 1 + flock->fl_end - flock->fl_start;
1227 struct list_head tmp_llist;
1229 INIT_LIST_HEAD(&tmp_llist);
1231 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
1232 sizeof(LOCKING_ANDX_RANGE);
1233 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1237 mutex_lock(&cinode->lock_mutex);
1238 for (i = 0; i < 2; i++) {
1241 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1242 if (flock->fl_start > li->offset ||
1243 (flock->fl_start + length) <
1244 (li->offset + li->length))
1246 if (current->tgid != li->pid)
1248 if (types[i] != li->type)
1250 if (!cinode->can_cache_brlcks) {
1251 cur->Pid = cpu_to_le16(li->pid);
1252 cur->LengthLow = cpu_to_le32((u32)li->length);
1254 cpu_to_le32((u32)(li->length>>32));
1255 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1257 cpu_to_le32((u32)(li->offset>>32));
1259 * We need to save a lock here to let us add
1260 * it again to the file's list if the unlock
1261 * range request fails on the server.
1263 list_move(&li->llist, &tmp_llist);
1264 if (++num == max_num) {
1265 stored_rc = cifs_lockv(xid, tcon,
1271 * We failed on the unlock range
1272 * request - add all locks from
1273 * the tmp list to the head of
1276 cifs_move_llist(&tmp_llist,
1281 * The unlock range request
1282 * succeed - free the tmp list.
1284 cifs_free_llist(&tmp_llist);
1291 * We can cache brlock requests - simply remove
1292 * a lock from the file's list.
1294 list_del(&li->llist);
1295 cifs_del_lock_waiters(li);
1300 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1301 types[i], num, 0, buf);
1303 cifs_move_llist(&tmp_llist, &cfile->llist);
1306 cifs_free_llist(&tmp_llist);
1310 mutex_unlock(&cinode->lock_mutex);
1316 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1317 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1320 __u64 length = 1 + flock->fl_end - flock->fl_start;
1321 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1322 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1323 struct TCP_Server_Info *server = tcon->ses->server;
1324 __u16 netfid = cfile->netfid;
1327 int posix_lock_type;
1329 rc = cifs_posix_lock_set(file, flock);
1333 if (type & server->vals->shared_lock_type)
1334 posix_lock_type = CIFS_RDLCK;
1336 posix_lock_type = CIFS_WRLCK;
1339 posix_lock_type = CIFS_UNLCK;
1341 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1342 0 /* set */, length, flock,
1343 posix_lock_type, wait_flag);
1348 struct cifsLockInfo *lock;
1350 lock = cifs_lock_init(flock->fl_start, length, type);
1354 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1360 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1361 type, 1, 0, wait_flag);
1367 cifs_lock_add(cfile, lock);
1369 rc = cifs_unlock_range(cfile, flock, xid);
1372 if (flock->fl_flags & FL_POSIX)
1373 posix_lock_file_wait(file, flock);
1377 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1380 int lock = 0, unlock = 0;
1381 bool wait_flag = false;
1382 bool posix_lck = false;
1383 struct cifs_sb_info *cifs_sb;
1384 struct cifs_tcon *tcon;
1385 struct cifsInodeInfo *cinode;
1386 struct cifsFileInfo *cfile;
1393 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1394 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1395 flock->fl_start, flock->fl_end);
1397 cfile = (struct cifsFileInfo *)file->private_data;
1398 tcon = tlink_tcon(cfile->tlink);
1400 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1403 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1404 netfid = cfile->netfid;
1405 cinode = CIFS_I(file->f_path.dentry->d_inode);
1407 if ((tcon->ses->capabilities & CAP_UNIX) &&
1408 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1409 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1412 * BB add code here to normalize offset and length to account for
1413 * negative length which we can not accept over the wire.
1415 if (IS_GETLK(cmd)) {
1416 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1421 if (!lock && !unlock) {
1423 * if no lock or unlock then nothing to do since we do not
1430 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1437 * update the file size (if needed) after a write. Should be called with
1438 * the inode->i_lock held
1441 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1442 unsigned int bytes_written)
1444 loff_t end_of_write = offset + bytes_written;
1446 if (end_of_write > cifsi->server_eof)
1447 cifsi->server_eof = end_of_write;
1450 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1451 const char *write_data, size_t write_size,
1455 unsigned int bytes_written = 0;
1456 unsigned int total_written;
1457 struct cifs_sb_info *cifs_sb;
1458 struct cifs_tcon *pTcon;
1460 struct dentry *dentry = open_file->dentry;
1461 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1462 struct cifs_io_parms io_parms;
1464 cifs_sb = CIFS_SB(dentry->d_sb);
1466 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1467 *poffset, dentry->d_name.name);
1469 pTcon = tlink_tcon(open_file->tlink);
1473 for (total_written = 0; write_size > total_written;
1474 total_written += bytes_written) {
1476 while (rc == -EAGAIN) {
1480 if (open_file->invalidHandle) {
1481 /* we could deadlock if we called
1482 filemap_fdatawait from here so tell
1483 reopen_file not to flush data to
1485 rc = cifs_reopen_file(open_file, false);
1490 len = min((size_t)cifs_sb->wsize,
1491 write_size - total_written);
1492 /* iov[0] is reserved for smb header */
1493 iov[1].iov_base = (char *)write_data + total_written;
1494 iov[1].iov_len = len;
1495 io_parms.netfid = open_file->netfid;
1497 io_parms.tcon = pTcon;
1498 io_parms.offset = *poffset;
1499 io_parms.length = len;
1500 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1503 if (rc || (bytes_written == 0)) {
1511 spin_lock(&dentry->d_inode->i_lock);
1512 cifs_update_eof(cifsi, *poffset, bytes_written);
1513 spin_unlock(&dentry->d_inode->i_lock);
1514 *poffset += bytes_written;
1518 cifs_stats_bytes_written(pTcon, total_written);
1520 if (total_written > 0) {
1521 spin_lock(&dentry->d_inode->i_lock);
1522 if (*poffset > dentry->d_inode->i_size)
1523 i_size_write(dentry->d_inode, *poffset);
1524 spin_unlock(&dentry->d_inode->i_lock);
1526 mark_inode_dirty_sync(dentry->d_inode);
1528 return total_written;
1531 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1534 struct cifsFileInfo *open_file = NULL;
1535 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1537 /* only filter by fsuid on multiuser mounts */
1538 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1541 spin_lock(&cifs_file_list_lock);
1542 /* we could simply get the first_list_entry since write-only entries
1543 are always at the end of the list but since the first entry might
1544 have a close pending, we go through the whole list */
1545 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1546 if (fsuid_only && open_file->uid != current_fsuid())
1548 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1549 if (!open_file->invalidHandle) {
1550 /* found a good file */
1551 /* lock it so it will not be closed on us */
1552 cifsFileInfo_get(open_file);
1553 spin_unlock(&cifs_file_list_lock);
1555 } /* else might as well continue, and look for
1556 another, or simply have the caller reopen it
1557 again rather than trying to fix this handle */
1558 } else /* write only file */
1559 break; /* write only files are last so must be done */
1561 spin_unlock(&cifs_file_list_lock);
1565 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1568 struct cifsFileInfo *open_file, *inv_file = NULL;
1569 struct cifs_sb_info *cifs_sb;
1570 bool any_available = false;
1572 unsigned int refind = 0;
1574 /* Having a null inode here (because mapping->host was set to zero by
1575 the VFS or MM) should not happen but we had reports of on oops (due to
1576 it being zero) during stress testcases so we need to check for it */
1578 if (cifs_inode == NULL) {
1579 cERROR(1, "Null inode passed to cifs_writeable_file");
1584 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1586 /* only filter by fsuid on multiuser mounts */
1587 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1590 spin_lock(&cifs_file_list_lock);
1592 if (refind > MAX_REOPEN_ATT) {
1593 spin_unlock(&cifs_file_list_lock);
1596 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1597 if (!any_available && open_file->pid != current->tgid)
1599 if (fsuid_only && open_file->uid != current_fsuid())
1601 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1602 if (!open_file->invalidHandle) {
1603 /* found a good writable file */
1604 cifsFileInfo_get(open_file);
1605 spin_unlock(&cifs_file_list_lock);
1609 inv_file = open_file;
1613 /* couldn't find useable FH with same pid, try any available */
1614 if (!any_available) {
1615 any_available = true;
1616 goto refind_writable;
1620 any_available = false;
1621 cifsFileInfo_get(inv_file);
1624 spin_unlock(&cifs_file_list_lock);
1627 rc = cifs_reopen_file(inv_file, false);
1631 spin_lock(&cifs_file_list_lock);
1632 list_move_tail(&inv_file->flist,
1633 &cifs_inode->openFileList);
1634 spin_unlock(&cifs_file_list_lock);
1635 cifsFileInfo_put(inv_file);
1636 spin_lock(&cifs_file_list_lock);
1638 goto refind_writable;
1645 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1647 struct address_space *mapping = page->mapping;
1648 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1651 int bytes_written = 0;
1652 struct inode *inode;
1653 struct cifsFileInfo *open_file;
1655 if (!mapping || !mapping->host)
1658 inode = page->mapping->host;
1660 offset += (loff_t)from;
1661 write_data = kmap(page);
1664 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1669 /* racing with truncate? */
1670 if (offset > mapping->host->i_size) {
1672 return 0; /* don't care */
1675 /* check to make sure that we are not extending the file */
1676 if (mapping->host->i_size - offset < (loff_t)to)
1677 to = (unsigned)(mapping->host->i_size - offset);
1679 open_file = find_writable_file(CIFS_I(mapping->host), false);
1681 bytes_written = cifs_write(open_file, open_file->pid,
1682 write_data, to - from, &offset);
1683 cifsFileInfo_put(open_file);
1684 /* Does mm or vfs already set times? */
1685 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1686 if ((bytes_written > 0) && (offset))
1688 else if (bytes_written < 0)
1691 cFYI(1, "No writeable filehandles for inode");
1700 * Marshal up the iov array, reserving the first one for the header. Also,
1704 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1707 struct inode *inode = wdata->cfile->dentry->d_inode;
1708 loff_t size = i_size_read(inode);
1710 /* marshal up the pages into iov array */
1712 for (i = 0; i < wdata->nr_pages; i++) {
1713 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1714 (loff_t)PAGE_CACHE_SIZE);
1715 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1716 wdata->bytes += iov[i + 1].iov_len;
1720 static int cifs_writepages(struct address_space *mapping,
1721 struct writeback_control *wbc)
1723 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1724 bool done = false, scanned = false, range_whole = false;
1726 struct cifs_writedata *wdata;
1731 * If wsize is smaller than the page cache size, default to writing
1732 * one page at a time via cifs_writepage
1734 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1735 return generic_writepages(mapping, wbc);
1737 if (wbc->range_cyclic) {
1738 index = mapping->writeback_index; /* Start from prev offset */
1741 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1742 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1743 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1748 while (!done && index <= end) {
1749 unsigned int i, nr_pages, found_pages;
1750 pgoff_t next = 0, tofind;
1751 struct page **pages;
1753 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1756 wdata = cifs_writedata_alloc((unsigned int)tofind,
1757 cifs_writev_complete);
1764 * find_get_pages_tag seems to return a max of 256 on each
1765 * iteration, so we must call it several times in order to
1766 * fill the array or the wsize is effectively limited to
1767 * 256 * PAGE_CACHE_SIZE.
1770 pages = wdata->pages;
1772 nr_pages = find_get_pages_tag(mapping, &index,
1773 PAGECACHE_TAG_DIRTY,
1775 found_pages += nr_pages;
1778 } while (nr_pages && tofind && index <= end);
1780 if (found_pages == 0) {
1781 kref_put(&wdata->refcount, cifs_writedata_release);
1786 for (i = 0; i < found_pages; i++) {
1787 page = wdata->pages[i];
1789 * At this point we hold neither mapping->tree_lock nor
1790 * lock on the page itself: the page may be truncated or
1791 * invalidated (changing page->mapping to NULL), or even
1792 * swizzled back from swapper_space to tmpfs file
1798 else if (!trylock_page(page))
1801 if (unlikely(page->mapping != mapping)) {
1806 if (!wbc->range_cyclic && page->index > end) {
1812 if (next && (page->index != next)) {
1813 /* Not next consecutive page */
1818 if (wbc->sync_mode != WB_SYNC_NONE)
1819 wait_on_page_writeback(page);
1821 if (PageWriteback(page) ||
1822 !clear_page_dirty_for_io(page)) {
1828 * This actually clears the dirty bit in the radix tree.
1829 * See cifs_writepage() for more commentary.
1831 set_page_writeback(page);
1833 if (page_offset(page) >= mapping->host->i_size) {
1836 end_page_writeback(page);
1840 wdata->pages[i] = page;
1841 next = page->index + 1;
1845 /* reset index to refind any pages skipped */
1847 index = wdata->pages[0]->index + 1;
1849 /* put any pages we aren't going to use */
1850 for (i = nr_pages; i < found_pages; i++) {
1851 page_cache_release(wdata->pages[i]);
1852 wdata->pages[i] = NULL;
1855 /* nothing to write? */
1856 if (nr_pages == 0) {
1857 kref_put(&wdata->refcount, cifs_writedata_release);
1861 wdata->sync_mode = wbc->sync_mode;
1862 wdata->nr_pages = nr_pages;
1863 wdata->offset = page_offset(wdata->pages[0]);
1864 wdata->marshal_iov = cifs_writepages_marshal_iov;
1867 if (wdata->cfile != NULL)
1868 cifsFileInfo_put(wdata->cfile);
1869 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1871 if (!wdata->cfile) {
1872 cERROR(1, "No writable handles for inode");
1876 wdata->pid = wdata->cfile->pid;
1877 rc = cifs_async_writev(wdata);
1878 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1880 for (i = 0; i < nr_pages; ++i)
1881 unlock_page(wdata->pages[i]);
1883 /* send failure -- clean up the mess */
1885 for (i = 0; i < nr_pages; ++i) {
1887 redirty_page_for_writepage(wbc,
1890 SetPageError(wdata->pages[i]);
1891 end_page_writeback(wdata->pages[i]);
1892 page_cache_release(wdata->pages[i]);
1895 mapping_set_error(mapping, rc);
1897 kref_put(&wdata->refcount, cifs_writedata_release);
1899 wbc->nr_to_write -= nr_pages;
1900 if (wbc->nr_to_write <= 0)
1906 if (!scanned && !done) {
1908 * We hit the last page and there is more work to be done: wrap
1909 * back to the start of the file
1916 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1917 mapping->writeback_index = index;
1923 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1929 /* BB add check for wbc flags */
1930 page_cache_get(page);
1931 if (!PageUptodate(page))
1932 cFYI(1, "ppw - page not up to date");
1935 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1937 * A writepage() implementation always needs to do either this,
1938 * or re-dirty the page with "redirty_page_for_writepage()" in
1939 * the case of a failure.
1941 * Just unlocking the page will cause the radix tree tag-bits
1942 * to fail to update with the state of the page correctly.
1944 set_page_writeback(page);
1946 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1947 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1949 else if (rc == -EAGAIN)
1950 redirty_page_for_writepage(wbc, page);
1954 SetPageUptodate(page);
1955 end_page_writeback(page);
1956 page_cache_release(page);
1961 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1963 int rc = cifs_writepage_locked(page, wbc);
1968 static int cifs_write_end(struct file *file, struct address_space *mapping,
1969 loff_t pos, unsigned len, unsigned copied,
1970 struct page *page, void *fsdata)
1973 struct inode *inode = mapping->host;
1974 struct cifsFileInfo *cfile = file->private_data;
1975 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1978 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1981 pid = current->tgid;
1983 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1986 if (PageChecked(page)) {
1988 SetPageUptodate(page);
1989 ClearPageChecked(page);
1990 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1991 SetPageUptodate(page);
1993 if (!PageUptodate(page)) {
1995 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1999 /* this is probably better than directly calling
2000 partialpage_write since in this function the file handle is
2001 known which we might as well leverage */
2002 /* BB check if anything else missing out of ppw
2003 such as updating last write time */
2004 page_data = kmap(page);
2005 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2006 /* if (rc < 0) should we set writebehind rc? */
2013 set_page_dirty(page);
2017 spin_lock(&inode->i_lock);
2018 if (pos > inode->i_size)
2019 i_size_write(inode, pos);
2020 spin_unlock(&inode->i_lock);
2024 page_cache_release(page);
2029 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2034 struct cifs_tcon *tcon;
2035 struct cifsFileInfo *smbfile = file->private_data;
2036 struct inode *inode = file->f_path.dentry->d_inode;
2037 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2039 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2042 mutex_lock(&inode->i_mutex);
2046 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2047 file->f_path.dentry->d_name.name, datasync);
2049 if (!CIFS_I(inode)->clientCanCacheRead) {
2050 rc = cifs_invalidate_mapping(inode);
2052 cFYI(1, "rc: %d during invalidate phase", rc);
2053 rc = 0; /* don't care about it in fsync */
2057 tcon = tlink_tcon(smbfile->tlink);
2058 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2059 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2062 mutex_unlock(&inode->i_mutex);
2066 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2070 struct cifs_tcon *tcon;
2071 struct cifsFileInfo *smbfile = file->private_data;
2072 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2073 struct inode *inode = file->f_mapping->host;
2075 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2078 mutex_lock(&inode->i_mutex);
2082 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2083 file->f_path.dentry->d_name.name, datasync);
2085 tcon = tlink_tcon(smbfile->tlink);
2086 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2087 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2090 mutex_unlock(&inode->i_mutex);
2095 * As file closes, flush all cached write data for this inode checking
2096 * for write behind errors.
2098 int cifs_flush(struct file *file, fl_owner_t id)
2100 struct inode *inode = file->f_path.dentry->d_inode;
2103 if (file->f_mode & FMODE_WRITE)
2104 rc = filemap_write_and_wait(inode->i_mapping);
2106 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2112 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2117 for (i = 0; i < num_pages; i++) {
2118 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2121 * save number of pages we have already allocated and
2122 * return with ENOMEM error
2131 for (i = 0; i < num_pages; i++)
2138 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2143 clen = min_t(const size_t, len, wsize);
2144 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2153 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2156 size_t bytes = wdata->bytes;
2158 /* marshal up the pages into iov array */
2159 for (i = 0; i < wdata->nr_pages; i++) {
2160 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2161 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2162 bytes -= iov[i + 1].iov_len;
2167 cifs_uncached_writev_complete(struct work_struct *work)
2170 struct cifs_writedata *wdata = container_of(work,
2171 struct cifs_writedata, work);
2172 struct inode *inode = wdata->cfile->dentry->d_inode;
2173 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2175 spin_lock(&inode->i_lock);
2176 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2177 if (cifsi->server_eof > inode->i_size)
2178 i_size_write(inode, cifsi->server_eof);
2179 spin_unlock(&inode->i_lock);
2181 complete(&wdata->done);
2183 if (wdata->result != -EAGAIN) {
2184 for (i = 0; i < wdata->nr_pages; i++)
2185 put_page(wdata->pages[i]);
2188 kref_put(&wdata->refcount, cifs_writedata_release);
2191 /* attempt to send write to server, retry on any -EAGAIN errors */
2193 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2198 if (wdata->cfile->invalidHandle) {
2199 rc = cifs_reopen_file(wdata->cfile, false);
2203 rc = cifs_async_writev(wdata);
2204 } while (rc == -EAGAIN);
2210 cifs_iovec_write(struct file *file, const struct iovec *iov,
2211 unsigned long nr_segs, loff_t *poffset)
2213 unsigned long nr_pages, i;
2214 size_t copied, len, cur_len;
2215 ssize_t total_written = 0;
2218 struct cifsFileInfo *open_file;
2219 struct cifs_tcon *tcon;
2220 struct cifs_sb_info *cifs_sb;
2221 struct cifs_writedata *wdata, *tmp;
2222 struct list_head wdata_list;
2226 len = iov_length(iov, nr_segs);
2230 rc = generic_write_checks(file, poffset, &len, 0);
2234 INIT_LIST_HEAD(&wdata_list);
2235 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2236 open_file = file->private_data;
2237 tcon = tlink_tcon(open_file->tlink);
2240 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2241 pid = open_file->pid;
2243 pid = current->tgid;
2245 iov_iter_init(&it, iov, nr_segs, len, 0);
2249 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2250 wdata = cifs_writedata_alloc(nr_pages,
2251 cifs_uncached_writev_complete);
2257 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2264 for (i = 0; i < nr_pages; i++) {
2265 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2266 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2269 iov_iter_advance(&it, copied);
2271 cur_len = save_len - cur_len;
2273 wdata->sync_mode = WB_SYNC_ALL;
2274 wdata->nr_pages = nr_pages;
2275 wdata->offset = (__u64)offset;
2276 wdata->cfile = cifsFileInfo_get(open_file);
2278 wdata->bytes = cur_len;
2279 wdata->marshal_iov = cifs_uncached_marshal_iov;
2280 rc = cifs_uncached_retry_writev(wdata);
2282 kref_put(&wdata->refcount, cifs_writedata_release);
2286 list_add_tail(&wdata->list, &wdata_list);
2292 * If at least one write was successfully sent, then discard any rc
2293 * value from the later writes. If the other write succeeds, then
2294 * we'll end up returning whatever was written. If it fails, then
2295 * we'll get a new rc value from that.
2297 if (!list_empty(&wdata_list))
2301 * Wait for and collect replies for any successful sends in order of
2302 * increasing offset. Once an error is hit or we get a fatal signal
2303 * while waiting, then return without waiting for any more replies.
2306 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2308 /* FIXME: freezable too? */
2309 rc = wait_for_completion_killable(&wdata->done);
2312 else if (wdata->result)
2315 total_written += wdata->bytes;
2317 /* resend call if it's a retryable error */
2318 if (rc == -EAGAIN) {
2319 rc = cifs_uncached_retry_writev(wdata);
2323 list_del_init(&wdata->list);
2324 kref_put(&wdata->refcount, cifs_writedata_release);
2327 if (total_written > 0)
2328 *poffset += total_written;
2330 cifs_stats_bytes_written(tcon, total_written);
2331 return total_written ? total_written : (ssize_t)rc;
2334 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2335 unsigned long nr_segs, loff_t pos)
2338 struct inode *inode;
2340 inode = iocb->ki_filp->f_path.dentry->d_inode;
2343 * BB - optimize the way when signing is disabled. We can drop this
2344 * extra memory-to-memory copying and use iovec buffers for constructing
2348 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2350 CIFS_I(inode)->invalid_mapping = true;
2357 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2358 unsigned long nr_segs, loff_t pos)
2360 struct inode *inode;
2362 inode = iocb->ki_filp->f_path.dentry->d_inode;
2364 if (CIFS_I(inode)->clientCanCacheAll)
2365 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2368 * In strict cache mode we need to write the data to the server exactly
2369 * from the pos to pos+len-1 rather than flush all affected pages
2370 * because it may cause a error with mandatory locks on these pages but
2371 * not on the region from pos to ppos+len-1.
2374 return cifs_user_writev(iocb, iov, nr_segs, pos);
2377 static struct cifs_readdata *
2378 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2380 struct cifs_readdata *rdata;
2382 rdata = kzalloc(sizeof(*rdata) +
2383 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2384 if (rdata != NULL) {
2385 kref_init(&rdata->refcount);
2386 INIT_LIST_HEAD(&rdata->list);
2387 init_completion(&rdata->done);
2388 INIT_WORK(&rdata->work, complete);
2389 INIT_LIST_HEAD(&rdata->pages);
2395 cifs_readdata_release(struct kref *refcount)
2397 struct cifs_readdata *rdata = container_of(refcount,
2398 struct cifs_readdata, refcount);
2401 cifsFileInfo_put(rdata->cfile);
2407 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2410 struct page *page, *tpage;
2413 for (i = 0; i < npages; i++) {
2414 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2419 list_add(&page->lru, list);
2423 list_for_each_entry_safe(page, tpage, list, lru) {
2424 list_del(&page->lru);
2432 cifs_uncached_readdata_release(struct kref *refcount)
2434 struct page *page, *tpage;
2435 struct cifs_readdata *rdata = container_of(refcount,
2436 struct cifs_readdata, refcount);
2438 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2439 list_del(&page->lru);
2442 cifs_readdata_release(refcount);
2446 cifs_retry_async_readv(struct cifs_readdata *rdata)
2451 if (rdata->cfile->invalidHandle) {
2452 rc = cifs_reopen_file(rdata->cfile, true);
2456 rc = cifs_async_readv(rdata);
2457 } while (rc == -EAGAIN);
2463 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2464 * @rdata: the readdata response with list of pages holding data
2465 * @iov: vector in which we should copy the data
2466 * @nr_segs: number of segments in vector
2467 * @offset: offset into file of the first iovec
2468 * @copied: used to return the amount of data copied to the iov
2470 * This function copies data from a list of pages in a readdata response into
2471 * an array of iovecs. It will first calculate where the data should go
2472 * based on the info in the readdata and then copy the data into that spot.
2475 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2476 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2480 size_t pos = rdata->offset - offset;
2481 struct page *page, *tpage;
2482 ssize_t remaining = rdata->bytes;
2483 unsigned char *pdata;
2485 /* set up iov_iter and advance to the correct offset */
2486 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2487 iov_iter_advance(&ii, pos);
2490 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2493 /* copy a whole page or whatever's left */
2494 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2496 /* ...but limit it to whatever space is left in the iov */
2497 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2499 /* go while there's data to be copied and no errors */
2502 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2508 iov_iter_advance(&ii, copy);
2512 list_del(&page->lru);
2520 cifs_uncached_readv_complete(struct work_struct *work)
2522 struct cifs_readdata *rdata = container_of(work,
2523 struct cifs_readdata, work);
2525 /* if the result is non-zero then the pages weren't kmapped */
2526 if (rdata->result == 0) {
2529 list_for_each_entry(page, &rdata->pages, lru)
2533 complete(&rdata->done);
2534 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2538 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2539 unsigned int remaining)
2542 struct page *page, *tpage;
2545 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2546 if (remaining >= PAGE_SIZE) {
2547 /* enough data to fill the page */
2548 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2549 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2550 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2551 rdata->nr_iov, page->index,
2552 rdata->iov[rdata->nr_iov].iov_base,
2553 rdata->iov[rdata->nr_iov].iov_len);
2556 remaining -= PAGE_SIZE;
2557 } else if (remaining > 0) {
2558 /* enough for partial page, fill and zero the rest */
2559 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2560 rdata->iov[rdata->nr_iov].iov_len = remaining;
2561 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2562 rdata->nr_iov, page->index,
2563 rdata->iov[rdata->nr_iov].iov_base,
2564 rdata->iov[rdata->nr_iov].iov_len);
2565 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2566 '\0', PAGE_SIZE - remaining);
2571 /* no need to hold page hostage */
2572 list_del(&page->lru);
2581 cifs_iovec_read(struct file *file, const struct iovec *iov,
2582 unsigned long nr_segs, loff_t *poffset)
2585 size_t len, cur_len;
2586 ssize_t total_read = 0;
2587 loff_t offset = *poffset;
2588 unsigned int npages;
2589 struct cifs_sb_info *cifs_sb;
2590 struct cifs_tcon *tcon;
2591 struct cifsFileInfo *open_file;
2592 struct cifs_readdata *rdata, *tmp;
2593 struct list_head rdata_list;
2599 len = iov_length(iov, nr_segs);
2603 INIT_LIST_HEAD(&rdata_list);
2604 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2605 open_file = file->private_data;
2606 tcon = tlink_tcon(open_file->tlink);
2608 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2609 pid = open_file->pid;
2611 pid = current->tgid;
2613 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2614 cFYI(1, "attempting read on write only file instance");
2617 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2618 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2620 /* allocate a readdata struct */
2621 rdata = cifs_readdata_alloc(npages,
2622 cifs_uncached_readv_complete);
2628 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2632 rdata->cfile = cifsFileInfo_get(open_file);
2633 rdata->offset = offset;
2634 rdata->bytes = cur_len;
2636 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2638 rc = cifs_retry_async_readv(rdata);
2641 kref_put(&rdata->refcount,
2642 cifs_uncached_readdata_release);
2646 list_add_tail(&rdata->list, &rdata_list);
2651 /* if at least one read request send succeeded, then reset rc */
2652 if (!list_empty(&rdata_list))
2655 /* the loop below should proceed in the order of increasing offsets */
2657 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2661 /* FIXME: freezable sleep too? */
2662 rc = wait_for_completion_killable(&rdata->done);
2665 else if (rdata->result)
2668 rc = cifs_readdata_to_iov(rdata, iov,
2671 total_read += copied;
2674 /* resend call if it's a retryable error */
2675 if (rc == -EAGAIN) {
2676 rc = cifs_retry_async_readv(rdata);
2680 list_del_init(&rdata->list);
2681 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2684 cifs_stats_bytes_read(tcon, total_read);
2685 *poffset += total_read;
2687 return total_read ? total_read : rc;
2690 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2691 unsigned long nr_segs, loff_t pos)
2695 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2702 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2703 unsigned long nr_segs, loff_t pos)
2705 struct inode *inode;
2707 inode = iocb->ki_filp->f_path.dentry->d_inode;
2709 if (CIFS_I(inode)->clientCanCacheRead)
2710 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2713 * In strict cache mode we need to read from the server all the time
2714 * if we don't have level II oplock because the server can delay mtime
2715 * change - so we can't make a decision about inode invalidating.
2716 * And we can also fail with pagereading if there are mandatory locks
2717 * on pages affected by this read but not on the region from pos to
2721 return cifs_user_readv(iocb, iov, nr_segs, pos);
2724 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2728 unsigned int bytes_read = 0;
2729 unsigned int total_read;
2730 unsigned int current_read_size;
2732 struct cifs_sb_info *cifs_sb;
2733 struct cifs_tcon *pTcon;
2735 char *current_offset;
2736 struct cifsFileInfo *open_file;
2737 struct cifs_io_parms io_parms;
2738 int buf_type = CIFS_NO_BUFFER;
2742 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2744 /* FIXME: set up handlers for larger reads and/or convert to async */
2745 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2747 if (file->private_data == NULL) {
2752 open_file = file->private_data;
2753 pTcon = tlink_tcon(open_file->tlink);
2755 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2756 pid = open_file->pid;
2758 pid = current->tgid;
2760 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2761 cFYI(1, "attempting read on write only file instance");
2763 for (total_read = 0, current_offset = read_data;
2764 read_size > total_read;
2765 total_read += bytes_read, current_offset += bytes_read) {
2766 current_read_size = min_t(uint, read_size - total_read, rsize);
2768 /* For windows me and 9x we do not want to request more
2769 than it negotiated since it will refuse the read then */
2771 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2772 current_read_size = min_t(uint, current_read_size,
2776 while (rc == -EAGAIN) {
2777 if (open_file->invalidHandle) {
2778 rc = cifs_reopen_file(open_file, true);
2782 io_parms.netfid = open_file->netfid;
2784 io_parms.tcon = pTcon;
2785 io_parms.offset = *poffset;
2786 io_parms.length = current_read_size;
2787 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2788 ¤t_offset, &buf_type);
2790 if (rc || (bytes_read == 0)) {
2798 cifs_stats_bytes_read(pTcon, total_read);
2799 *poffset += bytes_read;
2807 * If the page is mmap'ed into a process' page tables, then we need to make
2808 * sure that it doesn't change while being written back.
2811 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2813 struct page *page = vmf->page;
2816 return VM_FAULT_LOCKED;
2819 static struct vm_operations_struct cifs_file_vm_ops = {
2820 .fault = filemap_fault,
2821 .page_mkwrite = cifs_page_mkwrite,
2824 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2827 struct inode *inode = file->f_path.dentry->d_inode;
2831 if (!CIFS_I(inode)->clientCanCacheRead) {
2832 rc = cifs_invalidate_mapping(inode);
2837 rc = generic_file_mmap(file, vma);
2839 vma->vm_ops = &cifs_file_vm_ops;
2844 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2849 rc = cifs_revalidate_file(file);
2851 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2855 rc = generic_file_mmap(file, vma);
2857 vma->vm_ops = &cifs_file_vm_ops;
2863 cifs_readv_complete(struct work_struct *work)
2865 struct cifs_readdata *rdata = container_of(work,
2866 struct cifs_readdata, work);
2867 struct page *page, *tpage;
2869 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2870 list_del(&page->lru);
2871 lru_cache_add_file(page);
2873 if (rdata->result == 0) {
2875 flush_dcache_page(page);
2876 SetPageUptodate(page);
2881 if (rdata->result == 0)
2882 cifs_readpage_to_fscache(rdata->mapping->host, page);
2884 page_cache_release(page);
2886 kref_put(&rdata->refcount, cifs_readdata_release);
2890 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2893 struct page *page, *tpage;
2897 /* determine the eof that the server (probably) has */
2898 eof = CIFS_I(rdata->mapping->host)->server_eof;
2899 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2900 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2903 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2904 if (remaining >= PAGE_CACHE_SIZE) {
2905 /* enough data to fill the page */
2906 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2907 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2908 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2909 rdata->nr_iov, page->index,
2910 rdata->iov[rdata->nr_iov].iov_base,
2911 rdata->iov[rdata->nr_iov].iov_len);
2913 len += PAGE_CACHE_SIZE;
2914 remaining -= PAGE_CACHE_SIZE;
2915 } else if (remaining > 0) {
2916 /* enough for partial page, fill and zero the rest */
2917 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2918 rdata->iov[rdata->nr_iov].iov_len = remaining;
2919 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2920 rdata->nr_iov, page->index,
2921 rdata->iov[rdata->nr_iov].iov_base,
2922 rdata->iov[rdata->nr_iov].iov_len);
2923 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2924 '\0', PAGE_CACHE_SIZE - remaining);
2928 } else if (page->index > eof_index) {
2930 * The VFS will not try to do readahead past the
2931 * i_size, but it's possible that we have outstanding
2932 * writes with gaps in the middle and the i_size hasn't
2933 * caught up yet. Populate those with zeroed out pages
2934 * to prevent the VFS from repeatedly attempting to
2935 * fill them until the writes are flushed.
2937 zero_user(page, 0, PAGE_CACHE_SIZE);
2938 list_del(&page->lru);
2939 lru_cache_add_file(page);
2940 flush_dcache_page(page);
2941 SetPageUptodate(page);
2943 page_cache_release(page);
2945 /* no need to hold page hostage */
2946 list_del(&page->lru);
2947 lru_cache_add_file(page);
2949 page_cache_release(page);
2956 static int cifs_readpages(struct file *file, struct address_space *mapping,
2957 struct list_head *page_list, unsigned num_pages)
2960 struct list_head tmplist;
2961 struct cifsFileInfo *open_file = file->private_data;
2962 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2963 unsigned int rsize = cifs_sb->rsize;
2967 * Give up immediately if rsize is too small to read an entire page.
2968 * The VFS will fall back to readpage. We should never reach this
2969 * point however since we set ra_pages to 0 when the rsize is smaller
2970 * than a cache page.
2972 if (unlikely(rsize < PAGE_CACHE_SIZE))
2976 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2977 * immediately if the cookie is negative
2979 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2984 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2985 pid = open_file->pid;
2987 pid = current->tgid;
2990 INIT_LIST_HEAD(&tmplist);
2992 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
2993 mapping, num_pages);
2996 * Start with the page at end of list and move it to private
2997 * list. Do the same with any following pages until we hit
2998 * the rsize limit, hit an index discontinuity, or run out of
2999 * pages. Issue the async read and then start the loop again
3000 * until the list is empty.
3002 * Note that list order is important. The page_list is in
3003 * the order of declining indexes. When we put the pages in
3004 * the rdata->pages, then we want them in increasing order.
3006 while (!list_empty(page_list)) {
3007 unsigned int bytes = PAGE_CACHE_SIZE;
3008 unsigned int expected_index;
3009 unsigned int nr_pages = 1;
3011 struct page *page, *tpage;
3012 struct cifs_readdata *rdata;
3014 page = list_entry(page_list->prev, struct page, lru);
3017 * Lock the page and put it in the cache. Since no one else
3018 * should have access to this page, we're safe to simply set
3019 * PG_locked without checking it first.
3021 __set_page_locked(page);
3022 rc = add_to_page_cache_locked(page, mapping,
3023 page->index, GFP_KERNEL);
3025 /* give up if we can't stick it in the cache */
3027 __clear_page_locked(page);
3031 /* move first page to the tmplist */
3032 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3033 list_move_tail(&page->lru, &tmplist);
3035 /* now try and add more pages onto the request */
3036 expected_index = page->index + 1;
3037 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3038 /* discontinuity ? */
3039 if (page->index != expected_index)
3042 /* would this page push the read over the rsize? */
3043 if (bytes + PAGE_CACHE_SIZE > rsize)
3046 __set_page_locked(page);
3047 if (add_to_page_cache_locked(page, mapping,
3048 page->index, GFP_KERNEL)) {
3049 __clear_page_locked(page);
3052 list_move_tail(&page->lru, &tmplist);
3053 bytes += PAGE_CACHE_SIZE;
3058 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3060 /* best to give up if we're out of mem */
3061 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3062 list_del(&page->lru);
3063 lru_cache_add_file(page);
3065 page_cache_release(page);
3071 spin_lock(&cifs_file_list_lock);
3072 spin_unlock(&cifs_file_list_lock);
3073 rdata->cfile = cifsFileInfo_get(open_file);
3074 rdata->mapping = mapping;
3075 rdata->offset = offset;
3076 rdata->bytes = bytes;
3078 rdata->marshal_iov = cifs_readpages_marshal_iov;
3079 list_splice_init(&tmplist, &rdata->pages);
3081 rc = cifs_retry_async_readv(rdata);
3083 list_for_each_entry_safe(page, tpage, &rdata->pages,
3085 list_del(&page->lru);
3086 lru_cache_add_file(page);
3088 page_cache_release(page);
3090 kref_put(&rdata->refcount, cifs_readdata_release);
3094 kref_put(&rdata->refcount, cifs_readdata_release);
3100 static int cifs_readpage_worker(struct file *file, struct page *page,
3106 /* Is the page cached? */
3107 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3111 page_cache_get(page);
3112 read_data = kmap(page);
3113 /* for reads over a certain size could initiate async read ahead */
3115 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3120 cFYI(1, "Bytes read %d", rc);
3122 file->f_path.dentry->d_inode->i_atime =
3123 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3125 if (PAGE_CACHE_SIZE > rc)
3126 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3128 flush_dcache_page(page);
3129 SetPageUptodate(page);
3131 /* send this page to the cache */
3132 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3138 page_cache_release(page);
3144 static int cifs_readpage(struct file *file, struct page *page)
3146 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3152 if (file->private_data == NULL) {
3158 cFYI(1, "readpage %p at offset %d 0x%x\n",
3159 page, (int)offset, (int)offset);
3161 rc = cifs_readpage_worker(file, page, &offset);
3169 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3171 struct cifsFileInfo *open_file;
3173 spin_lock(&cifs_file_list_lock);
3174 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3175 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3176 spin_unlock(&cifs_file_list_lock);
3180 spin_unlock(&cifs_file_list_lock);
3184 /* We do not want to update the file size from server for inodes
3185 open for write - to avoid races with writepage extending
3186 the file - in the future we could consider allowing
3187 refreshing the inode only on increases in the file size
3188 but this is tricky to do without racing with writebehind
3189 page caching in the current Linux kernel design */
3190 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3195 if (is_inode_writable(cifsInode)) {
3196 /* This inode is open for write at least once */
3197 struct cifs_sb_info *cifs_sb;
3199 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3200 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3201 /* since no page cache to corrupt on directio
3202 we can change size safely */
3206 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3214 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3215 loff_t pos, unsigned len, unsigned flags,
3216 struct page **pagep, void **fsdata)
3218 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3219 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3220 loff_t page_start = pos & PAGE_MASK;
3225 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3227 page = grab_cache_page_write_begin(mapping, index, flags);
3233 if (PageUptodate(page))
3237 * If we write a full page it will be up to date, no need to read from
3238 * the server. If the write is short, we'll end up doing a sync write
3241 if (len == PAGE_CACHE_SIZE)
3245 * optimize away the read when we have an oplock, and we're not
3246 * expecting to use any of the data we'd be reading in. That
3247 * is, when the page lies beyond the EOF, or straddles the EOF
3248 * and the write will cover all of the existing data.
3250 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3251 i_size = i_size_read(mapping->host);
3252 if (page_start >= i_size ||
3253 (offset == 0 && (pos + len) >= i_size)) {
3254 zero_user_segments(page, 0, offset,
3258 * PageChecked means that the parts of the page
3259 * to which we're not writing are considered up
3260 * to date. Once the data is copied to the
3261 * page, it can be set uptodate.
3263 SetPageChecked(page);
3268 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3270 * might as well read a page, it is fast enough. If we get
3271 * an error, we don't need to return it. cifs_write_end will
3272 * do a sync write instead since PG_uptodate isn't set.
3274 cifs_readpage_worker(file, page, &page_start);
3276 /* we could try using another file handle if there is one -
3277 but how would we lock it to prevent close of that handle
3278 racing with this read? In any case
3279 this will be written out by write_end so is fine */
3286 static int cifs_release_page(struct page *page, gfp_t gfp)
3288 if (PagePrivate(page))
3291 return cifs_fscache_release_page(page, gfp);
3294 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3296 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3299 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3302 static int cifs_launder_page(struct page *page)
3305 loff_t range_start = page_offset(page);
3306 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3307 struct writeback_control wbc = {
3308 .sync_mode = WB_SYNC_ALL,
3310 .range_start = range_start,
3311 .range_end = range_end,
3314 cFYI(1, "Launder page: %p", page);
3316 if (clear_page_dirty_for_io(page))
3317 rc = cifs_writepage_locked(page, &wbc);
3319 cifs_fscache_invalidate_page(page, page->mapping->host);
3323 void cifs_oplock_break(struct work_struct *work)
3325 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3327 struct inode *inode = cfile->dentry->d_inode;
3328 struct cifsInodeInfo *cinode = CIFS_I(inode);
3331 if (inode && S_ISREG(inode->i_mode)) {
3332 if (cinode->clientCanCacheRead)
3333 break_lease(inode, O_RDONLY);
3335 break_lease(inode, O_WRONLY);
3336 rc = filemap_fdatawrite(inode->i_mapping);
3337 if (cinode->clientCanCacheRead == 0) {
3338 rc = filemap_fdatawait(inode->i_mapping);
3339 mapping_set_error(inode->i_mapping, rc);
3340 invalidate_remote_inode(inode);
3342 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3345 rc = cifs_push_locks(cfile);
3347 cERROR(1, "Push locks rc = %d", rc);
3350 * releasing stale oplock after recent reconnect of smb session using
3351 * a now incorrect file handle is not a data integrity issue but do
3352 * not bother sending an oplock release if session to server still is
3353 * disconnected since oplock already released by the server
3355 if (!cfile->oplock_break_cancelled) {
3356 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3357 current->tgid, 0, 0, 0, 0,
3358 LOCKING_ANDX_OPLOCK_RELEASE, false,
3359 cinode->clientCanCacheRead ? 1 : 0);
3360 cFYI(1, "Oplock release rc = %d", rc);
3364 const struct address_space_operations cifs_addr_ops = {
3365 .readpage = cifs_readpage,
3366 .readpages = cifs_readpages,
3367 .writepage = cifs_writepage,
3368 .writepages = cifs_writepages,
3369 .write_begin = cifs_write_begin,
3370 .write_end = cifs_write_end,
3371 .set_page_dirty = __set_page_dirty_nobuffers,
3372 .releasepage = cifs_release_page,
3373 .invalidatepage = cifs_invalidate_page,
3374 .launder_page = cifs_launder_page,
3378 * cifs_readpages requires the server to support a buffer large enough to
3379 * contain the header plus one complete page of data. Otherwise, we need
3380 * to leave cifs_readpages out of the address space operations.
3382 const struct address_space_operations cifs_addr_ops_smallbuf = {
3383 .readpage = cifs_readpage,
3384 .writepage = cifs_writepage,
3385 .writepages = cifs_writepages,
3386 .write_begin = cifs_write_begin,
3387 .write_end = cifs_write_end,
3388 .set_page_dirty = __set_page_dirty_nobuffers,
3389 .releasepage = cifs_release_page,
3390 .invalidatepage = cifs_invalidate_page,
3391 .launder_page = cifs_launder_page,