int idle;
};
+ static int __btrfs_start_workers(struct btrfs_workers *workers);
+
/*
* btrfs_start_workers uses kthread_run, which can block waiting for memory
* for a very long time. It will actually throttle on page writeback,
{
struct worker_start *start;
start = container_of(work, struct worker_start, work);
- btrfs_start_workers(start->queue, 1);
+ __btrfs_start_workers(start->queue);
kfree(start);
}
- static int start_new_worker(struct btrfs_workers *queue)
- {
- struct worker_start *start;
- int ret;
-
- start = kzalloc(sizeof(*start), GFP_NOFS);
- if (!start)
- return -ENOMEM;
-
- start->work.func = start_new_worker_func;
- start->queue = queue;
- ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
- if (ret)
- kfree(start);
- return ret;
- }
-
/*
* helper function to move a thread onto the idle list after it
* has finished some requests.
static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
{
struct btrfs_workers *workers = worker->workers;
+ struct worker_start *start;
unsigned long flags;
rmb();
if (!workers->atomic_start_pending)
return;
+ start = kzalloc(sizeof(*start), GFP_NOFS);
+ if (!start)
+ return;
+
+ start->work.func = start_new_worker_func;
+ start->queue = workers;
+
spin_lock_irqsave(&workers->lock, flags);
if (!workers->atomic_start_pending)
goto out;
workers->num_workers_starting += 1;
spin_unlock_irqrestore(&workers->lock, flags);
- start_new_worker(workers);
+ btrfs_queue_worker(workers->atomic_worker_start, &start->work);
return;
out:
+ kfree(start);
spin_unlock_irqrestore(&workers->lock, flags);
}
run_ordered_completions(worker->workers, work);
check_pending_worker_creates(worker);
-
+ cond_resched();
}
spin_lock_irq(&worker->lock);
* starts new worker threads. This does not enforce the max worker
* count in case you need to temporarily go past it.
*/
- static int __btrfs_start_workers(struct btrfs_workers *workers,
- int num_workers)
+ static int __btrfs_start_workers(struct btrfs_workers *workers)
{
struct btrfs_worker_thread *worker;
int ret = 0;
- int i;
- for (i = 0; i < num_workers; i++) {
- worker = kzalloc(sizeof(*worker), GFP_NOFS);
- if (!worker) {
- ret = -ENOMEM;
- goto fail;
- }
+ worker = kzalloc(sizeof(*worker), GFP_NOFS);
+ if (!worker) {
+ ret = -ENOMEM;
+ goto fail;
+ }
- INIT_LIST_HEAD(&worker->pending);
- INIT_LIST_HEAD(&worker->prio_pending);
- INIT_LIST_HEAD(&worker->worker_list);
- spin_lock_init(&worker->lock);
-
- atomic_set(&worker->num_pending, 0);
- atomic_set(&worker->refs, 1);
- worker->workers = workers;
- worker->task = kthread_run(worker_loop, worker,
- "btrfs-%s-%d", workers->name,
- workers->num_workers + i);
- if (IS_ERR(worker->task)) {
- ret = PTR_ERR(worker->task);
- kfree(worker);
- goto fail;
- }
- spin_lock_irq(&workers->lock);
- list_add_tail(&worker->worker_list, &workers->idle_list);
- worker->idle = 1;
- workers->num_workers++;
- workers->num_workers_starting--;
- WARN_ON(workers->num_workers_starting < 0);
- spin_unlock_irq(&workers->lock);
+ INIT_LIST_HEAD(&worker->pending);
+ INIT_LIST_HEAD(&worker->prio_pending);
+ INIT_LIST_HEAD(&worker->worker_list);
+ spin_lock_init(&worker->lock);
+
+ atomic_set(&worker->num_pending, 0);
+ atomic_set(&worker->refs, 1);
+ worker->workers = workers;
+ worker->task = kthread_run(worker_loop, worker,
+ "btrfs-%s-%d", workers->name,
+ workers->num_workers + 1);
+ if (IS_ERR(worker->task)) {
+ ret = PTR_ERR(worker->task);
+ kfree(worker);
+ goto fail;
}
+ spin_lock_irq(&workers->lock);
+ list_add_tail(&worker->worker_list, &workers->idle_list);
+ worker->idle = 1;
+ workers->num_workers++;
+ workers->num_workers_starting--;
+ WARN_ON(workers->num_workers_starting < 0);
+ spin_unlock_irq(&workers->lock);
+
return 0;
fail:
- btrfs_stop_workers(workers);
+ spin_lock_irq(&workers->lock);
+ workers->num_workers_starting--;
+ spin_unlock_irq(&workers->lock);
return ret;
}
- int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+ int btrfs_start_workers(struct btrfs_workers *workers)
{
spin_lock_irq(&workers->lock);
- workers->num_workers_starting += num_workers;
+ workers->num_workers_starting++;
spin_unlock_irq(&workers->lock);
- return __btrfs_start_workers(workers, num_workers);
+ return __btrfs_start_workers(workers);
}
/*
struct btrfs_worker_thread *worker;
unsigned long flags;
struct list_head *fallback;
+ int ret;
again:
spin_lock_irqsave(&workers->lock, flags);
workers->num_workers_starting++;
spin_unlock_irqrestore(&workers->lock, flags);
/* we're below the limit, start another worker */
- __btrfs_start_workers(workers, 1);
+ ret = __btrfs_start_workers(workers);
+ if (ret)
+ goto fallback;
goto again;
}
}
/*
* places a struct btrfs_work into the pending queue of one of the kthreads
*/
- int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
+ void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
{
struct btrfs_worker_thread *worker;
unsigned long flags;
/* don't requeue something already on a list */
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
- goto out;
+ return;
worker = find_worker(workers);
if (workers->ordered) {
if (wake)
wake_up_process(worker->task);
spin_unlock_irqrestore(&worker->lock, flags);
-
- out:
- return 0;
}
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+ #include <linux/mount.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret);
+ BUG_ON(ret && ret != -EEXIST);
}
/* insert an orphan item to track subvolume contains orphan files */
if (ret && ret != -ESTALE)
goto out;
+ if (ret == -ESTALE && root == root->fs_info->tree_root) {
+ struct btrfs_root *dead_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ int is_dead_root = 0;
+
+ /*
+ * this is an orphan in the tree root. Currently these
+ * could come from 2 sources:
+ * a) a snapshot deletion in progress
+ * b) a free space cache inode
+ * We need to distinguish those two, as the snapshot
+ * orphan must not get deleted.
+ * find_dead_roots already ran before us, so if this
+ * is a snapshot deletion, we should find the root
+ * in the dead_roots list
+ */
+ spin_lock(&fs_info->trans_lock);
+ list_for_each_entry(dead_root, &fs_info->dead_roots,
+ root_list) {
+ if (dead_root->root_key.objectid ==
+ found_key.objectid) {
+ is_dead_root = 1;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->trans_lock);
+ if (is_dead_root) {
+ /* prevent this orphan from being found again */
+ key.offset = found_key.objectid - 1;
+ continue;
+ }
+ }
/*
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
continue;
}
nr_truncate++;
+ /*
+ * Need to hold the imutex for reservation purposes, not
+ * a huge deal here but I have a WARN_ON in
+ * btrfs_delalloc_reserve_space to catch offenders.
+ */
+ mutex_lock(&inode->i_mutex);
ret = btrfs_truncate(inode);
+ mutex_unlock(&inode->i_mutex);
} else {
nr_unlink++;
}
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
- trans = btrfs_start_transaction(root, 2);
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
break;
cur_offset + hole_size,
&hint_byte, 1);
if (err) {
+ btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
break;
}
0, hole_size, 0, hole_size,
0, 0, 0);
if (err) {
+ btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
break;
}
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
+ btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
}
free_extent_map(em);
static int btrfs_setsize(struct inode *inode, loff_t newsize)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
int ret;
return 0;
if (newsize > oldsize) {
- i_size_write(inode, newsize);
- btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
- if (ret) {
- btrfs_setsize(inode, oldsize);
+ if (ret)
return ret;
- }
- ret = btrfs_dirty_inode(inode);
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ i_size_write(inode, newsize);
+ btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+ ret = btrfs_update_inode(trans, root, inode);
-
+ btrfs_end_transaction_throttle(trans, root);
} else {
/*
if (attr->ia_valid) {
setattr_copy(inode, attr);
- mark_inode_dirty(inode);
+ err = btrfs_dirty_inode(inode);
- if (attr->ia_valid & ATTR_MODE)
+ if (!err && attr->ia_valid & ATTR_MODE)
err = btrfs_acl_chmod(inode);
}
* FIXME, needs more benchmarking...there are no reasons other than performance
* to keep or drop this code.
*/
- void btrfs_dirty_inode(struct inode *inode, int flags)
+ int btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;
if (BTRFS_I(inode)->dummy_inode)
- return;
+ return 0;
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
if (ret && ret == -ENOSPC) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- printk_ratelimited(KERN_ERR "btrfs: fail to "
- "dirty inode %llu error %ld\n",
- (unsigned long long)btrfs_ino(inode),
- PTR_ERR(trans));
- return;
- }
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- printk_ratelimited(KERN_ERR "btrfs: fail to "
- "dirty inode %llu error %d\n",
- (unsigned long long)btrfs_ino(inode),
- ret);
- }
}
btrfs_end_transaction(trans, root);
if (BTRFS_I(inode)->delayed_node)
btrfs_balance_delayed_items(root);
+
+ return ret;
+ }
+
+ /*
+ * This is a copy of file_update_time. We need this so we can return error on
+ * ENOSPC for updating the inode in the case of file write and mmap writes.
+ */
+ int btrfs_update_time(struct file *file)
+ {
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct timespec now;
+ int ret;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+
+ /* First try to exhaust all avenues to not sync */
+ if (IS_NOCMTIME(inode))
+ return 0;
+
+ now = current_fs_time(inode->i_sb);
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
+
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
+
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return 0;
+
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return 0;
+
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ ret = btrfs_dirty_inode(inode);
+ if (!ret)
+ mark_inode_dirty_sync(inode);
+ mnt_drop_write(file->f_path.mnt);
+ return ret;
}
/*
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+
+ inode->i_op = &btrfs_special_inode_operations;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
- inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
btrfs_update_inode(trans, root, inode);
}
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+ inode->i_fop = &btrfs_file_operations;
+ inode->i_op = &btrfs_file_inode_operations;
+
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
out_unlock:
u64 page_start;
u64 page_end;
+ /* Need this to keep space reservations serialized */
+ mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+ mutex_unlock(&inode->i_mutex);
+ if (!ret)
+ ret = btrfs_update_time(vma->vm_file);
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
/* Just need the 1 for updating the inode */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out;
+ ret = err = PTR_ERR(trans);
+ trans = NULL;
+ break;
}
}
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+ inode->i_fop = &btrfs_file_operations;
+ inode->i_op = &btrfs_file_inode_operations;
+
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
if (drop_inode)
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.getattr = btrfs_getattr,
+ .setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
#include <linux/slab.h>
#include <linux/cleancache.h>
#include <linux/mnt_namespace.h>
+ #include <linux/ratelimit.h>
#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
u64 avail_space;
u64 used_space;
u64 min_stripe_size;
- int min_stripes = 1;
+ int min_stripes = 1, num_stripes = 1;
int i = 0, nr_devices;
int ret;
/* calc min stripe number for data space alloction */
type = btrfs_get_alloc_profile(root, 1);
- if (type & BTRFS_BLOCK_GROUP_RAID0)
+ if (type & BTRFS_BLOCK_GROUP_RAID0) {
min_stripes = 2;
- else if (type & BTRFS_BLOCK_GROUP_RAID1)
+ num_stripes = nr_devices;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
min_stripes = 2;
- else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ num_stripes = 2;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
min_stripes = 4;
+ num_stripes = 4;
+ }
if (type & BTRFS_BLOCK_GROUP_DUP)
min_stripe_size = 2 * BTRFS_STRIPE_LEN;
i = nr_devices - 1;
avail_space = 0;
while (nr_devices >= min_stripes) {
+ if (num_stripes > nr_devices)
+ num_stripes = nr_devices;
+
if (devices_info[i].max_avail >= min_stripe_size) {
int j;
u64 alloc_size;
- avail_space += devices_info[i].max_avail * min_stripes;
+ avail_space += devices_info[i].max_avail * num_stripes;
alloc_size = devices_info[i].max_avail;
- for (j = i + 1 - min_stripes; j <= i; j++)
+ for (j = i + 1 - num_stripes; j <= i; j++)
devices_info[j].max_avail -= alloc_size;
}
i--;
return 0;
}
+ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
+ {
+ int ret;
+
+ ret = btrfs_dirty_inode(inode);
+ if (ret)
+ printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
+ "error %d\n", btrfs_ino(inode), ret);
+ }
+
static const struct super_operations btrfs_super_ops = {
.drop_inode = btrfs_drop_inode,
.evict_inode = btrfs_evict_inode,
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.write_inode = btrfs_write_inode,
- .dirty_inode = btrfs_dirty_inode,
+ .dirty_inode = btrfs_fs_dirty_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,