#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include <trace/events/f2fs.h>
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
{
struct address_space *mapping = sbi->meta_inode->i_mapping;
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct blk_plug plug;
struct page *page;
pgoff_t index;
int i;
+ blk_start_plug(&plug);
+
for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
if (nid >= nm_i->max_nid)
nid = 0;
f2fs_put_page(page, 0);
}
+ blk_finish_plug(&plug);
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
+ if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
return 0;
write_lock(&nm_i->nat_tree_lock);
f2fs_put_page(dn->node_page, 1);
dn->node_page = NULL;
+ trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
}
static int truncate_dnode(struct dnode_of_data *dn)
if (dn->nid == 0)
return NIDS_PER_BLOCK + 1;
+ trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
+
page = get_node_page(sbi, dn->nid);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
return PTR_ERR(page);
+ }
rn = (struct f2fs_node *)page_address(page);
if (depth < 3) {
} else {
f2fs_put_page(page, 1);
}
+ trace_f2fs_truncate_nodes_exit(dn->inode, freed);
return freed;
out_err:
f2fs_put_page(page, 1);
+ trace_f2fs_truncate_nodes_exit(dn->inode, ret);
return ret;
}
fail:
for (i = depth - 3; i >= 0; i--)
f2fs_put_page(pages[i], 1);
+
+ trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
+
return err;
}
int truncate_inode_blocks(struct inode *inode, pgoff_t from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct address_space *node_mapping = sbi->node_inode->i_mapping;
int err = 0, cont = 1;
int level, offset[4], noffset[4];
unsigned int nofs = 0;
struct dnode_of_data dn;
struct page *page;
- level = get_node_path(from, offset, noffset);
+ trace_f2fs_truncate_inode_blocks_enter(inode, from);
+ level = get_node_path(from, offset, noffset);
+restart:
page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
return PTR_ERR(page);
+ }
set_new_dnode(&dn, inode, page, NULL, 0);
unlock_page(page);
if (offset[1] == 0 &&
rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
+ if (page->mapping != node_mapping) {
+ f2fs_put_page(page, 1);
+ goto restart;
+ }
wait_on_page_writeback(page);
rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
}
fail:
f2fs_put_page(page, 0);
+ trace_f2fs_truncate_inode_blocks_exit(inode, err);
return err > 0 ? 0 : err;
}
struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *page;
int err;
-
+repeat:
page = grab_cache_page(mapping, nid);
if (!page)
return ERR_PTR(-ENOMEM);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
+ if (page->mapping != mapping) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
got_it:
BUG_ON(nid != nid_of_node(page));
mark_page_accessed(page);
{
struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
struct address_space *mapping = sbi->node_inode->i_mapping;
+ struct blk_plug plug;
struct page *page;
int err, i, end;
nid_t nid;
nid = get_nid(parent, start, false);
if (!nid)
return ERR_PTR(-ENOENT);
-
+repeat:
page = grab_cache_page(mapping, nid);
if (!page)
return ERR_PTR(-ENOMEM);
else if (err == LOCKED_PAGE)
goto page_hit;
+ blk_start_plug(&plug);
+
/* Then, try readahead for siblings of the desired node */
end = start + MAX_RA_NODE;
end = min(end, NIDS_PER_BLOCK);
ra_node_page(sbi, nid);
}
- lock_page(page);
+ blk_finish_plug(&plug);
+ lock_page(page);
+ if (page->mapping != mapping) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
page_hit:
if (!PageUptodate(page)) {
f2fs_put_page(page, 1);
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- struct block_device *bdev = sbi->sb->s_bdev;
long nr_to_write = wbc->nr_to_write;
/* First check balancing cached NAT entries */
return 0;
/* if mounting is failed, skip writing node pages */
- wbc->nr_to_write = bio_get_nr_vecs(bdev);
+ wbc->nr_to_write = max_hw_blocks(sbi);
sync_node_pages(sbi, 0, wbc);
- wbc->nr_to_write = nr_to_write -
- (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
+ wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write);
return 0;
}
kmem_cache_free(free_nid_slab, i);
}
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
{
struct free_nid *i;
+ struct nat_entry *ne;
+ bool allocated = false;
if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+ return -1;
+
+ /* 0 nid should not be used */
+ if (nid == 0)
+ return 0;
+
+ if (!build)
+ goto retry;
+
+ /* do not add allocated nids */
+ read_lock(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+ allocated = true;
+ read_unlock(&nm_i->nat_tree_lock);
+ if (allocated)
return 0;
retry:
i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
spin_unlock(&nm_i->free_nid_list_lock);
}
-static int scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_nm_info *nm_i,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
- int fcnt = 0;
int i;
- /* 0 nid should not be used */
- if (start_nid == 0)
- ++start_nid;
-
i = start_nid % NAT_ENTRY_PER_BLOCK;
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+
if (start_nid >= nm_i->max_nid)
break;
- blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
+
+ blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
BUG_ON(blk_addr == NEW_ADDR);
- if (blk_addr == NULL_ADDR)
- fcnt += add_free_nid(nm_i, start_nid);
+ if (blk_addr == NULL_ADDR) {
+ if (add_free_nid(nm_i, start_nid, true) < 0)
+ break;
+ }
}
- return fcnt;
}
static void build_free_nids(struct f2fs_sb_info *sbi)
{
- struct free_nid *fnid, *next_fnid;
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- nid_t nid = 0;
- bool is_cycled = false;
- int fcnt = 0;
- int i;
+ int i = 0;
+ nid_t nid = nm_i->next_scan_nid;
- nid = nm_i->next_scan_nid;
- nm_i->init_scan_nid = nid;
+ /* Enough entries */
+ if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+ return;
+ /* readahead nat pages to be scanned */
ra_nat_pages(sbi, nid);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
- fcnt += scan_nat_page(nm_i, page, nid);
+ scan_nat_page(nm_i, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
-
- if (nid >= nm_i->max_nid) {
+ if (nid >= nm_i->max_nid)
nid = 0;
- is_cycled = true;
- }
- if (fcnt > MAX_FREE_NIDS)
- break;
- if (is_cycled && nm_i->init_scan_nid <= nid)
+
+ if (i++ == FREE_NID_PAGES)
break;
}
- /* go to the next nat page in order to reuse free nids first */
- nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK;
+ /* go to the next free nat pages to find free nids abundantly */
+ nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
mutex_lock(&curseg->curseg_mutex);
block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
nid = le32_to_cpu(nid_in_journal(sum, i));
if (addr == NULL_ADDR)
- add_free_nid(nm_i, nid);
+ add_free_nid(nm_i, nid, true);
else
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
-
- /* remove the free nids from current allocated nids */
- list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
- struct nat_entry *ne;
-
- read_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, fnid->nid);
- if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
- remove_free_nid(nm_i, fnid->nid);
- read_unlock(&nm_i->nat_tree_lock);
- }
}
/*
struct free_nid *i = NULL;
struct list_head *this;
retry:
- mutex_lock(&nm_i->build_lock);
- if (!nm_i->fcnt) {
- /* scan NAT in order to build free nid list */
- build_free_nids(sbi);
- if (!nm_i->fcnt) {
- mutex_unlock(&nm_i->build_lock);
- return false;
- }
- }
- mutex_unlock(&nm_i->build_lock);
+ if (sbi->total_valid_node_count + 1 >= nm_i->max_nid)
+ return false;
- /*
- * We check fcnt again since previous check is racy as
- * we didn't hold free_nid_list_lock. So other thread
- * could consume all of free nids.
- */
spin_lock(&nm_i->free_nid_list_lock);
- if (!nm_i->fcnt) {
- spin_unlock(&nm_i->free_nid_list_lock);
- goto retry;
- }
- BUG_ON(list_empty(&nm_i->free_nid_list));
- list_for_each(this, &nm_i->free_nid_list) {
- i = list_entry(this, struct free_nid, list);
- if (i->state == NID_NEW)
- break;
- }
+ /* We should not use stale free nids created by build_free_nids */
+ if (nm_i->fcnt && !sbi->on_build_free_nids) {
+ BUG_ON(list_empty(&nm_i->free_nid_list));
+ list_for_each(this, &nm_i->free_nid_list) {
+ i = list_entry(this, struct free_nid, list);
+ if (i->state == NID_NEW)
+ break;
+ }
- BUG_ON(i->state != NID_NEW);
- *nid = i->nid;
- i->state = NID_ALLOC;
- nm_i->fcnt--;
+ BUG_ON(i->state != NID_NEW);
+ *nid = i->nid;
+ i->state = NID_ALLOC;
+ nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ return true;
+ }
spin_unlock(&nm_i->free_nid_list_lock);
- return true;
+
+ /* Let's scan nat pages and its caches to get free nids */
+ mutex_lock(&nm_i->build_lock);
+ sbi->on_build_free_nids = 1;
+ build_free_nids(sbi);
+ sbi->on_build_free_nids = 0;
+ mutex_unlock(&nm_i->build_lock);
+ goto retry;
}
/*
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
BUG_ON(!i || i->state != NID_ALLOC);
- i->state = NID_NEW;
- nm_i->fcnt++;
+ if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
+ __del_from_free_nid_list(i);
+ } else {
+ i->state = NID_NEW;
+ nm_i->fcnt++;
+ }
spin_unlock(&nm_i->free_nid_list_lock);
}
}
if (nat_get_blkaddr(ne) == NULL_ADDR &&
- !add_free_nid(NM_I(sbi), nid)) {
+ add_free_nid(NM_I(sbi), nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
rwlock_init(&nm_i->nat_tree_lock);
- nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);