Merge branch 'i2c/for-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[firefly-linux-kernel-4.4.55.git] / fs / f2fs / node.c
1 /*
2  * fs/f2fs/node.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
18
19 #include "f2fs.h"
20 #include "node.h"
21 #include "segment.h"
22 #include "trace.h"
23 #include <trace/events/f2fs.h>
24
25 #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
26
27 static struct kmem_cache *nat_entry_slab;
28 static struct kmem_cache *free_nid_slab;
29 static struct kmem_cache *nat_entry_set_slab;
30
31 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
32 {
33         struct f2fs_nm_info *nm_i = NM_I(sbi);
34         struct sysinfo val;
35         unsigned long avail_ram;
36         unsigned long mem_size = 0;
37         bool res = false;
38
39         si_meminfo(&val);
40
41         /* only uses low memory */
42         avail_ram = val.totalram - val.totalhigh;
43
44         /* give 25%, 25%, 50%, 50% memory for each components respectively */
45         if (type == FREE_NIDS) {
46                 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
47                                                         PAGE_CACHE_SHIFT;
48                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
49         } else if (type == NAT_ENTRIES) {
50                 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
51                                                         PAGE_CACHE_SHIFT;
52                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
53         } else if (type == DIRTY_DENTS) {
54                 if (sbi->sb->s_bdi->dirty_exceeded)
55                         return false;
56                 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
57                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
58         } else if (type == INO_ENTRIES) {
59                 int i;
60
61                 for (i = 0; i <= UPDATE_INO; i++)
62                         mem_size += (sbi->im[i].ino_num *
63                                 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
64                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
65         } else {
66                 if (sbi->sb->s_bdi->dirty_exceeded)
67                         return false;
68         }
69         return res;
70 }
71
72 static void clear_node_page_dirty(struct page *page)
73 {
74         struct address_space *mapping = page->mapping;
75         unsigned int long flags;
76
77         if (PageDirty(page)) {
78                 spin_lock_irqsave(&mapping->tree_lock, flags);
79                 radix_tree_tag_clear(&mapping->page_tree,
80                                 page_index(page),
81                                 PAGECACHE_TAG_DIRTY);
82                 spin_unlock_irqrestore(&mapping->tree_lock, flags);
83
84                 clear_page_dirty_for_io(page);
85                 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
86         }
87         ClearPageUptodate(page);
88 }
89
90 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
91 {
92         pgoff_t index = current_nat_addr(sbi, nid);
93         return get_meta_page(sbi, index);
94 }
95
96 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
97 {
98         struct page *src_page;
99         struct page *dst_page;
100         pgoff_t src_off;
101         pgoff_t dst_off;
102         void *src_addr;
103         void *dst_addr;
104         struct f2fs_nm_info *nm_i = NM_I(sbi);
105
106         src_off = current_nat_addr(sbi, nid);
107         dst_off = next_nat_addr(sbi, src_off);
108
109         /* get current nat block page with lock */
110         src_page = get_meta_page(sbi, src_off);
111         dst_page = grab_meta_page(sbi, dst_off);
112         f2fs_bug_on(sbi, PageDirty(src_page));
113
114         src_addr = page_address(src_page);
115         dst_addr = page_address(dst_page);
116         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
117         set_page_dirty(dst_page);
118         f2fs_put_page(src_page, 1);
119
120         set_to_next_nat(nm_i, nid);
121
122         return dst_page;
123 }
124
125 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
126 {
127         return radix_tree_lookup(&nm_i->nat_root, n);
128 }
129
130 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
131                 nid_t start, unsigned int nr, struct nat_entry **ep)
132 {
133         return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
134 }
135
136 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
137 {
138         list_del(&e->list);
139         radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
140         nm_i->nat_cnt--;
141         kmem_cache_free(nat_entry_slab, e);
142 }
143
144 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
145                                                 struct nat_entry *ne)
146 {
147         nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
148         struct nat_entry_set *head;
149
150         if (get_nat_flag(ne, IS_DIRTY))
151                 return;
152
153         head = radix_tree_lookup(&nm_i->nat_set_root, set);
154         if (!head) {
155                 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
156
157                 INIT_LIST_HEAD(&head->entry_list);
158                 INIT_LIST_HEAD(&head->set_list);
159                 head->set = set;
160                 head->entry_cnt = 0;
161                 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
162         }
163         list_move_tail(&ne->list, &head->entry_list);
164         nm_i->dirty_nat_cnt++;
165         head->entry_cnt++;
166         set_nat_flag(ne, IS_DIRTY, true);
167 }
168
169 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
170                                                 struct nat_entry *ne)
171 {
172         nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
173         struct nat_entry_set *head;
174
175         head = radix_tree_lookup(&nm_i->nat_set_root, set);
176         if (head) {
177                 list_move_tail(&ne->list, &nm_i->nat_entries);
178                 set_nat_flag(ne, IS_DIRTY, false);
179                 head->entry_cnt--;
180                 nm_i->dirty_nat_cnt--;
181         }
182 }
183
184 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
185                 nid_t start, unsigned int nr, struct nat_entry_set **ep)
186 {
187         return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
188                                                         start, nr);
189 }
190
191 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
192 {
193         struct f2fs_nm_info *nm_i = NM_I(sbi);
194         struct nat_entry *e;
195         bool is_cp = true;
196
197         down_read(&nm_i->nat_tree_lock);
198         e = __lookup_nat_cache(nm_i, nid);
199         if (e && !get_nat_flag(e, IS_CHECKPOINTED))
200                 is_cp = false;
201         up_read(&nm_i->nat_tree_lock);
202         return is_cp;
203 }
204
205 bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
206 {
207         struct f2fs_nm_info *nm_i = NM_I(sbi);
208         struct nat_entry *e;
209         bool fsynced = false;
210
211         down_read(&nm_i->nat_tree_lock);
212         e = __lookup_nat_cache(nm_i, ino);
213         if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
214                 fsynced = true;
215         up_read(&nm_i->nat_tree_lock);
216         return fsynced;
217 }
218
219 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
220 {
221         struct f2fs_nm_info *nm_i = NM_I(sbi);
222         struct nat_entry *e;
223         bool need_update = true;
224
225         down_read(&nm_i->nat_tree_lock);
226         e = __lookup_nat_cache(nm_i, ino);
227         if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
228                         (get_nat_flag(e, IS_CHECKPOINTED) ||
229                          get_nat_flag(e, HAS_FSYNCED_INODE)))
230                 need_update = false;
231         up_read(&nm_i->nat_tree_lock);
232         return need_update;
233 }
234
235 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
236 {
237         struct nat_entry *new;
238
239         new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
240         f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
241         memset(new, 0, sizeof(struct nat_entry));
242         nat_set_nid(new, nid);
243         nat_reset_flag(new);
244         list_add_tail(&new->list, &nm_i->nat_entries);
245         nm_i->nat_cnt++;
246         return new;
247 }
248
249 static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
250                                                 struct f2fs_nat_entry *ne)
251 {
252         struct nat_entry *e;
253
254         down_write(&nm_i->nat_tree_lock);
255         e = __lookup_nat_cache(nm_i, nid);
256         if (!e) {
257                 e = grab_nat_entry(nm_i, nid);
258                 node_info_from_raw_nat(&e->ni, ne);
259         }
260         up_write(&nm_i->nat_tree_lock);
261 }
262
263 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
264                         block_t new_blkaddr, bool fsync_done)
265 {
266         struct f2fs_nm_info *nm_i = NM_I(sbi);
267         struct nat_entry *e;
268
269         down_write(&nm_i->nat_tree_lock);
270         e = __lookup_nat_cache(nm_i, ni->nid);
271         if (!e) {
272                 e = grab_nat_entry(nm_i, ni->nid);
273                 copy_node_info(&e->ni, ni);
274                 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
275         } else if (new_blkaddr == NEW_ADDR) {
276                 /*
277                  * when nid is reallocated,
278                  * previous nat entry can be remained in nat cache.
279                  * So, reinitialize it with new information.
280                  */
281                 copy_node_info(&e->ni, ni);
282                 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
283         }
284
285         /* sanity check */
286         f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
287         f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
288                         new_blkaddr == NULL_ADDR);
289         f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
290                         new_blkaddr == NEW_ADDR);
291         f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
292                         nat_get_blkaddr(e) != NULL_ADDR &&
293                         new_blkaddr == NEW_ADDR);
294
295         /* increment version no as node is removed */
296         if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
297                 unsigned char version = nat_get_version(e);
298                 nat_set_version(e, inc_node_version(version));
299         }
300
301         /* change address */
302         nat_set_blkaddr(e, new_blkaddr);
303         if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
304                 set_nat_flag(e, IS_CHECKPOINTED, false);
305         __set_nat_cache_dirty(nm_i, e);
306
307         /* update fsync_mark if its inode nat entry is still alive */
308         e = __lookup_nat_cache(nm_i, ni->ino);
309         if (e) {
310                 if (fsync_done && ni->nid == ni->ino)
311                         set_nat_flag(e, HAS_FSYNCED_INODE, true);
312                 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
313         }
314         up_write(&nm_i->nat_tree_lock);
315 }
316
317 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
318 {
319         struct f2fs_nm_info *nm_i = NM_I(sbi);
320
321         if (available_free_memory(sbi, NAT_ENTRIES))
322                 return 0;
323
324         down_write(&nm_i->nat_tree_lock);
325         while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
326                 struct nat_entry *ne;
327                 ne = list_first_entry(&nm_i->nat_entries,
328                                         struct nat_entry, list);
329                 __del_from_nat_cache(nm_i, ne);
330                 nr_shrink--;
331         }
332         up_write(&nm_i->nat_tree_lock);
333         return nr_shrink;
334 }
335
336 /*
337  * This function always returns success
338  */
339 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
340 {
341         struct f2fs_nm_info *nm_i = NM_I(sbi);
342         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
343         struct f2fs_summary_block *sum = curseg->sum_blk;
344         nid_t start_nid = START_NID(nid);
345         struct f2fs_nat_block *nat_blk;
346         struct page *page = NULL;
347         struct f2fs_nat_entry ne;
348         struct nat_entry *e;
349         int i;
350
351         ni->nid = nid;
352
353         /* Check nat cache */
354         down_read(&nm_i->nat_tree_lock);
355         e = __lookup_nat_cache(nm_i, nid);
356         if (e) {
357                 ni->ino = nat_get_ino(e);
358                 ni->blk_addr = nat_get_blkaddr(e);
359                 ni->version = nat_get_version(e);
360         }
361         up_read(&nm_i->nat_tree_lock);
362         if (e)
363                 return;
364
365         memset(&ne, 0, sizeof(struct f2fs_nat_entry));
366
367         /* Check current segment summary */
368         mutex_lock(&curseg->curseg_mutex);
369         i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
370         if (i >= 0) {
371                 ne = nat_in_journal(sum, i);
372                 node_info_from_raw_nat(ni, &ne);
373         }
374         mutex_unlock(&curseg->curseg_mutex);
375         if (i >= 0)
376                 goto cache;
377
378         /* Fill node_info from nat page */
379         page = get_current_nat_page(sbi, start_nid);
380         nat_blk = (struct f2fs_nat_block *)page_address(page);
381         ne = nat_blk->entries[nid - start_nid];
382         node_info_from_raw_nat(ni, &ne);
383         f2fs_put_page(page, 1);
384 cache:
385         /* cache nat entry */
386         cache_nat_entry(NM_I(sbi), nid, &ne);
387 }
388
389 /*
390  * The maximum depth is four.
391  * Offset[0] will have raw inode offset.
392  */
393 static int get_node_path(struct f2fs_inode_info *fi, long block,
394                                 int offset[4], unsigned int noffset[4])
395 {
396         const long direct_index = ADDRS_PER_INODE(fi);
397         const long direct_blks = ADDRS_PER_BLOCK;
398         const long dptrs_per_blk = NIDS_PER_BLOCK;
399         const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
400         const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
401         int n = 0;
402         int level = 0;
403
404         noffset[0] = 0;
405
406         if (block < direct_index) {
407                 offset[n] = block;
408                 goto got;
409         }
410         block -= direct_index;
411         if (block < direct_blks) {
412                 offset[n++] = NODE_DIR1_BLOCK;
413                 noffset[n] = 1;
414                 offset[n] = block;
415                 level = 1;
416                 goto got;
417         }
418         block -= direct_blks;
419         if (block < direct_blks) {
420                 offset[n++] = NODE_DIR2_BLOCK;
421                 noffset[n] = 2;
422                 offset[n] = block;
423                 level = 1;
424                 goto got;
425         }
426         block -= direct_blks;
427         if (block < indirect_blks) {
428                 offset[n++] = NODE_IND1_BLOCK;
429                 noffset[n] = 3;
430                 offset[n++] = block / direct_blks;
431                 noffset[n] = 4 + offset[n - 1];
432                 offset[n] = block % direct_blks;
433                 level = 2;
434                 goto got;
435         }
436         block -= indirect_blks;
437         if (block < indirect_blks) {
438                 offset[n++] = NODE_IND2_BLOCK;
439                 noffset[n] = 4 + dptrs_per_blk;
440                 offset[n++] = block / direct_blks;
441                 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
442                 offset[n] = block % direct_blks;
443                 level = 2;
444                 goto got;
445         }
446         block -= indirect_blks;
447         if (block < dindirect_blks) {
448                 offset[n++] = NODE_DIND_BLOCK;
449                 noffset[n] = 5 + (dptrs_per_blk * 2);
450                 offset[n++] = block / indirect_blks;
451                 noffset[n] = 6 + (dptrs_per_blk * 2) +
452                               offset[n - 1] * (dptrs_per_blk + 1);
453                 offset[n++] = (block / direct_blks) % dptrs_per_blk;
454                 noffset[n] = 7 + (dptrs_per_blk * 2) +
455                               offset[n - 2] * (dptrs_per_blk + 1) +
456                               offset[n - 1];
457                 offset[n] = block % direct_blks;
458                 level = 3;
459                 goto got;
460         } else {
461                 BUG();
462         }
463 got:
464         return level;
465 }
466
467 /*
468  * Caller should call f2fs_put_dnode(dn).
469  * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
470  * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
471  * In the case of RDONLY_NODE, we don't need to care about mutex.
472  */
473 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
474 {
475         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
476         struct page *npage[4];
477         struct page *parent = NULL;
478         int offset[4];
479         unsigned int noffset[4];
480         nid_t nids[4];
481         int level, i;
482         int err = 0;
483
484         level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
485
486         nids[0] = dn->inode->i_ino;
487         npage[0] = dn->inode_page;
488
489         if (!npage[0]) {
490                 npage[0] = get_node_page(sbi, nids[0]);
491                 if (IS_ERR(npage[0]))
492                         return PTR_ERR(npage[0]);
493         }
494
495         /* if inline_data is set, should not report any block indices */
496         if (f2fs_has_inline_data(dn->inode) && index) {
497                 err = -EINVAL;
498                 f2fs_put_page(npage[0], 1);
499                 goto release_out;
500         }
501
502         parent = npage[0];
503         if (level != 0)
504                 nids[1] = get_nid(parent, offset[0], true);
505         dn->inode_page = npage[0];
506         dn->inode_page_locked = true;
507
508         /* get indirect or direct nodes */
509         for (i = 1; i <= level; i++) {
510                 bool done = false;
511
512                 if (!nids[i] && mode == ALLOC_NODE) {
513                         /* alloc new node */
514                         if (!alloc_nid(sbi, &(nids[i]))) {
515                                 err = -ENOSPC;
516                                 goto release_pages;
517                         }
518
519                         dn->nid = nids[i];
520                         npage[i] = new_node_page(dn, noffset[i], NULL);
521                         if (IS_ERR(npage[i])) {
522                                 alloc_nid_failed(sbi, nids[i]);
523                                 err = PTR_ERR(npage[i]);
524                                 goto release_pages;
525                         }
526
527                         set_nid(parent, offset[i - 1], nids[i], i == 1);
528                         alloc_nid_done(sbi, nids[i]);
529                         done = true;
530                 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
531                         npage[i] = get_node_page_ra(parent, offset[i - 1]);
532                         if (IS_ERR(npage[i])) {
533                                 err = PTR_ERR(npage[i]);
534                                 goto release_pages;
535                         }
536                         done = true;
537                 }
538                 if (i == 1) {
539                         dn->inode_page_locked = false;
540                         unlock_page(parent);
541                 } else {
542                         f2fs_put_page(parent, 1);
543                 }
544
545                 if (!done) {
546                         npage[i] = get_node_page(sbi, nids[i]);
547                         if (IS_ERR(npage[i])) {
548                                 err = PTR_ERR(npage[i]);
549                                 f2fs_put_page(npage[0], 0);
550                                 goto release_out;
551                         }
552                 }
553                 if (i < level) {
554                         parent = npage[i];
555                         nids[i + 1] = get_nid(parent, offset[i], false);
556                 }
557         }
558         dn->nid = nids[level];
559         dn->ofs_in_node = offset[level];
560         dn->node_page = npage[level];
561         dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
562         return 0;
563
564 release_pages:
565         f2fs_put_page(parent, 1);
566         if (i > 1)
567                 f2fs_put_page(npage[0], 0);
568 release_out:
569         dn->inode_page = NULL;
570         dn->node_page = NULL;
571         return err;
572 }
573
574 static void truncate_node(struct dnode_of_data *dn)
575 {
576         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
577         struct node_info ni;
578
579         get_node_info(sbi, dn->nid, &ni);
580         if (dn->inode->i_blocks == 0) {
581                 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
582                 goto invalidate;
583         }
584         f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
585
586         /* Deallocate node address */
587         invalidate_blocks(sbi, ni.blk_addr);
588         dec_valid_node_count(sbi, dn->inode);
589         set_node_addr(sbi, &ni, NULL_ADDR, false);
590
591         if (dn->nid == dn->inode->i_ino) {
592                 remove_orphan_inode(sbi, dn->nid);
593                 dec_valid_inode_count(sbi);
594         } else {
595                 sync_inode_page(dn);
596         }
597 invalidate:
598         clear_node_page_dirty(dn->node_page);
599         set_sbi_flag(sbi, SBI_IS_DIRTY);
600
601         f2fs_put_page(dn->node_page, 1);
602
603         invalidate_mapping_pages(NODE_MAPPING(sbi),
604                         dn->node_page->index, dn->node_page->index);
605
606         dn->node_page = NULL;
607         trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
608 }
609
610 static int truncate_dnode(struct dnode_of_data *dn)
611 {
612         struct page *page;
613
614         if (dn->nid == 0)
615                 return 1;
616
617         /* get direct node */
618         page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
619         if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
620                 return 1;
621         else if (IS_ERR(page))
622                 return PTR_ERR(page);
623
624         /* Make dnode_of_data for parameter */
625         dn->node_page = page;
626         dn->ofs_in_node = 0;
627         truncate_data_blocks(dn);
628         truncate_node(dn);
629         return 1;
630 }
631
632 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
633                                                 int ofs, int depth)
634 {
635         struct dnode_of_data rdn = *dn;
636         struct page *page;
637         struct f2fs_node *rn;
638         nid_t child_nid;
639         unsigned int child_nofs;
640         int freed = 0;
641         int i, ret;
642
643         if (dn->nid == 0)
644                 return NIDS_PER_BLOCK + 1;
645
646         trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
647
648         page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
649         if (IS_ERR(page)) {
650                 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
651                 return PTR_ERR(page);
652         }
653
654         rn = F2FS_NODE(page);
655         if (depth < 3) {
656                 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
657                         child_nid = le32_to_cpu(rn->in.nid[i]);
658                         if (child_nid == 0)
659                                 continue;
660                         rdn.nid = child_nid;
661                         ret = truncate_dnode(&rdn);
662                         if (ret < 0)
663                                 goto out_err;
664                         set_nid(page, i, 0, false);
665                 }
666         } else {
667                 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
668                 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
669                         child_nid = le32_to_cpu(rn->in.nid[i]);
670                         if (child_nid == 0) {
671                                 child_nofs += NIDS_PER_BLOCK + 1;
672                                 continue;
673                         }
674                         rdn.nid = child_nid;
675                         ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
676                         if (ret == (NIDS_PER_BLOCK + 1)) {
677                                 set_nid(page, i, 0, false);
678                                 child_nofs += ret;
679                         } else if (ret < 0 && ret != -ENOENT) {
680                                 goto out_err;
681                         }
682                 }
683                 freed = child_nofs;
684         }
685
686         if (!ofs) {
687                 /* remove current indirect node */
688                 dn->node_page = page;
689                 truncate_node(dn);
690                 freed++;
691         } else {
692                 f2fs_put_page(page, 1);
693         }
694         trace_f2fs_truncate_nodes_exit(dn->inode, freed);
695         return freed;
696
697 out_err:
698         f2fs_put_page(page, 1);
699         trace_f2fs_truncate_nodes_exit(dn->inode, ret);
700         return ret;
701 }
702
703 static int truncate_partial_nodes(struct dnode_of_data *dn,
704                         struct f2fs_inode *ri, int *offset, int depth)
705 {
706         struct page *pages[2];
707         nid_t nid[3];
708         nid_t child_nid;
709         int err = 0;
710         int i;
711         int idx = depth - 2;
712
713         nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
714         if (!nid[0])
715                 return 0;
716
717         /* get indirect nodes in the path */
718         for (i = 0; i < idx + 1; i++) {
719                 /* reference count'll be increased */
720                 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
721                 if (IS_ERR(pages[i])) {
722                         err = PTR_ERR(pages[i]);
723                         idx = i - 1;
724                         goto fail;
725                 }
726                 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
727         }
728
729         /* free direct nodes linked to a partial indirect node */
730         for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
731                 child_nid = get_nid(pages[idx], i, false);
732                 if (!child_nid)
733                         continue;
734                 dn->nid = child_nid;
735                 err = truncate_dnode(dn);
736                 if (err < 0)
737                         goto fail;
738                 set_nid(pages[idx], i, 0, false);
739         }
740
741         if (offset[idx + 1] == 0) {
742                 dn->node_page = pages[idx];
743                 dn->nid = nid[idx];
744                 truncate_node(dn);
745         } else {
746                 f2fs_put_page(pages[idx], 1);
747         }
748         offset[idx]++;
749         offset[idx + 1] = 0;
750         idx--;
751 fail:
752         for (i = idx; i >= 0; i--)
753                 f2fs_put_page(pages[i], 1);
754
755         trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
756
757         return err;
758 }
759
760 /*
761  * All the block addresses of data and nodes should be nullified.
762  */
763 int truncate_inode_blocks(struct inode *inode, pgoff_t from)
764 {
765         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
766         int err = 0, cont = 1;
767         int level, offset[4], noffset[4];
768         unsigned int nofs = 0;
769         struct f2fs_inode *ri;
770         struct dnode_of_data dn;
771         struct page *page;
772
773         trace_f2fs_truncate_inode_blocks_enter(inode, from);
774
775         level = get_node_path(F2FS_I(inode), from, offset, noffset);
776 restart:
777         page = get_node_page(sbi, inode->i_ino);
778         if (IS_ERR(page)) {
779                 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
780                 return PTR_ERR(page);
781         }
782
783         set_new_dnode(&dn, inode, page, NULL, 0);
784         unlock_page(page);
785
786         ri = F2FS_INODE(page);
787         switch (level) {
788         case 0:
789         case 1:
790                 nofs = noffset[1];
791                 break;
792         case 2:
793                 nofs = noffset[1];
794                 if (!offset[level - 1])
795                         goto skip_partial;
796                 err = truncate_partial_nodes(&dn, ri, offset, level);
797                 if (err < 0 && err != -ENOENT)
798                         goto fail;
799                 nofs += 1 + NIDS_PER_BLOCK;
800                 break;
801         case 3:
802                 nofs = 5 + 2 * NIDS_PER_BLOCK;
803                 if (!offset[level - 1])
804                         goto skip_partial;
805                 err = truncate_partial_nodes(&dn, ri, offset, level);
806                 if (err < 0 && err != -ENOENT)
807                         goto fail;
808                 break;
809         default:
810                 BUG();
811         }
812
813 skip_partial:
814         while (cont) {
815                 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
816                 switch (offset[0]) {
817                 case NODE_DIR1_BLOCK:
818                 case NODE_DIR2_BLOCK:
819                         err = truncate_dnode(&dn);
820                         break;
821
822                 case NODE_IND1_BLOCK:
823                 case NODE_IND2_BLOCK:
824                         err = truncate_nodes(&dn, nofs, offset[1], 2);
825                         break;
826
827                 case NODE_DIND_BLOCK:
828                         err = truncate_nodes(&dn, nofs, offset[1], 3);
829                         cont = 0;
830                         break;
831
832                 default:
833                         BUG();
834                 }
835                 if (err < 0 && err != -ENOENT)
836                         goto fail;
837                 if (offset[1] == 0 &&
838                                 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
839                         lock_page(page);
840                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
841                                 f2fs_put_page(page, 1);
842                                 goto restart;
843                         }
844                         f2fs_wait_on_page_writeback(page, NODE);
845                         ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
846                         set_page_dirty(page);
847                         unlock_page(page);
848                 }
849                 offset[1] = 0;
850                 offset[0]++;
851                 nofs += err;
852         }
853 fail:
854         f2fs_put_page(page, 0);
855         trace_f2fs_truncate_inode_blocks_exit(inode, err);
856         return err > 0 ? 0 : err;
857 }
858
859 int truncate_xattr_node(struct inode *inode, struct page *page)
860 {
861         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
862         nid_t nid = F2FS_I(inode)->i_xattr_nid;
863         struct dnode_of_data dn;
864         struct page *npage;
865
866         if (!nid)
867                 return 0;
868
869         npage = get_node_page(sbi, nid);
870         if (IS_ERR(npage))
871                 return PTR_ERR(npage);
872
873         F2FS_I(inode)->i_xattr_nid = 0;
874
875         /* need to do checkpoint during fsync */
876         F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
877
878         set_new_dnode(&dn, inode, page, npage, nid);
879
880         if (page)
881                 dn.inode_page_locked = true;
882         truncate_node(&dn);
883         return 0;
884 }
885
886 /*
887  * Caller should grab and release a rwsem by calling f2fs_lock_op() and
888  * f2fs_unlock_op().
889  */
890 void remove_inode_page(struct inode *inode)
891 {
892         struct dnode_of_data dn;
893
894         set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
895         if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
896                 return;
897
898         if (truncate_xattr_node(inode, dn.inode_page)) {
899                 f2fs_put_dnode(&dn);
900                 return;
901         }
902
903         /* remove potential inline_data blocks */
904         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
905                                 S_ISLNK(inode->i_mode))
906                 truncate_data_blocks_range(&dn, 1);
907
908         /* 0 is possible, after f2fs_new_inode() has failed */
909         f2fs_bug_on(F2FS_I_SB(inode),
910                         inode->i_blocks != 0 && inode->i_blocks != 1);
911
912         /* will put inode & node pages */
913         truncate_node(&dn);
914 }
915
916 struct page *new_inode_page(struct inode *inode)
917 {
918         struct dnode_of_data dn;
919
920         /* allocate inode page for new inode */
921         set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
922
923         /* caller should f2fs_put_page(page, 1); */
924         return new_node_page(&dn, 0, NULL);
925 }
926
927 struct page *new_node_page(struct dnode_of_data *dn,
928                                 unsigned int ofs, struct page *ipage)
929 {
930         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
931         struct node_info old_ni, new_ni;
932         struct page *page;
933         int err;
934
935         if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
936                 return ERR_PTR(-EPERM);
937
938         page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
939         if (!page)
940                 return ERR_PTR(-ENOMEM);
941
942         if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
943                 err = -ENOSPC;
944                 goto fail;
945         }
946
947         get_node_info(sbi, dn->nid, &old_ni);
948
949         /* Reinitialize old_ni with new node page */
950         f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
951         new_ni = old_ni;
952         new_ni.ino = dn->inode->i_ino;
953         set_node_addr(sbi, &new_ni, NEW_ADDR, false);
954
955         f2fs_wait_on_page_writeback(page, NODE);
956         fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
957         set_cold_node(dn->inode, page);
958         SetPageUptodate(page);
959         set_page_dirty(page);
960
961         if (f2fs_has_xattr_block(ofs))
962                 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
963
964         dn->node_page = page;
965         if (ipage)
966                 update_inode(dn->inode, ipage);
967         else
968                 sync_inode_page(dn);
969         if (ofs == 0)
970                 inc_valid_inode_count(sbi);
971
972         return page;
973
974 fail:
975         clear_node_page_dirty(page);
976         f2fs_put_page(page, 1);
977         return ERR_PTR(err);
978 }
979
980 /*
981  * Caller should do after getting the following values.
982  * 0: f2fs_put_page(page, 0)
983  * LOCKED_PAGE: f2fs_put_page(page, 1)
984  * error: nothing
985  */
986 static int read_node_page(struct page *page, int rw)
987 {
988         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
989         struct node_info ni;
990         struct f2fs_io_info fio = {
991                 .type = NODE,
992                 .rw = rw,
993         };
994
995         get_node_info(sbi, page->index, &ni);
996
997         if (unlikely(ni.blk_addr == NULL_ADDR)) {
998                 f2fs_put_page(page, 1);
999                 return -ENOENT;
1000         }
1001
1002         if (PageUptodate(page))
1003                 return LOCKED_PAGE;
1004
1005         fio.blk_addr = ni.blk_addr;
1006         return f2fs_submit_page_bio(sbi, page, &fio);
1007 }
1008
1009 /*
1010  * Readahead a node page
1011  */
1012 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1013 {
1014         struct page *apage;
1015         int err;
1016
1017         apage = find_get_page(NODE_MAPPING(sbi), nid);
1018         if (apage && PageUptodate(apage)) {
1019                 f2fs_put_page(apage, 0);
1020                 return;
1021         }
1022         f2fs_put_page(apage, 0);
1023
1024         apage = grab_cache_page(NODE_MAPPING(sbi), nid);
1025         if (!apage)
1026                 return;
1027
1028         err = read_node_page(apage, READA);
1029         if (err == 0)
1030                 f2fs_put_page(apage, 0);
1031         else if (err == LOCKED_PAGE)
1032                 f2fs_put_page(apage, 1);
1033 }
1034
1035 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1036 {
1037         struct page *page;
1038         int err;
1039 repeat:
1040         page = grab_cache_page(NODE_MAPPING(sbi), nid);
1041         if (!page)
1042                 return ERR_PTR(-ENOMEM);
1043
1044         err = read_node_page(page, READ_SYNC);
1045         if (err < 0)
1046                 return ERR_PTR(err);
1047         else if (err != LOCKED_PAGE)
1048                 lock_page(page);
1049
1050         if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
1051                 ClearPageUptodate(page);
1052                 f2fs_put_page(page, 1);
1053                 return ERR_PTR(-EIO);
1054         }
1055         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1056                 f2fs_put_page(page, 1);
1057                 goto repeat;
1058         }
1059         return page;
1060 }
1061
1062 /*
1063  * Return a locked page for the desired node page.
1064  * And, readahead MAX_RA_NODE number of node pages.
1065  */
1066 struct page *get_node_page_ra(struct page *parent, int start)
1067 {
1068         struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1069         struct blk_plug plug;
1070         struct page *page;
1071         int err, i, end;
1072         nid_t nid;
1073
1074         /* First, try getting the desired direct node. */
1075         nid = get_nid(parent, start, false);
1076         if (!nid)
1077                 return ERR_PTR(-ENOENT);
1078 repeat:
1079         page = grab_cache_page(NODE_MAPPING(sbi), nid);
1080         if (!page)
1081                 return ERR_PTR(-ENOMEM);
1082
1083         err = read_node_page(page, READ_SYNC);
1084         if (err < 0)
1085                 return ERR_PTR(err);
1086         else if (err == LOCKED_PAGE)
1087                 goto page_hit;
1088
1089         blk_start_plug(&plug);
1090
1091         /* Then, try readahead for siblings of the desired node */
1092         end = start + MAX_RA_NODE;
1093         end = min(end, NIDS_PER_BLOCK);
1094         for (i = start + 1; i < end; i++) {
1095                 nid = get_nid(parent, i, false);
1096                 if (!nid)
1097                         continue;
1098                 ra_node_page(sbi, nid);
1099         }
1100
1101         blk_finish_plug(&plug);
1102
1103         lock_page(page);
1104         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1105                 f2fs_put_page(page, 1);
1106                 goto repeat;
1107         }
1108 page_hit:
1109         if (unlikely(!PageUptodate(page))) {
1110                 f2fs_put_page(page, 1);
1111                 return ERR_PTR(-EIO);
1112         }
1113         return page;
1114 }
1115
1116 void sync_inode_page(struct dnode_of_data *dn)
1117 {
1118         if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
1119                 update_inode(dn->inode, dn->node_page);
1120         } else if (dn->inode_page) {
1121                 if (!dn->inode_page_locked)
1122                         lock_page(dn->inode_page);
1123                 update_inode(dn->inode, dn->inode_page);
1124                 if (!dn->inode_page_locked)
1125                         unlock_page(dn->inode_page);
1126         } else {
1127                 update_inode_page(dn->inode);
1128         }
1129 }
1130
1131 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1132                                         struct writeback_control *wbc)
1133 {
1134         pgoff_t index, end;
1135         struct pagevec pvec;
1136         int step = ino ? 2 : 0;
1137         int nwritten = 0, wrote = 0;
1138
1139         pagevec_init(&pvec, 0);
1140
1141 next_step:
1142         index = 0;
1143         end = LONG_MAX;
1144
1145         while (index <= end) {
1146                 int i, nr_pages;
1147                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1148                                 PAGECACHE_TAG_DIRTY,
1149                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1150                 if (nr_pages == 0)
1151                         break;
1152
1153                 for (i = 0; i < nr_pages; i++) {
1154                         struct page *page = pvec.pages[i];
1155
1156                         /*
1157                          * flushing sequence with step:
1158                          * 0. indirect nodes
1159                          * 1. dentry dnodes
1160                          * 2. file dnodes
1161                          */
1162                         if (step == 0 && IS_DNODE(page))
1163                                 continue;
1164                         if (step == 1 && (!IS_DNODE(page) ||
1165                                                 is_cold_node(page)))
1166                                 continue;
1167                         if (step == 2 && (!IS_DNODE(page) ||
1168                                                 !is_cold_node(page)))
1169                                 continue;
1170
1171                         /*
1172                          * If an fsync mode,
1173                          * we should not skip writing node pages.
1174                          */
1175                         if (ino && ino_of_node(page) == ino)
1176                                 lock_page(page);
1177                         else if (!trylock_page(page))
1178                                 continue;
1179
1180                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1181 continue_unlock:
1182                                 unlock_page(page);
1183                                 continue;
1184                         }
1185                         if (ino && ino_of_node(page) != ino)
1186                                 goto continue_unlock;
1187
1188                         if (!PageDirty(page)) {
1189                                 /* someone wrote it for us */
1190                                 goto continue_unlock;
1191                         }
1192
1193                         if (!clear_page_dirty_for_io(page))
1194                                 goto continue_unlock;
1195
1196                         /* called by fsync() */
1197                         if (ino && IS_DNODE(page)) {
1198                                 set_fsync_mark(page, 1);
1199                                 if (IS_INODE(page)) {
1200                                         if (!is_checkpointed_node(sbi, ino) &&
1201                                                 !has_fsynced_inode(sbi, ino))
1202                                                 set_dentry_mark(page, 1);
1203                                         else
1204                                                 set_dentry_mark(page, 0);
1205                                 }
1206                                 nwritten++;
1207                         } else {
1208                                 set_fsync_mark(page, 0);
1209                                 set_dentry_mark(page, 0);
1210                         }
1211
1212                         if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1213                                 unlock_page(page);
1214                         else
1215                                 wrote++;
1216
1217                         if (--wbc->nr_to_write == 0)
1218                                 break;
1219                 }
1220                 pagevec_release(&pvec);
1221                 cond_resched();
1222
1223                 if (wbc->nr_to_write == 0) {
1224                         step = 2;
1225                         break;
1226                 }
1227         }
1228
1229         if (step < 2) {
1230                 step++;
1231                 goto next_step;
1232         }
1233
1234         if (wrote)
1235                 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1236         return nwritten;
1237 }
1238
1239 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1240 {
1241         pgoff_t index = 0, end = LONG_MAX;
1242         struct pagevec pvec;
1243         int ret2 = 0, ret = 0;
1244
1245         pagevec_init(&pvec, 0);
1246
1247         while (index <= end) {
1248                 int i, nr_pages;
1249                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1250                                 PAGECACHE_TAG_WRITEBACK,
1251                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1252                 if (nr_pages == 0)
1253                         break;
1254
1255                 for (i = 0; i < nr_pages; i++) {
1256                         struct page *page = pvec.pages[i];
1257
1258                         /* until radix tree lookup accepts end_index */
1259                         if (unlikely(page->index > end))
1260                                 continue;
1261
1262                         if (ino && ino_of_node(page) == ino) {
1263                                 f2fs_wait_on_page_writeback(page, NODE);
1264                                 if (TestClearPageError(page))
1265                                         ret = -EIO;
1266                         }
1267                 }
1268                 pagevec_release(&pvec);
1269                 cond_resched();
1270         }
1271
1272         if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
1273                 ret2 = -ENOSPC;
1274         if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
1275                 ret2 = -EIO;
1276         if (!ret)
1277                 ret = ret2;
1278         return ret;
1279 }
1280
1281 static int f2fs_write_node_page(struct page *page,
1282                                 struct writeback_control *wbc)
1283 {
1284         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1285         nid_t nid;
1286         struct node_info ni;
1287         struct f2fs_io_info fio = {
1288                 .type = NODE,
1289                 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1290         };
1291
1292         trace_f2fs_writepage(page, NODE);
1293
1294         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1295                 goto redirty_out;
1296         if (unlikely(f2fs_cp_error(sbi)))
1297                 goto redirty_out;
1298
1299         f2fs_wait_on_page_writeback(page, NODE);
1300
1301         /* get old block addr of this node page */
1302         nid = nid_of_node(page);
1303         f2fs_bug_on(sbi, page->index != nid);
1304
1305         get_node_info(sbi, nid, &ni);
1306
1307         /* This page is already truncated */
1308         if (unlikely(ni.blk_addr == NULL_ADDR)) {
1309                 dec_page_count(sbi, F2FS_DIRTY_NODES);
1310                 unlock_page(page);
1311                 return 0;
1312         }
1313
1314         if (wbc->for_reclaim) {
1315                 if (!down_read_trylock(&sbi->node_write))
1316                         goto redirty_out;
1317         } else {
1318                 down_read(&sbi->node_write);
1319         }
1320
1321         set_page_writeback(page);
1322         fio.blk_addr = ni.blk_addr;
1323         write_node_page(sbi, page, nid, &fio);
1324         set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
1325         dec_page_count(sbi, F2FS_DIRTY_NODES);
1326         up_read(&sbi->node_write);
1327         unlock_page(page);
1328
1329         if (wbc->for_reclaim)
1330                 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1331
1332         return 0;
1333
1334 redirty_out:
1335         redirty_page_for_writepage(wbc, page);
1336         return AOP_WRITEPAGE_ACTIVATE;
1337 }
1338
1339 static int f2fs_write_node_pages(struct address_space *mapping,
1340                             struct writeback_control *wbc)
1341 {
1342         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1343         long diff;
1344
1345         trace_f2fs_writepages(mapping->host, wbc, NODE);
1346
1347         /* balancing f2fs's metadata in background */
1348         f2fs_balance_fs_bg(sbi);
1349
1350         /* collect a number of dirty node pages and write together */
1351         if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1352                 goto skip_write;
1353
1354         diff = nr_pages_to_write(sbi, NODE, wbc);
1355         wbc->sync_mode = WB_SYNC_NONE;
1356         sync_node_pages(sbi, 0, wbc);
1357         wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1358         return 0;
1359
1360 skip_write:
1361         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1362         return 0;
1363 }
1364
1365 static int f2fs_set_node_page_dirty(struct page *page)
1366 {
1367         trace_f2fs_set_page_dirty(page, NODE);
1368
1369         SetPageUptodate(page);
1370         if (!PageDirty(page)) {
1371                 __set_page_dirty_nobuffers(page);
1372                 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1373                 SetPagePrivate(page);
1374                 f2fs_trace_pid(page);
1375                 return 1;
1376         }
1377         return 0;
1378 }
1379
1380 /*
1381  * Structure of the f2fs node operations
1382  */
1383 const struct address_space_operations f2fs_node_aops = {
1384         .writepage      = f2fs_write_node_page,
1385         .writepages     = f2fs_write_node_pages,
1386         .set_page_dirty = f2fs_set_node_page_dirty,
1387         .invalidatepage = f2fs_invalidate_page,
1388         .releasepage    = f2fs_release_page,
1389 };
1390
1391 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1392                                                 nid_t n)
1393 {
1394         return radix_tree_lookup(&nm_i->free_nid_root, n);
1395 }
1396
1397 static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1398                                                 struct free_nid *i)
1399 {
1400         list_del(&i->list);
1401         radix_tree_delete(&nm_i->free_nid_root, i->nid);
1402 }
1403
1404 static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1405 {
1406         struct f2fs_nm_info *nm_i = NM_I(sbi);
1407         struct free_nid *i;
1408         struct nat_entry *ne;
1409         bool allocated = false;
1410
1411         if (!available_free_memory(sbi, FREE_NIDS))
1412                 return -1;
1413
1414         /* 0 nid should not be used */
1415         if (unlikely(nid == 0))
1416                 return 0;
1417
1418         if (build) {
1419                 /* do not add allocated nids */
1420                 down_read(&nm_i->nat_tree_lock);
1421                 ne = __lookup_nat_cache(nm_i, nid);
1422                 if (ne &&
1423                         (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1424                                 nat_get_blkaddr(ne) != NULL_ADDR))
1425                         allocated = true;
1426                 up_read(&nm_i->nat_tree_lock);
1427                 if (allocated)
1428                         return 0;
1429         }
1430
1431         i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1432         i->nid = nid;
1433         i->state = NID_NEW;
1434
1435         if (radix_tree_preload(GFP_NOFS)) {
1436                 kmem_cache_free(free_nid_slab, i);
1437                 return 0;
1438         }
1439
1440         spin_lock(&nm_i->free_nid_list_lock);
1441         if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1442                 spin_unlock(&nm_i->free_nid_list_lock);
1443                 radix_tree_preload_end();
1444                 kmem_cache_free(free_nid_slab, i);
1445                 return 0;
1446         }
1447         list_add_tail(&i->list, &nm_i->free_nid_list);
1448         nm_i->fcnt++;
1449         spin_unlock(&nm_i->free_nid_list_lock);
1450         radix_tree_preload_end();
1451         return 1;
1452 }
1453
1454 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1455 {
1456         struct free_nid *i;
1457         bool need_free = false;
1458
1459         spin_lock(&nm_i->free_nid_list_lock);
1460         i = __lookup_free_nid_list(nm_i, nid);
1461         if (i && i->state == NID_NEW) {
1462                 __del_from_free_nid_list(nm_i, i);
1463                 nm_i->fcnt--;
1464                 need_free = true;
1465         }
1466         spin_unlock(&nm_i->free_nid_list_lock);
1467
1468         if (need_free)
1469                 kmem_cache_free(free_nid_slab, i);
1470 }
1471
1472 static void scan_nat_page(struct f2fs_sb_info *sbi,
1473                         struct page *nat_page, nid_t start_nid)
1474 {
1475         struct f2fs_nm_info *nm_i = NM_I(sbi);
1476         struct f2fs_nat_block *nat_blk = page_address(nat_page);
1477         block_t blk_addr;
1478         int i;
1479
1480         i = start_nid % NAT_ENTRY_PER_BLOCK;
1481
1482         for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1483
1484                 if (unlikely(start_nid >= nm_i->max_nid))
1485                         break;
1486
1487                 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1488                 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1489                 if (blk_addr == NULL_ADDR) {
1490                         if (add_free_nid(sbi, start_nid, true) < 0)
1491                                 break;
1492                 }
1493         }
1494 }
1495
1496 static void build_free_nids(struct f2fs_sb_info *sbi)
1497 {
1498         struct f2fs_nm_info *nm_i = NM_I(sbi);
1499         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1500         struct f2fs_summary_block *sum = curseg->sum_blk;
1501         int i = 0;
1502         nid_t nid = nm_i->next_scan_nid;
1503
1504         /* Enough entries */
1505         if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
1506                 return;
1507
1508         /* readahead nat pages to be scanned */
1509         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1510
1511         while (1) {
1512                 struct page *page = get_current_nat_page(sbi, nid);
1513
1514                 scan_nat_page(sbi, page, nid);
1515                 f2fs_put_page(page, 1);
1516
1517                 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1518                 if (unlikely(nid >= nm_i->max_nid))
1519                         nid = 0;
1520
1521                 if (i++ == FREE_NID_PAGES)
1522                         break;
1523         }
1524
1525         /* go to the next free nat pages to find free nids abundantly */
1526         nm_i->next_scan_nid = nid;
1527
1528         /* find free nids from current sum_pages */
1529         mutex_lock(&curseg->curseg_mutex);
1530         for (i = 0; i < nats_in_cursum(sum); i++) {
1531                 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1532                 nid = le32_to_cpu(nid_in_journal(sum, i));
1533                 if (addr == NULL_ADDR)
1534                         add_free_nid(sbi, nid, true);
1535                 else
1536                         remove_free_nid(nm_i, nid);
1537         }
1538         mutex_unlock(&curseg->curseg_mutex);
1539 }
1540
1541 /*
1542  * If this function returns success, caller can obtain a new nid
1543  * from second parameter of this function.
1544  * The returned nid could be used ino as well as nid when inode is created.
1545  */
1546 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1547 {
1548         struct f2fs_nm_info *nm_i = NM_I(sbi);
1549         struct free_nid *i = NULL;
1550 retry:
1551         if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
1552                 return false;
1553
1554         spin_lock(&nm_i->free_nid_list_lock);
1555
1556         /* We should not use stale free nids created by build_free_nids */
1557         if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1558                 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1559                 list_for_each_entry(i, &nm_i->free_nid_list, list)
1560                         if (i->state == NID_NEW)
1561                                 break;
1562
1563                 f2fs_bug_on(sbi, i->state != NID_NEW);
1564                 *nid = i->nid;
1565                 i->state = NID_ALLOC;
1566                 nm_i->fcnt--;
1567                 spin_unlock(&nm_i->free_nid_list_lock);
1568                 return true;
1569         }
1570         spin_unlock(&nm_i->free_nid_list_lock);
1571
1572         /* Let's scan nat pages and its caches to get free nids */
1573         mutex_lock(&nm_i->build_lock);
1574         build_free_nids(sbi);
1575         mutex_unlock(&nm_i->build_lock);
1576         goto retry;
1577 }
1578
1579 /*
1580  * alloc_nid() should be called prior to this function.
1581  */
1582 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1583 {
1584         struct f2fs_nm_info *nm_i = NM_I(sbi);
1585         struct free_nid *i;
1586
1587         spin_lock(&nm_i->free_nid_list_lock);
1588         i = __lookup_free_nid_list(nm_i, nid);
1589         f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1590         __del_from_free_nid_list(nm_i, i);
1591         spin_unlock(&nm_i->free_nid_list_lock);
1592
1593         kmem_cache_free(free_nid_slab, i);
1594 }
1595
1596 /*
1597  * alloc_nid() should be called prior to this function.
1598  */
1599 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1600 {
1601         struct f2fs_nm_info *nm_i = NM_I(sbi);
1602         struct free_nid *i;
1603         bool need_free = false;
1604
1605         if (!nid)
1606                 return;
1607
1608         spin_lock(&nm_i->free_nid_list_lock);
1609         i = __lookup_free_nid_list(nm_i, nid);
1610         f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1611         if (!available_free_memory(sbi, FREE_NIDS)) {
1612                 __del_from_free_nid_list(nm_i, i);
1613                 need_free = true;
1614         } else {
1615                 i->state = NID_NEW;
1616                 nm_i->fcnt++;
1617         }
1618         spin_unlock(&nm_i->free_nid_list_lock);
1619
1620         if (need_free)
1621                 kmem_cache_free(free_nid_slab, i);
1622 }
1623
1624 void recover_inline_xattr(struct inode *inode, struct page *page)
1625 {
1626         void *src_addr, *dst_addr;
1627         size_t inline_size;
1628         struct page *ipage;
1629         struct f2fs_inode *ri;
1630
1631         ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
1632         f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
1633
1634         ri = F2FS_INODE(page);
1635         if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
1636                 clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
1637                 goto update_inode;
1638         }
1639
1640         dst_addr = inline_xattr_addr(ipage);
1641         src_addr = inline_xattr_addr(page);
1642         inline_size = inline_xattr_size(inode);
1643
1644         f2fs_wait_on_page_writeback(ipage, NODE);
1645         memcpy(dst_addr, src_addr, inline_size);
1646 update_inode:
1647         update_inode(inode, ipage);
1648         f2fs_put_page(ipage, 1);
1649 }
1650
1651 void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1652 {
1653         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1654         nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1655         nid_t new_xnid = nid_of_node(page);
1656         struct node_info ni;
1657
1658         /* 1: invalidate the previous xattr nid */
1659         if (!prev_xnid)
1660                 goto recover_xnid;
1661
1662         /* Deallocate node address */
1663         get_node_info(sbi, prev_xnid, &ni);
1664         f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
1665         invalidate_blocks(sbi, ni.blk_addr);
1666         dec_valid_node_count(sbi, inode);
1667         set_node_addr(sbi, &ni, NULL_ADDR, false);
1668
1669 recover_xnid:
1670         /* 2: allocate new xattr nid */
1671         if (unlikely(!inc_valid_node_count(sbi, inode)))
1672                 f2fs_bug_on(sbi, 1);
1673
1674         remove_free_nid(NM_I(sbi), new_xnid);
1675         get_node_info(sbi, new_xnid, &ni);
1676         ni.ino = inode->i_ino;
1677         set_node_addr(sbi, &ni, NEW_ADDR, false);
1678         F2FS_I(inode)->i_xattr_nid = new_xnid;
1679
1680         /* 3: update xattr blkaddr */
1681         refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1682         set_node_addr(sbi, &ni, blkaddr, false);
1683
1684         update_inode_page(inode);
1685 }
1686
1687 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1688 {
1689         struct f2fs_inode *src, *dst;
1690         nid_t ino = ino_of_node(page);
1691         struct node_info old_ni, new_ni;
1692         struct page *ipage;
1693
1694         get_node_info(sbi, ino, &old_ni);
1695
1696         if (unlikely(old_ni.blk_addr != NULL_ADDR))
1697                 return -EINVAL;
1698
1699         ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
1700         if (!ipage)
1701                 return -ENOMEM;
1702
1703         /* Should not use this inode from free nid list */
1704         remove_free_nid(NM_I(sbi), ino);
1705
1706         SetPageUptodate(ipage);
1707         fill_node_footer(ipage, ino, ino, 0, true);
1708
1709         src = F2FS_INODE(page);
1710         dst = F2FS_INODE(ipage);
1711
1712         memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
1713         dst->i_size = 0;
1714         dst->i_blocks = cpu_to_le64(1);
1715         dst->i_links = cpu_to_le32(1);
1716         dst->i_xattr_nid = 0;
1717         dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
1718
1719         new_ni = old_ni;
1720         new_ni.ino = ino;
1721
1722         if (unlikely(!inc_valid_node_count(sbi, NULL)))
1723                 WARN_ON(1);
1724         set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1725         inc_valid_inode_count(sbi);
1726         set_page_dirty(ipage);
1727         f2fs_put_page(ipage, 1);
1728         return 0;
1729 }
1730
1731 int restore_node_summary(struct f2fs_sb_info *sbi,
1732                         unsigned int segno, struct f2fs_summary_block *sum)
1733 {
1734         struct f2fs_node *rn;
1735         struct f2fs_summary *sum_entry;
1736         block_t addr;
1737         int bio_blocks = MAX_BIO_BLOCKS(sbi);
1738         int i, idx, last_offset, nrpages;
1739
1740         /* scan the node segment */
1741         last_offset = sbi->blocks_per_seg;
1742         addr = START_BLOCK(sbi, segno);
1743         sum_entry = &sum->entries[0];
1744
1745         for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
1746                 nrpages = min(last_offset - i, bio_blocks);
1747
1748                 /* readahead node pages */
1749                 ra_meta_pages(sbi, addr, nrpages, META_POR);
1750
1751                 for (idx = addr; idx < addr + nrpages; idx++) {
1752                         struct page *page = get_meta_page(sbi, idx);
1753
1754                         rn = F2FS_NODE(page);
1755                         sum_entry->nid = rn->footer.nid;
1756                         sum_entry->version = 0;
1757                         sum_entry->ofs_in_node = 0;
1758                         sum_entry++;
1759                         f2fs_put_page(page, 1);
1760                 }
1761
1762                 invalidate_mapping_pages(META_MAPPING(sbi), addr,
1763                                                         addr + nrpages);
1764         }
1765         return 0;
1766 }
1767
1768 static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1769 {
1770         struct f2fs_nm_info *nm_i = NM_I(sbi);
1771         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1772         struct f2fs_summary_block *sum = curseg->sum_blk;
1773         int i;
1774
1775         mutex_lock(&curseg->curseg_mutex);
1776         for (i = 0; i < nats_in_cursum(sum); i++) {
1777                 struct nat_entry *ne;
1778                 struct f2fs_nat_entry raw_ne;
1779                 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1780
1781                 raw_ne = nat_in_journal(sum, i);
1782
1783                 down_write(&nm_i->nat_tree_lock);
1784                 ne = __lookup_nat_cache(nm_i, nid);
1785                 if (!ne) {
1786                         ne = grab_nat_entry(nm_i, nid);
1787                         node_info_from_raw_nat(&ne->ni, &raw_ne);
1788                 }
1789                 __set_nat_cache_dirty(nm_i, ne);
1790                 up_write(&nm_i->nat_tree_lock);
1791         }
1792         update_nats_in_cursum(sum, -i);
1793         mutex_unlock(&curseg->curseg_mutex);
1794 }
1795
1796 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
1797                                                 struct list_head *head, int max)
1798 {
1799         struct nat_entry_set *cur;
1800
1801         if (nes->entry_cnt >= max)
1802                 goto add_out;
1803
1804         list_for_each_entry(cur, head, set_list) {
1805                 if (cur->entry_cnt >= nes->entry_cnt) {
1806                         list_add(&nes->set_list, cur->set_list.prev);
1807                         return;
1808                 }
1809         }
1810 add_out:
1811         list_add_tail(&nes->set_list, head);
1812 }
1813
1814 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1815                                         struct nat_entry_set *set)
1816 {
1817         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1818         struct f2fs_summary_block *sum = curseg->sum_blk;
1819         nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
1820         bool to_journal = true;
1821         struct f2fs_nat_block *nat_blk;
1822         struct nat_entry *ne, *cur;
1823         struct page *page = NULL;
1824
1825         /*
1826          * there are two steps to flush nat entries:
1827          * #1, flush nat entries to journal in current hot data summary block.
1828          * #2, flush nat entries to nat page.
1829          */
1830         if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
1831                 to_journal = false;
1832
1833         if (to_journal) {
1834                 mutex_lock(&curseg->curseg_mutex);
1835         } else {
1836                 page = get_next_nat_page(sbi, start_nid);
1837                 nat_blk = page_address(page);
1838                 f2fs_bug_on(sbi, !nat_blk);
1839         }
1840
1841         /* flush dirty nats in nat entry set */
1842         list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
1843                 struct f2fs_nat_entry *raw_ne;
1844                 nid_t nid = nat_get_nid(ne);
1845                 int offset;
1846
1847                 if (nat_get_blkaddr(ne) == NEW_ADDR)
1848                         continue;
1849
1850                 if (to_journal) {
1851                         offset = lookup_journal_in_cursum(sum,
1852                                                         NAT_JOURNAL, nid, 1);
1853                         f2fs_bug_on(sbi, offset < 0);
1854                         raw_ne = &nat_in_journal(sum, offset);
1855                         nid_in_journal(sum, offset) = cpu_to_le32(nid);
1856                 } else {
1857                         raw_ne = &nat_blk->entries[nid - start_nid];
1858                 }
1859                 raw_nat_from_node_info(raw_ne, &ne->ni);
1860
1861                 down_write(&NM_I(sbi)->nat_tree_lock);
1862                 nat_reset_flag(ne);
1863                 __clear_nat_cache_dirty(NM_I(sbi), ne);
1864                 up_write(&NM_I(sbi)->nat_tree_lock);
1865
1866                 if (nat_get_blkaddr(ne) == NULL_ADDR)
1867                         add_free_nid(sbi, nid, false);
1868         }
1869
1870         if (to_journal)
1871                 mutex_unlock(&curseg->curseg_mutex);
1872         else
1873                 f2fs_put_page(page, 1);
1874
1875         f2fs_bug_on(sbi, set->entry_cnt);
1876
1877         radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1878         kmem_cache_free(nat_entry_set_slab, set);
1879 }
1880
1881 /*
1882  * This function is called during the checkpointing process.
1883  */
1884 void flush_nat_entries(struct f2fs_sb_info *sbi)
1885 {
1886         struct f2fs_nm_info *nm_i = NM_I(sbi);
1887         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1888         struct f2fs_summary_block *sum = curseg->sum_blk;
1889         struct nat_entry_set *setvec[SETVEC_SIZE];
1890         struct nat_entry_set *set, *tmp;
1891         unsigned int found;
1892         nid_t set_idx = 0;
1893         LIST_HEAD(sets);
1894
1895         if (!nm_i->dirty_nat_cnt)
1896                 return;
1897         /*
1898          * if there are no enough space in journal to store dirty nat
1899          * entries, remove all entries from journal and merge them
1900          * into nat entry set.
1901          */
1902         if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1903                 remove_nats_in_journal(sbi);
1904
1905         while ((found = __gang_lookup_nat_set(nm_i,
1906                                         set_idx, SETVEC_SIZE, setvec))) {
1907                 unsigned idx;
1908                 set_idx = setvec[found - 1]->set + 1;
1909                 for (idx = 0; idx < found; idx++)
1910                         __adjust_nat_entry_set(setvec[idx], &sets,
1911                                                         MAX_NAT_JENTRIES(sum));
1912         }
1913
1914         /* flush dirty nats in nat entry set */
1915         list_for_each_entry_safe(set, tmp, &sets, set_list)
1916                 __flush_nat_entry_set(sbi, set);
1917
1918         f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
1919 }
1920
1921 static int init_node_manager(struct f2fs_sb_info *sbi)
1922 {
1923         struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1924         struct f2fs_nm_info *nm_i = NM_I(sbi);
1925         unsigned char *version_bitmap;
1926         unsigned int nat_segs, nat_blocks;
1927
1928         nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1929
1930         /* segment_count_nat includes pair segment so divide to 2. */
1931         nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1932         nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1933
1934         nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1935
1936         /* not used nids: 0, node, meta, (and root counted as valid node) */
1937         nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
1938         nm_i->fcnt = 0;
1939         nm_i->nat_cnt = 0;
1940         nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1941
1942         INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1943         INIT_LIST_HEAD(&nm_i->free_nid_list);
1944         INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
1945         INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
1946         INIT_LIST_HEAD(&nm_i->nat_entries);
1947
1948         mutex_init(&nm_i->build_lock);
1949         spin_lock_init(&nm_i->free_nid_list_lock);
1950         init_rwsem(&nm_i->nat_tree_lock);
1951
1952         nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1953         nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1954         version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1955         if (!version_bitmap)
1956                 return -EFAULT;
1957
1958         nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
1959                                         GFP_KERNEL);
1960         if (!nm_i->nat_bitmap)
1961                 return -ENOMEM;
1962         return 0;
1963 }
1964
1965 int build_node_manager(struct f2fs_sb_info *sbi)
1966 {
1967         int err;
1968
1969         sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1970         if (!sbi->nm_info)
1971                 return -ENOMEM;
1972
1973         err = init_node_manager(sbi);
1974         if (err)
1975                 return err;
1976
1977         build_free_nids(sbi);
1978         return 0;
1979 }
1980
1981 void destroy_node_manager(struct f2fs_sb_info *sbi)
1982 {
1983         struct f2fs_nm_info *nm_i = NM_I(sbi);
1984         struct free_nid *i, *next_i;
1985         struct nat_entry *natvec[NATVEC_SIZE];
1986         struct nat_entry_set *setvec[SETVEC_SIZE];
1987         nid_t nid = 0;
1988         unsigned int found;
1989
1990         if (!nm_i)
1991                 return;
1992
1993         /* destroy free nid list */
1994         spin_lock(&nm_i->free_nid_list_lock);
1995         list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1996                 f2fs_bug_on(sbi, i->state == NID_ALLOC);
1997                 __del_from_free_nid_list(nm_i, i);
1998                 nm_i->fcnt--;
1999                 spin_unlock(&nm_i->free_nid_list_lock);
2000                 kmem_cache_free(free_nid_slab, i);
2001                 spin_lock(&nm_i->free_nid_list_lock);
2002         }
2003         f2fs_bug_on(sbi, nm_i->fcnt);
2004         spin_unlock(&nm_i->free_nid_list_lock);
2005
2006         /* destroy nat cache */
2007         down_write(&nm_i->nat_tree_lock);
2008         while ((found = __gang_lookup_nat_cache(nm_i,
2009                                         nid, NATVEC_SIZE, natvec))) {
2010                 unsigned idx;
2011
2012                 nid = nat_get_nid(natvec[found - 1]) + 1;
2013                 for (idx = 0; idx < found; idx++)
2014                         __del_from_nat_cache(nm_i, natvec[idx]);
2015         }
2016         f2fs_bug_on(sbi, nm_i->nat_cnt);
2017
2018         /* destroy nat set cache */
2019         nid = 0;
2020         while ((found = __gang_lookup_nat_set(nm_i,
2021                                         nid, SETVEC_SIZE, setvec))) {
2022                 unsigned idx;
2023
2024                 nid = setvec[found - 1]->set + 1;
2025                 for (idx = 0; idx < found; idx++) {
2026                         /* entry_cnt is not zero, when cp_error was occurred */
2027                         f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
2028                         radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
2029                         kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2030                 }
2031         }
2032         up_write(&nm_i->nat_tree_lock);
2033
2034         kfree(nm_i->nat_bitmap);
2035         sbi->nm_info = NULL;
2036         kfree(nm_i);
2037 }
2038
2039 int __init create_node_manager_caches(void)
2040 {
2041         nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
2042                         sizeof(struct nat_entry));
2043         if (!nat_entry_slab)
2044                 goto fail;
2045
2046         free_nid_slab = f2fs_kmem_cache_create("free_nid",
2047                         sizeof(struct free_nid));
2048         if (!free_nid_slab)
2049                 goto destroy_nat_entry;
2050
2051         nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2052                         sizeof(struct nat_entry_set));
2053         if (!nat_entry_set_slab)
2054                 goto destroy_free_nid;
2055         return 0;
2056
2057 destroy_free_nid:
2058         kmem_cache_destroy(free_nid_slab);
2059 destroy_nat_entry:
2060         kmem_cache_destroy(nat_entry_slab);
2061 fail:
2062         return -ENOMEM;
2063 }
2064
2065 void destroy_node_manager_caches(void)
2066 {
2067         kmem_cache_destroy(nat_entry_set_slab);
2068         kmem_cache_destroy(free_nid_slab);
2069         kmem_cache_destroy(nat_entry_slab);
2070 }