9a33e34d26ce9dcd31f0af918a3df510d6d47c52
[firefly-linux-kernel-4.4.55.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include <trace/events/f2fs.h>
24
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *sit_entry_set_slab;
29 static struct kmem_cache *inmem_entry_slab;
30
31 /*
32  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
33  * MSB and LSB are reversed in a byte by f2fs_set_bit.
34  */
35 static inline unsigned long __reverse_ffs(unsigned long word)
36 {
37         int num = 0;
38
39 #if BITS_PER_LONG == 64
40         if ((word & 0xffffffff) == 0) {
41                 num += 32;
42                 word >>= 32;
43         }
44 #endif
45         if ((word & 0xffff) == 0) {
46                 num += 16;
47                 word >>= 16;
48         }
49         if ((word & 0xff) == 0) {
50                 num += 8;
51                 word >>= 8;
52         }
53         if ((word & 0xf0) == 0)
54                 num += 4;
55         else
56                 word >>= 4;
57         if ((word & 0xc) == 0)
58                 num += 2;
59         else
60                 word >>= 2;
61         if ((word & 0x2) == 0)
62                 num += 1;
63         return num;
64 }
65
66 /*
67  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
68  * f2fs_set_bit makes MSB and LSB reversed in a byte.
69  * Example:
70  *                             LSB <--> MSB
71  *   f2fs_set_bit(0, bitmap) => 0000 0001
72  *   f2fs_set_bit(7, bitmap) => 1000 0000
73  */
74 static unsigned long __find_rev_next_bit(const unsigned long *addr,
75                         unsigned long size, unsigned long offset)
76 {
77         const unsigned long *p = addr + BIT_WORD(offset);
78         unsigned long result = offset & ~(BITS_PER_LONG - 1);
79         unsigned long tmp;
80         unsigned long mask, submask;
81         unsigned long quot, rest;
82
83         if (offset >= size)
84                 return size;
85
86         size -= result;
87         offset %= BITS_PER_LONG;
88         if (!offset)
89                 goto aligned;
90
91         tmp = *(p++);
92         quot = (offset >> 3) << 3;
93         rest = offset & 0x7;
94         mask = ~0UL << quot;
95         submask = (unsigned char)(0xff << rest) >> rest;
96         submask <<= quot;
97         mask &= submask;
98         tmp &= mask;
99         if (size < BITS_PER_LONG)
100                 goto found_first;
101         if (tmp)
102                 goto found_middle;
103
104         size -= BITS_PER_LONG;
105         result += BITS_PER_LONG;
106 aligned:
107         while (size & ~(BITS_PER_LONG-1)) {
108                 tmp = *(p++);
109                 if (tmp)
110                         goto found_middle;
111                 result += BITS_PER_LONG;
112                 size -= BITS_PER_LONG;
113         }
114         if (!size)
115                 return result;
116         tmp = *p;
117 found_first:
118         tmp &= (~0UL >> (BITS_PER_LONG - size));
119         if (tmp == 0UL)         /* Are any bits set? */
120                 return result + size;   /* Nope. */
121 found_middle:
122         return result + __reverse_ffs(tmp);
123 }
124
125 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
126                         unsigned long size, unsigned long offset)
127 {
128         const unsigned long *p = addr + BIT_WORD(offset);
129         unsigned long result = offset & ~(BITS_PER_LONG - 1);
130         unsigned long tmp;
131         unsigned long mask, submask;
132         unsigned long quot, rest;
133
134         if (offset >= size)
135                 return size;
136
137         size -= result;
138         offset %= BITS_PER_LONG;
139         if (!offset)
140                 goto aligned;
141
142         tmp = *(p++);
143         quot = (offset >> 3) << 3;
144         rest = offset & 0x7;
145         mask = ~(~0UL << quot);
146         submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
147         submask <<= quot;
148         mask += submask;
149         tmp |= mask;
150         if (size < BITS_PER_LONG)
151                 goto found_first;
152         if (~tmp)
153                 goto found_middle;
154
155         size -= BITS_PER_LONG;
156         result += BITS_PER_LONG;
157 aligned:
158         while (size & ~(BITS_PER_LONG - 1)) {
159                 tmp = *(p++);
160                 if (~tmp)
161                         goto found_middle;
162                 result += BITS_PER_LONG;
163                 size -= BITS_PER_LONG;
164         }
165         if (!size)
166                 return result;
167         tmp = *p;
168
169 found_first:
170         tmp |= ~0UL << size;
171         if (tmp == ~0UL)        /* Are any bits zero? */
172                 return result + size;   /* Nope. */
173 found_middle:
174         return result + __reverse_ffz(tmp);
175 }
176
177 void register_inmem_page(struct inode *inode, struct page *page)
178 {
179         struct f2fs_inode_info *fi = F2FS_I(inode);
180         struct inmem_pages *new;
181         int err;
182 retry:
183         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
184
185         /* add atomic page indices to the list */
186         new->page = page;
187         INIT_LIST_HEAD(&new->list);
188
189         /* increase reference count with clean state */
190         mutex_lock(&fi->inmem_lock);
191         err = radix_tree_insert(&fi->inmem_root, page->index, new);
192         if (err == -EEXIST) {
193                 mutex_unlock(&fi->inmem_lock);
194                 kmem_cache_free(inmem_entry_slab, new);
195                 return;
196         } else if (err) {
197                 mutex_unlock(&fi->inmem_lock);
198                 kmem_cache_free(inmem_entry_slab, new);
199                 goto retry;
200         }
201         get_page(page);
202         list_add_tail(&new->list, &fi->inmem_pages);
203         mutex_unlock(&fi->inmem_lock);
204 }
205
206 void invalidate_inmem_page(struct inode *inode, struct page *page)
207 {
208         struct f2fs_inode_info *fi = F2FS_I(inode);
209         struct inmem_pages *cur;
210
211         mutex_lock(&fi->inmem_lock);
212         cur = radix_tree_lookup(&fi->inmem_root, page->index);
213         if (cur) {
214                 radix_tree_delete(&fi->inmem_root, cur->page->index);
215                 f2fs_put_page(cur->page, 0);
216                 list_del(&cur->list);
217                 kmem_cache_free(inmem_entry_slab, cur);
218         }
219         mutex_unlock(&fi->inmem_lock);
220 }
221
222 void commit_inmem_pages(struct inode *inode, bool abort)
223 {
224         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
225         struct f2fs_inode_info *fi = F2FS_I(inode);
226         struct inmem_pages *cur, *tmp;
227         bool submit_bio = false;
228         struct f2fs_io_info fio = {
229                 .type = DATA,
230                 .rw = WRITE_SYNC,
231         };
232
233         /*
234          * The abort is true only when f2fs_evict_inode is called.
235          * Basically, the f2fs_evict_inode doesn't produce any data writes, so
236          * that we don't need to call f2fs_balance_fs.
237          * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
238          * inode becomes free by iget_locked in f2fs_iget.
239          */
240         if (!abort)
241                 f2fs_balance_fs(sbi);
242
243         f2fs_lock_op(sbi);
244
245         mutex_lock(&fi->inmem_lock);
246         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
247                 lock_page(cur->page);
248                 if (!abort && cur->page->mapping == inode->i_mapping) {
249                         f2fs_wait_on_page_writeback(cur->page, DATA);
250                         if (clear_page_dirty_for_io(cur->page))
251                                 inode_dec_dirty_pages(inode);
252                         do_write_data_page(cur->page, &fio);
253                         submit_bio = true;
254                 }
255                 radix_tree_delete(&fi->inmem_root, cur->page->index);
256                 f2fs_put_page(cur->page, 1);
257                 list_del(&cur->list);
258                 kmem_cache_free(inmem_entry_slab, cur);
259         }
260         if (submit_bio)
261                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
262         mutex_unlock(&fi->inmem_lock);
263
264         filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
265         f2fs_unlock_op(sbi);
266 }
267
268 /*
269  * This function balances dirty node and dentry pages.
270  * In addition, it controls garbage collection.
271  */
272 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
273 {
274         /*
275          * We should do GC or end up with checkpoint, if there are so many dirty
276          * dir/node pages without enough free segments.
277          */
278         if (has_not_enough_free_secs(sbi, 0)) {
279                 mutex_lock(&sbi->gc_mutex);
280                 f2fs_gc(sbi);
281         }
282 }
283
284 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
285 {
286         /* check the # of cached NAT entries and prefree segments */
287         if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
288                         excess_prefree_segs(sbi) ||
289                         available_free_memory(sbi, INO_ENTRIES))
290                 f2fs_sync_fs(sbi->sb, true);
291 }
292
293 static int issue_flush_thread(void *data)
294 {
295         struct f2fs_sb_info *sbi = data;
296         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
297         wait_queue_head_t *q = &fcc->flush_wait_queue;
298 repeat:
299         if (kthread_should_stop())
300                 return 0;
301
302         if (!llist_empty(&fcc->issue_list)) {
303                 struct bio *bio = bio_alloc(GFP_NOIO, 0);
304                 struct flush_cmd *cmd, *next;
305                 int ret;
306
307                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
308                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
309
310                 bio->bi_bdev = sbi->sb->s_bdev;
311                 ret = submit_bio_wait(WRITE_FLUSH, bio);
312
313                 llist_for_each_entry_safe(cmd, next,
314                                           fcc->dispatch_list, llnode) {
315                         cmd->ret = ret;
316                         complete(&cmd->wait);
317                 }
318                 bio_put(bio);
319                 fcc->dispatch_list = NULL;
320         }
321
322         wait_event_interruptible(*q,
323                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
324         goto repeat;
325 }
326
327 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
328 {
329         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
330         struct flush_cmd cmd;
331
332         trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
333                                         test_opt(sbi, FLUSH_MERGE));
334
335         if (test_opt(sbi, NOBARRIER))
336                 return 0;
337
338         if (!test_opt(sbi, FLUSH_MERGE))
339                 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
340
341         init_completion(&cmd.wait);
342
343         llist_add(&cmd.llnode, &fcc->issue_list);
344
345         if (!fcc->dispatch_list)
346                 wake_up(&fcc->flush_wait_queue);
347
348         wait_for_completion(&cmd.wait);
349
350         return cmd.ret;
351 }
352
353 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
354 {
355         dev_t dev = sbi->sb->s_bdev->bd_dev;
356         struct flush_cmd_control *fcc;
357         int err = 0;
358
359         fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
360         if (!fcc)
361                 return -ENOMEM;
362         init_waitqueue_head(&fcc->flush_wait_queue);
363         init_llist_head(&fcc->issue_list);
364         SM_I(sbi)->cmd_control_info = fcc;
365         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
366                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
367         if (IS_ERR(fcc->f2fs_issue_flush)) {
368                 err = PTR_ERR(fcc->f2fs_issue_flush);
369                 kfree(fcc);
370                 SM_I(sbi)->cmd_control_info = NULL;
371                 return err;
372         }
373
374         return err;
375 }
376
377 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
378 {
379         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
380
381         if (fcc && fcc->f2fs_issue_flush)
382                 kthread_stop(fcc->f2fs_issue_flush);
383         kfree(fcc);
384         SM_I(sbi)->cmd_control_info = NULL;
385 }
386
387 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
388                 enum dirty_type dirty_type)
389 {
390         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
391
392         /* need not be added */
393         if (IS_CURSEG(sbi, segno))
394                 return;
395
396         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
397                 dirty_i->nr_dirty[dirty_type]++;
398
399         if (dirty_type == DIRTY) {
400                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
401                 enum dirty_type t = sentry->type;
402
403                 if (unlikely(t >= DIRTY)) {
404                         f2fs_bug_on(sbi, 1);
405                         return;
406                 }
407                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
408                         dirty_i->nr_dirty[t]++;
409         }
410 }
411
412 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
413                 enum dirty_type dirty_type)
414 {
415         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
416
417         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
418                 dirty_i->nr_dirty[dirty_type]--;
419
420         if (dirty_type == DIRTY) {
421                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
422                 enum dirty_type t = sentry->type;
423
424                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
425                         dirty_i->nr_dirty[t]--;
426
427                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
428                         clear_bit(GET_SECNO(sbi, segno),
429                                                 dirty_i->victim_secmap);
430         }
431 }
432
433 /*
434  * Should not occur error such as -ENOMEM.
435  * Adding dirty entry into seglist is not critical operation.
436  * If a given segment is one of current working segments, it won't be added.
437  */
438 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
439 {
440         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
441         unsigned short valid_blocks;
442
443         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
444                 return;
445
446         mutex_lock(&dirty_i->seglist_lock);
447
448         valid_blocks = get_valid_blocks(sbi, segno, 0);
449
450         if (valid_blocks == 0) {
451                 __locate_dirty_segment(sbi, segno, PRE);
452                 __remove_dirty_segment(sbi, segno, DIRTY);
453         } else if (valid_blocks < sbi->blocks_per_seg) {
454                 __locate_dirty_segment(sbi, segno, DIRTY);
455         } else {
456                 /* Recovery routine with SSR needs this */
457                 __remove_dirty_segment(sbi, segno, DIRTY);
458         }
459
460         mutex_unlock(&dirty_i->seglist_lock);
461 }
462
463 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
464                                 block_t blkstart, block_t blklen)
465 {
466         sector_t start = SECTOR_FROM_BLOCK(blkstart);
467         sector_t len = SECTOR_FROM_BLOCK(blklen);
468         trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
469         return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
470 }
471
472 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
473 {
474         if (f2fs_issue_discard(sbi, blkaddr, 1)) {
475                 struct page *page = grab_meta_page(sbi, blkaddr);
476                 /* zero-filled page */
477                 set_page_dirty(page);
478                 f2fs_put_page(page, 1);
479         }
480 }
481
482 static void __add_discard_entry(struct f2fs_sb_info *sbi,
483                 struct cp_control *cpc, unsigned int start, unsigned int end)
484 {
485         struct list_head *head = &SM_I(sbi)->discard_list;
486         struct discard_entry *new, *last;
487
488         if (!list_empty(head)) {
489                 last = list_last_entry(head, struct discard_entry, list);
490                 if (START_BLOCK(sbi, cpc->trim_start) + start ==
491                                                 last->blkaddr + last->len) {
492                         last->len += end - start;
493                         goto done;
494                 }
495         }
496
497         new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
498         INIT_LIST_HEAD(&new->list);
499         new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
500         new->len = end - start;
501         list_add_tail(&new->list, head);
502 done:
503         SM_I(sbi)->nr_discards += end - start;
504         cpc->trimmed += end - start;
505 }
506
507 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
508 {
509         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
510         int max_blocks = sbi->blocks_per_seg;
511         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
512         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
513         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
514         unsigned long dmap[entries];
515         unsigned int start = 0, end = -1;
516         bool force = (cpc->reason == CP_DISCARD);
517         int i;
518
519         if (!force && !test_opt(sbi, DISCARD))
520                 return;
521
522         if (force && !se->valid_blocks) {
523                 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
524                 /*
525                  * if this segment is registered in the prefree list, then
526                  * we should skip adding a discard candidate, and let the
527                  * checkpoint do that later.
528                  */
529                 mutex_lock(&dirty_i->seglist_lock);
530                 if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
531                         mutex_unlock(&dirty_i->seglist_lock);
532                         cpc->trimmed += sbi->blocks_per_seg;
533                         return;
534                 }
535                 mutex_unlock(&dirty_i->seglist_lock);
536
537                 __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
538                 return;
539         }
540
541         /* zero block will be discarded through the prefree list */
542         if (!se->valid_blocks || se->valid_blocks == max_blocks)
543                 return;
544
545         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
546         for (i = 0; i < entries; i++)
547                 dmap[i] = ~(cur_map[i] | ckpt_map[i]);
548
549         while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
550                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
551                 if (start >= max_blocks)
552                         break;
553
554                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
555
556                 if (end - start < cpc->trim_minlen)
557                         continue;
558
559                 __add_discard_entry(sbi, cpc, start, end);
560         }
561 }
562
563 void release_discard_addrs(struct f2fs_sb_info *sbi)
564 {
565         struct list_head *head = &(SM_I(sbi)->discard_list);
566         struct discard_entry *entry, *this;
567
568         /* drop caches */
569         list_for_each_entry_safe(entry, this, head, list) {
570                 list_del(&entry->list);
571                 kmem_cache_free(discard_entry_slab, entry);
572         }
573 }
574
575 /*
576  * Should call clear_prefree_segments after checkpoint is done.
577  */
578 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
579 {
580         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
581         unsigned int segno;
582
583         mutex_lock(&dirty_i->seglist_lock);
584         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
585                 __set_test_and_free(sbi, segno);
586         mutex_unlock(&dirty_i->seglist_lock);
587 }
588
589 void clear_prefree_segments(struct f2fs_sb_info *sbi)
590 {
591         struct list_head *head = &(SM_I(sbi)->discard_list);
592         struct discard_entry *entry, *this;
593         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
594         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
595         unsigned int start = 0, end = -1;
596
597         mutex_lock(&dirty_i->seglist_lock);
598
599         while (1) {
600                 int i;
601                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
602                 if (start >= MAIN_SEGS(sbi))
603                         break;
604                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
605                                                                 start + 1);
606
607                 for (i = start; i < end; i++)
608                         clear_bit(i, prefree_map);
609
610                 dirty_i->nr_dirty[PRE] -= end - start;
611
612                 if (!test_opt(sbi, DISCARD))
613                         continue;
614
615                 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
616                                 (end - start) << sbi->log_blocks_per_seg);
617         }
618         mutex_unlock(&dirty_i->seglist_lock);
619
620         /* send small discards */
621         list_for_each_entry_safe(entry, this, head, list) {
622                 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
623                 list_del(&entry->list);
624                 SM_I(sbi)->nr_discards -= entry->len;
625                 kmem_cache_free(discard_entry_slab, entry);
626         }
627 }
628
629 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
630 {
631         struct sit_info *sit_i = SIT_I(sbi);
632
633         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
634                 sit_i->dirty_sentries++;
635                 return false;
636         }
637
638         return true;
639 }
640
641 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
642                                         unsigned int segno, int modified)
643 {
644         struct seg_entry *se = get_seg_entry(sbi, segno);
645         se->type = type;
646         if (modified)
647                 __mark_sit_entry_dirty(sbi, segno);
648 }
649
650 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
651 {
652         struct seg_entry *se;
653         unsigned int segno, offset;
654         long int new_vblocks;
655
656         segno = GET_SEGNO(sbi, blkaddr);
657
658         se = get_seg_entry(sbi, segno);
659         new_vblocks = se->valid_blocks + del;
660         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
661
662         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
663                                 (new_vblocks > sbi->blocks_per_seg)));
664
665         se->valid_blocks = new_vblocks;
666         se->mtime = get_mtime(sbi);
667         SIT_I(sbi)->max_mtime = se->mtime;
668
669         /* Update valid block bitmap */
670         if (del > 0) {
671                 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
672                         f2fs_bug_on(sbi, 1);
673         } else {
674                 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
675                         f2fs_bug_on(sbi, 1);
676         }
677         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
678                 se->ckpt_valid_blocks += del;
679
680         __mark_sit_entry_dirty(sbi, segno);
681
682         /* update total number of valid blocks to be written in ckpt area */
683         SIT_I(sbi)->written_valid_blocks += del;
684
685         if (sbi->segs_per_sec > 1)
686                 get_sec_entry(sbi, segno)->valid_blocks += del;
687 }
688
689 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
690 {
691         update_sit_entry(sbi, new, 1);
692         if (GET_SEGNO(sbi, old) != NULL_SEGNO)
693                 update_sit_entry(sbi, old, -1);
694
695         locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
696         locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
697 }
698
699 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
700 {
701         unsigned int segno = GET_SEGNO(sbi, addr);
702         struct sit_info *sit_i = SIT_I(sbi);
703
704         f2fs_bug_on(sbi, addr == NULL_ADDR);
705         if (addr == NEW_ADDR)
706                 return;
707
708         /* add it into sit main buffer */
709         mutex_lock(&sit_i->sentry_lock);
710
711         update_sit_entry(sbi, addr, -1);
712
713         /* add it into dirty seglist */
714         locate_dirty_segment(sbi, segno);
715
716         mutex_unlock(&sit_i->sentry_lock);
717 }
718
719 /*
720  * This function should be resided under the curseg_mutex lock
721  */
722 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
723                                         struct f2fs_summary *sum)
724 {
725         struct curseg_info *curseg = CURSEG_I(sbi, type);
726         void *addr = curseg->sum_blk;
727         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
728         memcpy(addr, sum, sizeof(struct f2fs_summary));
729 }
730
731 /*
732  * Calculate the number of current summary pages for writing
733  */
734 int npages_for_summary_flush(struct f2fs_sb_info *sbi)
735 {
736         int valid_sum_count = 0;
737         int i, sum_in_page;
738
739         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
740                 if (sbi->ckpt->alloc_type[i] == SSR)
741                         valid_sum_count += sbi->blocks_per_seg;
742                 else
743                         valid_sum_count += curseg_blkoff(sbi, i);
744         }
745
746         sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
747                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
748         if (valid_sum_count <= sum_in_page)
749                 return 1;
750         else if ((valid_sum_count - sum_in_page) <=
751                 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
752                 return 2;
753         return 3;
754 }
755
756 /*
757  * Caller should put this summary page
758  */
759 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
760 {
761         return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
762 }
763
764 static void write_sum_page(struct f2fs_sb_info *sbi,
765                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
766 {
767         struct page *page = grab_meta_page(sbi, blk_addr);
768         void *kaddr = page_address(page);
769         memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
770         set_page_dirty(page);
771         f2fs_put_page(page, 1);
772 }
773
774 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
775 {
776         struct curseg_info *curseg = CURSEG_I(sbi, type);
777         unsigned int segno = curseg->segno + 1;
778         struct free_segmap_info *free_i = FREE_I(sbi);
779
780         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
781                 return !test_bit(segno, free_i->free_segmap);
782         return 0;
783 }
784
785 /*
786  * Find a new segment from the free segments bitmap to right order
787  * This function should be returned with success, otherwise BUG
788  */
789 static void get_new_segment(struct f2fs_sb_info *sbi,
790                         unsigned int *newseg, bool new_sec, int dir)
791 {
792         struct free_segmap_info *free_i = FREE_I(sbi);
793         unsigned int segno, secno, zoneno;
794         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
795         unsigned int hint = *newseg / sbi->segs_per_sec;
796         unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
797         unsigned int left_start = hint;
798         bool init = true;
799         int go_left = 0;
800         int i;
801
802         write_lock(&free_i->segmap_lock);
803
804         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
805                 segno = find_next_zero_bit(free_i->free_segmap,
806                                         MAIN_SEGS(sbi), *newseg + 1);
807                 if (segno - *newseg < sbi->segs_per_sec -
808                                         (*newseg % sbi->segs_per_sec))
809                         goto got_it;
810         }
811 find_other_zone:
812         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
813         if (secno >= MAIN_SECS(sbi)) {
814                 if (dir == ALLOC_RIGHT) {
815                         secno = find_next_zero_bit(free_i->free_secmap,
816                                                         MAIN_SECS(sbi), 0);
817                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
818                 } else {
819                         go_left = 1;
820                         left_start = hint - 1;
821                 }
822         }
823         if (go_left == 0)
824                 goto skip_left;
825
826         while (test_bit(left_start, free_i->free_secmap)) {
827                 if (left_start > 0) {
828                         left_start--;
829                         continue;
830                 }
831                 left_start = find_next_zero_bit(free_i->free_secmap,
832                                                         MAIN_SECS(sbi), 0);
833                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
834                 break;
835         }
836         secno = left_start;
837 skip_left:
838         hint = secno;
839         segno = secno * sbi->segs_per_sec;
840         zoneno = secno / sbi->secs_per_zone;
841
842         /* give up on finding another zone */
843         if (!init)
844                 goto got_it;
845         if (sbi->secs_per_zone == 1)
846                 goto got_it;
847         if (zoneno == old_zoneno)
848                 goto got_it;
849         if (dir == ALLOC_LEFT) {
850                 if (!go_left && zoneno + 1 >= total_zones)
851                         goto got_it;
852                 if (go_left && zoneno == 0)
853                         goto got_it;
854         }
855         for (i = 0; i < NR_CURSEG_TYPE; i++)
856                 if (CURSEG_I(sbi, i)->zone == zoneno)
857                         break;
858
859         if (i < NR_CURSEG_TYPE) {
860                 /* zone is in user, try another */
861                 if (go_left)
862                         hint = zoneno * sbi->secs_per_zone - 1;
863                 else if (zoneno + 1 >= total_zones)
864                         hint = 0;
865                 else
866                         hint = (zoneno + 1) * sbi->secs_per_zone;
867                 init = false;
868                 goto find_other_zone;
869         }
870 got_it:
871         /* set it as dirty segment in free segmap */
872         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
873         __set_inuse(sbi, segno);
874         *newseg = segno;
875         write_unlock(&free_i->segmap_lock);
876 }
877
878 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
879 {
880         struct curseg_info *curseg = CURSEG_I(sbi, type);
881         struct summary_footer *sum_footer;
882
883         curseg->segno = curseg->next_segno;
884         curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
885         curseg->next_blkoff = 0;
886         curseg->next_segno = NULL_SEGNO;
887
888         sum_footer = &(curseg->sum_blk->footer);
889         memset(sum_footer, 0, sizeof(struct summary_footer));
890         if (IS_DATASEG(type))
891                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
892         if (IS_NODESEG(type))
893                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
894         __set_sit_entry_type(sbi, type, curseg->segno, modified);
895 }
896
897 /*
898  * Allocate a current working segment.
899  * This function always allocates a free segment in LFS manner.
900  */
901 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
902 {
903         struct curseg_info *curseg = CURSEG_I(sbi, type);
904         unsigned int segno = curseg->segno;
905         int dir = ALLOC_LEFT;
906
907         write_sum_page(sbi, curseg->sum_blk,
908                                 GET_SUM_BLOCK(sbi, segno));
909         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
910                 dir = ALLOC_RIGHT;
911
912         if (test_opt(sbi, NOHEAP))
913                 dir = ALLOC_RIGHT;
914
915         get_new_segment(sbi, &segno, new_sec, dir);
916         curseg->next_segno = segno;
917         reset_curseg(sbi, type, 1);
918         curseg->alloc_type = LFS;
919 }
920
921 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
922                         struct curseg_info *seg, block_t start)
923 {
924         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
925         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
926         unsigned long target_map[entries];
927         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
928         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
929         int i, pos;
930
931         for (i = 0; i < entries; i++)
932                 target_map[i] = ckpt_map[i] | cur_map[i];
933
934         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
935
936         seg->next_blkoff = pos;
937 }
938
939 /*
940  * If a segment is written by LFS manner, next block offset is just obtained
941  * by increasing the current block offset. However, if a segment is written by
942  * SSR manner, next block offset obtained by calling __next_free_blkoff
943  */
944 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
945                                 struct curseg_info *seg)
946 {
947         if (seg->alloc_type == SSR)
948                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
949         else
950                 seg->next_blkoff++;
951 }
952
953 /*
954  * This function always allocates a used segment(from dirty seglist) by SSR
955  * manner, so it should recover the existing segment information of valid blocks
956  */
957 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
958 {
959         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
960         struct curseg_info *curseg = CURSEG_I(sbi, type);
961         unsigned int new_segno = curseg->next_segno;
962         struct f2fs_summary_block *sum_node;
963         struct page *sum_page;
964
965         write_sum_page(sbi, curseg->sum_blk,
966                                 GET_SUM_BLOCK(sbi, curseg->segno));
967         __set_test_and_inuse(sbi, new_segno);
968
969         mutex_lock(&dirty_i->seglist_lock);
970         __remove_dirty_segment(sbi, new_segno, PRE);
971         __remove_dirty_segment(sbi, new_segno, DIRTY);
972         mutex_unlock(&dirty_i->seglist_lock);
973
974         reset_curseg(sbi, type, 1);
975         curseg->alloc_type = SSR;
976         __next_free_blkoff(sbi, curseg, 0);
977
978         if (reuse) {
979                 sum_page = get_sum_page(sbi, new_segno);
980                 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
981                 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
982                 f2fs_put_page(sum_page, 1);
983         }
984 }
985
986 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
987 {
988         struct curseg_info *curseg = CURSEG_I(sbi, type);
989         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
990
991         if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
992                 return v_ops->get_victim(sbi,
993                                 &(curseg)->next_segno, BG_GC, type, SSR);
994
995         /* For data segments, let's do SSR more intensively */
996         for (; type >= CURSEG_HOT_DATA; type--)
997                 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
998                                                 BG_GC, type, SSR))
999                         return 1;
1000         return 0;
1001 }
1002
1003 /*
1004  * flush out current segment and replace it with new segment
1005  * This function should be returned with success, otherwise BUG
1006  */
1007 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1008                                                 int type, bool force)
1009 {
1010         struct curseg_info *curseg = CURSEG_I(sbi, type);
1011
1012         if (force)
1013                 new_curseg(sbi, type, true);
1014         else if (type == CURSEG_WARM_NODE)
1015                 new_curseg(sbi, type, false);
1016         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1017                 new_curseg(sbi, type, false);
1018         else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1019                 change_curseg(sbi, type, true);
1020         else
1021                 new_curseg(sbi, type, false);
1022
1023         stat_inc_seg_type(sbi, curseg);
1024 }
1025
1026 void allocate_new_segments(struct f2fs_sb_info *sbi)
1027 {
1028         struct curseg_info *curseg;
1029         unsigned int old_curseg;
1030         int i;
1031
1032         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1033                 curseg = CURSEG_I(sbi, i);
1034                 old_curseg = curseg->segno;
1035                 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1036                 locate_dirty_segment(sbi, old_curseg);
1037         }
1038 }
1039
1040 static const struct segment_allocation default_salloc_ops = {
1041         .allocate_segment = allocate_segment_by_default,
1042 };
1043
1044 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1045 {
1046         __u64 start = range->start >> sbi->log_blocksize;
1047         __u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1048         unsigned int start_segno, end_segno;
1049         struct cp_control cpc;
1050
1051         if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1052                                                 range->len < sbi->blocksize)
1053                 return -EINVAL;
1054
1055         cpc.trimmed = 0;
1056         if (end <= MAIN_BLKADDR(sbi))
1057                 goto out;
1058
1059         /* start/end segment number in main_area */
1060         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1061         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1062                                                 GET_SEGNO(sbi, end);
1063         cpc.reason = CP_DISCARD;
1064         cpc.trim_start = start_segno;
1065         cpc.trim_end = end_segno;
1066         cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1067
1068         /* do checkpoint to issue discard commands safely */
1069         mutex_lock(&sbi->gc_mutex);
1070         write_checkpoint(sbi, &cpc);
1071         mutex_unlock(&sbi->gc_mutex);
1072 out:
1073         range->len = cpc.trimmed << sbi->log_blocksize;
1074         return 0;
1075 }
1076
1077 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1078 {
1079         struct curseg_info *curseg = CURSEG_I(sbi, type);
1080         if (curseg->next_blkoff < sbi->blocks_per_seg)
1081                 return true;
1082         return false;
1083 }
1084
1085 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1086 {
1087         if (p_type == DATA)
1088                 return CURSEG_HOT_DATA;
1089         else
1090                 return CURSEG_HOT_NODE;
1091 }
1092
1093 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1094 {
1095         if (p_type == DATA) {
1096                 struct inode *inode = page->mapping->host;
1097
1098                 if (S_ISDIR(inode->i_mode))
1099                         return CURSEG_HOT_DATA;
1100                 else
1101                         return CURSEG_COLD_DATA;
1102         } else {
1103                 if (IS_DNODE(page) && is_cold_node(page))
1104                         return CURSEG_WARM_NODE;
1105                 else
1106                         return CURSEG_COLD_NODE;
1107         }
1108 }
1109
1110 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1111 {
1112         if (p_type == DATA) {
1113                 struct inode *inode = page->mapping->host;
1114
1115                 if (S_ISDIR(inode->i_mode))
1116                         return CURSEG_HOT_DATA;
1117                 else if (is_cold_data(page) || file_is_cold(inode))
1118                         return CURSEG_COLD_DATA;
1119                 else
1120                         return CURSEG_WARM_DATA;
1121         } else {
1122                 if (IS_DNODE(page))
1123                         return is_cold_node(page) ? CURSEG_WARM_NODE :
1124                                                 CURSEG_HOT_NODE;
1125                 else
1126                         return CURSEG_COLD_NODE;
1127         }
1128 }
1129
1130 static int __get_segment_type(struct page *page, enum page_type p_type)
1131 {
1132         switch (F2FS_P_SB(page)->active_logs) {
1133         case 2:
1134                 return __get_segment_type_2(page, p_type);
1135         case 4:
1136                 return __get_segment_type_4(page, p_type);
1137         }
1138         /* NR_CURSEG_TYPE(6) logs by default */
1139         f2fs_bug_on(F2FS_P_SB(page),
1140                 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1141         return __get_segment_type_6(page, p_type);
1142 }
1143
1144 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1145                 block_t old_blkaddr, block_t *new_blkaddr,
1146                 struct f2fs_summary *sum, int type)
1147 {
1148         struct sit_info *sit_i = SIT_I(sbi);
1149         struct curseg_info *curseg;
1150
1151         curseg = CURSEG_I(sbi, type);
1152
1153         mutex_lock(&curseg->curseg_mutex);
1154
1155         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1156
1157         /*
1158          * __add_sum_entry should be resided under the curseg_mutex
1159          * because, this function updates a summary entry in the
1160          * current summary block.
1161          */
1162         __add_sum_entry(sbi, type, sum);
1163
1164         mutex_lock(&sit_i->sentry_lock);
1165         __refresh_next_blkoff(sbi, curseg);
1166
1167         stat_inc_block_count(sbi, curseg);
1168
1169         if (!__has_curseg_space(sbi, type))
1170                 sit_i->s_ops->allocate_segment(sbi, type, false);
1171         /*
1172          * SIT information should be updated before segment allocation,
1173          * since SSR needs latest valid block information.
1174          */
1175         refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1176
1177         mutex_unlock(&sit_i->sentry_lock);
1178
1179         if (page && IS_NODESEG(type))
1180                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1181
1182         mutex_unlock(&curseg->curseg_mutex);
1183 }
1184
1185 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1186                         block_t old_blkaddr, block_t *new_blkaddr,
1187                         struct f2fs_summary *sum, struct f2fs_io_info *fio)
1188 {
1189         int type = __get_segment_type(page, fio->type);
1190
1191         allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
1192
1193         /* writeout dirty page into bdev */
1194         f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
1195 }
1196
1197 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1198 {
1199         struct f2fs_io_info fio = {
1200                 .type = META,
1201                 .rw = WRITE_SYNC | REQ_META | REQ_PRIO
1202         };
1203
1204         set_page_writeback(page);
1205         f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1206 }
1207
1208 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1209                 struct f2fs_io_info *fio,
1210                 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1211 {
1212         struct f2fs_summary sum;
1213         set_summary(&sum, nid, 0, 0);
1214         do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
1215 }
1216
1217 void write_data_page(struct page *page, struct dnode_of_data *dn,
1218                 block_t *new_blkaddr, struct f2fs_io_info *fio)
1219 {
1220         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1221         struct f2fs_summary sum;
1222         struct node_info ni;
1223
1224         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1225         get_node_info(sbi, dn->nid, &ni);
1226         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1227
1228         do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
1229 }
1230
1231 void rewrite_data_page(struct page *page, block_t old_blkaddr,
1232                                         struct f2fs_io_info *fio)
1233 {
1234         f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1235 }
1236
1237 void recover_data_page(struct f2fs_sb_info *sbi,
1238                         struct page *page, struct f2fs_summary *sum,
1239                         block_t old_blkaddr, block_t new_blkaddr)
1240 {
1241         struct sit_info *sit_i = SIT_I(sbi);
1242         struct curseg_info *curseg;
1243         unsigned int segno, old_cursegno;
1244         struct seg_entry *se;
1245         int type;
1246
1247         segno = GET_SEGNO(sbi, new_blkaddr);
1248         se = get_seg_entry(sbi, segno);
1249         type = se->type;
1250
1251         if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1252                 if (old_blkaddr == NULL_ADDR)
1253                         type = CURSEG_COLD_DATA;
1254                 else
1255                         type = CURSEG_WARM_DATA;
1256         }
1257         curseg = CURSEG_I(sbi, type);
1258
1259         mutex_lock(&curseg->curseg_mutex);
1260         mutex_lock(&sit_i->sentry_lock);
1261
1262         old_cursegno = curseg->segno;
1263
1264         /* change the current segment */
1265         if (segno != curseg->segno) {
1266                 curseg->next_segno = segno;
1267                 change_curseg(sbi, type, true);
1268         }
1269
1270         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1271         __add_sum_entry(sbi, type, sum);
1272
1273         refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1274         locate_dirty_segment(sbi, old_cursegno);
1275
1276         mutex_unlock(&sit_i->sentry_lock);
1277         mutex_unlock(&curseg->curseg_mutex);
1278 }
1279
1280 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1281                                         struct page *page, enum page_type type)
1282 {
1283         enum page_type btype = PAGE_TYPE_OF_BIO(type);
1284         struct f2fs_bio_info *io = &sbi->write_io[btype];
1285         struct bio_vec *bvec;
1286         int i;
1287
1288         down_read(&io->io_rwsem);
1289         if (!io->bio)
1290                 goto out;
1291
1292         bio_for_each_segment_all(bvec, io->bio, i) {
1293                 if (page == bvec->bv_page) {
1294                         up_read(&io->io_rwsem);
1295                         return true;
1296                 }
1297         }
1298
1299 out:
1300         up_read(&io->io_rwsem);
1301         return false;
1302 }
1303
1304 void f2fs_wait_on_page_writeback(struct page *page,
1305                                 enum page_type type)
1306 {
1307         if (PageWriteback(page)) {
1308                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1309
1310                 if (is_merged_page(sbi, page, type))
1311                         f2fs_submit_merged_bio(sbi, type, WRITE);
1312                 wait_on_page_writeback(page);
1313         }
1314 }
1315
1316 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1317 {
1318         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1319         struct curseg_info *seg_i;
1320         unsigned char *kaddr;
1321         struct page *page;
1322         block_t start;
1323         int i, j, offset;
1324
1325         start = start_sum_block(sbi);
1326
1327         page = get_meta_page(sbi, start++);
1328         kaddr = (unsigned char *)page_address(page);
1329
1330         /* Step 1: restore nat cache */
1331         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1332         memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1333
1334         /* Step 2: restore sit cache */
1335         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1336         memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1337                                                 SUM_JOURNAL_SIZE);
1338         offset = 2 * SUM_JOURNAL_SIZE;
1339
1340         /* Step 3: restore summary entries */
1341         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1342                 unsigned short blk_off;
1343                 unsigned int segno;
1344
1345                 seg_i = CURSEG_I(sbi, i);
1346                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1347                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1348                 seg_i->next_segno = segno;
1349                 reset_curseg(sbi, i, 0);
1350                 seg_i->alloc_type = ckpt->alloc_type[i];
1351                 seg_i->next_blkoff = blk_off;
1352
1353                 if (seg_i->alloc_type == SSR)
1354                         blk_off = sbi->blocks_per_seg;
1355
1356                 for (j = 0; j < blk_off; j++) {
1357                         struct f2fs_summary *s;
1358                         s = (struct f2fs_summary *)(kaddr + offset);
1359                         seg_i->sum_blk->entries[j] = *s;
1360                         offset += SUMMARY_SIZE;
1361                         if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1362                                                 SUM_FOOTER_SIZE)
1363                                 continue;
1364
1365                         f2fs_put_page(page, 1);
1366                         page = NULL;
1367
1368                         page = get_meta_page(sbi, start++);
1369                         kaddr = (unsigned char *)page_address(page);
1370                         offset = 0;
1371                 }
1372         }
1373         f2fs_put_page(page, 1);
1374         return 0;
1375 }
1376
1377 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1378 {
1379         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1380         struct f2fs_summary_block *sum;
1381         struct curseg_info *curseg;
1382         struct page *new;
1383         unsigned short blk_off;
1384         unsigned int segno = 0;
1385         block_t blk_addr = 0;
1386
1387         /* get segment number and block addr */
1388         if (IS_DATASEG(type)) {
1389                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1390                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1391                                                         CURSEG_HOT_DATA]);
1392                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1393                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1394                 else
1395                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1396         } else {
1397                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1398                                                         CURSEG_HOT_NODE]);
1399                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1400                                                         CURSEG_HOT_NODE]);
1401                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1402                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1403                                                         type - CURSEG_HOT_NODE);
1404                 else
1405                         blk_addr = GET_SUM_BLOCK(sbi, segno);
1406         }
1407
1408         new = get_meta_page(sbi, blk_addr);
1409         sum = (struct f2fs_summary_block *)page_address(new);
1410
1411         if (IS_NODESEG(type)) {
1412                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1413                         struct f2fs_summary *ns = &sum->entries[0];
1414                         int i;
1415                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1416                                 ns->version = 0;
1417                                 ns->ofs_in_node = 0;
1418                         }
1419                 } else {
1420                         int err;
1421
1422                         err = restore_node_summary(sbi, segno, sum);
1423                         if (err) {
1424                                 f2fs_put_page(new, 1);
1425                                 return err;
1426                         }
1427                 }
1428         }
1429
1430         /* set uncompleted segment to curseg */
1431         curseg = CURSEG_I(sbi, type);
1432         mutex_lock(&curseg->curseg_mutex);
1433         memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1434         curseg->next_segno = segno;
1435         reset_curseg(sbi, type, 0);
1436         curseg->alloc_type = ckpt->alloc_type[type];
1437         curseg->next_blkoff = blk_off;
1438         mutex_unlock(&curseg->curseg_mutex);
1439         f2fs_put_page(new, 1);
1440         return 0;
1441 }
1442
1443 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1444 {
1445         int type = CURSEG_HOT_DATA;
1446         int err;
1447
1448         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1449                 /* restore for compacted data summary */
1450                 if (read_compacted_summaries(sbi))
1451                         return -EINVAL;
1452                 type = CURSEG_HOT_NODE;
1453         }
1454
1455         for (; type <= CURSEG_COLD_NODE; type++) {
1456                 err = read_normal_summaries(sbi, type);
1457                 if (err)
1458                         return err;
1459         }
1460
1461         return 0;
1462 }
1463
1464 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1465 {
1466         struct page *page;
1467         unsigned char *kaddr;
1468         struct f2fs_summary *summary;
1469         struct curseg_info *seg_i;
1470         int written_size = 0;
1471         int i, j;
1472
1473         page = grab_meta_page(sbi, blkaddr++);
1474         kaddr = (unsigned char *)page_address(page);
1475
1476         /* Step 1: write nat cache */
1477         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1478         memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1479         written_size += SUM_JOURNAL_SIZE;
1480
1481         /* Step 2: write sit cache */
1482         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1483         memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1484                                                 SUM_JOURNAL_SIZE);
1485         written_size += SUM_JOURNAL_SIZE;
1486
1487         /* Step 3: write summary entries */
1488         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1489                 unsigned short blkoff;
1490                 seg_i = CURSEG_I(sbi, i);
1491                 if (sbi->ckpt->alloc_type[i] == SSR)
1492                         blkoff = sbi->blocks_per_seg;
1493                 else
1494                         blkoff = curseg_blkoff(sbi, i);
1495
1496                 for (j = 0; j < blkoff; j++) {
1497                         if (!page) {
1498                                 page = grab_meta_page(sbi, blkaddr++);
1499                                 kaddr = (unsigned char *)page_address(page);
1500                                 written_size = 0;
1501                         }
1502                         summary = (struct f2fs_summary *)(kaddr + written_size);
1503                         *summary = seg_i->sum_blk->entries[j];
1504                         written_size += SUMMARY_SIZE;
1505
1506                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1507                                                         SUM_FOOTER_SIZE)
1508                                 continue;
1509
1510                         set_page_dirty(page);
1511                         f2fs_put_page(page, 1);
1512                         page = NULL;
1513                 }
1514         }
1515         if (page) {
1516                 set_page_dirty(page);
1517                 f2fs_put_page(page, 1);
1518         }
1519 }
1520
1521 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1522                                         block_t blkaddr, int type)
1523 {
1524         int i, end;
1525         if (IS_DATASEG(type))
1526                 end = type + NR_CURSEG_DATA_TYPE;
1527         else
1528                 end = type + NR_CURSEG_NODE_TYPE;
1529
1530         for (i = type; i < end; i++) {
1531                 struct curseg_info *sum = CURSEG_I(sbi, i);
1532                 mutex_lock(&sum->curseg_mutex);
1533                 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1534                 mutex_unlock(&sum->curseg_mutex);
1535         }
1536 }
1537
1538 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1539 {
1540         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1541                 write_compacted_summaries(sbi, start_blk);
1542         else
1543                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1544 }
1545
1546 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1547 {
1548         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1549                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1550 }
1551
1552 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1553                                         unsigned int val, int alloc)
1554 {
1555         int i;
1556
1557         if (type == NAT_JOURNAL) {
1558                 for (i = 0; i < nats_in_cursum(sum); i++) {
1559                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1560                                 return i;
1561                 }
1562                 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1563                         return update_nats_in_cursum(sum, 1);
1564         } else if (type == SIT_JOURNAL) {
1565                 for (i = 0; i < sits_in_cursum(sum); i++)
1566                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1567                                 return i;
1568                 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1569                         return update_sits_in_cursum(sum, 1);
1570         }
1571         return -1;
1572 }
1573
1574 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1575                                         unsigned int segno)
1576 {
1577         return get_meta_page(sbi, current_sit_addr(sbi, segno));
1578 }
1579
1580 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1581                                         unsigned int start)
1582 {
1583         struct sit_info *sit_i = SIT_I(sbi);
1584         struct page *src_page, *dst_page;
1585         pgoff_t src_off, dst_off;
1586         void *src_addr, *dst_addr;
1587
1588         src_off = current_sit_addr(sbi, start);
1589         dst_off = next_sit_addr(sbi, src_off);
1590
1591         /* get current sit block page without lock */
1592         src_page = get_meta_page(sbi, src_off);
1593         dst_page = grab_meta_page(sbi, dst_off);
1594         f2fs_bug_on(sbi, PageDirty(src_page));
1595
1596         src_addr = page_address(src_page);
1597         dst_addr = page_address(dst_page);
1598         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1599
1600         set_page_dirty(dst_page);
1601         f2fs_put_page(src_page, 1);
1602
1603         set_to_next_sit(sit_i, start);
1604
1605         return dst_page;
1606 }
1607
1608 static struct sit_entry_set *grab_sit_entry_set(void)
1609 {
1610         struct sit_entry_set *ses =
1611                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1612
1613         ses->entry_cnt = 0;
1614         INIT_LIST_HEAD(&ses->set_list);
1615         return ses;
1616 }
1617
1618 static void release_sit_entry_set(struct sit_entry_set *ses)
1619 {
1620         list_del(&ses->set_list);
1621         kmem_cache_free(sit_entry_set_slab, ses);
1622 }
1623
1624 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1625                                                 struct list_head *head)
1626 {
1627         struct sit_entry_set *next = ses;
1628
1629         if (list_is_last(&ses->set_list, head))
1630                 return;
1631
1632         list_for_each_entry_continue(next, head, set_list)
1633                 if (ses->entry_cnt <= next->entry_cnt)
1634                         break;
1635
1636         list_move_tail(&ses->set_list, &next->set_list);
1637 }
1638
1639 static void add_sit_entry(unsigned int segno, struct list_head *head)
1640 {
1641         struct sit_entry_set *ses;
1642         unsigned int start_segno = START_SEGNO(segno);
1643
1644         list_for_each_entry(ses, head, set_list) {
1645                 if (ses->start_segno == start_segno) {
1646                         ses->entry_cnt++;
1647                         adjust_sit_entry_set(ses, head);
1648                         return;
1649                 }
1650         }
1651
1652         ses = grab_sit_entry_set();
1653
1654         ses->start_segno = start_segno;
1655         ses->entry_cnt++;
1656         list_add(&ses->set_list, head);
1657 }
1658
1659 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1660 {
1661         struct f2fs_sm_info *sm_info = SM_I(sbi);
1662         struct list_head *set_list = &sm_info->sit_entry_set;
1663         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1664         unsigned int segno;
1665
1666         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1667                 add_sit_entry(segno, set_list);
1668 }
1669
1670 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1671 {
1672         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1673         struct f2fs_summary_block *sum = curseg->sum_blk;
1674         int i;
1675
1676         for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1677                 unsigned int segno;
1678                 bool dirtied;
1679
1680                 segno = le32_to_cpu(segno_in_journal(sum, i));
1681                 dirtied = __mark_sit_entry_dirty(sbi, segno);
1682
1683                 if (!dirtied)
1684                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1685         }
1686         update_sits_in_cursum(sum, -sits_in_cursum(sum));
1687 }
1688
1689 /*
1690  * CP calls this function, which flushes SIT entries including sit_journal,
1691  * and moves prefree segs to free segs.
1692  */
1693 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1694 {
1695         struct sit_info *sit_i = SIT_I(sbi);
1696         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1697         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1698         struct f2fs_summary_block *sum = curseg->sum_blk;
1699         struct sit_entry_set *ses, *tmp;
1700         struct list_head *head = &SM_I(sbi)->sit_entry_set;
1701         bool to_journal = true;
1702         struct seg_entry *se;
1703
1704         mutex_lock(&curseg->curseg_mutex);
1705         mutex_lock(&sit_i->sentry_lock);
1706
1707         /*
1708          * add and account sit entries of dirty bitmap in sit entry
1709          * set temporarily
1710          */
1711         add_sits_in_set(sbi);
1712
1713         /*
1714          * if there are no enough space in journal to store dirty sit
1715          * entries, remove all entries from journal and add and account
1716          * them in sit entry set.
1717          */
1718         if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1719                 remove_sits_in_journal(sbi);
1720
1721         if (!sit_i->dirty_sentries)
1722                 goto out;
1723
1724         /*
1725          * there are two steps to flush sit entries:
1726          * #1, flush sit entries to journal in current cold data summary block.
1727          * #2, flush sit entries to sit page.
1728          */
1729         list_for_each_entry_safe(ses, tmp, head, set_list) {
1730                 struct page *page = NULL;
1731                 struct f2fs_sit_block *raw_sit = NULL;
1732                 unsigned int start_segno = ses->start_segno;
1733                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1734                                                 (unsigned long)MAIN_SEGS(sbi));
1735                 unsigned int segno = start_segno;
1736
1737                 if (to_journal &&
1738                         !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1739                         to_journal = false;
1740
1741                 if (!to_journal) {
1742                         page = get_next_sit_page(sbi, start_segno);
1743                         raw_sit = page_address(page);
1744                 }
1745
1746                 /* flush dirty sit entries in region of current sit set */
1747                 for_each_set_bit_from(segno, bitmap, end) {
1748                         int offset, sit_offset;
1749
1750                         se = get_seg_entry(sbi, segno);
1751
1752                         /* add discard candidates */
1753                         if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1754                                 cpc->trim_start = segno;
1755                                 add_discard_addrs(sbi, cpc);
1756                         }
1757
1758                         if (to_journal) {
1759                                 offset = lookup_journal_in_cursum(sum,
1760                                                         SIT_JOURNAL, segno, 1);
1761                                 f2fs_bug_on(sbi, offset < 0);
1762                                 segno_in_journal(sum, offset) =
1763                                                         cpu_to_le32(segno);
1764                                 seg_info_to_raw_sit(se,
1765                                                 &sit_in_journal(sum, offset));
1766                         } else {
1767                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1768                                 seg_info_to_raw_sit(se,
1769                                                 &raw_sit->entries[sit_offset]);
1770                         }
1771
1772                         __clear_bit(segno, bitmap);
1773                         sit_i->dirty_sentries--;
1774                         ses->entry_cnt--;
1775                 }
1776
1777                 if (!to_journal)
1778                         f2fs_put_page(page, 1);
1779
1780                 f2fs_bug_on(sbi, ses->entry_cnt);
1781                 release_sit_entry_set(ses);
1782         }
1783
1784         f2fs_bug_on(sbi, !list_empty(head));
1785         f2fs_bug_on(sbi, sit_i->dirty_sentries);
1786 out:
1787         if (cpc->reason == CP_DISCARD) {
1788                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1789                         add_discard_addrs(sbi, cpc);
1790         }
1791         mutex_unlock(&sit_i->sentry_lock);
1792         mutex_unlock(&curseg->curseg_mutex);
1793
1794         set_prefree_as_free_segments(sbi);
1795 }
1796
1797 static int build_sit_info(struct f2fs_sb_info *sbi)
1798 {
1799         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1800         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1801         struct sit_info *sit_i;
1802         unsigned int sit_segs, start;
1803         char *src_bitmap, *dst_bitmap;
1804         unsigned int bitmap_size;
1805
1806         /* allocate memory for SIT information */
1807         sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1808         if (!sit_i)
1809                 return -ENOMEM;
1810
1811         SM_I(sbi)->sit_info = sit_i;
1812
1813         sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1814         if (!sit_i->sentries)
1815                 return -ENOMEM;
1816
1817         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1818         sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1819         if (!sit_i->dirty_sentries_bitmap)
1820                 return -ENOMEM;
1821
1822         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1823                 sit_i->sentries[start].cur_valid_map
1824                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1825                 sit_i->sentries[start].ckpt_valid_map
1826                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1827                 if (!sit_i->sentries[start].cur_valid_map
1828                                 || !sit_i->sentries[start].ckpt_valid_map)
1829                         return -ENOMEM;
1830         }
1831
1832         if (sbi->segs_per_sec > 1) {
1833                 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1834                                         sizeof(struct sec_entry));
1835                 if (!sit_i->sec_entries)
1836                         return -ENOMEM;
1837         }
1838
1839         /* get information related with SIT */
1840         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1841
1842         /* setup SIT bitmap from ckeckpoint pack */
1843         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1844         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1845
1846         dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1847         if (!dst_bitmap)
1848                 return -ENOMEM;
1849
1850         /* init SIT information */
1851         sit_i->s_ops = &default_salloc_ops;
1852
1853         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1854         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1855         sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1856         sit_i->sit_bitmap = dst_bitmap;
1857         sit_i->bitmap_size = bitmap_size;
1858         sit_i->dirty_sentries = 0;
1859         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1860         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1861         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1862         mutex_init(&sit_i->sentry_lock);
1863         return 0;
1864 }
1865
1866 static int build_free_segmap(struct f2fs_sb_info *sbi)
1867 {
1868         struct free_segmap_info *free_i;
1869         unsigned int bitmap_size, sec_bitmap_size;
1870
1871         /* allocate memory for free segmap information */
1872         free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1873         if (!free_i)
1874                 return -ENOMEM;
1875
1876         SM_I(sbi)->free_info = free_i;
1877
1878         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1879         free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1880         if (!free_i->free_segmap)
1881                 return -ENOMEM;
1882
1883         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1884         free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1885         if (!free_i->free_secmap)
1886                 return -ENOMEM;
1887
1888         /* set all segments as dirty temporarily */
1889         memset(free_i->free_segmap, 0xff, bitmap_size);
1890         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1891
1892         /* init free segmap information */
1893         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1894         free_i->free_segments = 0;
1895         free_i->free_sections = 0;
1896         rwlock_init(&free_i->segmap_lock);
1897         return 0;
1898 }
1899
1900 static int build_curseg(struct f2fs_sb_info *sbi)
1901 {
1902         struct curseg_info *array;
1903         int i;
1904
1905         array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1906         if (!array)
1907                 return -ENOMEM;
1908
1909         SM_I(sbi)->curseg_array = array;
1910
1911         for (i = 0; i < NR_CURSEG_TYPE; i++) {
1912                 mutex_init(&array[i].curseg_mutex);
1913                 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1914                 if (!array[i].sum_blk)
1915                         return -ENOMEM;
1916                 array[i].segno = NULL_SEGNO;
1917                 array[i].next_blkoff = 0;
1918         }
1919         return restore_curseg_summaries(sbi);
1920 }
1921
1922 static void build_sit_entries(struct f2fs_sb_info *sbi)
1923 {
1924         struct sit_info *sit_i = SIT_I(sbi);
1925         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1926         struct f2fs_summary_block *sum = curseg->sum_blk;
1927         int sit_blk_cnt = SIT_BLK_CNT(sbi);
1928         unsigned int i, start, end;
1929         unsigned int readed, start_blk = 0;
1930         int nrpages = MAX_BIO_BLOCKS(sbi);
1931
1932         do {
1933                 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1934
1935                 start = start_blk * sit_i->sents_per_block;
1936                 end = (start_blk + readed) * sit_i->sents_per_block;
1937
1938                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
1939                         struct seg_entry *se = &sit_i->sentries[start];
1940                         struct f2fs_sit_block *sit_blk;
1941                         struct f2fs_sit_entry sit;
1942                         struct page *page;
1943
1944                         mutex_lock(&curseg->curseg_mutex);
1945                         for (i = 0; i < sits_in_cursum(sum); i++) {
1946                                 if (le32_to_cpu(segno_in_journal(sum, i))
1947                                                                 == start) {
1948                                         sit = sit_in_journal(sum, i);
1949                                         mutex_unlock(&curseg->curseg_mutex);
1950                                         goto got_it;
1951                                 }
1952                         }
1953                         mutex_unlock(&curseg->curseg_mutex);
1954
1955                         page = get_current_sit_page(sbi, start);
1956                         sit_blk = (struct f2fs_sit_block *)page_address(page);
1957                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1958                         f2fs_put_page(page, 1);
1959 got_it:
1960                         check_block_count(sbi, start, &sit);
1961                         seg_info_from_raw_sit(se, &sit);
1962                         if (sbi->segs_per_sec > 1) {
1963                                 struct sec_entry *e = get_sec_entry(sbi, start);
1964                                 e->valid_blocks += se->valid_blocks;
1965                         }
1966                 }
1967                 start_blk += readed;
1968         } while (start_blk < sit_blk_cnt);
1969 }
1970
1971 static void init_free_segmap(struct f2fs_sb_info *sbi)
1972 {
1973         unsigned int start;
1974         int type;
1975
1976         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1977                 struct seg_entry *sentry = get_seg_entry(sbi, start);
1978                 if (!sentry->valid_blocks)
1979                         __set_free(sbi, start);
1980         }
1981
1982         /* set use the current segments */
1983         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1984                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1985                 __set_test_and_inuse(sbi, curseg_t->segno);
1986         }
1987 }
1988
1989 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1990 {
1991         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1992         struct free_segmap_info *free_i = FREE_I(sbi);
1993         unsigned int segno = 0, offset = 0;
1994         unsigned short valid_blocks;
1995
1996         while (1) {
1997                 /* find dirty segment based on free segmap */
1998                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1999                 if (segno >= MAIN_SEGS(sbi))
2000                         break;
2001                 offset = segno + 1;
2002                 valid_blocks = get_valid_blocks(sbi, segno, 0);
2003                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2004                         continue;
2005                 if (valid_blocks > sbi->blocks_per_seg) {
2006                         f2fs_bug_on(sbi, 1);
2007                         continue;
2008                 }
2009                 mutex_lock(&dirty_i->seglist_lock);
2010                 __locate_dirty_segment(sbi, segno, DIRTY);
2011                 mutex_unlock(&dirty_i->seglist_lock);
2012         }
2013 }
2014
2015 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2016 {
2017         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2018         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2019
2020         dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2021         if (!dirty_i->victim_secmap)
2022                 return -ENOMEM;
2023         return 0;
2024 }
2025
2026 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2027 {
2028         struct dirty_seglist_info *dirty_i;
2029         unsigned int bitmap_size, i;
2030
2031         /* allocate memory for dirty segments list information */
2032         dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2033         if (!dirty_i)
2034                 return -ENOMEM;
2035
2036         SM_I(sbi)->dirty_info = dirty_i;
2037         mutex_init(&dirty_i->seglist_lock);
2038
2039         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2040
2041         for (i = 0; i < NR_DIRTY_TYPE; i++) {
2042                 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2043                 if (!dirty_i->dirty_segmap[i])
2044                         return -ENOMEM;
2045         }
2046
2047         init_dirty_segmap(sbi);
2048         return init_victim_secmap(sbi);
2049 }
2050
2051 /*
2052  * Update min, max modified time for cost-benefit GC algorithm
2053  */
2054 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2055 {
2056         struct sit_info *sit_i = SIT_I(sbi);
2057         unsigned int segno;
2058
2059         mutex_lock(&sit_i->sentry_lock);
2060
2061         sit_i->min_mtime = LLONG_MAX;
2062
2063         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2064                 unsigned int i;
2065                 unsigned long long mtime = 0;
2066
2067                 for (i = 0; i < sbi->segs_per_sec; i++)
2068                         mtime += get_seg_entry(sbi, segno + i)->mtime;
2069
2070                 mtime = div_u64(mtime, sbi->segs_per_sec);
2071
2072                 if (sit_i->min_mtime > mtime)
2073                         sit_i->min_mtime = mtime;
2074         }
2075         sit_i->max_mtime = get_mtime(sbi);
2076         mutex_unlock(&sit_i->sentry_lock);
2077 }
2078
2079 int build_segment_manager(struct f2fs_sb_info *sbi)
2080 {
2081         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2082         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2083         struct f2fs_sm_info *sm_info;
2084         int err;
2085
2086         sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2087         if (!sm_info)
2088                 return -ENOMEM;
2089
2090         /* init sm info */
2091         sbi->sm_info = sm_info;
2092         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2093         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2094         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2095         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2096         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2097         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2098         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2099         sm_info->rec_prefree_segments = sm_info->main_segments *
2100                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2101         sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2102         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2103         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2104
2105         INIT_LIST_HEAD(&sm_info->discard_list);
2106         sm_info->nr_discards = 0;
2107         sm_info->max_discards = 0;
2108
2109         INIT_LIST_HEAD(&sm_info->sit_entry_set);
2110
2111         if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2112                 err = create_flush_cmd_control(sbi);
2113                 if (err)
2114                         return err;
2115         }
2116
2117         err = build_sit_info(sbi);
2118         if (err)
2119                 return err;
2120         err = build_free_segmap(sbi);
2121         if (err)
2122                 return err;
2123         err = build_curseg(sbi);
2124         if (err)
2125                 return err;
2126
2127         /* reinit free segmap based on SIT */
2128         build_sit_entries(sbi);
2129
2130         init_free_segmap(sbi);
2131         err = build_dirty_segmap(sbi);
2132         if (err)
2133                 return err;
2134
2135         init_min_max_mtime(sbi);
2136         return 0;
2137 }
2138
2139 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2140                 enum dirty_type dirty_type)
2141 {
2142         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2143
2144         mutex_lock(&dirty_i->seglist_lock);
2145         kfree(dirty_i->dirty_segmap[dirty_type]);
2146         dirty_i->nr_dirty[dirty_type] = 0;
2147         mutex_unlock(&dirty_i->seglist_lock);
2148 }
2149
2150 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2151 {
2152         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2153         kfree(dirty_i->victim_secmap);
2154 }
2155
2156 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2157 {
2158         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2159         int i;
2160
2161         if (!dirty_i)
2162                 return;
2163
2164         /* discard pre-free/dirty segments list */
2165         for (i = 0; i < NR_DIRTY_TYPE; i++)
2166                 discard_dirty_segmap(sbi, i);
2167
2168         destroy_victim_secmap(sbi);
2169         SM_I(sbi)->dirty_info = NULL;
2170         kfree(dirty_i);
2171 }
2172
2173 static void destroy_curseg(struct f2fs_sb_info *sbi)
2174 {
2175         struct curseg_info *array = SM_I(sbi)->curseg_array;
2176         int i;
2177
2178         if (!array)
2179                 return;
2180         SM_I(sbi)->curseg_array = NULL;
2181         for (i = 0; i < NR_CURSEG_TYPE; i++)
2182                 kfree(array[i].sum_blk);
2183         kfree(array);
2184 }
2185
2186 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2187 {
2188         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2189         if (!free_i)
2190                 return;
2191         SM_I(sbi)->free_info = NULL;
2192         kfree(free_i->free_segmap);
2193         kfree(free_i->free_secmap);
2194         kfree(free_i);
2195 }
2196
2197 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2198 {
2199         struct sit_info *sit_i = SIT_I(sbi);
2200         unsigned int start;
2201
2202         if (!sit_i)
2203                 return;
2204
2205         if (sit_i->sentries) {
2206                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2207                         kfree(sit_i->sentries[start].cur_valid_map);
2208                         kfree(sit_i->sentries[start].ckpt_valid_map);
2209                 }
2210         }
2211         vfree(sit_i->sentries);
2212         vfree(sit_i->sec_entries);
2213         kfree(sit_i->dirty_sentries_bitmap);
2214
2215         SM_I(sbi)->sit_info = NULL;
2216         kfree(sit_i->sit_bitmap);
2217         kfree(sit_i);
2218 }
2219
2220 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2221 {
2222         struct f2fs_sm_info *sm_info = SM_I(sbi);
2223
2224         if (!sm_info)
2225                 return;
2226         destroy_flush_cmd_control(sbi);
2227         destroy_dirty_segmap(sbi);
2228         destroy_curseg(sbi);
2229         destroy_free_segmap(sbi);
2230         destroy_sit_info(sbi);
2231         sbi->sm_info = NULL;
2232         kfree(sm_info);
2233 }
2234
2235 int __init create_segment_manager_caches(void)
2236 {
2237         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2238                         sizeof(struct discard_entry));
2239         if (!discard_entry_slab)
2240                 goto fail;
2241
2242         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2243                         sizeof(struct sit_entry_set));
2244         if (!sit_entry_set_slab)
2245                 goto destory_discard_entry;
2246
2247         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2248                         sizeof(struct inmem_pages));
2249         if (!inmem_entry_slab)
2250                 goto destroy_sit_entry_set;
2251         return 0;
2252
2253 destroy_sit_entry_set:
2254         kmem_cache_destroy(sit_entry_set_slab);
2255 destory_discard_entry:
2256         kmem_cache_destroy(discard_entry_slab);
2257 fail:
2258         return -ENOMEM;
2259 }
2260
2261 void destroy_segment_manager_caches(void)
2262 {
2263         kmem_cache_destroy(sit_entry_set_slab);
2264         kmem_cache_destroy(discard_entry_slab);
2265         kmem_cache_destroy(inmem_entry_slab);
2266 }