X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=fs%2Ffs-writeback.c;h=091a36444972fa0b746118b0cc58ca2a3dcaa97f;hb=7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8;hp=ae0f438c2ee68f59e4f8f4ebc17aeaae9c250ff9;hpb=65c61bc5dbbcfa1ff38e58aa834cb9a88e84a886;p=firefly-linux-kernel-4.4.55.git diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae0f438c2ee6..091a36444972 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,8 +53,6 @@ struct wb_writeback_work { unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned int auto_free:1; /* free on completion */ - unsigned int single_wait:1; - unsigned int single_done:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -178,14 +176,11 @@ static void wb_wakeup(struct bdi_writeback *wb) static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { - trace_writeback_queue(wb->bdi, work); + trace_writeback_queue(wb, work); spin_lock_bh(&wb->work_lock); - if (!test_bit(WB_registered, &wb->state)) { - if (work->single_wait) - work->single_done = 1; + if (!test_bit(WB_registered, &wb->state)) goto out_unlock; - } if (work->done) atomic_inc(&work->done->cnt); list_add_tail(&work->list, &wb->work_list); @@ -706,7 +701,7 @@ EXPORT_SYMBOL_GPL(wbc_account_io); /** * inode_congested - test whether an inode is congested - * @inode: inode to test for congestion + * @inode: inode to test for congestion (may be NULL) * @cong_bits: mask of WB_[a]sync_congested bits to test * * Tests whether @inode is congested. @cong_bits is the mask of congestion @@ -716,6 +711,9 @@ EXPORT_SYMBOL_GPL(wbc_account_io); * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg * associated with @inode is congested; otherwise, the root wb's congestion * state is used. + * + * @inode is allowed to be NULL as this function is often called on + * mapping->host which is NULL for the swapper space. */ int inode_congested(struct inode *inode, int cong_bits) { @@ -737,32 +735,6 @@ int inode_congested(struct inode *inode, int cong_bits) } EXPORT_SYMBOL_GPL(inode_congested); -/** - * wb_wait_for_single_work - wait for completion of a single bdi_writeback_work - * @bdi: bdi the work item was issued to - * @work: work item to wait for - * - * Wait for the completion of @work which was issued to one of @bdi's - * bdi_writeback's. The caller must have set @work->single_wait before - * issuing it. This wait operates independently fo - * wb_wait_for_completion() and also disables automatic freeing of @work. - */ -static void wb_wait_for_single_work(struct backing_dev_info *bdi, - struct wb_writeback_work *work) -{ - if (WARN_ON_ONCE(!work->single_wait)) - return; - - wait_event(bdi->wb_waitq, work->single_done); - - /* - * Paired with smp_wmb() in wb_do_writeback() and ensures that all - * modifications to @work prior to assertion of ->single_done is - * visible to the caller once this function returns. - */ - smp_rmb(); -} - /** * wb_split_bdi_pages - split nr_pages to write according to bandwidth * @wb: target bdi_writeback to split @nr_pages to @@ -791,38 +763,6 @@ static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages) return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw); } -/** - * wb_clone_and_queue_work - clone a wb_writeback_work and issue it to a wb - * @wb: target bdi_writeback - * @base_work: source wb_writeback_work - * - * Try to make a clone of @base_work and issue it to @wb. If cloning - * succeeds, %true is returned; otherwise, @base_work is issued directly - * and %false is returned. In the latter case, the caller is required to - * wait for @base_work's completion using wb_wait_for_single_work(). - * - * A clone is auto-freed on completion. @base_work never is. - */ -static bool wb_clone_and_queue_work(struct bdi_writeback *wb, - struct wb_writeback_work *base_work) -{ - struct wb_writeback_work *work; - - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - *work = *base_work; - work->auto_free = 1; - work->single_wait = 0; - } else { - work = base_work; - work->auto_free = 0; - work->single_wait = 1; - } - work->single_done = 0; - wb_queue_work(wb, work); - return work != base_work; -} - /** * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi * @bdi: target backing_dev_info @@ -838,15 +778,19 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, struct wb_writeback_work *base_work, bool skip_if_busy) { - long nr_pages = base_work->nr_pages; - int next_blkcg_id = 0; + int next_memcg_id = 0; struct bdi_writeback *wb; struct wb_iter iter; might_sleep(); restart: rcu_read_lock(); - bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) { + bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) { + DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done); + struct wb_writeback_work fallback_work; + struct wb_writeback_work *work; + long nr_pages; + /* SYNC_ALL writes out I_DIRTY_TIME too */ if (!wb_has_dirty_io(wb) && (base_work->sync_mode == WB_SYNC_NONE || @@ -855,13 +799,30 @@ restart: if (skip_if_busy && writeback_in_progress(wb)) continue; - base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages); - if (!wb_clone_and_queue_work(wb, base_work)) { - next_blkcg_id = wb->blkcg_css->id + 1; - rcu_read_unlock(); - wb_wait_for_single_work(bdi, base_work); - goto restart; + nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + *work = *base_work; + work->nr_pages = nr_pages; + work->auto_free = 1; + wb_queue_work(wb, work); + continue; } + + /* alloc failed, execute synchronously using on-stack fallback */ + work = &fallback_work; + *work = *base_work; + work->nr_pages = nr_pages; + work->auto_free = 0; + work->done = &fallback_work_done; + + wb_queue_work(wb, work); + + next_memcg_id = wb->memcg_css->id + 1; + rcu_read_unlock(); + wb_wait_for_completion(bdi, &fallback_work_done); + goto restart; } rcu_read_unlock(); } @@ -902,8 +863,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { base_work->auto_free = 0; - base_work->single_wait = 0; - base_work->single_done = 0; wb_queue_work(&bdi->wb, base_work); } } @@ -924,7 +883,7 @@ void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { - trace_writeback_nowork(wb->bdi); + trace_writeback_nowork(wb); wb_wakeup(wb); return; } @@ -954,7 +913,7 @@ void wb_start_background_writeback(struct bdi_writeback *wb) * We just wake up the flusher thread. It will perform background * writeback as soon as there is no other work to do. */ - trace_writeback_wake_background(wb->bdi); + trace_writeback_wake_background(wb); wb_wakeup(wb); } @@ -1421,6 +1380,10 @@ static long writeback_chunk_size(struct bdi_writeback *wb, * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. + * + * NOTE! This is called with wb->list_lock held, and will + * unlock and relock that for each inode it ends up doing + * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, @@ -1439,9 +1402,7 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long wrote = 0; /* count both pages and inodes */ - struct blk_plug plug; - blk_start_plug(&plug); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); @@ -1520,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; + + if (need_resched()) { + /* + * We're trying to balance between building up a nice + * long list of IOs to improve our merge rate, and + * getting those IOs out quickly for anyone throttling + * in balance_dirty_pages(). cond_resched() doesn't + * unplug, so get our IOs out the door before we + * give up the CPU. + */ + blk_flush_plug(current); + cond_resched(); + } + + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) @@ -1527,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb, requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); - cond_resched_lock(&wb->list_lock); + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. @@ -1539,7 +1515,6 @@ static long writeback_sb_inodes(struct super_block *sb, break; } } - blk_finish_plug(&plug); return wrote; } @@ -1586,12 +1561,15 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .range_cyclic = 1, .reason = reason, }; + struct blk_plug plug; + blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work.nr_pages; } @@ -1619,10 +1597,12 @@ static long wb_writeback(struct bdi_writeback *wb, unsigned long oldest_jif; struct inode *inode; long progress; + struct blk_plug plug; oldest_jif = jiffies; work->older_than_this = &oldest_jif; + blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { /* @@ -1660,14 +1640,14 @@ static long wb_writeback(struct bdi_writeback *wb, } else if (work->for_background) oldest_jif = jiffies; - trace_writeback_start(wb->bdi, work); + trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) queue_io(wb, work); if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else progress = __writeback_inodes_wb(wb, work); - trace_writeback_written(wb->bdi, work); + trace_writeback_written(wb, work); wb_update_bandwidth(wb, wb_start); @@ -1692,7 +1672,7 @@ static long wb_writeback(struct bdi_writeback *wb, * we'll just busyloop. */ if (!list_empty(&wb->b_more_io)) { - trace_writeback_wait(wb->bdi, work); + trace_writeback_wait(wb, work); inode = wb_inode(wb->b_more_io.prev); spin_lock(&inode->i_lock); spin_unlock(&wb->list_lock); @@ -1702,6 +1682,7 @@ static long wb_writeback(struct bdi_writeback *wb, } } spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work->nr_pages; } @@ -1797,26 +1778,14 @@ static long wb_do_writeback(struct bdi_writeback *wb) set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { struct wb_completion *done = work->done; - bool need_wake_up = false; - trace_writeback_exec(wb->bdi, work); + trace_writeback_exec(wb, work); wrote += wb_writeback(wb, work); - if (work->single_wait) { - WARN_ON_ONCE(work->auto_free); - /* paired w/ rmb in wb_wait_for_single_work() */ - smp_wmb(); - work->single_done = 1; - need_wake_up = true; - } else if (work->auto_free) { + if (work->auto_free) kfree(work); - } - if (done && atomic_dec_and_test(&done->cnt)) - need_wake_up = true; - - if (need_wake_up) wake_up_all(&wb->bdi->wb_waitq); }