#include <linux/syscalls.h>
#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
#include <linux/pagevec.h>
+#include <linux/timer.h>
#include <trace/events/writeback.h>
/*
* measured in page writeback completions.
*
*/
-static struct prop_descriptor vm_completions;
+static struct fprop_global writeout_completions;
+
+static void writeout_period(unsigned long t);
+/* Timer for aging of writeout_completions */
+static struct timer_list writeout_period_timer =
+ TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
+static unsigned long writeout_period_time = 0;
+
+/*
+ * Length of period for aging writeout fractions of bdis. This is an
+ * arbitrarily chosen number. The longer the period, the slower fractions will
+ * reflect changes in current writeout rate.
+ */
+#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
/*
* Work out the current dirty-memory clamping and background writeout
zone_page_state(zone, NR_WRITEBACK) <= limit;
}
-/*
- * couple the period to the dirty_ratio:
- *
- * period/2 ~ roundup_pow_of_two(dirty limit)
- */
-static int calc_period_shift(void)
-{
- unsigned long dirty_total;
-
- if (vm_dirty_bytes)
- dirty_total = vm_dirty_bytes / PAGE_SIZE;
- else
- dirty_total = (vm_dirty_ratio * global_dirtyable_memory()) /
- 100;
- return 2 + ilog2(dirty_total - 1);
-}
-
-/*
- * update the period when the dirty threshold changes.
- */
-static void update_completion_period(void)
-{
- int shift = calc_period_shift();
- prop_change_shift(&vm_completions, shift);
-
- writeback_set_ratelimit();
-}
-
int dirty_background_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
- update_completion_period();
+ writeback_set_ratelimit();
vm_dirty_bytes = 0;
}
return ret;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
- update_completion_period();
+ writeback_set_ratelimit();
vm_dirty_ratio = 0;
}
return ret;
}
+static unsigned long wp_next_time(unsigned long cur_time)
+{
+ cur_time += VM_COMPLETIONS_PERIOD_LEN;
+ /* 0 has a special meaning... */
+ if (!cur_time)
+ return 1;
+ return cur_time;
+}
+
/*
* Increment the BDI's writeout completion count and the global writeout
* completion count. Called from test_clear_page_writeback().
static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
{
__inc_bdi_stat(bdi, BDI_WRITTEN);
- __prop_inc_percpu_max(&vm_completions, &bdi->completions,
- bdi->max_prop_frac);
+ __fprop_inc_percpu_max(&writeout_completions, &bdi->completions,
+ bdi->max_prop_frac);
+ /* First event after period switching was turned off? */
+ if (!unlikely(writeout_period_time)) {
+ /*
+ * We can race with other __bdi_writeout_inc calls here but
+ * it does not cause any harm since the resulting time when
+ * timer will fire and what is in writeout_period_time will be
+ * roughly the same.
+ */
+ writeout_period_time = wp_next_time(jiffies);
+ mod_timer(&writeout_period_timer, writeout_period_time);
+ }
}
void bdi_writeout_inc(struct backing_dev_info *bdi)
static void bdi_writeout_fraction(struct backing_dev_info *bdi,
long *numerator, long *denominator)
{
- prop_fraction_percpu(&vm_completions, &bdi->completions,
+ fprop_fraction_percpu(&writeout_completions, &bdi->completions,
numerator, denominator);
}
+/*
+ * On idle system, we can be called long after we scheduled because we use
+ * deferred timers so count with missed periods.
+ */
+static void writeout_period(unsigned long t)
+{
+ int miss_periods = (jiffies - writeout_period_time) /
+ VM_COMPLETIONS_PERIOD_LEN;
+
+ if (fprop_new_period(&writeout_completions, miss_periods + 1)) {
+ writeout_period_time = wp_next_time(writeout_period_time +
+ miss_periods * VM_COMPLETIONS_PERIOD_LEN);
+ mod_timer(&writeout_period_timer, writeout_period_time);
+ } else {
+ /*
+ * Aging has zeroed all fractions. Stop wasting CPU on period
+ * updates.
+ */
+ writeout_period_time = 0;
+ }
+}
+
/*
* bdi_min_ratio keeps the sum of the minimum dirty shares of all
* registered backing devices, which, for obvious reasons, can not
ret = -EINVAL;
} else {
bdi->max_ratio = max_ratio;
- bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
+ bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
}
spin_unlock_bh(&bdi_lock);
*/
void __init page_writeback_init(void)
{
- int shift;
-
writeback_set_ratelimit();
register_cpu_notifier(&ratelimit_nb);
- shift = calc_period_shift();
- prop_descriptor_init(&vm_completions, shift);
+ fprop_global_init(&writeout_completions);
}
/**