From a42dde04152750426cc620fd277e80fffae2f65a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:36 -0700 Subject: [PATCH] mm: bdi: allow setting a maximum for the bdi dirty limit Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in max_ratio_store(). - export bdi_set_max_ratio() to modules - limit bdi_dirty with bdi->max_ratio - document new sysfs attribute Signed-off-by: Peter Zijlstra Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 9 ++++- include/linux/backing-dev.h | 2 ++ include/linux/proportions.h | 13 +++++++ lib/proportions.c | 38 +++++++++++++++++---- mm/backing-dev.c | 21 ++++++++++++ mm/page-writeback.c | 41 +++++++++++++++++++---- 6 files changed, 111 insertions(+), 13 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b9e8a9368dc6..c55e811ca180 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -49,4 +49,11 @@ min_ratio (read-write) Minimal percentage of global dirty threshold allocated to this bdi. If the value written to this file would make the the sum of all min_ratio values exceed 100, then EINVAL is returned. - The default is zero + If min_ratio would become larger than the current max_ratio, + then also EINVAL is returned. The default is zero + +max_ratio (read-write) + + Maximal percentage of global dirty threshold allocated to this + bdi. If max_ratio would become smaller than the current + min_ratio, then EINVAL is returned. The default is 100 diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9a8965518d1d..ad3271d1e90a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -52,6 +52,7 @@ struct backing_dev_info { int dirty_exceeded; unsigned int min_ratio; + unsigned int max_ratio, max_prop_frac; struct device *dev; }; @@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) } int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); /* * Flags in backing_dev_info::capability diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 2c3b3cad92be..5afc1b23346d 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -77,6 +77,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) local_irq_restore(flags); } +/* + * Limit the time part in order to ensure there are some bits left for the + * cycle counter and fraction multiply. + */ +#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) + +#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) +#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) + +void __prop_inc_percpu_max(struct prop_descriptor *pd, + struct prop_local_percpu *pl, long frac); + + /* * ----- SINGLE ------ */ diff --git a/lib/proportions.c b/lib/proportions.c index 9508d9a7af3e..4f387a643d72 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,12 +73,6 @@ #include #include -/* - * Limit the time part in order to ensure there are some bits left for the - * cycle counter. - */ -#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) - int prop_descriptor_init(struct prop_descriptor *pd, int shift) { int err; @@ -267,6 +261,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) prop_put_global(pd, pg); } +/* + * identical to __prop_inc_percpu, except that it limits this pl's fraction to + * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. + */ +void __prop_inc_percpu_max(struct prop_descriptor *pd, + struct prop_local_percpu *pl, long frac) +{ + struct prop_global *pg = prop_get_global(pd); + + prop_norm_percpu(pg, pl); + + if (unlikely(frac != PROP_FRAC_BASE)) { + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + long numerator, denominator; + + numerator = percpu_counter_read_positive(&pl->events); + global_count = percpu_counter_read(&pg->events); + denominator = period_2 + (global_count & counter_mask); + + if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) + goto out_put; + } + + percpu_counter_add(&pl->events, 1); + percpu_counter_add(&pg->events, 1); + +out_put: + prop_put_global(pd, pg); +} + /* * Obtain a fraction of this proportion * diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4967fb176e53..08361b6aad50 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio) +static ssize_t max_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_max_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(max_ratio, bdi->max_ratio) + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { @@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = { __ATTR_RO(dirty_kb), __ATTR_RO(bdi_dirty_kb), __ATTR_RW(min_ratio), + __ATTR_RW(max_ratio), __ATTR_NULL, }; @@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; bdi->min_ratio = 0; + bdi->max_ratio = 100; + bdi->max_prop_frac = PROP_FRAC_BASE; for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4ac077f4269c..2a9942f5387c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -164,7 +164,8 @@ int dirty_ratio_handler(struct ctl_table *table, int write, */ static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) { - __prop_inc_percpu(&vm_completions, &bdi->completions); + __prop_inc_percpu_max(&vm_completions, &bdi->completions, + bdi->max_prop_frac); } static inline void task_dirty_inc(struct task_struct *tsk) @@ -254,16 +255,42 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned long flags; spin_lock_irqsave(&bdi_lock, flags); - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; - } else + if (min_ratio > bdi->max_ratio) { ret = -EINVAL; + } else { + min_ratio -= bdi->min_ratio; + if (bdi_min_ratio + min_ratio < 100) { + bdi_min_ratio += min_ratio; + bdi->min_ratio += min_ratio; + } else { + ret = -EINVAL; + } + } + spin_unlock_irqrestore(&bdi_lock, flags); + + return ret; +} + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +{ + unsigned long flags; + int ret = 0; + + if (max_ratio > 100) + return -EINVAL; + + spin_lock_irqsave(&bdi_lock, flags); + if (bdi->min_ratio > max_ratio) { + ret = -EINVAL; + } else { + bdi->max_ratio = max_ratio; + bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; + } spin_unlock_irqrestore(&bdi_lock, flags); return ret; } +EXPORT_SYMBOL(bdi_set_max_ratio); /* * Work out the current dirty-memory clamping and background writeout @@ -365,6 +392,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, bdi_dirty *= numerator; do_div(bdi_dirty, denominator); bdi_dirty += (dirty * bdi->min_ratio) / 100; + if (bdi_dirty > (dirty * bdi->max_ratio) / 100) + bdi_dirty = dirty * bdi->max_ratio / 100; *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); -- 2.34.1