From eb608e3a344b3af21300360fcf868f8b4e808a8e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 May 2012 18:59:11 +0200 Subject: block: Convert BDI proportion calculations to flexible proportions Convert calculations of proportion of writeback each bdi does to new flexible proportion code. That allows us to use aging period of fixed wallclock time which gives better proportion estimates given the hugely varying throughput of different devices. Acked-by: Peter Zijlstra Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu --- include/linux/backing-dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b1038bd686ac..489de625cd25 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include @@ -89,7 +89,7 @@ struct backing_dev_info { unsigned long dirty_ratelimit; unsigned long balanced_dirty_ratelimit; - struct prop_local_percpu completions; + struct fprop_local_percpu completions; int dirty_exceeded; unsigned int min_ratio; -- cgit From 3965c9ae47d64aadf6f13b6fcd37767b83c0689a Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 31 Jul 2012 16:41:52 -0700 Subject: mm: prepare for removal of obsolete /proc/sys/vm/nr_pdflush_threads Since per-BDI flusher threads were introduced in 2.6, the pdflush mechanism is not used any more. But the old interface exported through /proc/sys/vm/nr_pdflush_threads still exists and is obviously useless. For back-compatibility, printk warning information and return 2 to notify the users that the interface is removed. Signed-off-by: Wanpeng Li Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../ABI/obsolete/proc-sys-vm-nr_pdflush_threads | 5 +++++ Documentation/feature-removal-schedule.txt | 8 ++++++++ Documentation/sysctl/vm.txt | 11 ----------- fs/fs-writeback.c | 5 ----- include/linux/backing-dev.h | 3 +++ include/linux/writeback.h | 5 ----- kernel/sysctl.c | 8 +++----- kernel/sysctl_binary.c | 2 +- mm/backing-dev.c | 20 ++++++++++++++++++++ 9 files changed, 40 insertions(+), 27 deletions(-) create mode 100644 Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads (limited to 'include/linux/backing-dev.h') diff --git a/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads new file mode 100644 index 000000000000..b0b0eeb20fe3 --- /dev/null +++ b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads @@ -0,0 +1,5 @@ +What: /proc/sys/vm/nr_pdflush_threads +Date: June 2012 +Contact: Wanpeng Li +Description: Since pdflush is replaced by per-BDI flusher, the interface of old pdflush + exported in /proc/sys/vm/ should be removed. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index e9237fb71950..88f2fa48bb63 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -13,6 +13,14 @@ Who: Jim Cromie , Jason Baron --------------------------- +What: /proc/sys/vm/nr_pdflush_threads +When: 2012 +Why: Since pdflush is deprecated, the interface exported in /proc/sys/vm/ + should be removed. +Who: Wanpeng Li + +--------------------------- + What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle When: 2012 Why: This optional sub-feature of APM is of dubious reliability, diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 84eb25cd69aa..06d662b1c5d5 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -42,7 +42,6 @@ Currently, these files are in /proc/sys/vm: - mmap_min_addr - nr_hugepages - nr_overcommit_hugepages -- nr_pdflush_threads - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -426,16 +425,6 @@ See Documentation/vm/hugetlbpage.txt ============================================================== -nr_pdflush_threads - -The current number of pdflush threads. This value is read-only. -The value changes according to the number of dirty pages in the system. - -When necessary, additional pdflush threads are created, one per second, up to -nr_pdflush_threads_max. - -============================================================== - nr_trim_pages This is available only on NOMMU kernels. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 50d0b78130a1..be3efc4f64f4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,11 +52,6 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; -/* - * We don't actually have pdflush, but this one is exported though /proc... - */ -int nr_pdflush_threads; - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 489de625cd25..c97c6b9cd38e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,6 +17,7 @@ #include #include #include +#include struct page; struct device; @@ -304,6 +305,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync); void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); long wait_iff_congested(struct zone *zone, int sync, long timeout); +int pdflush_proc_obsolete(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 6d0a0fcd80e7..c66fe3332d83 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -189,9 +189,4 @@ void tag_pages_for_writeback(struct address_space *mapping, void account_page_redirty(struct page *page); -/* pdflush.c */ -extern int nr_pdflush_threads; /* Global so it can be exported to sysctl - read-only. */ - - #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 97186b99b0e4..6502d35a25ba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1101,11 +1101,9 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { - .procname = "nr_pdflush_threads", - .data = &nr_pdflush_threads, - .maxlen = sizeof nr_pdflush_threads, - .mode = 0444 /* read-only*/, - .proc_handler = proc_dointvec, + .procname = "nr_pdflush_threads", + .mode = 0444 /* read-only */, + .proc_handler = pdflush_proc_obsolete, }, { .procname = "swappiness", diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a1..65bdcf198d4e 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -147,7 +147,7 @@ static const struct bin_table bin_vm_table[] = { { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ - { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" }, + /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */ { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, /* VM_PAGEBUF unused */ /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3387aea11209..6b4718e2ee34 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -886,3 +886,23 @@ out: return ret; } EXPORT_SYMBOL(wait_iff_congested); + +int pdflush_proc_obsolete(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char kbuf[] = "0\n"; + + if (*ppos) { + *lenp = 0; + return 0; + } + + if (copy_to_user(buffer, kbuf, sizeof(kbuf))) + return -EFAULT; + printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal\n", + table->procname); + + *lenp = 2; + *ppos += *lenp; + return 2; +} -- cgit From f0cd2dbb6cf387c11f87265462e370bb5469299e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2012 18:11:59 +0300 Subject: vfs: kill write_super and sync_supers Finally we can kill the 'sync_supers' kernel thread along with the '->write_super()' superblock operation because all the users are gone. Now every file-system is supposed to self-manage own superblock and its dirty state. The nice thing about killing this thread is that it improves power management. Indeed, 'sync_supers' is a source of monotonic system wake-ups - it woke up every 5 seconds no matter what - even if there were no dirty superblocks and even if there were no file-systems using this service (e.g., btrfs and journalled ext4 do not need it). So it was wasting power most of the time. And because the thread was in the core of the kernel, all systems had to have it. So I am quite happy to make it go away. Interestingly, this thread is a left-over from the pdflush kernel thread which was a self-forking kernel thread responsible for all the write-back in old Linux kernels. It was turned into per-block device BDI threads, and 'sync_supers' was a left-over. Thus, R.I.P, pdflush as well. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/super.c | 40 ---------------------------------- include/linux/backing-dev.h | 1 - include/linux/fs.h | 3 --- mm/backing-dev.c | 52 --------------------------------------------- mm/page-writeback.c | 1 - 5 files changed, 97 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/super.c b/fs/super.c index b05cf47463d0..0902cfa6a12e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -536,46 +536,6 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); -/** - * sync_supers - helper for periodic superblock writeback - * - * Call the write_super method if present on all dirty superblocks in - * the system. This is for the periodic writeback used by most older - * filesystems. For data integrity superblock writeback use - * sync_filesystems() instead. - * - * Note: check the dirty flag before waiting, so we don't - * hold up the sync while mounting a device. (The newly - * mounted device won't need syncing.) - */ -void sync_supers(void) -{ - struct super_block *sb, *p = NULL; - - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; - if (sb->s_op->write_super && sb->s_dirt) { - sb->s_count++; - spin_unlock(&sb_lock); - - down_read(&sb->s_umount); - if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN)) - sb->s_op->write_super(sb); - up_read(&sb->s_umount); - - spin_lock(&sb_lock); - if (p) - __put_super(p); - p = sb; - } - } - if (p) - __put_super(p); - spin_unlock(&sb_lock); -} - /** * iterate_supers - call function for all active superblocks * @f: function to call diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c97c6b9cd38e..2a9a9abc9126 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -124,7 +124,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); -void bdi_arm_supers_timer(void); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); diff --git a/include/linux/fs.h b/include/linux/fs.h index 38dba16c4176..aa110476a95b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1491,7 +1491,6 @@ struct sb_writers { struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned char s_dirt; unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ @@ -1861,7 +1860,6 @@ struct super_operations { int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); - void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_fs) (struct super_block *); int (*unfreeze_fs) (struct super_block *); @@ -2397,7 +2395,6 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); -extern void sync_supers(void); extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6b4718e2ee34..b41823cc05e6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -39,12 +39,6 @@ DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); LIST_HEAD(bdi_pending_list); -static struct task_struct *sync_supers_tsk; -static struct timer_list sync_supers_timer; - -static int bdi_sync_supers(void *); -static void sync_supers_timer_fn(unsigned long); - void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { @@ -250,12 +244,6 @@ static int __init default_bdi_init(void) { int err; - sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); - BUG_ON(IS_ERR(sync_supers_tsk)); - - setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); - bdi_arm_supers_timer(); - err = bdi_init(&default_backing_dev_info); if (!err) bdi_register(&default_backing_dev_info, NULL, "default"); @@ -270,46 +258,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) return wb_has_dirty_io(&bdi->wb); } -/* - * kupdated() used to do this. We cannot do it from the bdi_forker_thread() - * or we risk deadlocking on ->s_umount. The longer term solution would be - * to implement sync_supers_bdi() or similar and simply do it from the - * bdi writeback thread individually. - */ -static int bdi_sync_supers(void *unused) -{ - set_user_nice(current, 0); - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - - /* - * Do this periodically, like kupdated() did before. - */ - sync_supers(); - } - - return 0; -} - -void bdi_arm_supers_timer(void) -{ - unsigned long next; - - if (!dirty_writeback_interval) - return; - - next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; - mod_timer(&sync_supers_timer, round_jiffies_up(next)); -} - -static void sync_supers_timer_fn(unsigned long unused) -{ - wake_up_process(sync_supers_tsk); - bdi_arm_supers_timer(); -} - static void wakeup_timer_fn(unsigned long data) { struct backing_dev_info *bdi = (struct backing_dev_info *)data; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e5363f34e025..5ad5ce23c1e0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1532,7 +1532,6 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); - bdi_arm_supers_timer(); return 0; } -- cgit