diff options
Diffstat (limited to 'mm/backing-dev.c')
| -rw-r--r-- | mm/backing-dev.c | 80 |
1 files changed, 78 insertions, 2 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 576220acd686..4a9d4e27d0d9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -271,6 +271,14 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) spin_unlock_bh(&wb->work_lock); } +static void wb_update_bandwidth_workfn(struct work_struct *work) +{ + struct bdi_writeback *wb = container_of(to_delayed_work(work), + struct bdi_writeback, bw_dwork); + + wb_update_bandwidth(wb); +} + /* * Initial write bandwidth: 100 MB/s */ @@ -293,6 +301,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->b_dirty_time); spin_lock_init(&wb->list_lock); + atomic_set(&wb->writeback_inodes, 0); wb->bw_time_stamp = jiffies; wb->balanced_dirty_ratelimit = INIT_BW; wb->dirty_ratelimit = INIT_BW; @@ -302,6 +311,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, spin_lock_init(&wb->work_lock); INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); wb->dirty_sleep = jiffies; err = fprop_local_init_percpu(&wb->completions, gfp); @@ -350,6 +360,7 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); + flush_delayed_work(&wb->bw_dwork); } static void wb_exit(struct bdi_writeback *wb) @@ -371,12 +382,16 @@ static void wb_exit(struct bdi_writeback *wb) #include <linux/memcontrol.h> /* - * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list. - * bdi->cgwb_tree is also RCU protected. + * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, offline_cgwbs and + * memcg->cgwb_list. bdi->cgwb_tree is also RCU protected. */ static DEFINE_SPINLOCK(cgwb_lock); static struct workqueue_struct *cgwb_release_wq; +static LIST_HEAD(offline_cgwbs); +static void cleanup_offline_cgwbs_workfn(struct work_struct *work); +static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn); + static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, @@ -394,8 +409,14 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); + + spin_lock_irq(&cgwb_lock); + list_del(&wb->offline_node); + spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); + WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); } @@ -413,6 +434,7 @@ static void cgwb_kill(struct bdi_writeback *wb) WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id)); list_del(&wb->memcg_node); list_del(&wb->blkcg_node); + list_add(&wb->offline_node, &offline_cgwbs); percpu_ref_kill(&wb->refcnt); } @@ -472,6 +494,7 @@ static int cgwb_create(struct backing_dev_info *bdi, wb->memcg_css = memcg_css; wb->blkcg_css = blkcg_css; + INIT_LIST_HEAD(&wb->b_attached); INIT_WORK(&wb->release_work, cgwb_release_workfn); set_bit(WB_registered, &wb->state); @@ -633,6 +656,54 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) mutex_unlock(&bdi->cgwb_release_mutex); } +/* + * cleanup_offline_cgwbs_workfn - try to release dying cgwbs + * + * Try to release dying cgwbs by switching attached inodes to the nearest + * living ancestor's writeback. Processed wbs are placed at the end + * of the list to guarantee the forward progress. + */ +static void cleanup_offline_cgwbs_workfn(struct work_struct *work) +{ + struct bdi_writeback *wb; + LIST_HEAD(processed); + + spin_lock_irq(&cgwb_lock); + + while (!list_empty(&offline_cgwbs)) { + wb = list_first_entry(&offline_cgwbs, struct bdi_writeback, + offline_node); + list_move(&wb->offline_node, &processed); + + /* + * If wb is dirty, cleaning up the writeback by switching + * attached inodes will result in an effective removal of any + * bandwidth restrictions, which isn't the goal. Instead, + * it can be postponed until the next time, when all io + * will be likely completed. If in the meantime some inodes + * will get re-dirtied, they should be eventually switched to + * a new cgwb. + */ + if (wb_has_dirty_io(wb)) + continue; + + if (!wb_tryget(wb)) + continue; + + spin_unlock_irq(&cgwb_lock); + while (cleanup_offline_cgwb(wb)) + cond_resched(); + spin_lock_irq(&cgwb_lock); + + wb_put(wb); + } + + if (!list_empty(&processed)) + list_splice_tail(&processed, &offline_cgwbs); + + spin_unlock_irq(&cgwb_lock); +} + /** * wb_memcg_offline - kill all wb's associated with a memcg being offlined * @memcg: memcg being offlined @@ -649,6 +720,8 @@ void wb_memcg_offline(struct mem_cgroup *memcg) cgwb_kill(wb); memcg_cgwb_list->next = NULL; /* prevent new wb's */ spin_unlock_irq(&cgwb_lock); + + queue_work(system_unbound_wq, &cleanup_offline_cgwbs_work); } /** @@ -745,6 +818,7 @@ struct backing_dev_info *bdi_alloc(int node_id) bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT; bdi->ra_pages = VM_READAHEAD_PAGES; bdi->io_pages = VM_READAHEAD_PAGES; + timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0); return bdi; } EXPORT_SYMBOL(bdi_alloc); @@ -866,6 +940,8 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { + del_timer_sync(&bdi->laptop_mode_wb_timer); + /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); wb_shutdown(&bdi->wb); |