From 1b4eaf3d3809a658c85911e92d9ff64086931efa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Jan 2014 15:04:18 -0800 Subject: bcache: Fix flash_dev_cache_miss() for real this time The code was using sectors to count the number of sectors it was zeroing... but then it passed it to bio_advance()... after it had been set to 0. Amusing... Signed-off-by: Kent Overstreet --- drivers/md/bcache/request.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 5d5d031cf381..fc14ba3f6d05 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1203,22 +1203,13 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { - struct bio_vec bv; - struct bvec_iter iter; - - /* Zero fill bio */ - - bio_for_each_segment(bv, bio, iter) { - unsigned j = min(bv.bv_len >> 9, sectors); - - void *p = kmap(bv.bv_page); - memset(p + bv.bv_offset, 0, j << 9); - kunmap(bv.bv_page); + unsigned bytes = min(sectors, bio_sectors(bio)) << 9; - sectors -= j; - } + swap(bio->bi_iter.bi_size, bytes); + zero_fill_bio(bio); + swap(bio->bi_iter.bi_size, bytes); - bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size)); + bio_advance(bio, bytes); if (!bio->bi_iter.bi_size) return MAP_DONE; -- cgit From da415a096fc06e49d1a15f7a06bcfe6ad44c5d38 Mon Sep 17 00:00:00 2001 From: Nicholas Swenson Date: Thu, 9 Jan 2014 16:03:04 -0800 Subject: bcache: Fix moving_gc deadlocking with a foreground write Deadlock happened because a foreground write slept, waiting for a bucket to be allocated. Normally the gc would mark buckets available for invalidation. But the moving_gc was stuck waiting for outstanding writes to complete. These writes used the bcache_wq, the same queue foreground writes used. This fix gives moving_gc its own work queue, so it was still finish moving even if foreground writes are stuck waiting for allocation. It also makes work queue a parameter to the data_insert path, so moving_gc can use its workqueue for writes. Signed-off-by: Nicholas Swenson Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 ++ drivers/md/bcache/movinggc.c | 5 +++-- drivers/md/bcache/request.c | 13 +++++++------ drivers/md/bcache/request.h | 1 + drivers/md/bcache/super.c | 3 +++ 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index a4c7306ff43d..6d814f463d9e 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -628,6 +628,8 @@ struct cache_set { /* Number of moving GC bios in flight */ struct semaphore moving_in_flight; + struct workqueue_struct *moving_gc_wq; + struct btree *root; #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 9eb60d102de8..8c7205186d08 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -115,7 +115,7 @@ static void write_moving(struct closure *cl) closure_call(&op->cl, bch_data_insert, NULL, cl); } - continue_at(cl, write_moving_finish, system_wq); + continue_at(cl, write_moving_finish, op->wq); } static void read_moving_submit(struct closure *cl) @@ -125,7 +125,7 @@ static void read_moving_submit(struct closure *cl) bch_submit_bbio(bio, io->op.c, &io->w->key, 0); - continue_at(cl, write_moving, system_wq); + continue_at(cl, write_moving, io->op.wq); } static void read_moving(struct cache_set *c) @@ -160,6 +160,7 @@ static void read_moving(struct cache_set *c) io->w = w; io->op.inode = KEY_INODE(&w->key); io->op.c = c; + io->op.wq = c->moving_gc_wq; moving_init(io); bio = &io->bio.bio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fc14ba3f6d05..3e880869871f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -248,7 +248,7 @@ static void bch_data_insert_keys(struct closure *cl) atomic_dec_bug(journal_ref); if (!op->insert_data_done) - continue_at(cl, bch_data_insert_start, bcache_wq); + continue_at(cl, bch_data_insert_start, op->wq); bch_keylist_free(&op->insert_keys); closure_return(cl); @@ -297,7 +297,7 @@ static void bch_data_invalidate(struct closure *cl) op->insert_data_done = true; bio_put(bio); out: - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); } static void bch_data_insert_error(struct closure *cl) @@ -340,7 +340,7 @@ static void bch_data_insert_endio(struct bio *bio, int error) if (op->writeback) op->error = error; else if (!op->replace) - set_closure_fn(cl, bch_data_insert_error, bcache_wq); + set_closure_fn(cl, bch_data_insert_error, op->wq); else set_closure_fn(cl, NULL, NULL); } @@ -376,7 +376,7 @@ static void bch_data_insert_start(struct closure *cl) if (bch_keylist_realloc(&op->insert_keys, 3 + (op->csum ? 1 : 0), op->c)) - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); k = op->insert_keys.top; bkey_init(k); @@ -413,7 +413,7 @@ static void bch_data_insert_start(struct closure *cl) } while (n != bio); op->insert_data_done = true; - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); err: /* bch_alloc_sectors() blocks if s->writeback = true */ BUG_ON(op->writeback); @@ -442,7 +442,7 @@ err: bio_put(bio); if (!bch_keylist_empty(&op->insert_keys)) - continue_at(cl, bch_data_insert_keys, bcache_wq); + continue_at(cl, bch_data_insert_keys, op->wq); else closure_return(cl); } @@ -824,6 +824,7 @@ static inline struct search *search_alloc(struct bio *bio, s->iop.error = 0; s->iop.flags = 0; s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; + s->iop.wq = bcache_wq; return s; } diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 39f21dbedc38..c117c4082aa2 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -7,6 +7,7 @@ struct data_insert_op { struct closure cl; struct cache_set *c; struct bio *bio; + struct workqueue_struct *wq; unsigned inode; uint16_t write_point; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fb343276beef..ddfde380b49f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1356,6 +1356,8 @@ static void cache_set_free(struct closure *cl) bch_bset_sort_state_free(&c->sort); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); + if (c->moving_gc_wq) + destroy_workqueue(c->moving_gc_wq); if (c->bio_split) bioset_free(c->bio_split); if (c->fill_iter) @@ -1522,6 +1524,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || + !(c->moving_gc_wq = create_workqueue("bcache_gc")) || bch_journal_alloc(c) || bch_btree_cache_alloc(c) || bch_open_buckets_alloc(c) || -- cgit From 3f5e0a34daed197aa55d0c6b466bb4cd03babb4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Jan 2014 04:42:58 -0800 Subject: bcache: Kill dead cgroup code This hasn't been used or even enabled in ages. Signed-off-by: Kent Overstreet --- drivers/md/bcache/Kconfig | 8 --- drivers/md/bcache/btree.c | 4 -- drivers/md/bcache/request.c | 169 -------------------------------------------- drivers/md/bcache/request.h | 18 ----- drivers/md/bcache/stats.c | 3 - 5 files changed, 202 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 2638417b19aa..4d200883c505 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG Keeps all active closures in a linked list and provides a debugfs interface to list them, which makes it possible to see asynchronous operations that get stuck. - -# cgroup code needs to be updated: -# -#config CGROUP_BCACHE -# bool "Cgroup controls for bcache" -# depends on BCACHE && BLK_CGROUP -# ---help--- -# TODO diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5f587ce57e3a..ea5a59e2d740 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -68,15 +68,11 @@ * alloc_bucket() cannot fail. This should be true but is not completely * obvious. * - * Make sure all allocations get charged to the root cgroup - * * Plugging? * * If data write is less than hard sector size of ssd, round up offset in open * bucket to the next whole sector * - * Also lookup by cgroup in get_open_bucket() - * * Superblock needs to be fleshed out for multiple cache devices * * Add a sysfs tunable for the number of writeback IOs in flight diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3e880869871f..15fff4f68a7c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -12,11 +12,9 @@ #include "request.h" #include "writeback.h" -#include #include #include #include -#include "blk-cgroup.h" #include @@ -27,171 +25,13 @@ struct kmem_cache *bch_search_cache; static void bch_data_insert_start(struct closure *); -/* Cgroup interface */ - -#ifdef CONFIG_CGROUP_BCACHE -static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; - -static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) -{ - struct cgroup_subsys_state *css; - return cgroup && - (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) - ? container_of(css, struct bch_cgroup, css) - : &bcache_default_cgroup; -} - -struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) -{ - struct cgroup_subsys_state *css = bio->bi_css - ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) - : task_subsys_state(current, bcache_subsys_id); - - return css - ? container_of(css, struct bch_cgroup, css) - : &bcache_default_cgroup; -} - -static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, loff_t *ppos) -{ - char tmp[1024]; - int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, - cgroup_to_bcache(cgrp)->cache_mode + 1); - - if (len < 0) - return len; - - return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); -} - -static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, - const char *buf) -{ - int v = bch_read_string_list(buf, bch_cache_modes); - if (v < 0) - return v; - - cgroup_to_bcache(cgrp)->cache_mode = v - 1; - return 0; -} - -static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) -{ - return cgroup_to_bcache(cgrp)->verify; -} - -static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) -{ - cgroup_to_bcache(cgrp)->verify = val; - return 0; -} - -static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_hits); -} - -static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_misses); -} - -static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, - struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_bypass_hits); -} - -static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, - struct cftype *cft) -{ - struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); - return atomic_read(&bcachecg->stats.cache_bypass_misses); -} - -static struct cftype bch_files[] = { - { - .name = "cache_mode", - .read = cache_mode_read, - .write_string = cache_mode_write, - }, - { - .name = "verify", - .read_u64 = bch_verify_read, - .write_u64 = bch_verify_write, - }, - { - .name = "cache_hits", - .read_u64 = bch_cache_hits_read, - }, - { - .name = "cache_misses", - .read_u64 = bch_cache_misses_read, - }, - { - .name = "cache_bypass_hits", - .read_u64 = bch_cache_bypass_hits_read, - }, - { - .name = "cache_bypass_misses", - .read_u64 = bch_cache_bypass_misses_read, - }, - { } /* terminate */ -}; - -static void init_bch_cgroup(struct bch_cgroup *cg) -{ - cg->cache_mode = -1; -} - -static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) -{ - struct bch_cgroup *cg; - - cg = kzalloc(sizeof(*cg), GFP_KERNEL); - if (!cg) - return ERR_PTR(-ENOMEM); - init_bch_cgroup(cg); - return &cg->css; -} - -static void bcachecg_destroy(struct cgroup *cgroup) -{ - struct bch_cgroup *cg = cgroup_to_bcache(cgroup); - kfree(cg); -} - -struct cgroup_subsys bcache_subsys = { - .create = bcachecg_create, - .destroy = bcachecg_destroy, - .subsys_id = bcache_subsys_id, - .name = "bcache", - .module = THIS_MODULE, -}; -EXPORT_SYMBOL_GPL(bcache_subsys); -#endif - static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) { -#ifdef CONFIG_CGROUP_BCACHE - int r = bch_bio_to_cgroup(bio)->cache_mode; - if (r >= 0) - return r; -#endif return BDEV_CACHE_MODE(&dc->sb); } static bool verify(struct cached_dev *dc, struct bio *bio) { -#ifdef CONFIG_CGROUP_BCACHE - if (bch_bio_to_cgroup(bio)->verify) - return true; -#endif return dc->verify; } @@ -1305,9 +1145,6 @@ void bch_flash_dev_request_init(struct bcache_device *d) void bch_request_exit(void) { -#ifdef CONFIG_CGROUP_BCACHE - cgroup_unload_subsys(&bcache_subsys); -#endif if (bch_search_cache) kmem_cache_destroy(bch_search_cache); } @@ -1318,11 +1155,5 @@ int __init bch_request_init(void) if (!bch_search_cache) return -ENOMEM; -#ifdef CONFIG_CGROUP_BCACHE - cgroup_load_subsys(&bcache_subsys); - init_bch_cgroup(&bcache_default_cgroup); - - cgroup_add_cftypes(&bcache_subsys, bch_files); -#endif return 0; } diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index c117c4082aa2..1ff36875c2b3 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -1,8 +1,6 @@ #ifndef _BCACHE_REQUEST_H_ #define _BCACHE_REQUEST_H_ -#include - struct data_insert_op { struct closure cl; struct cache_set *c; @@ -42,20 +40,4 @@ void bch_flash_dev_request_init(struct bcache_device *d); extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; -struct bch_cgroup { -#ifdef CONFIG_CGROUP_BCACHE - struct cgroup_subsys_state css; -#endif - /* - * We subtract one from the index into bch_cache_modes[], so that - * default == -1; this makes it so the rest match up with d->cache_mode, - * and we use d->cache_mode if cgrp->cache_mode < 0 - */ - short cache_mode; - bool verify; - struct cache_stat_collector stats; -}; - -struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio); - #endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 84d0782f702e..0ca072c20d0d 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -201,9 +201,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, struct cached_dev *dc = container_of(d, struct cached_dev, disk); mark_cache_stats(&dc->accounting.collector, hit, bypass); mark_cache_stats(&c->accounting.collector, hit, bypass); -#ifdef CONFIG_CGROUP_BCACHE - mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass); -#endif } void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) -- cgit