diff options
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 228 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.h | 52 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 162 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 35 |
4 files changed, 446 insertions, 31 deletions
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 431a03067d64..e83047cbb2da 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -277,6 +277,48 @@ static int insert_ablock(struct dm_array_info *info, uint64_t index, return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); } +/*----------------------------------------------------------------*/ + +static int __shadow_ablock(struct dm_array_info *info, dm_block_t b, + struct dm_block **block, struct array_block **ab) +{ + int inc; + int r = dm_tm_shadow_block(info->btree_info.tm, b, + &array_validator, block, &inc); + if (r) + return r; + + *ab = dm_block_data(*block); + if (inc) + inc_ablock_entries(info, *ab); + + return 0; +} + +/* + * The shadow op will often be a noop. Only insert if it really + * copied data. + */ +static int __reinsert_ablock(struct dm_array_info *info, unsigned index, + struct dm_block *block, dm_block_t b, + dm_block_t *root) +{ + int r = 0; + + if (dm_block_location(block) != b) { + /* + * dm_tm_shadow_block will have already decremented the old + * block, but it is still referenced by the btree. We + * increment to stop the insert decrementing it below zero + * when overwriting the old value. + */ + dm_tm_inc(info->btree_info.tm, b); + r = insert_ablock(info, index, block, root); + } + + return r; +} + /* * Looks up an array block in the btree. Then shadows it, and updates the * btree to point to this new shadow. 'root' is an input/output parameter @@ -286,49 +328,21 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, unsigned index, struct dm_block **block, struct array_block **ab) { - int r, inc; + int r; uint64_t key = index; dm_block_t b; __le64 block_le; - /* - * lookup - */ r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); if (r) return r; b = le64_to_cpu(block_le); - /* - * shadow - */ - r = dm_tm_shadow_block(info->btree_info.tm, b, - &array_validator, block, &inc); + r = __shadow_ablock(info, b, block, ab); if (r) return r; - *ab = dm_block_data(*block); - if (inc) - inc_ablock_entries(info, *ab); - - /* - * Reinsert. - * - * The shadow op will often be a noop. Only insert if it really - * copied data. - */ - if (dm_block_location(*block) != b) { - /* - * dm_tm_shadow_block will have already decremented the old - * block, but it is still referenced by the btree. We - * increment to stop the insert decrementing it below zero - * when overwriting the old value. - */ - dm_tm_inc(info->btree_info.tm, b); - r = insert_ablock(info, index, *block, root); - } - - return r; + return __reinsert_ablock(info, index, *block, b, root); } /* @@ -681,6 +695,72 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root, } EXPORT_SYMBOL_GPL(dm_array_resize); +static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab, + value_fn fn, void *context, unsigned base, unsigned new_nr) +{ + int r; + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(le32_to_cpu(ab->nr_entries)); + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = 0; i < new_nr; i++) { + r = fn(base + i, element_at(info, ab, i), context); + if (r) + return r; + + if (vt->inc) + vt->inc(vt->context, element_at(info, ab, i)); + } + + ab->nr_entries = cpu_to_le32(new_nr); + return 0; +} + +int dm_array_new(struct dm_array_info *info, dm_block_t *root, + uint32_t size, value_fn fn, void *context) +{ + int r; + struct dm_block *block; + struct array_block *ab; + unsigned block_index, end_block, size_of_block, max_entries; + + r = dm_array_empty(info, root); + if (r) + return r; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + end_block = dm_div_up(size, max_entries); + + for (block_index = 0; block_index != end_block; block_index++) { + r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); + if (r) + break; + + r = populate_ablock_with_values(info, ab, fn, context, + block_index * max_entries, + min(max_entries, size)); + if (r) { + unlock_ablock(info, block); + break; + } + + r = insert_ablock(info, block_index, block, root); + unlock_ablock(info, block); + if (r) + break; + + size -= max_entries; + } + + return r; +} +EXPORT_SYMBOL_GPL(dm_array_new); + int dm_array_del(struct dm_array_info *info, dm_block_t root) { return dm_btree_del(&info->btree_info, root); @@ -819,3 +899,89 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root, EXPORT_SYMBOL_GPL(dm_array_walk); /*----------------------------------------------------------------*/ + +static int load_ablock(struct dm_array_cursor *c) +{ + int r; + __le64 value_le; + uint64_t key; + + if (c->block) + unlock_ablock(c->info, c->block); + + c->block = NULL; + c->ab = NULL; + c->index = 0; + + r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le); + if (r) { + DMERR("dm_btree_cursor_get_value failed"); + dm_btree_cursor_end(&c->cursor); + + } else { + r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab); + if (r) { + DMERR("get_ablock failed"); + dm_btree_cursor_end(&c->cursor); + } + } + + return r; +} + +int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root, + struct dm_array_cursor *c) +{ + int r; + + memset(c, 0, sizeof(*c)); + c->info = info; + r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor); + if (r) { + DMERR("couldn't create btree cursor"); + return r; + } + + return load_ablock(c); +} +EXPORT_SYMBOL_GPL(dm_array_cursor_begin); + +void dm_array_cursor_end(struct dm_array_cursor *c) +{ + if (c->block) { + unlock_ablock(c->info, c->block); + dm_btree_cursor_end(&c->cursor); + } +} +EXPORT_SYMBOL_GPL(dm_array_cursor_end); + +int dm_array_cursor_next(struct dm_array_cursor *c) +{ + int r; + + if (!c->block) + return -ENODATA; + + c->index++; + + if (c->index >= le32_to_cpu(c->ab->nr_entries)) { + r = dm_btree_cursor_next(&c->cursor); + if (r) + return r; + + r = load_ablock(c); + if (r) + return r; + } + + return 0; +} +EXPORT_SYMBOL_GPL(dm_array_cursor_next); + +void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) +{ + *value_le = element_at(c->info, c->ab, c->index); +} +EXPORT_SYMBOL_GPL(dm_array_cursor_get_value); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h index ea177d6fa58f..27ee49a55473 100644 --- a/drivers/md/persistent-data/dm-array.h +++ b/drivers/md/persistent-data/dm-array.h @@ -112,6 +112,25 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root, __dm_written_to_disk(value); /* + * Creates a new array populated with values provided by a callback + * function. This is more efficient than creating an empty array, + * resizing, and then setting values since that process incurs a lot of + * copying. + * + * Assumes 32bit values for now since it's only used by the cache hint + * array. + * + * info - describes the array + * root - the root block of the array on disk + * size - the number of entries in the array + * fn - the callback + * context - passed to the callback + */ +typedef int (*value_fn)(uint32_t index, void *value_le, void *context); +int dm_array_new(struct dm_array_info *info, dm_block_t *root, + uint32_t size, value_fn fn, void *context); + +/* * Frees a whole array. The value_type's decrement operation will be called * for all values in the array */ @@ -163,4 +182,37 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root, /*----------------------------------------------------------------*/ +/* + * Cursor api. + * + * This lets you iterate through all the entries in an array efficiently + * (it will preload metadata). + * + * I'm using a cursor, rather than a walk function with a callback because + * the cache target needs to iterate both the mapping and hint arrays in + * unison. + */ +struct dm_array_cursor { + struct dm_array_info *info; + struct dm_btree_cursor cursor; + + struct dm_block *block; + struct array_block *ab; + unsigned index; +}; + +int dm_array_cursor_begin(struct dm_array_info *info, + dm_block_t root, struct dm_array_cursor *c); +void dm_array_cursor_end(struct dm_array_cursor *c); + +uint32_t dm_array_cursor_index(struct dm_array_cursor *c); +int dm_array_cursor_next(struct dm_array_cursor *c); + +/* + * value_le is only valid while the cursor points at the current value. + */ +void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le); + +/*----------------------------------------------------------------*/ + #endif /* _LINUX_DM_ARRAY_H */ diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 2cc1877804c2..20a40329d84a 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, return walk_node(info, root, fn, context); } EXPORT_SYMBOL_GPL(dm_btree_walk); + +/*----------------------------------------------------------------*/ + +static void prefetch_values(struct dm_btree_cursor *c) +{ + unsigned i, nr; + __le64 value_le; + struct cursor_node *n = c->nodes + c->depth - 1; + struct btree_node *bn = dm_block_data(n->b); + struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm); + + BUG_ON(c->info->value_type.size != sizeof(value_le)); + + nr = le32_to_cpu(bn->header.nr_entries); + for (i = 0; i < nr; i++) { + memcpy(&value_le, value_ptr(bn, i), sizeof(value_le)); + dm_bm_prefetch(bm, le64_to_cpu(value_le)); + } +} + +static bool leaf_node(struct dm_btree_cursor *c) +{ + struct cursor_node *n = c->nodes + c->depth - 1; + struct btree_node *bn = dm_block_data(n->b); + + return le32_to_cpu(bn->header.flags) & LEAF_NODE; +} + +static int push_node(struct dm_btree_cursor *c, dm_block_t b) +{ + int r; + struct cursor_node *n = c->nodes + c->depth; + + if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) { + DMERR("couldn't push cursor node, stack depth too high"); + return -EINVAL; + } + + r = bn_read_lock(c->info, b, &n->b); + if (r) + return r; + + n->index = 0; + c->depth++; + + if (c->prefetch_leaves || !leaf_node(c)) + prefetch_values(c); + + return 0; +} + +static void pop_node(struct dm_btree_cursor *c) +{ + c->depth--; + unlock_block(c->info, c->nodes[c->depth].b); +} + +static int inc_or_backtrack(struct dm_btree_cursor *c) +{ + struct cursor_node *n; + struct btree_node *bn; + + for (;;) { + if (!c->depth) + return -ENODATA; + + n = c->nodes + c->depth - 1; + bn = dm_block_data(n->b); + + n->index++; + if (n->index < le32_to_cpu(bn->header.nr_entries)) + break; + + pop_node(c); + } + + return 0; +} + +static int find_leaf(struct dm_btree_cursor *c) +{ + int r = 0; + struct cursor_node *n; + struct btree_node *bn; + __le64 value_le; + + for (;;) { + n = c->nodes + c->depth - 1; + bn = dm_block_data(n->b); + + if (le32_to_cpu(bn->header.flags) & LEAF_NODE) + break; + + memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le)); + r = push_node(c, le64_to_cpu(value_le)); + if (r) { + DMERR("push_node failed"); + break; + } + } + + if (!r && (le32_to_cpu(bn->header.nr_entries) == 0)) + return -ENODATA; + + return r; +} + +int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, + bool prefetch_leaves, struct dm_btree_cursor *c) +{ + int r; + + c->info = info; + c->root = root; + c->depth = 0; + c->prefetch_leaves = prefetch_leaves; + + r = push_node(c, root); + if (r) + return r; + + return find_leaf(c); +} +EXPORT_SYMBOL_GPL(dm_btree_cursor_begin); + +void dm_btree_cursor_end(struct dm_btree_cursor *c) +{ + while (c->depth) + pop_node(c); +} +EXPORT_SYMBOL_GPL(dm_btree_cursor_end); + +int dm_btree_cursor_next(struct dm_btree_cursor *c) +{ + int r = inc_or_backtrack(c); + if (!r) { + r = find_leaf(c); + if (r) + DMERR("find_leaf failed"); + } + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_cursor_next); + +int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) +{ + if (c->depth) { + struct cursor_node *n = c->nodes + c->depth - 1; + struct btree_node *bn = dm_block_data(n->b); + + if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE) + return -EINVAL; + + *key = le64_to_cpu(*key_ptr(bn, n->index)); + memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size); + return 0; + + } else + return -ENODATA; +} +EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value); diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index c74301fa5a37..db9bd26adf31 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, int (*fn)(void *context, uint64_t *keys, void *leaf), void *context); + +/*----------------------------------------------------------------*/ + +/* + * Cursor API. This does not follow the rolling lock convention. Since we + * know the order that values are required we can issue prefetches to speed + * up iteration. Use on a single level btree only. + */ +#define DM_BTREE_CURSOR_MAX_DEPTH 16 + +struct cursor_node { + struct dm_block *b; + unsigned index; +}; + +struct dm_btree_cursor { + struct dm_btree_info *info; + dm_block_t root; + + bool prefetch_leaves; + unsigned depth; + struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; +}; + +/* + * Creates a fresh cursor. If prefetch_leaves is set then it is assumed + * the btree contains block indexes that will be prefetched. The cursor is + * quite large, so you probably don't want to put it on the stack. + */ +int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, + bool prefetch_leaves, struct dm_btree_cursor *c); +void dm_btree_cursor_end(struct dm_btree_cursor *c); +int dm_btree_cursor_next(struct dm_btree_cursor *c); +int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); + #endif /* _LINUX_DM_BTREE_H */ |