aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/dm-array.c228
-rw-r--r--drivers/md/persistent-data/dm-array.h52
-rw-r--r--drivers/md/persistent-data/dm-btree.c162
-rw-r--r--drivers/md/persistent-data/dm-btree.h35
4 files changed, 446 insertions, 31 deletions
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 431a03067d64..e83047cbb2da 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -277,6 +277,48 @@ static int insert_ablock(struct dm_array_info *info, uint64_t index,
return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
}
+/*----------------------------------------------------------------*/
+
+static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
+ struct dm_block **block, struct array_block **ab)
+{
+ int inc;
+ int r = dm_tm_shadow_block(info->btree_info.tm, b,
+ &array_validator, block, &inc);
+ if (r)
+ return r;
+
+ *ab = dm_block_data(*block);
+ if (inc)
+ inc_ablock_entries(info, *ab);
+
+ return 0;
+}
+
+/*
+ * The shadow op will often be a noop. Only insert if it really
+ * copied data.
+ */
+static int __reinsert_ablock(struct dm_array_info *info, unsigned index,
+ struct dm_block *block, dm_block_t b,
+ dm_block_t *root)
+{
+ int r = 0;
+
+ if (dm_block_location(block) != b) {
+ /*
+ * dm_tm_shadow_block will have already decremented the old
+ * block, but it is still referenced by the btree. We
+ * increment to stop the insert decrementing it below zero
+ * when overwriting the old value.
+ */
+ dm_tm_inc(info->btree_info.tm, b);
+ r = insert_ablock(info, index, block, root);
+ }
+
+ return r;
+}
+
/*
* Looks up an array block in the btree. Then shadows it, and updates the
* btree to point to this new shadow. 'root' is an input/output parameter
@@ -286,49 +328,21 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
unsigned index, struct dm_block **block,
struct array_block **ab)
{
- int r, inc;
+ int r;
uint64_t key = index;
dm_block_t b;
__le64 block_le;
- /*
- * lookup
- */
r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
if (r)
return r;
b = le64_to_cpu(block_le);
- /*
- * shadow
- */
- r = dm_tm_shadow_block(info->btree_info.tm, b,
- &array_validator, block, &inc);
+ r = __shadow_ablock(info, b, block, ab);
if (r)
return r;
- *ab = dm_block_data(*block);
- if (inc)
- inc_ablock_entries(info, *ab);
-
- /*
- * Reinsert.
- *
- * The shadow op will often be a noop. Only insert if it really
- * copied data.
- */
- if (dm_block_location(*block) != b) {
- /*
- * dm_tm_shadow_block will have already decremented the old
- * block, but it is still referenced by the btree. We
- * increment to stop the insert decrementing it below zero
- * when overwriting the old value.
- */
- dm_tm_inc(info->btree_info.tm, b);
- r = insert_ablock(info, index, *block, root);
- }
-
- return r;
+ return __reinsert_ablock(info, index, *block, b, root);
}
/*
@@ -681,6 +695,72 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root,
}
EXPORT_SYMBOL_GPL(dm_array_resize);
+static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
+ value_fn fn, void *context, unsigned base, unsigned new_nr)
+{
+ int r;
+ unsigned i;
+ uint32_t nr_entries;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ BUG_ON(le32_to_cpu(ab->nr_entries));
+ BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = 0; i < new_nr; i++) {
+ r = fn(base + i, element_at(info, ab, i), context);
+ if (r)
+ return r;
+
+ if (vt->inc)
+ vt->inc(vt->context, element_at(info, ab, i));
+ }
+
+ ab->nr_entries = cpu_to_le32(new_nr);
+ return 0;
+}
+
+int dm_array_new(struct dm_array_info *info, dm_block_t *root,
+ uint32_t size, value_fn fn, void *context)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+ unsigned block_index, end_block, size_of_block, max_entries;
+
+ r = dm_array_empty(info, root);
+ if (r)
+ return r;
+
+ size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ max_entries = calc_max_entries(info->value_type.size, size_of_block);
+ end_block = dm_div_up(size, max_entries);
+
+ for (block_index = 0; block_index != end_block; block_index++) {
+ r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
+ if (r)
+ break;
+
+ r = populate_ablock_with_values(info, ab, fn, context,
+ block_index * max_entries,
+ min(max_entries, size));
+ if (r) {
+ unlock_ablock(info, block);
+ break;
+ }
+
+ r = insert_ablock(info, block_index, block, root);
+ unlock_ablock(info, block);
+ if (r)
+ break;
+
+ size -= max_entries;
+ }
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_new);
+
int dm_array_del(struct dm_array_info *info, dm_block_t root)
{
return dm_btree_del(&info->btree_info, root);
@@ -819,3 +899,89 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root,
EXPORT_SYMBOL_GPL(dm_array_walk);
/*----------------------------------------------------------------*/
+
+static int load_ablock(struct dm_array_cursor *c)
+{
+ int r;
+ __le64 value_le;
+ uint64_t key;
+
+ if (c->block)
+ unlock_ablock(c->info, c->block);
+
+ c->block = NULL;
+ c->ab = NULL;
+ c->index = 0;
+
+ r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
+ if (r) {
+ DMERR("dm_btree_cursor_get_value failed");
+ dm_btree_cursor_end(&c->cursor);
+
+ } else {
+ r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
+ if (r) {
+ DMERR("get_ablock failed");
+ dm_btree_cursor_end(&c->cursor);
+ }
+ }
+
+ return r;
+}
+
+int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
+ struct dm_array_cursor *c)
+{
+ int r;
+
+ memset(c, 0, sizeof(*c));
+ c->info = info;
+ r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
+ if (r) {
+ DMERR("couldn't create btree cursor");
+ return r;
+ }
+
+ return load_ablock(c);
+}
+EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
+
+void dm_array_cursor_end(struct dm_array_cursor *c)
+{
+ if (c->block) {
+ unlock_ablock(c->info, c->block);
+ dm_btree_cursor_end(&c->cursor);
+ }
+}
+EXPORT_SYMBOL_GPL(dm_array_cursor_end);
+
+int dm_array_cursor_next(struct dm_array_cursor *c)
+{
+ int r;
+
+ if (!c->block)
+ return -ENODATA;
+
+ c->index++;
+
+ if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
+ r = dm_btree_cursor_next(&c->cursor);
+ if (r)
+ return r;
+
+ r = load_ablock(c);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_array_cursor_next);
+
+void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
+{
+ *value_le = element_at(c->info, c->ab, c->index);
+}
+EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h
index ea177d6fa58f..27ee49a55473 100644
--- a/drivers/md/persistent-data/dm-array.h
+++ b/drivers/md/persistent-data/dm-array.h
@@ -112,6 +112,25 @@ int dm_array_resize(struct dm_array_info *info, dm_block_t root,
__dm_written_to_disk(value);
/*
+ * Creates a new array populated with values provided by a callback
+ * function. This is more efficient than creating an empty array,
+ * resizing, and then setting values since that process incurs a lot of
+ * copying.
+ *
+ * Assumes 32bit values for now since it's only used by the cache hint
+ * array.
+ *
+ * info - describes the array
+ * root - the root block of the array on disk
+ * size - the number of entries in the array
+ * fn - the callback
+ * context - passed to the callback
+ */
+typedef int (*value_fn)(uint32_t index, void *value_le, void *context);
+int dm_array_new(struct dm_array_info *info, dm_block_t *root,
+ uint32_t size, value_fn fn, void *context);
+
+/*
* Frees a whole array. The value_type's decrement operation will be called
* for all values in the array
*/
@@ -163,4 +182,37 @@ int dm_array_walk(struct dm_array_info *info, dm_block_t root,
/*----------------------------------------------------------------*/
+/*
+ * Cursor api.
+ *
+ * This lets you iterate through all the entries in an array efficiently
+ * (it will preload metadata).
+ *
+ * I'm using a cursor, rather than a walk function with a callback because
+ * the cache target needs to iterate both the mapping and hint arrays in
+ * unison.
+ */
+struct dm_array_cursor {
+ struct dm_array_info *info;
+ struct dm_btree_cursor cursor;
+
+ struct dm_block *block;
+ struct array_block *ab;
+ unsigned index;
+};
+
+int dm_array_cursor_begin(struct dm_array_info *info,
+ dm_block_t root, struct dm_array_cursor *c);
+void dm_array_cursor_end(struct dm_array_cursor *c);
+
+uint32_t dm_array_cursor_index(struct dm_array_cursor *c);
+int dm_array_cursor_next(struct dm_array_cursor *c);
+
+/*
+ * value_le is only valid while the cursor points at the current value.
+ */
+void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le);
+
+/*----------------------------------------------------------------*/
+
#endif /* _LINUX_DM_ARRAY_H */
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 2cc1877804c2..20a40329d84a 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
return walk_node(info, root, fn, context);
}
EXPORT_SYMBOL_GPL(dm_btree_walk);
+
+/*----------------------------------------------------------------*/
+
+static void prefetch_values(struct dm_btree_cursor *c)
+{
+ unsigned i, nr;
+ __le64 value_le;
+ struct cursor_node *n = c->nodes + c->depth - 1;
+ struct btree_node *bn = dm_block_data(n->b);
+ struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);
+
+ BUG_ON(c->info->value_type.size != sizeof(value_le));
+
+ nr = le32_to_cpu(bn->header.nr_entries);
+ for (i = 0; i < nr; i++) {
+ memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
+ dm_bm_prefetch(bm, le64_to_cpu(value_le));
+ }
+}
+
+static bool leaf_node(struct dm_btree_cursor *c)
+{
+ struct cursor_node *n = c->nodes + c->depth - 1;
+ struct btree_node *bn = dm_block_data(n->b);
+
+ return le32_to_cpu(bn->header.flags) & LEAF_NODE;
+}
+
+static int push_node(struct dm_btree_cursor *c, dm_block_t b)
+{
+ int r;
+ struct cursor_node *n = c->nodes + c->depth;
+
+ if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
+ DMERR("couldn't push cursor node, stack depth too high");
+ return -EINVAL;
+ }
+
+ r = bn_read_lock(c->info, b, &n->b);
+ if (r)
+ return r;
+
+ n->index = 0;
+ c->depth++;
+
+ if (c->prefetch_leaves || !leaf_node(c))
+ prefetch_values(c);
+
+ return 0;
+}
+
+static void pop_node(struct dm_btree_cursor *c)
+{
+ c->depth--;
+ unlock_block(c->info, c->nodes[c->depth].b);
+}
+
+static int inc_or_backtrack(struct dm_btree_cursor *c)
+{
+ struct cursor_node *n;
+ struct btree_node *bn;
+
+ for (;;) {
+ if (!c->depth)
+ return -ENODATA;
+
+ n = c->nodes + c->depth - 1;
+ bn = dm_block_data(n->b);
+
+ n->index++;
+ if (n->index < le32_to_cpu(bn->header.nr_entries))
+ break;
+
+ pop_node(c);
+ }
+
+ return 0;
+}
+
+static int find_leaf(struct dm_btree_cursor *c)
+{
+ int r = 0;
+ struct cursor_node *n;
+ struct btree_node *bn;
+ __le64 value_le;
+
+ for (;;) {
+ n = c->nodes + c->depth - 1;
+ bn = dm_block_data(n->b);
+
+ if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
+ break;
+
+ memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
+ r = push_node(c, le64_to_cpu(value_le));
+ if (r) {
+ DMERR("push_node failed");
+ break;
+ }
+ }
+
+ if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
+ return -ENODATA;
+
+ return r;
+}
+
+int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
+ bool prefetch_leaves, struct dm_btree_cursor *c)
+{
+ int r;
+
+ c->info = info;
+ c->root = root;
+ c->depth = 0;
+ c->prefetch_leaves = prefetch_leaves;
+
+ r = push_node(c, root);
+ if (r)
+ return r;
+
+ return find_leaf(c);
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);
+
+void dm_btree_cursor_end(struct dm_btree_cursor *c)
+{
+ while (c->depth)
+ pop_node(c);
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_end);
+
+int dm_btree_cursor_next(struct dm_btree_cursor *c)
+{
+ int r = inc_or_backtrack(c);
+ if (!r) {
+ r = find_leaf(c);
+ if (r)
+ DMERR("find_leaf failed");
+ }
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_next);
+
+int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
+{
+ if (c->depth) {
+ struct cursor_node *n = c->nodes + c->depth - 1;
+ struct btree_node *bn = dm_block_data(n->b);
+
+ if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
+ return -EINVAL;
+
+ *key = le64_to_cpu(*key_ptr(bn, n->index));
+ memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
+ return 0;
+
+ } else
+ return -ENODATA;
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index c74301fa5a37..db9bd26adf31 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
int (*fn)(void *context, uint64_t *keys, void *leaf),
void *context);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Cursor API. This does not follow the rolling lock convention. Since we
+ * know the order that values are required we can issue prefetches to speed
+ * up iteration. Use on a single level btree only.
+ */
+#define DM_BTREE_CURSOR_MAX_DEPTH 16
+
+struct cursor_node {
+ struct dm_block *b;
+ unsigned index;
+};
+
+struct dm_btree_cursor {
+ struct dm_btree_info *info;
+ dm_block_t root;
+
+ bool prefetch_leaves;
+ unsigned depth;
+ struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
+};
+
+/*
+ * Creates a fresh cursor. If prefetch_leaves is set then it is assumed
+ * the btree contains block indexes that will be prefetched. The cursor is
+ * quite large, so you probably don't want to put it on the stack.
+ */
+int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
+ bool prefetch_leaves, struct dm_btree_cursor *c);
+void dm_btree_cursor_end(struct dm_btree_cursor *c);
+int dm_btree_cursor_next(struct dm_btree_cursor *c);
+int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
+
#endif /* _LINUX_DM_BTREE_H */