summaryrefslogtreecommitdiffstats
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 11:44:27 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 11:44:27 -0800
commit37cae6ad4c484030fa972241533c32730ec79b7d (patch)
treea01a13982af7b326af37c729a5ad83adbe99322d /drivers/md/persistent-data
parent986248993d495aebffcdf0758ce28ab85aa4e9ff (diff)
parent8735a8134786fa4ef36dee65d7fa779b99ba5fe3 (diff)
downloadlinux-37cae6ad4c484030fa972241533c32730ec79b7d.tar.gz
linux-37cae6ad4c484030fa972241533c32730ec79b7d.zip
Merge tag 'dm-3.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper update from Alasdair G Kergon: "The main addition here is a long-desired target framework to allow an SSD to be used as a cache in front of a slower device. Cache tuning is delegated to interchangeable policy modules so these can be developed independently of the mechanics needed to shuffle the data around. Other than that, kcopyd users acquire a throttling parameter, ioctl buffer usage gets streamlined, more mempool reliance is reduced and there are a few other bug fixes and tidy-ups." * tag 'dm-3.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (30 commits) dm cache: add cleaner policy dm cache: add mq policy dm: add cache target dm persistent data: add bitset dm persistent data: add transactional array dm thin: remove cells from stack dm bio prison: pass cell memory in dm persistent data: add btree_walk dm: add target num_write_bios fn dm kcopyd: introduce configurable throttling dm ioctl: allow message to return data dm ioctl: optimize functions without variable params dm ioctl: introduce ioctl_flags dm: merge io_pool and tio_pool dm: remove unused _rq_bio_info_cache dm: fix limits initialization when there are no data devices dm snapshot: add missing module aliases dm persistent data: set some btree fn parms const dm: refactor bio cloning dm: rename bio cloning functions ...
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/Kconfig2
-rw-r--r--drivers/md/persistent-data/Makefile2
-rw-r--r--drivers/md/persistent-data/dm-array.c808
-rw-r--r--drivers/md/persistent-data/dm-array.h166
-rw-r--r--drivers/md/persistent-data/dm-bitset.c163
-rw-r--r--drivers/md/persistent-data/dm-bitset.h165
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c1
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h1
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c7
-rw-r--r--drivers/md/persistent-data/dm-btree.c52
-rw-r--r--drivers/md/persistent-data/dm-btree.h15
11 files changed, 1378 insertions, 4 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index ceb359050a59..19b268795415 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -1,6 +1,6 @@
config DM_PERSISTENT_DATA
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
select LIBCRC32C
select DM_BUFIO
---help---
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile
index d8e7cb767c1e..ff528792c358 100644
--- a/drivers/md/persistent-data/Makefile
+++ b/drivers/md/persistent-data/Makefile
@@ -1,5 +1,7 @@
obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
dm-persistent-data-objs := \
+ dm-array.o \
+ dm-bitset.o \
dm-block-manager.o \
dm-space-map-common.o \
dm-space-map-disk.o \
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
new file mode 100644
index 000000000000..172147eb1d40
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-array.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "array"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The array is implemented as a fully populated btree, which points to
+ * blocks that contain the packed values. This is more space efficient
+ * than just using a btree since we don't store 1 key per value.
+ */
+struct array_block {
+ __le32 csum;
+ __le32 max_entries;
+ __le32 nr_entries;
+ __le32 value_size;
+ __le64 blocknr; /* Block this node is supposed to live in. */
+} __packed;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Validator methods. As usual we calculate a checksum, and also write the
+ * block location into the header (paranoia about ssds remapping areas by
+ * mistake).
+ */
+#define CSUM_XOR 595846735
+
+static void array_block_prepare_for_write(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t size_of_block)
+{
+ struct array_block *bh_le = dm_block_data(b);
+
+ bh_le->blocknr = cpu_to_le64(dm_block_location(b));
+ bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+ size_of_block - sizeof(__le32),
+ CSUM_XOR));
+}
+
+static int array_block_check(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t size_of_block)
+{
+ struct array_block *bh_le = dm_block_data(b);
+ __le32 csum_disk;
+
+ if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
+ DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
+ (unsigned long long) le64_to_cpu(bh_le->blocknr),
+ (unsigned long long) dm_block_location(b));
+ return -ENOTBLK;
+ }
+
+ csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+ size_of_block - sizeof(__le32),
+ CSUM_XOR));
+ if (csum_disk != bh_le->csum) {
+ DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
+ (unsigned) le32_to_cpu(csum_disk),
+ (unsigned) le32_to_cpu(bh_le->csum));
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static struct dm_block_validator array_validator = {
+ .name = "array",
+ .prepare_for_write = array_block_prepare_for_write,
+ .check = array_block_check
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Functions for manipulating the array blocks.
+ */
+
+/*
+ * Returns a pointer to a value within an array block.
+ *
+ * index - The index into _this_ specific block.
+ */
+static void *element_at(struct dm_array_info *info, struct array_block *ab,
+ unsigned index)
+{
+ unsigned char *entry = (unsigned char *) (ab + 1);
+
+ entry += index * info->value_type.size;
+
+ return entry;
+}
+
+/*
+ * Utility function that calls one of the value_type methods on every value
+ * in an array block.
+ */
+static void on_entries(struct dm_array_info *info, struct array_block *ab,
+ void (*fn)(void *, const void *))
+{
+ unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
+
+ for (i = 0; i < nr_entries; i++)
+ fn(info->value_type.context, element_at(info, ab, i));
+}
+
+/*
+ * Increment every value in an array block.
+ */
+static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ if (vt->inc)
+ on_entries(info, ab, vt->inc);
+}
+
+/*
+ * Decrement every value in an array block.
+ */
+static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ if (vt->dec)
+ on_entries(info, ab, vt->dec);
+}
+
+/*
+ * Each array block can hold this many values.
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
+{
+ return (size_of_block - sizeof(struct array_block)) / value_size;
+}
+
+/*
+ * Allocate a new array block. The caller will need to unlock block.
+ */
+static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
+ uint32_t max_entries,
+ struct dm_block **block, struct array_block **ab)
+{
+ int r;
+
+ r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
+ if (r)
+ return r;
+
+ (*ab) = dm_block_data(*block);
+ (*ab)->max_entries = cpu_to_le32(max_entries);
+ (*ab)->nr_entries = cpu_to_le32(0);
+ (*ab)->value_size = cpu_to_le32(info->value_type.size);
+
+ return 0;
+}
+
+/*
+ * Pad an array block out with a particular value. Every instance will
+ * cause an increment of the value_type. new_nr must always be more than
+ * the current number of entries.
+ */
+static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
+ const void *value, unsigned new_nr)
+{
+ unsigned i;
+ uint32_t nr_entries;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+ BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
+
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = nr_entries; i < new_nr; i++) {
+ if (vt->inc)
+ vt->inc(vt->context, value);
+ memcpy(element_at(info, ab, i), value, vt->size);
+ }
+ ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Remove some entries from the back of an array block. Every value
+ * removed will be decremented. new_nr must be <= the current number of
+ * entries.
+ */
+static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
+ unsigned new_nr)
+{
+ unsigned i;
+ uint32_t nr_entries;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+ BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
+
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = nr_entries; i > new_nr; i--)
+ if (vt->dec)
+ vt->dec(vt->context, element_at(info, ab, i - 1));
+ ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Read locks a block, and coerces it to an array block. The caller must
+ * unlock 'block' when finished.
+ */
+static int get_ablock(struct dm_array_info *info, dm_block_t b,
+ struct dm_block **block, struct array_block **ab)
+{
+ int r;
+
+ r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
+ if (r)
+ return r;
+
+ *ab = dm_block_data(*block);
+ return 0;
+}
+
+/*
+ * Unlocks an array block.
+ */
+static int unlock_ablock(struct dm_array_info *info, struct dm_block *block)
+{
+ return dm_tm_unlock(info->btree_info.tm, block);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Btree manipulation.
+ */
+
+/*
+ * Looks up an array block in the btree, and then read locks it.
+ *
+ * index is the index of the index of the array_block, (ie. the array index
+ * / max_entries).
+ */
+static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
+ unsigned index, struct dm_block **block,
+ struct array_block **ab)
+{
+ int r;
+ uint64_t key = index;
+ __le64 block_le;
+
+ r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
+ if (r)
+ return r;
+
+ return get_ablock(info, le64_to_cpu(block_le), block, ab);
+}
+
+/*
+ * Insert an array block into the btree. The block is _not_ unlocked.
+ */
+static int insert_ablock(struct dm_array_info *info, uint64_t index,
+ struct dm_block *block, dm_block_t *root)
+{
+ __le64 block_le = cpu_to_le64(dm_block_location(block));
+
+ __dm_bless_for_disk(block_le);
+ return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
+}
+
+/*
+ * Looks up an array block in the btree. Then shadows it, and updates the
+ * btree to point to this new shadow. 'root' is an input/output parameter
+ * for both the current root block, and the new one.
+ */
+static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
+ unsigned index, struct dm_block **block,
+ struct array_block **ab)
+{
+ int r, inc;
+ uint64_t key = index;
+ dm_block_t b;
+ __le64 block_le;
+
+ /*
+ * lookup
+ */
+ r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
+ if (r)
+ return r;
+ b = le64_to_cpu(block_le);
+
+ /*
+ * shadow
+ */
+ r = dm_tm_shadow_block(info->btree_info.tm, b,
+ &array_validator, block, &inc);
+ if (r)
+ return r;
+
+ *ab = dm_block_data(*block);
+ if (inc)
+ inc_ablock_entries(info, *ab);
+
+ /*
+ * Reinsert.
+ *
+ * The shadow op will often be a noop. Only insert if it really
+ * copied data.
+ */
+ if (dm_block_location(*block) != b)
+ r = insert_ablock(info, index, *block, root);
+
+ return r;
+}
+
+/*
+ * Allocate an new array block, and fill it with some values.
+ */
+static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
+ uint32_t max_entries,
+ unsigned block_index, uint32_t nr,
+ const void *value, dm_block_t *root)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
+ if (r)
+ return r;
+
+ fill_ablock(info, ab, value, nr);
+ r = insert_ablock(info, block_index, block, root);
+ unlock_ablock(info, block);
+
+ return r;
+}
+
+static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
+ unsigned begin_block, unsigned end_block,
+ unsigned max_entries, const void *value,
+ dm_block_t *root)
+{
+ int r = 0;
+
+ for (; !r && begin_block != end_block; begin_block++)
+ r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
+
+ return r;
+}
+
+/*
+ * There are a bunch of functions involved with resizing an array. This
+ * structure holds information that commonly needed by them. Purely here
+ * to reduce parameter count.
+ */
+struct resize {
+ /*
+ * Describes the array.
+ */
+ struct dm_array_info *info;
+
+ /*
+ * The current root of the array. This gets updated.
+ */
+ dm_block_t root;
+
+ /*
+ * Metadata block size. Used to calculate the nr entries in an
+ * array block.
+ */
+ size_t size_of_block;
+
+ /*
+ * Maximum nr entries in an array block.
+ */
+ unsigned max_entries;
+
+ /*
+ * nr of completely full blocks in the array.
+ *
+ * 'old' refers to before the resize, 'new' after.
+ */
+ unsigned old_nr_full_blocks, new_nr_full_blocks;
+
+ /*
+ * Number of entries in the final block. 0 iff only full blocks in
+ * the array.
+ */
+ unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
+
+ /*
+ * The default value used when growing the array.
+ */
+ const void *value;
+};
+
+/*
+ * Removes a consecutive set of array blocks from the btree. The values
+ * in block are decremented as a side effect of the btree remove.
+ *
+ * begin_index - the index of the first array block to remove.
+ * end_index - the one-past-the-end value. ie. this block is not removed.
+ */
+static int drop_blocks(struct resize *resize, unsigned begin_index,
+ unsigned end_index)
+{
+ int r;
+
+ while (begin_index != end_index) {
+ uint64_t key = begin_index++;
+ r = dm_btree_remove(&resize->info->btree_info, resize->root,
+ &key, &resize->root);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculates how many blocks are needed for the array.
+ */
+static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
+ unsigned nr_entries_in_last_block)
+{
+ return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
+}
+
+/*
+ * Shrink an array.
+ */
+static int shrink(struct resize *resize)
+{
+ int r;
+ unsigned begin, end;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ /*
+ * Lose some blocks from the back?
+ */
+ if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
+ begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
+ resize->new_nr_entries_in_last_block);
+ end = total_nr_blocks_needed(resize->old_nr_full_blocks,
+ resize->old_nr_entries_in_last_block);
+
+ r = drop_blocks(resize, begin, end);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Trim the new tail block
+ */
+ if (resize->new_nr_entries_in_last_block) {
+ r = shadow_ablock(resize->info, &resize->root,
+ resize->new_nr_full_blocks, &block, &ab);
+ if (r)
+ return r;
+
+ trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
+ unlock_ablock(resize->info, block);
+ }
+
+ return 0;
+}
+
+/*
+ * Grow an array.
+ */
+static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ r = shadow_ablock(resize->info, &resize->root,
+ resize->old_nr_full_blocks, &block, &ab);
+ if (r)
+ return r;
+
+ fill_ablock(resize->info, ab, resize->value, new_nr_entries);
+ unlock_ablock(resize->info, block);
+
+ return r;
+}
+
+static int grow_add_tail_block(struct resize *resize)
+{
+ return insert_new_ablock(resize->info, resize->size_of_block,
+ resize->max_entries,
+ resize->new_nr_full_blocks,
+ resize->new_nr_entries_in_last_block,
+ resize->value, &resize->root);
+}
+
+static int grow_needs_more_blocks(struct resize *resize)
+{
+ int r;
+
+ if (resize->old_nr_entries_in_last_block > 0) {
+ r = grow_extend_tail_block(resize, resize->max_entries);
+ if (r)
+ return r;
+ }
+
+ r = insert_full_ablocks(resize->info, resize->size_of_block,
+ resize->old_nr_full_blocks,
+ resize->new_nr_full_blocks,
+ resize->max_entries, resize->value,
+ &resize->root);
+ if (r)
+ return r;
+
+ if (resize->new_nr_entries_in_last_block)
+ r = grow_add_tail_block(resize);
+
+ return r;
+}
+
+static int grow(struct resize *resize)
+{
+ if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
+ return grow_needs_more_blocks(resize);
+
+ else if (resize->old_nr_entries_in_last_block)
+ return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
+
+ else
+ return grow_add_tail_block(resize);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * These are the value_type functions for the btree elements, which point
+ * to array blocks.
+ */
+static void block_inc(void *context, const void *value)
+{
+ __le64 block_le;
+ struct dm_array_info *info = context;
+
+ memcpy(&block_le, value, sizeof(block_le));
+ dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+}
+
+static void block_dec(void *context, const void *value)
+{
+ int r;
+ uint64_t b;
+ __le64 block_le;
+ uint32_t ref_count;
+ struct dm_block *block;
+ struct array_block *ab;
+ struct dm_array_info *info = context;
+
+ memcpy(&block_le, value, sizeof(block_le));
+ b = le64_to_cpu(block_le);
+
+ r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
+ if (r) {
+ DMERR_LIMIT("couldn't get reference count for block %llu",
+ (unsigned long long) b);
+ return;
+ }
+
+ if (ref_count == 1) {
+ /*
+ * We're about to drop the last reference to this ablock.
+ * So we need to decrement the ref count of the contents.
+ */
+ r = get_ablock(info, b, &block, &ab);
+ if (r) {
+ DMERR_LIMIT("couldn't get array block %llu",
+ (unsigned long long) b);
+ return;
+ }
+
+ dec_ablock_entries(info, ab);
+ unlock_ablock(info, block);
+ }
+
+ dm_tm_dec(info->btree_info.tm, b);
+}
+
+static int block_equal(void *context, const void *value1, const void *value2)
+{
+ return !memcmp(value1, value2, sizeof(__le64));
+}
+
+/*----------------------------------------------------------------*/
+
+void dm_array_info_init(struct dm_array_info *info,
+ struct dm_transaction_manager *tm,
+ struct dm_btree_value_type *vt)
+{
+ struct dm_btree_value_type *bvt = &info->btree_info.value_type;
+
+ memcpy(&info->value_type, vt, sizeof(info->value_type));
+ info->btree_info.tm = tm;
+ info->btree_info.levels = 1;
+
+ bvt->context = info;
+ bvt->size = sizeof(__le64);
+ bvt->inc = block_inc;
+ bvt->dec = block_dec;
+ bvt->equal = block_equal;
+}
+EXPORT_SYMBOL_GPL(dm_array_info_init);
+
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
+{
+ return dm_btree_empty(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_empty);
+
+static int array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+{
+ int r;
+ struct resize resize;
+
+ if (old_size == new_size)
+ return 0;
+
+ resize.info = info;
+ resize.root = root;
+ resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ resize.max_entries = calc_max_entries(info->value_type.size,
+ resize.size_of_block);
+
+ resize.old_nr_full_blocks = old_size / resize.max_entries;
+ resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
+ resize.new_nr_full_blocks = new_size / resize.max_entries;
+ resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
+ resize.value = value;
+
+ r = ((new_size > old_size) ? grow : shrink)(&resize);
+ if (r)
+ return r;
+
+ *new_root = resize.root;
+ return 0;
+}
+
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ int r = array_resize(info, root, old_size, new_size, value, new_root);
+ __dm_unbless_for_disk(value);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_resize);
+
+int dm_array_del(struct dm_array_info *info, dm_block_t root)
+{
+ return dm_btree_del(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_del);
+
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, void *value_le)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+ size_t size_of_block;
+ unsigned entry, max_entries;
+
+ size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+ r = lookup_ablock(info, root, index / max_entries, &block, &ab);
+ if (r)
+ return r;
+
+ entry = index % max_entries;
+ if (entry >= le32_to_cpu(ab->nr_entries))
+ r = -ENODATA;
+ else
+ memcpy(value_le, element_at(info, ab, entry),
+ info->value_type.size);
+
+ unlock_ablock(info, block);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_get_value);
+
+static int array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+ size_t size_of_block;
+ unsigned max_entries;
+ unsigned entry;
+ void *old_value;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+ r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
+ if (r)
+ return r;
+ *new_root = root;
+
+ entry = index % max_entries;
+ if (entry >= le32_to_cpu(ab->nr_entries)) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ old_value = element_at(info, ab, entry);
+ if (vt->dec &&
+ (!vt->equal || !vt->equal(vt->context, old_value, value))) {
+ vt->dec(vt->context, old_value);
+ if (vt->inc)
+ vt->inc(vt->context, value);
+ }
+
+ memcpy(old_value, value, info->value_type.size);
+
+out:
+ unlock_ablock(info, block);
+ return r;
+}
+
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ int r;
+
+ r = array_set_value(info, root, index, value, new_root);
+ __dm_unbless_for_disk(value);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_set_value);
+
+struct walk_info {
+ struct dm_array_info *info;
+ int (*fn)(void *context, uint64_t key, void *leaf);
+ void *context;
+};
+
+static int walk_ablock(void *context, uint64_t *keys, void *leaf)
+{
+ struct walk_info *wi = context;
+
+ int r;
+ unsigned i;
+ __le64 block_le;
+ unsigned nr_entries, max_entries;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ memcpy(&block_le, leaf, sizeof(block_le));
+ r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
+ if (r)
+ return r;
+
+ max_entries = le32_to_cpu(ab->max_entries);
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = 0; i < nr_entries; i++) {
+ r = wi->fn(wi->context, keys[0] * max_entries + i,
+ element_at(wi->info, ab, i));
+
+ if (r)
+ break;
+ }
+
+ unlock_ablock(wi->info, block);
+ return r;
+}
+
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+ int (*fn)(void *, uint64_t key, void *leaf),
+ void *context)
+{
+ struct walk_info wi;
+
+ wi.info = info;
+ wi.fn = fn;
+ wi.context = context;
+
+ return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
+}
+EXPORT_SYMBOL_GPL(dm_array_walk);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h
new file mode 100644
index 000000000000..ea177d6fa58f
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_ARRAY_H
+#define _LINUX_DM_ARRAY_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The dm-array is a persistent version of an array. It packs the data
+ * more efficiently than a btree which will result in less disk space use,
+ * and a performance boost. The element get and set operations are still
+ * O(ln(n)), but with a much smaller constant.
+ *
+ * The value type structure is reused from the btree type to support proper
+ * reference counting of values.
+ *
+ * The arrays implicitly know their length, and bounds are checked for
+ * lookups and updated. It doesn't store this in an accessible place
+ * because it would waste a whole metadata block. Make sure you store the
+ * size along with the array root in your encompassing data.
+ *
+ * Array entries are indexed via an unsigned integer starting from zero.
+ * Arrays are not sparse; if you resize an array to have 'n' entries then
+ * 'n - 1' will be the last valid index.
+ *
+ * Typical use:
+ *
+ * a) initialise a dm_array_info structure. This describes the array
+ * values and ties it into a specific transaction manager. It holds no
+ * instance data; the same info can be used for many similar arrays if
+ * you wish.
+ *
+ * b) Get yourself a root. The root is the index of a block of data on the
+ * disk that holds a particular instance of an array. You may have a
+ * pre existing root in your metadata that you wish to use, or you may
+ * want to create a brand new, empty array with dm_array_empty().
+ *
+ * Like the other data structures in this library, dm_array objects are
+ * immutable between transactions. Update functions will return you the
+ * root for a _new_ array. If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * c) resize an array with dm_array_resize().
+ *
+ * d) Get a value from the array with dm_array_get_value().
+ *
+ * e) Set a value in the array with dm_array_set_value().
+ *
+ * f) Walk an array of values in index order with dm_array_walk(). More
+ * efficient than making many calls to dm_array_get_value().
+ *
+ * g) Destroy the array with dm_array_del(). This tells the transaction
+ * manager that you're no longer using this data structure so it can
+ * recycle it's blocks. (dm_array_dec() would be a better name for it,
+ * but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Describes an array. Don't initialise this structure yourself, use the
+ * init function below.
+ */
+struct dm_array_info {
+ struct dm_transaction_manager *tm;
+ struct dm_btree_value_type value_type;
+ struct dm_btree_info btree_info;
+};
+
+/*
+ * Sets up a dm_array_info structure. You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * info - the structure being filled in.
+ * tm - the transaction manager that should supervise this structure.
+ * vt - describes the leaf values.
+ */
+void dm_array_info_init(struct dm_array_info *info,
+ struct dm_transaction_manager *tm,
+ struct dm_btree_value_type *vt);
+
+/*
+ * Create an empty, zero length array.
+ *
+ * info - describes the array
+ * root - on success this will be filled out with the root block
+ */
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root);
+
+/*
+ * Resizes the array.
+ *
+ * info - describes the array
+ * root - the root block of the array on disk
+ * old_size - the caller is responsible for remembering the size of
+ * the array
+ * new_size - can be bigger or smaller than old_size
+ * value - if we're growing the array the new entries will have this value
+ * new_root - on success, points to the new root block
+ *
+ * If growing the inc function for 'value' will be called the appropriate
+ * number of times. So if the caller is holding a reference they may want
+ * to drop it.
+ */
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value);
+
+/*
+ * Frees a whole array. The value_type's decrement operation will be called
+ * for all values in the array
+ */
+int dm_array_del(struct dm_array_info *info, dm_block_t root);
+
+/*
+ * Lookup a value in the array
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - the value to be read. Will be in on-disk format of course.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, void *value);
+
+/*
+ * Set an entry in the array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - value to be written to disk. Make sure you confirm the value is
+ * in on-disk format with__dm_bless_for_disk() before calling.
+ * new_root - the new root block
+ *
+ * The old value being overwritten will be decremented, the new value
+ * incremented.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value);
+
+/*
+ * Walk through all the entries in an array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * fn - called back for every element
+ * context - passed to the callback
+ */
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t key, void *leaf),
+ void *context);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_ARRAY_H */
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
new file mode 100644
index 000000000000..cd9a86d4cdf0
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-bitset.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "bitset"
+#define BITS_PER_ARRAY_ENTRY 64
+
+/*----------------------------------------------------------------*/
+
+static struct dm_btree_value_type bitset_bvt = {
+ .context = NULL,
+ .size = sizeof(__le64),
+ .inc = NULL,
+ .dec = NULL,
+ .equal = NULL,
+};
+
+/*----------------------------------------------------------------*/
+
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+ struct dm_disk_bitset *info)
+{
+ dm_array_info_init(&info->array_info, tm, &bitset_bvt);
+ info->current_index_set = false;
+}
+EXPORT_SYMBOL_GPL(dm_disk_bitset_init);
+
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root)
+{
+ return dm_array_empty(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_empty);
+
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t old_nr_entries, uint32_t new_nr_entries,
+ bool default_value, dm_block_t *new_root)
+{
+ uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY);
+ uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY);
+ __le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0);
+
+ __dm_bless_for_disk(&value);
+ return dm_array_resize(&info->array_info, root, old_blocks, new_blocks,
+ &value, new_root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_resize);
+
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root)
+{
+ return dm_array_del(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_del);
+
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+ dm_block_t *new_root)
+{
+ int r;
+ __le64 value;
+
+ if (!info->current_index_set)
+ return 0;
+
+ value = cpu_to_le64(info->current_bits);
+
+ __dm_bless_for_disk(&value);
+ r = dm_array_set_value(&info->array_info, root, info->current_index,
+ &value, new_root);
+ if (r)
+ return r;
+
+ info->current_index_set = false;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_flush);
+
+static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t array_index)
+{
+ int r;
+ __le64 value;
+
+ r = dm_array_get_value(&info->array_info, root, array_index, &value);
+ if (r)
+ return r;
+
+ info->current_bits = le64_to_cpu(value);
+ info->current_index_set = true;
+ info->current_index = array_index;
+ return 0;
+}
+
+static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned array_index = index / BITS_PER_ARRAY_ENTRY;
+
+ if (info->current_index_set) {
+ if (info->current_index == array_index)
+ return 0;
+
+ r = dm_bitset_flush(info, root, new_root);
+ if (r)
+ return r;
+ }
+
+ return read_bits(info, root, array_index);
+}
+
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ set_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
+
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ clear_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
+
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root, bool *result)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ *result = test_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_test_bit);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
new file mode 100644
index 000000000000..e1b9bea14aa1
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_BITSET_H
+#define _LINUX_DM_BITSET_H
+
+#include "dm-array.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This bitset type is a thin wrapper round a dm_array of 64bit words. It
+ * uses a tiny, one word cache to reduce the number of array lookups and so
+ * increase performance.
+ *
+ * Like the dm-array that it's based on, the caller needs to keep track of
+ * the size of the bitset separately. The underlying dm-array implicitly
+ * knows how many words it's storing and will return -ENODATA if you try
+ * and access an out of bounds word. However, an out of bounds bit in the
+ * final word will _not_ be detected, you have been warned.
+ *
+ * Bits are indexed from zero.
+
+ * Typical use:
+ *
+ * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init().
+ * This describes the bitset and includes the cache. It's not called it
+ * dm_bitset_info in line with other data structures because it does
+ * include instance data.
+ *
+ * b) Get yourself a root. The root is the index of a block of data on the
+ * disk that holds a particular instance of an bitset. You may have a
+ * pre existing root in your metadata that you wish to use, or you may
+ * want to create a brand new, empty bitset with dm_bitset_empty().
+ *
+ * Like the other data structures in this library, dm_bitset objects are
+ * immutable between transactions. Update functions will return you the
+ * root for a _new_ array. If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * Even read operations may trigger the cache to be flushed and as such
+ * return a root for a new, updated bitset.
+ *
+ * c) resize a bitset with dm_bitset_resize().
+ *
+ * d) Set a bit with dm_bitset_set_bit().
+ *
+ * e) Clear a bit with dm_bitset_clear_bit().
+ *
+ * f) Test a bit with dm_bitset_test_bit().
+ *
+ * g) Flush all updates from the cache with dm_bitset_flush().
+ *
+ * h) Destroy the bitset with dm_bitset_del(). This tells the transaction
+ * manager that you're no longer using this data structure so it can
+ * recycle it's blocks. (dm_bitset_dec() would be a better name for it,
+ * but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Opaque object. Unlike dm_array_info, you should have one of these per
+ * bitset. Initialise with dm_disk_bitset_init().
+ */
+struct dm_disk_bitset {
+ struct dm_array_info array_info;
+
+ uint32_t current_index;
+ uint64_t current_bits;
+
+ bool current_index_set:1;
+};
+
+/*
+ * Sets up a dm_disk_bitset structure. You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * tm - the transaction manager that should supervise this structure
+ * info - the structure being initialised
+ */
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+ struct dm_disk_bitset *info);
+
+/*
+ * Create an empty, zero length bitset.
+ *
+ * info - describes the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root);
+
+/*
+ * Resize the bitset.
+ *
+ * info - describes the bitset
+ * old_root - the root block of the array on disk
+ * old_nr_entries - the number of bits in the old bitset
+ * new_nr_entries - the number of bits you want in the new bitset
+ * default_value - the value for any new bits
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root,
+ uint32_t old_nr_entries, uint32_t new_nr_entries,
+ bool default_value, dm_block_t *new_root);
+
+/*
+ * Frees the bitset.
+ */
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root);
+
+/*
+ * Set a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root);
+
+/*
+ * Clears a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root);
+
+/*
+ * Tests a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block (cached values may have been written)
+ * result - the bit value you're after
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root, bool *result);
+
+/*
+ * Flush any cached changes to disk.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+ dm_block_t *new_root);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_BITSET_H */
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 28c3ed072a79..81b513890e2b 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -613,6 +613,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
return dm_bufio_write_dirty_buffers(bm->bufio);
}
+EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
void dm_bm_set_read_only(struct dm_block_manager *bm)
{
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index accbb05f17b6..37d367bb9aa8 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -64,6 +64,7 @@ struct ro_spine {
void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
int exit_ro_spine(struct ro_spine *s);
int ro_step(struct ro_spine *s, dm_block_t new_child);
+void ro_pop(struct ro_spine *s);
struct btree_node *ro_node(struct ro_spine *s);
struct shadow_spine {
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index f199a0c4ed04..cf9fd676ae44 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -164,6 +164,13 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
return r;
}
+void ro_pop(struct ro_spine *s)
+{
+ BUG_ON(!s->count);
+ --s->count;
+ unlock_block(s->info, s->nodes[s->count]);
+}
+
struct btree_node *ro_node(struct ro_spine *s)
{
struct dm_block *block;
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 4caf66918cdb..35865425e4b4 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -807,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+
+/*
+ * FIXME: We shouldn't use a recursive algorithm when we have limited stack
+ * space. Also this only works for single level trees.
+ */
+static int walk_node(struct ro_spine *s, dm_block_t block,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ unsigned i, nr;
+ struct btree_node *n;
+ uint64_t keys;
+
+ r = ro_step(s, block);
+ n = ro_node(s);
+
+ nr = le32_to_cpu(n->header.nr_entries);
+ for (i = 0; i < nr; i++) {
+ if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
+ r = walk_node(s, value64(n, i), fn, context);
+ if (r)
+ goto out;
+ } else {
+ keys = le64_to_cpu(*key_ptr(n, i));
+ r = fn(context, &keys, value_ptr(n, i));
+ if (r)
+ goto out;
+ }
+ }
+
+out:
+ ro_pop(s);
+ return r;
+}
+
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ struct ro_spine spine;
+
+ BUG_ON(info->levels > 1);
+
+ init_ro_spine(&spine, info);
+ r = walk_node(&spine, root, fn, context);
+ exit_ro_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_walk);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index a2cd50441ca1..8672d159e0b5 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -58,21 +58,21 @@ struct dm_btree_value_type {
* somewhere.) This method is _not_ called for insertion of a new
* value: It is assumed the ref count is already 1.
*/
- void (*inc)(void *context, void *value);
+ void (*inc)(void *context, const void *value);
/*
* This value is being deleted. The btree takes care of freeing
* the memory pointed to by @value. Often the del function just
* needs to decrement a reference count somewhere.
*/
- void (*dec)(void *context, void *value);
+ void (*dec)(void *context, const void *value);
/*
* A test for equality between two values. When a value is
* overwritten with a new one, the old one has the dec method
* called _unless_ the new and old value are deemed equal.
*/
- int (*equal)(void *context, void *value1, void *value2);
+ int (*equal)(void *context, const void *value1, const void *value2);
};
/*
@@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
uint64_t *result_keys);
+/*
+ * Iterate through the a btree, calling fn() on each entry.
+ * It only works for single level trees and is internally recursive, so
+ * monitor stack usage carefully.
+ */
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context);
+
#endif /* _LINUX_DM_BTREE_H */