We implement extents in the filter by checking block-by-block if the
block is present in the overlay (in which case it's non-sparse) or if
not calling through to the underlying plugin.
---
filters/cow/blk.h | 3 ++
filters/cow/blk.c | 10 +++++
filters/cow/cow.c | 102 ++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 111 insertions(+), 4 deletions(-)
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index 28ef3b4a..5eb30794 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -54,6 +54,9 @@ extern void blk_free (void);
/* Allocate or resize the overlay and bitmap. */
extern int blk_set_size (uint64_t new_size);
+/* Returns the status of the block in the overlay. */
+extern void blk_status (uint64_t blknum, bool *present, bool *trimmed);
+
/* Read a single block from the overlay or plugin. */
extern int blk_read (struct nbdkit_next_ops *next_ops, void *nxdata,
uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 9e85920f..4a8adfb9 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -177,6 +177,16 @@ blk_set_allocated (uint64_t blknum)
bitmap_set_blk (&bm, blknum, true);
}
+/* This is a bit of a hack since usually this information is hidden in
+ * the blk module. However it is needed when calculating extents.
+ */
+void
+blk_status (uint64_t blknum, bool *present, bool *trimmed)
+{
+ *present = blk_is_allocated (blknum);
+ *trimmed = false;
+}
+
/* These are the block operations. They always read or write a single
* whole block of size ‘blksize’.
*/
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 92358375..d12565e6 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -141,9 +141,6 @@ cow_prepare (struct nbdkit_next_ops *next_ops, void *nxdata,
return r >= 0 ? 0 : -1;
}
-/* Whatever the underlying plugin can or can't do, we can write, we
- * cannot trim or detect extents, and we can flush.
- */
static int
cow_can_write (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle)
{
@@ -159,7 +156,7 @@ cow_can_trim (struct nbdkit_next_ops *next_ops, void *nxdata, void
*handle)
static int
cow_can_extents (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle)
{
- return 0;
+ return 1;
}
static int
@@ -499,6 +496,102 @@ cow_cache (struct nbdkit_next_ops *next_ops, void *nxdata,
return 0;
}
+/* Extents. */
+static int
+cow_extents (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+ struct nbdkit_extents *extents, int *err)
+{
+ const bool can_extents = next_ops->can_extents (nxdata);
+ const bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
+ uint64_t end;
+ uint64_t blknum;
+
+ /* To make this easier, align the requested extents to whole blocks. */
+ end = offset + count;
+ offset = ROUND_DOWN (offset, BLKSIZE);
+ end = ROUND_UP (end, BLKSIZE);
+ count = end - offset;
+ blknum = offset / BLKSIZE;
+
+ assert (IS_ALIGNED (offset, BLKSIZE));
+ assert (IS_ALIGNED (count, BLKSIZE));
+ assert (count > 0); /* We must make forward progress. */
+
+ /* We hold the lock for the whole time, even when requesting extents
+ * from the plugin, because we want to present an atomic picture of
+ * the current state.
+ */
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+
+ while (count > 0) {
+ bool present, trimmed;
+ struct nbdkit_extent e;
+
+ blk_status (blknum, &present, &trimmed);
+
+ /* Present in the overlay. */
+ if (present) {
+ e.offset = offset;
+ e.length = BLKSIZE;
+
+ if (trimmed)
+ e.type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO;
+ else
+ e.type = 0;
+
+ if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
+ *err = errno;
+ return -1;
+ }
+ }
+
+ /* Not present in the overlay, but we can ask the plugin */
+ else if (can_extents) {
+ size_t i;
+
+ CLEANUP_EXTENTS_FREE struct nbdkit_extents *extents2 =
+ nbdkit_extents_full (next_ops, nxdata, BLKSIZE, offset, flags, err);
+ if (extents2 == NULL)
+ return -1;
+
+ for (i = 0; i < nbdkit_extents_count (extents2); ++i) {
+ e = nbdkit_get_extent (extents2, i);
+ if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
+ *err = errno;
+ return -1;
+ }
+ }
+ }
+
+ /* Otherwise assume the block is non-sparse. */
+ else {
+ e.offset = offset;
+ e.length = BLKSIZE;
+ e.type = 0;
+
+ if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
+ *err = errno;
+ return -1;
+ }
+ }
+
+ blknum++;
+ offset += BLKSIZE;
+ count -= BLKSIZE;
+
+ /* If the caller only wanted the first extent, and we've managed
+ * to add at least one extent to the list, then we can drop out
+ * now. (Note calling nbdkit_add_extent above does not mean the
+ * extent got added since it might be before the first offset.)
+ */
+ if (req_one && nbdkit_extents_count (extents) > 0)
+ break;
+ }
+
+ return 0;
+}
+
static struct nbdkit_filter filter = {
.name = "cow",
.longname = "nbdkit copy-on-write (COW) filter",
@@ -521,6 +614,7 @@ static struct nbdkit_filter filter = {
.zero = cow_zero,
.flush = cow_flush,
.cache = cow_cache,
+ .extents = cow_extents,
};
NBDKIT_REGISTER_FILTER(filter)
--
2.29.0.rc2