The NBD protocol is adding an extension to let servers advertise
initialization state to the client: whether the image contains holes,
and whether it is known to read as all zeroes. For memory-based
plugins, it is fairly easy to advertise several cases: data and memory
are usually sparse and detecting zero is easy (requires new functions
to the sparse_array common code), although since the sparse array is
reused between consecutive clients, a later client might get different
answers than the first client. The null and full plugins are
obviously zero. The zero plugin doesn't return any data, so it
doesn't need changes. The info plugin is never sparse, but can be all
zeroes in base64exportname mode.
Adding initial state support to file-based and language binding
plugins will be done separately.
Testing of this addition relies on a contemporary patch to libnbd
adding a new nbd_get_init_flags() function for reading the advertised
initial state, then demonstrating changes in state observable from the
memory plugin over successive clients.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
common/sparse/sparse.c | 30 +++++++++++++++++-
common/sparse/sparse.h | 19 +++++++++++-
docs/nbdkit-plugin.pod | 23 ++++++++++++++
include/nbdkit-plugin.h | 5 ++-
plugins/data/data.c | 21 ++++++++++++-
plugins/full/full.c | 18 ++++++++++-
plugins/info/info.c | 15 ++++++++-
plugins/memory/memory.c | 19 ++++++++++++
plugins/null/null.c | 18 ++++++++++-
server/plugins.c | 10 ++++--
tests/Makefile.am | 2 ++
tests/test-memory-init.sh | 65 +++++++++++++++++++++++++++++++++++++++
12 files changed, 236 insertions(+), 9 deletions(-)
create mode 100755 tests/test-memory-init.sh
diff --git a/common/sparse/sparse.c b/common/sparse/sparse.c
index 0acfa1f..ed593f1 100644
--- a/common/sparse/sparse.c
+++ b/common/sparse/sparse.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2019 Red Hat Inc.
+ * Copyright (C) 2017-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -44,6 +44,7 @@
#include <nbdkit-plugin.h>
#include "iszero.h"
+#include "rounding.h"
#include "sparse.h"
/* Two level directory for the sparse array.
@@ -67,6 +68,8 @@
* images, plus some architectures have much larger page sizes than
* others making behaviour inconsistent across arches.
*
+ * 4. We can easily track how much of the image is allocated.
+ *
* To achieve this we use a B-Tree-like structure. The L1 directory
* contains an ordered, non-overlapping, non-contiguous list of
* (offset, pointer to L2 directory).
@@ -103,6 +106,8 @@ struct l1_entry {
struct sparse_array {
struct l1_entry *l1_dir; /* L1 directory. */
size_t l1_size; /* Number of entries in L1 directory. */
+ size_t used_pages; /* Number of non-NULL L2 entries. */
+ uint64_t max_pages; /* Maximum L2 pages if fully allocated. */
bool debug;
};
@@ -140,6 +145,8 @@ alloc_sparse_array (bool debug)
return NULL;
sa->l1_dir = NULL;
sa->l1_size = 0;
+ sa->used_pages = 0;
+ sa->max_pages = 0;
sa->debug = debug;
return sa;
}
@@ -254,6 +261,7 @@ lookup (struct sparse_array *sa, uint64_t offset, bool create,
return NULL;
}
l2_dir[o] = page;
+ sa->used_pages++;
}
if (!page)
return NULL;
@@ -355,6 +363,7 @@ sparse_array_zero (struct sparse_array *sa, uint32_t count, uint64_t
offset)
__func__, offset);
free (*l2_page);
*l2_page = NULL;
+ sa->used_pages--;
}
}
@@ -398,3 +407,22 @@ sparse_array_extents (struct sparse_array *sa,
return 0;
}
+
+void
+sparse_array_set_size (struct sparse_array *sa, uint64_t size)
+{
+ assert (size <= INT64_MAX);
+ sa->max_pages = DIV_ROUND_UP (size, PAGE_SIZE);
+}
+
+int
+sparse_array_is_sparse (struct sparse_array *sa)
+{
+ return sa->used_pages < sa->max_pages;
+}
+
+int
+sparse_array_is_zero (struct sparse_array *sa)
+{
+ return !sa->used_pages;
+}
diff --git a/common/sparse/sparse.h b/common/sparse/sparse.h
index 704ba32..6234ffe 100644
--- a/common/sparse/sparse.h
+++ b/common/sparse/sparse.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2019 Red Hat Inc.
+ * Copyright (C) 2017-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -91,4 +91,21 @@ extern int sparse_array_extents (struct sparse_array *sa,
uint32_t count, uint64_t offset,
struct nbdkit_extents *extents);
+/* Set the expected maximum size of the sparse array.
+ *
+ * May be called after writing initial contents, as long as those contents
+ * were not placed beyond the new expected size.
+ */
+extern void sparse_array_set_size (struct sparse_array *sa, uint64_t size);
+
+/* Return true if the array is sparse.
+ *
+ * The results are reliable only after a call to sparse_array_set_size.
+ */
+extern int sparse_array_is_sparse (struct sparse_array *sa);
+
+/* Return true if the array reads as all zeroes. */
+extern int sparse_array_is_zero (struct sparse_array *sa);
+
+
#endif /* NBDKIT_SPARSE_H */
diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod
index 41bffb7..d55aafd 100644
--- a/docs/nbdkit-plugin.pod
+++ b/docs/nbdkit-plugin.pod
@@ -712,6 +712,29 @@ This callback is not required. If omitted, then we return
C<NBDKIT_CACHE_NONE> if the C<.cache> callback is missing, or
C<NBDKIT_CACHE_NATIVE> if it is defined.
+=head2 C<.init_sparse>
+
+ int init_sparse (void *handle);
+
+This is called during the option negotiation phase to find out if the
+plugin knows whether the image starts out as sparse (that is, at least
+one unallocated hole, regardless of what might be read from that hole).
+
+This callback is not required. If omitted, then we return C<0>.
+
+=head2 C<.init_zero>
+
+ int init_zero (void *handle);
+
+This is called during the option negotiation phase to find out if the
+plugin knows whether the image starts out reading entirely as zero
+(regardless of whether the image is sparse). This bit of information
+is most useful to a client that plans to copy an image from elsewhere
+over to a just-created file exposed by the server, because the client
+can decide if it can bypass a potentially lengthy pre-zeroing pass.
+
+This callback is not required. If omitted, then we return C<0>.
+
=head2 C<.pread>
int pread (void *handle, void *buf, uint32_t count, uint64_t offset,
diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h
index b4ecf65..77bd23e 100644
--- a/include/nbdkit-plugin.h
+++ b/include/nbdkit-plugin.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2019 Red Hat Inc.
+ * Copyright (C) 2013-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -136,6 +136,9 @@ struct nbdkit_plugin {
int (*can_fast_zero) (void *handle);
int (*preconnect) (int readonly);
+
+ int (*init_sparse) (void *handle);
+ int (*init_zero) (void *handle);
};
extern void nbdkit_set_error (int err);
diff --git a/plugins/data/data.c b/plugins/data/data.c
index 1e5cb0f..ff8b272 100644
--- a/plugins/data/data.c
+++ b/plugins/data/data.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018-2019 Red Hat Inc.
+ * Copyright (C) 2018-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -314,6 +314,7 @@ data_config_complete (void)
if (size == -1)
size = data_size;
nbdkit_debug ("final size: %" PRIi64, size);
+ sparse_array_set_size (sa, size);
return 0;
}
@@ -378,6 +379,22 @@ data_can_fast_zero (void *handle)
return 1;
}
+/* Does current client start with a sparse image. */
+static int
+data_init_sparse (void *handle)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+ return sparse_array_is_sparse (sa);
+}
+
+/* Does current client start with all zeroes. */
+static int
+data_init_zero (void *handle)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+ return sparse_array_is_zero (sa);
+}
+
/* Read data. */
static int
data_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
@@ -455,6 +472,8 @@ static struct nbdkit_plugin plugin = {
.can_fua = data_can_fua,
.can_cache = data_can_cache,
.can_fast_zero = data_can_fast_zero,
+ .init_sparse = data_init_sparse,
+ .init_zero = data_init_zero,
.pread = data_pread,
.pwrite = data_pwrite,
.zero = data_zero,
diff --git a/plugins/full/full.c b/plugins/full/full.c
index 0b69a8c..9fb8d85 100644
--- a/plugins/full/full.c
+++ b/plugins/full/full.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2019 Red Hat Inc.
+ * Copyright (C) 2017-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -111,6 +111,20 @@ full_can_cache (void *handle)
return NBDKIT_CACHE_NATIVE;
}
+/* Always sparse. */
+static int
+full_init_sparse (void *handle)
+{
+ return 1;
+}
+
+/* Always reads as zero. */
+static int
+full_init_zero (void *handle)
+{
+ return 1;
+}
+
/* Read data. */
static int
full_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
@@ -167,6 +181,8 @@ static struct nbdkit_plugin plugin = {
.get_size = full_get_size,
.can_multi_conn = full_can_multi_conn,
.can_cache = full_can_cache,
+ .init_sparse = full_init_sparse,
+ .init_zero = full_init_zero,
.pread = full_pread,
.pwrite = full_pwrite,
.trim = full_trim,
diff --git a/plugins/info/info.c b/plugins/info/info.c
index 329a368..29bff77 100644
--- a/plugins/info/info.c
+++ b/plugins/info/info.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2019 Red Hat Inc.
+ * Copyright (C) 2017-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -50,6 +50,7 @@
#include <nbdkit-plugin.h>
#include "byte-swapping.h"
+#include "iszero.h"
#include "tvdiff.h"
/* The mode. */
@@ -380,6 +381,17 @@ info_can_cache (void *handle)
return NBDKIT_CACHE_NATIVE;
}
+/* Initially zero in some cases. */
+static int
+info_init_zero (void *handle)
+{
+ struct handle *h = handle;
+
+ if (mode == MODE_TIME || mode == MODE_UPTIME || mode == MODE_CONNTIME)
+ return 0;
+ return is_zero(h->data, h->len);
+}
+
static void
update_time (struct handle *h)
{
@@ -444,6 +456,7 @@ static struct nbdkit_plugin plugin = {
.get_size = info_get_size,
.can_multi_conn = info_can_multi_conn,
.can_cache = info_can_cache,
+ .init_zero = info_init_zero,
.pread = info_pread,
/* In this plugin, errno is preserved properly along error return
* paths from failed system calls.
diff --git a/plugins/memory/memory.c b/plugins/memory/memory.c
index a96252b..bfc1249 100644
--- a/plugins/memory/memory.c
+++ b/plugins/memory/memory.c
@@ -101,6 +101,7 @@ memory_config_complete (void)
nbdkit_error ("you must specify size=<SIZE> on the command line");
return -1;
}
+ sparse_array_set_size (sa, size);
return 0;
}
@@ -154,6 +155,22 @@ memory_can_fast_zero (void *handle)
return 1;
}
+/* Does current client start with a sparse image. */
+static int
+memory_init_sparse (void *handle)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+ return sparse_array_is_sparse (sa);
+}
+
+/* Does current client start with all zeroes. */
+static int
+memory_init_zero (void *handle)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+ return sparse_array_is_zero (sa);
+}
+
/* Read data. */
static int
memory_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
@@ -231,6 +248,8 @@ static struct nbdkit_plugin plugin = {
.can_multi_conn = memory_can_multi_conn,
.can_cache = memory_can_cache,
.can_fast_zero = memory_can_fast_zero,
+ .init_sparse = memory_init_sparse,
+ .init_zero = memory_init_zero,
.pread = memory_pread,
.pwrite = memory_pwrite,
.zero = memory_zero,
diff --git a/plugins/null/null.c b/plugins/null/null.c
index 559cb81..bea908c 100644
--- a/plugins/null/null.c
+++ b/plugins/null/null.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2019 Red Hat Inc.
+ * Copyright (C) 2017-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -107,6 +107,20 @@ null_can_fast_zero (void *handle)
return 1;
}
+/* Always sparse. */
+static int
+null_init_sparse (void *handle)
+{
+ return 1;
+}
+
+/* Always reads as zero. */
+static int
+null_init_zero (void *handle)
+{
+ return 1;
+}
+
/* Read data. */
static int
null_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
@@ -175,6 +189,8 @@ static struct nbdkit_plugin plugin = {
.can_multi_conn = null_can_multi_conn,
.can_cache = null_can_cache,
.can_fast_zero = null_can_fast_zero,
+ .init_sparse = null_init_sparse,
+ .init_zero = null_init_zero,
.pread = null_pread,
.pwrite = null_pwrite,
.zero = null_zero,
diff --git a/server/plugins.c b/server/plugins.c
index 9b98cc6..040dc71 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -435,14 +435,20 @@ plugin_can_cache (struct backend *b, struct connection *conn, void
*handle)
static int
plugin_init_sparse (struct backend *b, struct connection *conn, void *handle)
{
- /* TODO Allow plugin to control this. */
+ struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+
+ if (p->plugin.init_sparse)
+ return p->plugin.init_sparse (handle);
return 0;
}
static int
plugin_init_zero (struct backend *b, struct connection *conn, void *handle)
{
- /* TODO Allow plugin to control this. */
+ struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+
+ if (p->plugin.init_zero)
+ return p->plugin.init_zero (handle);
return 0;
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ea6b147..4e036a3 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -134,6 +134,7 @@ EXTRA_DIST = \
test-log.sh \
test-long-name.sh \
test.lua \
+ test-memory-init.sh \
test-memory-largest.sh \
test-memory-largest-for-qemu.sh \
test-nbd-extents.sh \
@@ -600,6 +601,7 @@ TESTS += \
# memory plugin test.
LIBGUESTFS_TESTS += test-memory
TESTS += test-memory-largest.sh test-memory-largest-for-qemu.sh
+TESTS += test-memory-init.sh
test_memory_SOURCES = test-memory.c test.h
test_memory_CFLAGS = $(WARNINGS_CFLAGS) $(LIBGUESTFS_CFLAGS)
diff --git a/tests/test-memory-init.sh b/tests/test-memory-init.sh
new file mode 100755
index 0000000..11a526a
--- /dev/null
+++ b/tests/test-memory-init.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the initial state of the memory plugin.
+
+source ./functions.sh
+set -e
+
+requires nbdsh -c 'exit (not hasattr (h, "get_init_flags"))'
+
+sock=`mktemp -u`
+files="memory-init.pid $sock"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the memory plugin.
+start_nbdkit -P memory-init.pid -U $sock memory 1M
+
+# The image should start as sparse/zero, before we write to it
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c 'assert (h.get_init_flags () == nbd.INIT_SPARSE | nbd.INIT_ZERO)' \
+ -c 'h.pwrite ("hello world".encode (), 0)'
+
+# The image is still sparse, but no longer zero, before we fill it
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c 'assert (h.get_init_flags () == nbd.INIT_SPARSE)' \
+ -c 'h.pwrite (("1" * (1024*1024)).encode (), 0)'
+
+# The image is neither sparse nor zero, before we wipe it
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c 'assert (h.get_init_flags () == 0)' \
+ -c 'h.zero (1024 * 1024, 0)'
+
+# Once again, the image is sparse/zero
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c 'assert (h.get_init_flags () == nbd.INIT_SPARSE | nbd.INIT_ZERO)'
--
2.24.1