When we added support for .extents, we had nbdkit unconditionally
support structured replies if the client requests them, and the
plugin's .can_extents has no impact on what the server advertises.
However, while the plugin API doesn't care whether the client
requested SR, there is still a case to be made for allowing a filter
to prevent SR, at least for testing purposese (such as comparison on
what a client does with no block status vs. a block status that always
reports allocated). Enhance the filter API to allow an SR inhibit,
and wire up the existing noextents filter to expose this option.
In particular, doing this found that 'qemu-nbd --list' from qemu 4.2
is rather picky: it hangs up on a server that replies with
NBD_REP_ERR_POLICY, rather than silently proceeding without SR support
(at least libnbd is more tolerant).
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-filter.pod | 22 ++++++++++++
filters/noextents/nbdkit-noextents-filter.pod | 17 ++++++---
server/internal.h | 1 +
server/filters.c | 24 +++++++++++++
server/plugins.c | 12 +++++++
server/protocol-handshake-newstyle.c | 18 ++++++++++
server/protocol-handshake.c | 11 +++---
include/nbdkit-filter.h | 3 ++
filters/noextents/noextents.c | 28 +++++++++++++++
TODO | 4 ++-
tests/test-eflags.sh | 36 +++++++++++++++++++
11 files changed, 167 insertions(+), 9 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index 6e2bea61..cd8ba255 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -419,4 +419,26 @@ than once for the same connection, they should return the same
value;
similarly, the filter may cache the results of each counterpart in
C<next_ops> for a given connection rather than repeating calls.
+=head2 C<.can_sr>
+
+ int (*can_sr) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+
+This function exists to allow a filter to prevent a client from seeing
+support for structured replies, which in turn prevents clients from
+attempting sparse reads or querying extents. This function has no
+counterpart in plugins, because none of the version 2 API callbacks
+change semantics based on whether the client requested structured
+replies (the C<.extents> callback can still be used by filters
+regardless of whether the client enables structured replies or
+requests the "base:allocation" context for querying extents).
+
+If there is an error, the callback should call C<nbdkit_error> with an
+error message and return C<-1>. If this function is called more than
+once for the same connection, it should return the same value;
+similarly, the filter may cache the result of the counterpart in
+C<next_ops> for a given connection rather than repeating calls.
+
+This function is optional, and defaults to true if omitted.
+
=head2 C<.pread>
int (*pread) (struct nbdkit_next_ops *next_ops, void *nxdata,
diff --git a/filters/noextents/nbdkit-noextents-filter.pod
b/filters/noextents/nbdkit-noextents-filter.pod
index 47223928..1a74e203 100644
--- a/filters/noextents/nbdkit-noextents-filter.pod
+++ b/filters/noextents/nbdkit-noextents-filter.pod
@@ -11,7 +11,8 @@ nbdkit-noextents-filter - disable extents in the underlying plugin
“Extents” are a feature of the NBD protocol / nbdkit which allow the
client to detect sparse regions of the underlying disk.
C<nbdkit-noextents-filter> disables this so that the plugin appears to
-be fully allocated.
+be fully allocated. The effects of this filter are only observable
+from a client which is able to request structured replies.
This filter can be useful when combined with L<nbdkit-file-plugin(1)>
serving a file from a file system known to have poor C<lseek(2)>
@@ -19,9 +20,17 @@ performance (C<tmpfs> is known to be one such system).
=head1 PARAMETERS
-There are no parameters specific to nbdkit-noextents-filter. Any
-parameters are passed through to and processed by the underlying
-plugin in the normal way.
+=over 4
+
+=item B<structured=false>
+
+Controls whether a client can request structured replies, which are a
+pre-requisite for supporting sparse reads and extent queries. The
+parameter defaults to true (so that a client that requests extent
+support sees a fully-allocated image), but can be set to false to
+force the client to be unable to query extents at all.
+
+=back
=head1 SEE ALSO
diff --git a/server/internal.h b/server/internal.h
index 29a89606..c115c50f 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -274,6 +274,7 @@ struct backend {
int (*is_rotational) (struct backend *, struct connection *conn);
int (*can_trim) (struct backend *, struct connection *conn);
int (*can_zero) (struct backend *, struct connection *conn);
+ int (*can_sr) (struct backend *, struct connection *conn);
int (*can_extents) (struct backend *, struct connection *conn);
int (*can_fua) (struct backend *, struct connection *conn);
int (*can_multi_conn) (struct backend *, struct connection *conn);
diff --git a/server/filters.c b/server/filters.c
index 14ca0cc6..17403e89 100644
--- a/server/filters.c
+++ b/server/filters.c
@@ -314,6 +314,13 @@ next_can_zero (void *nxdata)
return b_conn->b->can_zero (b_conn->b, b_conn->conn);
}
+static int
+next_can_sr (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return b_conn->b->can_sr (b_conn->b, b_conn->conn);
+}
+
static int
next_can_extents (void *nxdata)
{
@@ -442,6 +449,7 @@ static struct nbdkit_next_ops next_ops = {
.is_rotational = next_is_rotational,
.can_trim = next_can_trim,
.can_zero = next_can_zero,
+ .can_sr = next_can_sr,
.can_extents = next_can_extents,
.can_fua = next_can_fua,
.can_multi_conn = next_can_multi_conn,
@@ -586,6 +594,21 @@ filter_can_zero (struct backend *b, struct connection *conn)
return f->backend.next->can_zero (f->backend.next, conn);
}
+static int
+filter_can_sr (struct backend *b, struct connection *conn)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ void *handle = connection_get_handle (conn, f->backend.i);
+ struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+ debug ("%s: can_sr", f->name);
+
+ if (f->filter.can_sr)
+ return f->filter.can_sr (&next_ops, &nxdata, handle);
+ else
+ return f->backend.next->can_sr (f->backend.next, conn);
+}
+
static int
filter_can_extents (struct backend *b, struct connection *conn)
{
@@ -818,6 +841,7 @@ static struct backend filter_functions = {
.is_rotational = filter_is_rotational,
.can_trim = filter_can_trim,
.can_zero = filter_can_zero,
+ .can_sr = filter_can_sr,
.can_extents = filter_can_extents,
.can_fua = filter_can_fua,
.can_multi_conn = filter_can_multi_conn,
diff --git a/server/plugins.c b/server/plugins.c
index e217e6b8..d83af78a 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -401,6 +401,17 @@ plugin_can_zero (struct backend *b, struct connection *conn)
return plugin_can_write (b, conn);
}
+static int
+plugin_can_sr (struct backend *b, struct connection *conn)
+{
+ /* Nothing in the v2 API depends on whether the client uses or
+ * avoids structured replies; .extents is still useful to filters
+ * even without SR. So this is hard-coded to true, with no exposure
+ * to the plugin interface; only filters can change it.
+ */
+ return 1;
+}
+
static int
plugin_can_extents (struct backend *b, struct connection *conn)
{
@@ -761,6 +772,7 @@ static struct backend plugin_functions = {
.is_rotational = plugin_is_rotational,
.can_trim = plugin_can_trim,
.can_zero = plugin_can_zero,
+ .can_sr = plugin_can_sr,
.can_extents = plugin_can_extents,
.can_fua = plugin_can_fua,
.can_multi_conn = plugin_can_multi_conn,
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index e0136de1..518de703 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -233,6 +233,7 @@ negotiate_handshake_newstyle_options (struct connection *conn)
char data[MAX_OPTION_LENGTH+1];
struct new_handshake_finish handshake_finish;
const char *optname;
+ int r;
for (nr_options = 0; nr_options < MAX_NR_OPTIONS; ++nr_options) {
if (conn_recv_full (conn, &new_option, sizeof new_option,
@@ -481,6 +482,20 @@ negotiate_handshake_newstyle_options (struct connection *conn)
debug ("newstyle negotiation: %s: client requested structured replies",
name_of_nbd_opt (option));
+ r = backend->can_sr (backend, conn);
+ if (r == -1)
+ return -1;
+ if (!r) {
+ /* Must fail with ERR_UNSUP for qemu 4.2 to remain happy;
+ * but failing with ERR_POLICY would have been nicer.
+ */
+ if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1)
+ return -1;
+ debug ("newstyle negotiation: %s: filter disabled structured replies",
+ name_of_nbd_opt (option));
+ break;
+ }
+
if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
return -1;
@@ -499,6 +514,9 @@ negotiate_handshake_newstyle_options (struct connection *conn)
if (conn_recv_full (conn, data, optlen, "read: %s: %m", optname) ==
-1)
return -1;
+ /* Note that we support base:allocation whether or not the plugin
+ * supports can_extents.
+ */
if (!conn->structured_replies) {
if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
== -1)
diff --git a/server/protocol-handshake.c b/server/protocol-handshake.c
index 0f3bd280..2cd9f01f 100644
--- a/server/protocol-handshake.c
+++ b/server/protocol-handshake.c
@@ -122,10 +122,13 @@ protocol_compute_eflags (struct connection *conn, uint16_t *flags)
conn->emulate_cache = fl == NBDKIT_CACHE_EMULATE;
}
- /* The result of this is not returned to callers here (or at any
- * time during the handshake). However it makes sense to do it once
- * per connection and store the result in the handle anyway. This
- * protocol_compute_eflags function is a bit misnamed XXX.
+ /* The result of this is not returned to callers here (in fact,
+ * earlier in the handshake, we unconditionally advertise
+ * base:allocation support to a client that asks). However it makes
+ * sense to learn once per connection whether filters can use
+ * extents, regardless of whether the client will do so, so store
+ * the result in the handle anyway. This protocol_compute_eflags
+ * function is a bit misnamed XXX.
*/
fl = backend->can_extents (backend, conn);
if (fl == -1)
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 94f17789..83ff3aae 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -71,6 +71,7 @@ struct nbdkit_next_ops {
int (*is_rotational) (void *nxdata);
int (*can_trim) (void *nxdata);
int (*can_zero) (void *nxdata);
+ int (*can_sr) (void *nxdata);
int (*can_extents) (void *nxdata);
int (*can_fua) (void *nxdata);
int (*can_multi_conn) (void *nxdata);
@@ -139,6 +140,8 @@ struct nbdkit_filter {
void *handle);
int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*can_sr) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
int (*can_extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
diff --git a/filters/noextents/noextents.c b/filters/noextents/noextents.c
index e39723cd..c95b7e53 100644
--- a/filters/noextents/noextents.c
+++ b/filters/noextents/noextents.c
@@ -32,8 +32,33 @@
#include <config.h>
+#include <string.h>
+
#include <nbdkit-filter.h>
+/* Whether to permit SR */
+static int structured = 1;
+
+static int
+noextents_config (nbdkit_next_config *next, void *nxdata,
+ const char *key, const char *value)
+{
+ if (strcmp (key, "structured") == 0)
+ return structured = nbdkit_parse_bool (value);
+
+ return next (nxdata, key, value);
+}
+
+#define noextents_config_help \
+ "structured=<BOOL> Set to false to inhibit structured replies (default
true).\n"
+
+static int
+noextents_can_sr (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return structured;
+}
+
static int
noextents_can_extents (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle)
@@ -45,6 +70,9 @@ static struct nbdkit_filter filter = {
.name = "noextents",
.longname = "nbdkit noextents filter",
.version = PACKAGE_VERSION,
+ .config = noextents_config,
+ .config_help = noextents_config_help,
+ .can_sr = noextents_can_sr,
.can_extents = noextents_can_extents,
};
diff --git a/TODO b/TODO
index 332400b3..87621791 100644
--- a/TODO
+++ b/TODO
@@ -198,4 +198,5 @@ using ‘#define NBDKIT_API_VERSION <version>’.
that supports .extents, the v2 protocol would allow us to at least
synthesize NBD_REPLY_TYPE_OFFSET_HOLE for less network traffic, even
though the plugin will still have to fully populate the .pread
- buffer; the v3 protocol should make sparse reads more direct.
+ buffer; the v3 protocol should make sparse reads more direct. Also,
+ the filter callback .can_sr may make sense for a v3 plugin.
* Parameters should be systematized so that they aren't just (key,
value) strings. nbdkit should know the possible keys for the plugin
diff --git a/tests/test-eflags.sh b/tests/test-eflags.sh
index b3724170..7a064bbb 100755
--- a/tests/test-eflags.sh
+++ b/tests/test-eflags.sh
@@ -87,6 +87,8 @@ fail ()
#----------------------------------------------------------------------
# can_write=false
+#
+# nbdkit supports DF if client requests SR.
do_nbdkit <<'EOF'
case "$1" in
@@ -98,6 +100,22 @@ EOF
[ $eflags -eq $(( HAS_FLAGS|READ_ONLY|SEND_DF )) ] ||
fail "expected HAS_FLAGS|READ_ONLY|SEND_DF"
+#----------------------------------------------------------------------
+# --filter=noextents structured=false
+# can_write=false
+#
+# The noextents filter can force no SR, and thus no DF.
+
+late_args="structured=false" do_nbdkit --filter=noextents
<<'EOF'
+case "$1" in
+ get_size) echo 1M ;;
+ *) exit 2 ;;
+esac
+EOF
+
+[ $eflags -eq $(( HAS_FLAGS|READ_ONLY )) ] ||
+ fail "expected HAS_FLAGS|READ_ONLY"
+
#----------------------------------------------------------------------
# -r
# can_write=false
@@ -147,6 +165,24 @@ EOF
[ $eflags -eq $(( HAS_FLAGS|SEND_DF )) ] ||
fail "expected HAS_FLAGS|SEND_DF"
+#----------------------------------------------------------------------
+# --filter=nozero
+# --filter=noextents structured=false
+# can_write=true
+#
+# Absolute minimum in flags.
+
+late_args="structured=false" do_nbdkit --filter=nozero --filter=noextents
<<'EOF'
+case "$1" in
+ get_size) echo 1M ;;
+ can_write) exit 0 ;;
+ *) exit 2 ;;
+esac
+EOF
+
+[ $eflags -eq $(( HAS_FLAGS )) ] ||
+ fail "expected HAS_FLAGS"
+
#----------------------------------------------------------------------
# -r
# can_write=true
--
2.20.1