The tri-state return pattern has proved valuable in other cases where
a backend wants to opt in or out of nbdkit fallbacks. Using a
tri-state return pattern at the backend level unifies the different
semantics between plugin and filter .can_zero return values, and makes
it possible for plugins to use cached results of .can_zero rather than
calling into the plugin on each .zero request. As in other recent
patches, it is easy to write an sh script that demonstrates a
resulting speedup from the caching. All filters are in-tree and do
not have a promise of API compatability, so it's easy to update all of
them (the log filter is okay as-is, the nozero filter is easy to
update, and no other filter overrides .can_zero). But for backwards
compatibility reasons, we cannot change the semantics of the plugin
.can_zero return value while remaining at API version 2; so that
remains a bool, but now selects between EMULATE or NATIVE at the
backend level.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-filter.pod | 44 ++++++++++++++++++++++++++++++-----------
server/backend.c | 2 +-
server/plugins.c | 33 ++++++++++++++++---------------
include/nbdkit-filter.h | 4 ++++
filters/nozero/nozero.c | 9 ++++++++-
5 files changed, 63 insertions(+), 29 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index 3333d6b5..c83fcbfe 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -393,16 +393,37 @@ fail.
These intercept the corresponding plugin methods, and control feature
bits advertised to the client.
-Of note, the C<.can_zero> callback in the filter controls whether the
-server advertises zero support to the client, which is slightly
-different semantics than the plugin; that is,
-C<next_ops-E<gt>can_zero> always returns true for a plugin, even when
-the plugin's own C<.can_zero> callback returned false, because nbdkit
-implements a fallback to C<.pwrite> at the plugin layer.
+Of note, the semantics of C<.can_zero> callback in the filter are
+slightly different from the plugin, and must be one of three success
+values visible only to filters:
+
+=over 4
+
+=item C<NBDKIT_ZERO_NONE>
+
+Completely suppress advertisement of write zero support (this can only
+be done from filters, not plugins).
+
+=item C<NBDKIT_ZERO_EMULATE>
+
+Inform nbdkit that write zeroes should immediately fall back to
+C<.pwrite> emulation without trying C<.zero> (this value is returned
+by C<next_ops-E<gt>can_zero> if the plugin returned false in its
+C<.can_zero>).
+
+=item C<NBDKIT_ZERO_NATIVE>
+
+Inform nbdkit that write zeroes should attempt to use C<.zero>,
+although it may still fall back to C<.pwrite> emulation for C<ENOTSUP>
+or C<EOPNOTSUPP> failures (this value is returned by
+C<next_ops-E<gt>can_zero> if the plugin returned true in its
+C<.can_zero>).
+
+=back
Remember that most of the feature check functions return merely a
-boolean success value, while C<.can_fua> and C<.can_cache> have three
-success values.
+boolean success value, while C<.can_zero>, C<.can_fua> and
+C<.can_cache> have three success values.
The difference between C<.can_fua> values may affect choices made in
the filter: when splitting a write request that requested FUA from the
@@ -514,9 +535,10 @@ value to return to the client.
This intercepts the plugin C<.zero> method and can be used to modify
zero requests.
-This function will not be called if C<.can_zero> returned false; in
-turn, the filter should not call C<next_ops-E<gt>zero> if
-C<next_ops-E<gt>can_zero> did not return true.
+This function will not be called if C<.can_zero> returned
+C<NBDKIT_ZERO_NONE>; in turn, the filter should not call
+C<next_ops-E<gt>zero> if C<next_ops-E<gt>can_zero> returned
+C<NBDKIT_ZERO_NONE>.
On input, the parameter C<flags> may include C<NBDKIT_FLAG_MAY_TRIM>
unconditionally, and C<NBDKIT_FLAG_FUA> based on the result of
diff --git a/server/backend.c b/server/backend.c
index 196b48e4..1e3a0109 100644
--- a/server/backend.c
+++ b/server/backend.c
@@ -207,7 +207,7 @@ backend_can_zero (struct backend *b, struct connection *conn)
if (h->can_zero == -1) {
r = backend_can_write (b, conn);
if (r != 1) {
- h->can_zero = 0;
+ h->can_zero = NBDKIT_ZERO_NONE;
return r;
}
h->can_zero = b->can_zero (b, conn);
diff --git a/server/plugins.c b/server/plugins.c
index c8f4af90..bff4cd47 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -354,19 +354,26 @@ static int
plugin_can_zero (struct backend *b, struct connection *conn)
{
struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+ int r;
assert (connection_get_handle (conn, 0));
- /* Note the special case here: the plugin's .can_zero controls only
- * whether we call .zero; while the backend expects .can_zero to
- * return whether to advertise zero support. Since we ALWAYS know
- * how to fall back to .pwrite in plugin_zero(), we ignore the
- * difference between the plugin's true or false return, and only
- * call it to catch a -1 failure during negotiation. */
- if (p->plugin.can_zero &&
- p->plugin.can_zero (connection_get_handle (conn, 0)) == -1)
+ /* Note the special case here: the plugin's .can_zero returns a bool
+ * which controls only whether we call .zero; while the backend
+ * expects .can_zero to return a tri-state on level of support.
+ */
+ if (p->plugin.can_zero) {
+ r = p->plugin.can_zero (connection_get_handle (conn, 0));
+ if (r == -1)
+ return -1;
+ return r ? NBDKIT_ZERO_NATIVE : NBDKIT_ZERO_EMULATE;
+ }
+ if (p->plugin.zero || p->plugin._zero_old)
+ return NBDKIT_ZERO_NATIVE;
+ r = backend_can_write (b, conn);
+ if (r == -1)
return -1;
- return plugin_can_write (b, conn);
+ return r ? NBDKIT_ZERO_EMULATE : NBDKIT_ZERO_NONE;
}
static int
@@ -570,7 +577,6 @@ plugin_zero (struct backend *b, struct connection *conn,
bool fua = flags & NBDKIT_FLAG_FUA;
bool emulate = false;
bool need_flush = false;
- int can_zero = 1; /* TODO cache this per-connection? */
assert (connection_get_handle (conn, 0));
@@ -580,13 +586,8 @@ plugin_zero (struct backend *b, struct connection *conn,
}
if (!count)
return 0;
- if (p->plugin.can_zero) {
- /* Calling backend_can_zero would answer the wrong question */
- can_zero = p->plugin.can_zero (connection_get_handle (conn, 0));
- assert (can_zero != -1);
- }
- if (can_zero) {
+ if (backend_can_zero (b, conn) == NBDKIT_ZERO_NATIVE) {
errno = 0;
if (p->plugin.zero)
r = p->plugin.zero (connection_get_handle (conn, 0), count, offset,
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 29d92755..6232e0e2 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -43,6 +43,10 @@
extern "C" {
#endif
+#define NBDKIT_ZERO_NONE 0
+#define NBDKIT_ZERO_EMULATE 1
+#define NBDKIT_ZERO_NATIVE 2
+
struct nbdkit_extent {
uint64_t offset;
uint64_t length;
diff --git a/filters/nozero/nozero.c b/filters/nozero/nozero.c
index 964cce9f..d916657e 100644
--- a/filters/nozero/nozero.c
+++ b/filters/nozero/nozero.c
@@ -94,7 +94,14 @@ nozero_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, void
*handle)
static int
nozero_can_zero (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle)
{
- return zeromode != NONE;
+ switch (zeromode) {
+ case NONE:
+ return NBDKIT_ZERO_NONE;
+ case EMULATE:
+ return NBDKIT_ZERO_EMULATE;
+ default:
+ return next_ops->can_zero (nxdata);
+ }
}
static int
--
2.21.0