While our plugin code always advertises WRITE_ZEROES on writable
connections (because we can emulate .zero by calling .pwrite),
and FUA support when .flush exists (because we can emulate the
persistence by calling .flush), it is conceivable that a filter
may want to explicitly avoid advertising particular bits. (More
to the point, I plan on writing a 'nozero' filter that hides
WRITE_ZEROES support, at least for the purposes of timing
comparisons between server emulation and the client having to
send zeroes over the wire). Furthermore, since we only have
to honor FUA support on write requests, we do not need to
advertise it on a readonly connection.
Wire up two new backend callbacks, as well as exposing them to the
filter interface (for now, we intentionally do not expose .can_zero
to the plugin interface, and later patches will worry about exposing
.can_fua as part of updating the plugin interface to support flags).
This change breaks filter API; a filter compiled against an earlier
nbdkit will not run against this version of nbdkit. We haven't
promised ABI stability yet, in part because a filter compiled
against an older nbdkit needs to be aware of any new entry points
(and other upcoming changes, such as adding FUA flag support);
conversely, a filter compiled against a newer nbdkit is liable to
try to use new pointers in nbdkit_next_ops that are not part of
the current nbdkit, and while we have _struct_size to protect what
we call in the filter, we do not have a similar mechanism to
protect what the filter can call back in our code.
As such, this patch can keep the public interface logically grouped
by inserting members in the middle of the struct, rather than
worrying about having to stick additions at the end.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-filter.pod | 21 ++++++++++++++-
include/nbdkit-filter.h | 8 +++++-
src/connections.c | 29 +++++++++++++++++---
src/filters.c | 70 ++++++++++++++++++++++++++++++++++++++++++++-----
src/internal.h | 2 ++
src/plugins.c | 22 ++++++++++++++++
6 files changed, 140 insertions(+), 12 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index be3c7a0..dbd4385 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -338,7 +338,26 @@ should call C<nbdkit_error> with an error message and return
C<-1>.
int (*can_trim) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
-These intercept the corresponding plugin methods.
+These intercept the corresponding plugin methods, and control feature
+bits advertised to the client.
+
+If there is an error, the callback should call C<nbdkit_error> with an
+error message and return C<-1>.
+
+=head2 C<.can_zero>
+
+=head2 C<.can_fua>
+
+ int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+ int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+
+These control feature bits that are advertised to the client. These
+functions have no counterpart in plugins, because nbdkit can always
+emulate zero by using pwrite and emulate Forced Unit Access (FUA) by
+using flush; but a filter can implement these to force a bypass of the
+nbdkit fallbacks.
If there is an error, the callback should call C<nbdkit_error> with an
error message and return C<-1>.
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 8ea9919..2ff2902 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2017 Red Hat Inc.
+ * Copyright (C) 2013-2018 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,6 +57,8 @@ struct nbdkit_next_ops {
int (*can_flush) (void *nxdata);
int (*is_rotational) (void *nxdata);
int (*can_trim) (void *nxdata);
+ int (*can_zero) (void *nxdata);
+ int (*can_fua) (void *nxdata);
int (*pread) (void *nxdata, void *buf, uint32_t count, uint64_t offset);
int (*pwrite) (void *nxdata,
@@ -111,6 +113,10 @@ struct nbdkit_filter {
void *handle);
int (*can_trim) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+ int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
int (*pread) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle, void *buf, uint32_t count, uint64_t offset);
diff --git a/src/connections.c b/src/connections.c
index 75c2c2d..5a7b2d8 100644
--- a/src/connections.c
+++ b/src/connections.c
@@ -80,6 +80,8 @@ struct connection {
int can_flush;
int is_rotational;
int can_trim;
+ int can_zero;
+ int can_fua;
int using_tls;
int sockin, sockout;
@@ -426,7 +428,13 @@ compute_eflags (struct connection *conn, uint16_t *flags)
conn->readonly = 1;
}
if (!conn->readonly) {
- eflags |= NBD_FLAG_SEND_WRITE_ZEROES;
+ fl = backend->can_zero (backend, conn);
+ if (fl == -1)
+ return -1;
+ if (fl) {
+ eflags |= NBD_FLAG_SEND_WRITE_ZEROES;
+ conn->can_zero = 1;
+ }
fl = backend->can_trim (backend, conn);
if (fl == -1)
@@ -435,13 +443,21 @@ compute_eflags (struct connection *conn, uint16_t *flags)
eflags |= NBD_FLAG_SEND_TRIM;
conn->can_trim = 1;
}
+
+ fl = backend->can_fua (backend, conn);
+ if (fl == -1)
+ return -1;
+ if (fl) {
+ eflags |= NBD_FLAG_SEND_FUA;
+ conn->can_fua = 1;
+ }
}
fl = backend->can_flush (backend, conn);
if (fl == -1)
return -1;
if (fl) {
- eflags |= NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA;
+ eflags |= NBD_FLAG_SEND_FLUSH;
conn->can_flush = 1;
}
@@ -880,7 +896,7 @@ validate_request (struct connection *conn,
*error = EINVAL;
return false;
}
- if (!conn->can_flush && (flags & NBD_CMD_FLAG_FUA)) {
+ if (!conn->can_fua && (flags & NBD_CMD_FLAG_FUA)) {
nbdkit_error ("invalid request: FUA flag not supported");
*error = EINVAL;
return false;
@@ -909,6 +925,13 @@ validate_request (struct connection *conn,
return false;
}
+ /* Zero allowed? */
+ if (!conn->can_zero && cmd == NBD_CMD_WRITE_ZEROES) {
+ nbdkit_error ("invalid request: write zeroes operation not supported");
+ *error = EINVAL;
+ return false;
+ }
+
return true; /* Command validates. */
}
diff --git a/src/filters.c b/src/filters.c
index 2804cba..87c6e97 100644
--- a/src/filters.c
+++ b/src/filters.c
@@ -297,6 +297,20 @@ next_can_trim (void *nxdata)
return b_conn->b->can_trim (b_conn->b, b_conn->conn);
}
+static int
+next_can_zero (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return b_conn->b->can_zero (b_conn->b, b_conn->conn);
+}
+
+static int
+next_can_fua (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return b_conn->b->can_fua (b_conn->b, b_conn->conn);
+}
+
static int
next_pread (void *nxdata, void *buf, uint32_t count, uint64_t offset)
{
@@ -348,6 +362,8 @@ static struct nbdkit_next_ops next_ops = {
.flush = next_flush,
.trim = next_trim,
.zero = next_zero,
+ .can_zero = next_can_zero,
+ .can_fua = next_can_fua,
};
static int
@@ -455,6 +471,36 @@ filter_can_trim (struct backend *b, struct connection *conn)
return f->backend.next->can_trim (f->backend.next, conn);
}
+static int
+filter_can_zero (struct backend *b, struct connection *conn)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ void *handle = connection_get_handle (conn, f->backend.i);
+ struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+ debug ("can_zero");
+
+ if (f->filter.can_zero)
+ return f->filter.can_zero (&next_ops, &nxdata, handle);
+ else
+ return f->backend.next->can_zero (f->backend.next, conn);
+}
+
+static int
+filter_can_fua (struct backend *b, struct connection *conn)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ void *handle = connection_get_handle (conn, f->backend.i);
+ struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+ debug ("can_fua");
+
+ if (f->filter.can_fua)
+ return f->filter.can_fua (&next_ops, &nxdata, handle);
+ else
+ return f->backend.next->can_fua (f->backend.next, conn);
+}
+
static int
filter_pread (struct backend *b, struct connection *conn,
void *buf, uint32_t count, uint64_t offset,
@@ -582,6 +628,8 @@ static struct backend filter_functions = {
.flush = filter_flush,
.trim = filter_trim,
.zero = filter_zero,
+ .can_zero = filter_can_zero,
+ .can_fua = filter_can_fua,
};
/* Register and load a filter. */
@@ -626,15 +674,23 @@ filter_register (struct backend *next, size_t index, const char
*filename,
exit (EXIT_FAILURE);
}
- /* Since the filter might be much older than the current version of
- * nbdkit, only copy up to the self-declared _struct_size of the
- * filter and zero out the rest. If the filter is much newer then
- * we'll only call the "old" fields.
+ /* Providing a stable filter ABI is much harder than a stable plugin
+ * ABI. Any newer filter compiled against a larger struct
+ * nbdkit_next_ops could potentially dereference fields we did not
+ * provide; and any older filter compiled against a smaller struct
+ * nbdkit_filter may miss intercepting functions that are vital to
+ * avoid corrupting the client-visible data. So for now, we insist
+ * on an exact match in _struct_size, and promise that
+ * nbdkit_next_ops won't change size without also changing
+ * _struct_size.
*/
size = sizeof f->filter; /* our struct */
- memset (&f->filter, 0, size);
- if (size > filter->_struct_size)
- size = filter->_struct_size;
+ if (filter->_struct_size != size) {
+ fprintf (stderr,
+ "%s: %s: filter compiled against incompatible nbdkit version\n",
+ program_name, f->filename);
+ exit (EXIT_FAILURE);
+ }
memcpy (&f->filter, filter, size);
/* Only filter.name is required. */
diff --git a/src/internal.h b/src/internal.h
index 3cbfde5..2f03279 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -191,6 +191,8 @@ struct backend {
int (*flush) (struct backend *, struct connection *conn, uint32_t flags);
int (*trim) (struct backend *, struct connection *conn, uint32_t count, uint64_t
offset, uint32_t flags);
int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t
offset, uint32_t flags);
+ int (*can_zero) (struct backend *, struct connection *conn);
+ int (*can_fua) (struct backend *, struct connection *conn);
};
/* plugins.c */
diff --git a/src/plugins.c b/src/plugins.c
index dba3e24..3468b6d 100644
--- a/src/plugins.c
+++ b/src/plugins.c
@@ -358,6 +358,26 @@ plugin_can_trim (struct backend *b, struct connection *conn)
return p->plugin.trim != NULL;
}
+static int
+plugin_can_zero (struct backend *b, struct connection *conn)
+{
+ debug ("can_zero");
+
+ // We always allow .zero to fall back to .write, so plugins don't
+ // need to override this
+ return plugin_can_write (b, conn);
+}
+
+static int
+plugin_can_fua (struct backend *b, struct connection *conn)
+{
+ debug ("can_fua");
+
+ // TODO - wire FUA flag support into plugins. Until then, this copies
+ // can_flush, since that's how we emulate FUA
+ return plugin_can_flush (b, conn);
+}
+
static int
plugin_pread (struct backend *b, struct connection *conn,
void *buf, uint32_t count, uint64_t offset, uint32_t flags)
@@ -535,6 +555,8 @@ static struct backend plugin_functions = {
.flush = plugin_flush,
.trim = plugin_trim,
.zero = plugin_zero,
+ .can_zero = plugin_can_zero,
+ .can_fua = plugin_can_fua,
};
/* Register and load a plugin. */
--
2.14.3