When using cache=unsafe, we are already stating that the plugin will
not necessarily ever get our cached data written back, but locally
everything is already consistent (we grab a shared lock before
consulting the cache), so we can behave as if client flush requests
always works in a multi-conn safe manner, even without bothering with
fdatasync() on our cache file.
When using cache=writethrough, our behavior under multiple connections
is at the mercy of the plugin (if connection A and B both write, but
only B flushes, we can only guarantee that the data from A was flushed
if the plugin advertises multi-conn; as otherwise, the write by
connection A may still be in a cache that was not flushed by B's
command). But when using cache=writeback, we are guaranteed that only
one connection is ever writing back to the server at a time (that is,
once we flush, we are flushing data from ALL connections), so we can
always advertise multi-conn.
---
This is fallout from IRC discussion we had earlier today about whether
it was safe to enable multi-conn on the ssh plugin.
After re-reading
https://sourceforge.net/p/nbd/mailman/nbd-general/?viewmonth=201610&v...,
my take is that the original goal for MULTI_CONN is that when clear,
if connection A and B both write, then both must FLUSH to guarantee
that their writes reached persistent storage (if only one flushes, the
other connection may still have cached data that remains unflushed).
But when MULTI_CONN is set, only one of the two connections has to
issue a flush after both connections have received replies to all
writes that are intended to be made persistent. And the way we share
our cache for cache=unsafe and cache=writeback meets that goal.
filters/cache/cache.c | 56 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 54 insertions(+), 2 deletions(-)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index b979f256..9e5a3e80 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -257,6 +257,53 @@ cache_can_fast_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
return 1;
}
+/* Override the plugin's .can_flush, if we are cache=unsafe */
+static int
+cache_can_flush (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ if (cache_mode == CACHE_MODE_UNSAFE)
+ return 1;
+ return next_ops->can_flush (nxdata);
+}
+
+
+/* Override the plugin's .can_fua, if we are cache=unsafe */
+static int
+cache_can_fua (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ if (cache_mode == CACHE_MODE_UNSAFE)
+ return NBDKIT_FUA_NATIVE;
+ return next_ops->can_fua (nxdata);
+}
+
+/* Override the plugin's .can_multi_conn, if we are not cache=writethrough */
+static int
+cache_can_multi_conn (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ /* For CACHE_MODE_NONE, we always advertise a no-op flush because
+ * our local cache access is consistent between connections, and we
+ * don't care about persisting the data to the underlying plugin.
+ *
+ * For CACHE_MODE_WRITEBACK, things are more subtle: we only write
+ * to the plugin during NBD_CMD_FLUSH, at which point that one
+ * connection writes back ALL cached blocks regardless of which
+ * connection originally wrote them, so a client can be assured that
+ * blocks from all connections have reached the plugin's permanent
+ * storage with only one connection having to send a flush.
+ *
+ * But for CACHE_MODE_WRITETHROUGH, we are at the mercy of the
+ * plugin; data written by connection A is not guaranteed to be made
+ * persistent by a flush from connection B unless the plugin itself
+ * supports multi-conn.
+ */
+ if (cache_mode !== CACHE_MODE_WRITETHROUGH)
+ return 1;
+ return next_ops->can_multi_conn (nxdata);
+}
+
/* Read data. */
static int
cache_pread (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -352,7 +399,8 @@ cache_pwrite (struct nbdkit_next_ops *next_ops, void *nxdata,
}
if ((flags & NBDKIT_FLAG_FUA) &&
- next_ops->can_fua (nxdata) == NBDKIT_FUA_EMULATE) {
+ (cache_mode == CACHE_MODE_UNSAFE ||
+ next_ops->can_fua (nxdata) == NBDKIT_FUA_EMULATE)) {
flags &= ~NBDKIT_FLAG_FUA;
need_flush = true;
}
@@ -442,7 +490,8 @@ cache_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
flags &= ~NBDKIT_FLAG_MAY_TRIM;
if ((flags & NBDKIT_FLAG_FUA) &&
- next_ops->can_fua (nxdata) == NBDKIT_FUA_EMULATE) {
+ (cache_mode == CACHE_MODE_UNSAFE ||
+ next_ops->can_fua (nxdata) == NBDKIT_FUA_EMULATE)) {
flags &= ~NBDKIT_FLAG_FUA;
need_flush = true;
}
@@ -639,6 +688,9 @@ static struct nbdkit_filter filter = {
.get_size = cache_get_size,
.can_cache = cache_can_cache,
.can_fast_zero = cache_can_fast_zero,
+ .can_flush = cache_can_flush,
+ .can_fua = cache_can_fua,
+ .can_multi_conn = cache_can_multi_conn,
.pread = cache_pread,
.pwrite = cache_pwrite,
.zero = cache_zero,
--
2.30.1