The NBD spec documents NBD_CMD_CACHE as an optional extension,
although it is rather vague on what semantics are required (future
spec additions may add flags to the cache command, where the use of a
flag requires specific caching behavior or an error if that behavior
is not possible). Unlike NBD_CMD_WRITE_ZEROES, we do not want to
default to an emulation (calling .pread and ignoring the results works
for some cases like local file systems, but actually penalizes other
cases like network access); still the code for doing a .pread and
ignoring the result is common enough to warrant having .can_cache
return a tri-state (similar to .can_fua). This patch wires up the
backend for the new entry points as well as the emulation handling,
although until later patches actually expose the new callbacks for
filters and plugins, a client cannot observe any difference yet.
Note that the NBD spec states that some older clients call
NBD_CMD_CACHE without first checking whether NBD_FLAG_SEND_CACHE is
set; we choose to flag cache requests from such clients as invalid.
Also, for bisection reasons, this patch treats any use of a filter as
a forced .can_cache of NBDKIT_CACHE_NONE to rather than passthrough to
the plugin's state. Once all affected filters are patched to handle
cache requests correctly, a later patch will then switch the filter
default to passthrough for the sake of remaining filters.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-protocol.pod | 8 ++++++++
common/protocol/protocol.h | 2 ++
include/nbdkit-common.h | 4 ++++
server/internal.h | 5 +++++
server/filters.c | 33 ++++++++++++++++++++++++++++++++-
server/plugins.c | 34 ++++++++++++++++++++++++++++++++++
server/protocol-handshake.c | 9 +++++++++
server/protocol.c | 26 ++++++++++++++++++++++++++
8 files changed, 120 insertions(+), 1 deletion(-)
diff --git a/docs/nbdkit-protocol.pod b/docs/nbdkit-protocol.pod
index f706cfd..ad470bd 100644
--- a/docs/nbdkit-protocol.pod
+++ b/docs/nbdkit-protocol.pod
@@ -152,6 +152,14 @@ when structured replies are in effect. However, the flag is a no-op
until we extend the plugin API to allow a fragmented read in the first
place.
+=item C<NBD_CMD_CACHE>
+
+Supported in nbdkit E<ge> 1.13.4.
+
+This protocol extension allows a client to inform the server about
+intent to access a portion of the export, to allow the server an
+opportunity to cache things appropriately.
+
=item Resize Extension
I<Not supported>.
diff --git a/common/protocol/protocol.h b/common/protocol/protocol.h
index c27104c..e938643 100644
--- a/common/protocol/protocol.h
+++ b/common/protocol/protocol.h
@@ -95,6 +95,7 @@ extern const char *name_of_nbd_flag (int);
#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)
#define NBD_FLAG_SEND_DF (1 << 7)
#define NBD_FLAG_CAN_MULTI_CONN (1 << 8)
+#define NBD_FLAG_SEND_CACHE (1 << 10)
/* NBD options (new style handshake only). */
extern const char *name_of_nbd_opt (int);
@@ -217,6 +218,7 @@ extern const char *name_of_nbd_cmd (int);
#define NBD_CMD_DISC 2 /* Disconnect. */
#define NBD_CMD_FLUSH 3
#define NBD_CMD_TRIM 4
+#define NBD_CMD_CACHE 5
#define NBD_CMD_WRITE_ZEROES 6
#define NBD_CMD_BLOCK_STATUS 7
diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h
index 636a789..5257d99 100644
--- a/include/nbdkit-common.h
+++ b/include/nbdkit-common.h
@@ -65,6 +65,10 @@ extern "C" {
#define NBDKIT_FUA_EMULATE 1
#define NBDKIT_FUA_NATIVE 2
+#define NBDKIT_CACHE_NONE 0
+#define NBDKIT_CACHE_EMULATE 1
+#define NBDKIT_CACHE_NATIVE 2
+
#define NBDKIT_EXTENT_HOLE (1<<0) /* Same as NBD_STATE_HOLE */
#define NBDKIT_EXTENT_ZERO (1<<1) /* Same as NBD_STATE_ZERO */
diff --git a/server/internal.h b/server/internal.h
index 67fccfc..2ee5e23 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -170,6 +170,8 @@ struct connection {
bool can_zero;
bool can_fua;
bool can_multi_conn;
+ bool can_cache;
+ bool emulate_cache;
bool can_extents;
bool using_tls;
bool structured_replies;
@@ -276,6 +278,7 @@ struct backend {
int (*can_extents) (struct backend *, struct connection *conn);
int (*can_fua) (struct backend *, struct connection *conn);
int (*can_multi_conn) (struct backend *, struct connection *conn);
+ int (*can_cache) (struct backend *, struct connection *conn);
int (*pread) (struct backend *, struct connection *conn, void *buf,
uint32_t count, uint64_t offset, uint32_t flags, int *err);
@@ -290,6 +293,8 @@ struct backend {
int (*extents) (struct backend *, struct connection *conn, uint32_t count,
uint64_t offset, uint32_t flags,
struct nbdkit_extents *extents, int *err);
+ int (*cache) (struct backend *, struct connection *conn, uint32_t count,
+ uint64_t offset, uint32_t flags, int *err);
};
/* plugins.c */
diff --git a/server/filters.c b/server/filters.c
index b73e74f..e456fbf 100644
--- a/server/filters.c
+++ b/server/filters.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -573,6 +573,18 @@ filter_can_multi_conn (struct backend *b, struct connection *conn)
return f->backend.next->can_multi_conn (f->backend.next, conn);
}
+static int
+filter_can_cache (struct backend *b, struct connection *conn)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+
+ debug ("%s: can_cache", f->name);
+
+ /* FIXME: Default to f->backend.next->can_cache, once all filters
+ have been audited */
+ return NBDKIT_CACHE_NONE;
+}
+
static int
filter_pread (struct backend *b, struct connection *conn,
void *buf, uint32_t count, uint64_t offset,
@@ -702,6 +714,23 @@ filter_extents (struct backend *b, struct connection *conn,
extents, err);
}
+static int
+filter_cache (struct backend *b, struct connection *conn,
+ uint32_t count, uint64_t offset,
+ uint32_t flags, int *err)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+
+ assert (flags == 0);
+
+ debug ("%s: cache count=%" PRIu32 " offset=%" PRIu64 "
flags=0x%" PRIx32,
+ f->name, count, offset, flags);
+
+ /* FIXME: Allow filter to rewrite request */
+ return f->backend.next->cache (f->backend.next, conn,
+ count, offset, flags, err);
+}
+
static struct backend filter_functions = {
.free = filter_free,
.thread_model = filter_thread_model,
@@ -726,12 +755,14 @@ static struct backend filter_functions = {
.can_extents = filter_can_extents,
.can_fua = filter_can_fua,
.can_multi_conn = filter_can_multi_conn,
+ .can_cache = filter_can_cache,
.pread = filter_pread,
.pwrite = filter_pwrite,
.flush = filter_flush,
.trim = filter_trim,
.zero = filter_zero,
.extents = filter_extents,
+ .cache = filter_cache,
};
/* Register and load a filter. */
diff --git a/server/plugins.c b/server/plugins.c
index e26d133..cb9a50c 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -448,6 +448,17 @@ plugin_can_multi_conn (struct backend *b, struct connection *conn)
return 0; /* assume false */
}
+static int
+plugin_can_cache (struct backend *b, struct connection *conn)
+{
+ assert (connection_get_handle (conn, 0));
+
+ debug ("can_cache");
+
+ /* FIXME: return default based on plugin->cache */
+ return NBDKIT_CACHE_NONE;
+}
+
/* Plugins and filters can call this to set the true errno, in cases
* where !errno_is_preserved.
*/
@@ -693,6 +704,27 @@ plugin_extents (struct backend *b, struct connection *conn,
return r;
}
+static int
+plugin_cache (struct backend *b, struct connection *conn,
+ uint32_t count, uint64_t offset, uint32_t flags,
+ int *err)
+{
+ struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+ int r = -1;
+
+ assert (connection_get_handle (conn, 0));
+ assert (!flags);
+
+ debug ("cache count=%" PRIu32 " offset=%" PRIu64, count, offset);
+
+ /* FIXME: assert plugin->cache and call it */
+ assert (false);
+
+ if (r == -1)
+ *err = get_error (p);
+ return r;
+}
+
static struct backend plugin_functions = {
.free = plugin_free,
.thread_model = plugin_thread_model,
@@ -717,12 +749,14 @@ static struct backend plugin_functions = {
.can_extents = plugin_can_extents,
.can_fua = plugin_can_fua,
.can_multi_conn = plugin_can_multi_conn,
+ .can_cache = plugin_can_cache,
.pread = plugin_pread,
.pwrite = plugin_pwrite,
.flush = plugin_flush,
.trim = plugin_trim,
.zero = plugin_zero,
.extents = plugin_extents,
+ .cache = plugin_cache,
};
/* Register and load a plugin. */
diff --git a/server/protocol-handshake.c b/server/protocol-handshake.c
index d8cde77..0f3bd28 100644
--- a/server/protocol-handshake.c
+++ b/server/protocol-handshake.c
@@ -113,6 +113,15 @@ protocol_compute_eflags (struct connection *conn, uint16_t *flags)
}
}
+ fl = backend->can_cache (backend, conn);
+ if (fl == -1)
+ return -1;
+ if (fl) {
+ eflags |= NBD_FLAG_SEND_CACHE;
+ conn->can_cache = true;
+ conn->emulate_cache = fl == NBDKIT_CACHE_EMULATE;
+ }
+
/* The result of this is not returned to callers here (or at any
* time during the handshake). However it makes sense to do it once
* per connection and store the result in the handle anyway. This
diff --git a/server/protocol.c b/server/protocol.c
index 01d4c71..792b1ac 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -76,6 +76,7 @@ validate_request (struct connection *conn,
/* Validate cmd, offset, count. */
switch (cmd) {
case NBD_CMD_READ:
+ case NBD_CMD_CACHE:
case NBD_CMD_WRITE:
case NBD_CMD_TRIM:
case NBD_CMD_WRITE_ZEROES:
@@ -180,6 +181,14 @@ validate_request (struct connection *conn,
return false;
}
+ /* Cache allowed? */
+ if (!conn->can_cache && cmd == NBD_CMD_CACHE) {
+ nbdkit_error ("invalid request: %s: cache operation not supported",
+ name_of_nbd_cmd (cmd));
+ *error = EINVAL;
+ return false;
+ }
+
/* Block status allowed? */
if (cmd == NBD_CMD_BLOCK_STATUS) {
if (!conn->structured_replies) {
@@ -254,6 +263,23 @@ handle_request (struct connection *conn,
return err;
break;
+ case NBD_CMD_CACHE:
+ if (conn->emulate_cache) {
+ static char buf[MAX_REQUEST_SIZE]; /* data sink, never read */
+ uint32_t limit;
+
+ while (count) {
+ limit = MIN (count, sizeof buf);
+ if (backend->pread (backend, conn, buf, limit, offset, flags,
+ &err) == -1)
+ return err;
+ count -= limit;
+ }
+ }
+ else if (backend->cache (backend, conn, count, offset, 0, &err) == -1)
+ return err;
+ break;
+
case NBD_CMD_WRITE_ZEROES:
if (!(flags & NBD_CMD_FLAG_NO_HOLE))
f |= NBDKIT_FLAG_MAY_TRIM;
--
2.20.1