This is about the simplest implementation of NBD Structured Replies
(SR) that we can do right now.
It accepts NBD_OPT_STRUCTURED_REPLIES when negotiated by the client,
but only sends back the simplest possible SR when required to by
NBD_CMD_READ. The rest of the time it will send back simple replies
as before. We do not modify the plugin API so plugins are unable to
send complex SRs.
Also we do not handle human-readable error messages yet because that
would require changes in how we handle nbdkit_error().
Also we do not understand NBD_CMD_FLAG_DF, but that seems to be OK
because (a) we don't advertize the feature and (b) we only send back a
single chunk anyway.
---
docs/nbdkit-protocol.pod | 6 +-
server/protocol.h | 59 +++++++++---
plugins/nbd/nbd.c | 4 +-
server/connections.c | 189 ++++++++++++++++++++++++++++++++-------
tests/test-layers.c | 2 +-
5 files changed, 212 insertions(+), 48 deletions(-)
diff --git a/docs/nbdkit-protocol.pod b/docs/nbdkit-protocol.pod
index c44db6a..68438fa 100644
--- a/docs/nbdkit-protocol.pod
+++ b/docs/nbdkit-protocol.pod
@@ -122,7 +122,11 @@ Supported in nbdkit E<ge> 1.9.9.
=item Structured Replies
-I<Not supported>.
+Supported in nbdkit E<ge> 1.11.8.
+
+However we don’t expose the capability to send structured replies to
+plugins yet, nor do we send human-readable error messages using this
+facility.
=item Block Status
diff --git a/server/protocol.h b/server/protocol.h
index 003fc45..0aadd46 100644
--- a/server/protocol.h
+++ b/server/protocol.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -98,12 +98,13 @@ extern const char *name_of_nbd_flag (int);
/* NBD options (new style handshake only). */
extern const char *name_of_nbd_opt (int);
-#define NBD_OPT_EXPORT_NAME 1
-#define NBD_OPT_ABORT 2
-#define NBD_OPT_LIST 3
-#define NBD_OPT_STARTTLS 5
-#define NBD_OPT_INFO 6
-#define NBD_OPT_GO 7
+#define NBD_OPT_EXPORT_NAME 1
+#define NBD_OPT_ABORT 2
+#define NBD_OPT_LIST 3
+#define NBD_OPT_STARTTLS 5
+#define NBD_OPT_INFO 6
+#define NBD_OPT_GO 7
+#define NBD_OPT_STRUCTURED_REPLY 8
extern const char *name_of_nbd_rep (int);
#define NBD_REP_ACK 1
@@ -144,15 +145,49 @@ struct request {
uint32_t count; /* Request length. */
} __attribute__((packed));
-/* Reply (server -> client). */
-struct reply {
- uint32_t magic; /* NBD_REPLY_MAGIC. */
+/* Simple reply (server -> client). */
+struct simple_reply {
+ uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC. */
uint32_t error; /* NBD_SUCCESS or one of NBD_E*. */
uint64_t handle; /* Opaque handle. */
} __attribute__((packed));
-#define NBD_REQUEST_MAGIC 0x25609513
-#define NBD_REPLY_MAGIC 0x67446698
+/* Structured reply (server -> client). */
+struct structured_reply {
+ uint32_t magic; /* NBD_STRUCTURED_REPLY_MAGIC. */
+ uint16_t flags; /* NBD_REPLY_FLAG_* */
+ uint16_t type; /* NBD_REPLY_TYPE_* */
+ uint64_t handle; /* Opaque handle. */
+ uint32_t length; /* Length of payload which follows. */
+} __attribute__((packed));
+
+struct structured_reply_offset_data {
+ uint64_t offset; /* offset */
+ /* Followed by data. */
+} __attribute__((packed));
+
+struct structured_reply_error {
+ uint32_t error; /* NBD_E* error number */
+ uint16_t len; /* Length of human readable error. */
+ /* Followed by human readable error string. */
+} __attribute__((packed));
+
+#define NBD_REQUEST_MAGIC 0x25609513
+#define NBD_SIMPLE_REPLY_MAGIC 0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef
+
+/* Structured reply flags. */
+extern const char *name_of_nbd_reply_flag (int);
+#define NBD_REPLY_FLAG_DONE (1<<0)
+
+/* Structured reply types. */
+extern const char *name_of_nbd_reply_type (int);
+#define NBD_REPLY_TYPE_NONE 0
+#define NBD_REPLY_TYPE_OFFSET_DATA 1
+#define NBD_REPLY_TYPE_OFFSET_HOLE 2
+#define NBD_REPLY_TYPE_BLOCK_STATUS 3
+#define NBD_REPLY_TYPE_ERROR 32769
+#define NBD_REPLY_TYPE_ERROR_OFFSET 32770
/* NBD commands. */
extern const char *name_of_nbd_cmd (int);
diff --git a/plugins/nbd/nbd.c b/plugins/nbd/nbd.c
index 674f4a4..2f494cd 100644
--- a/plugins/nbd/nbd.c
+++ b/plugins/nbd/nbd.c
@@ -345,7 +345,7 @@ nbd_request (struct handle *h, uint16_t flags, uint16_t type, uint64_t
offset,
static int
nbd_reply_raw (struct handle *h, int *fd)
{
- struct reply rep;
+ struct simple_reply rep;
struct transaction *trans;
void *buf;
uint32_t count;
@@ -353,7 +353,7 @@ nbd_reply_raw (struct handle *h, int *fd)
*fd = -1;
if (read_full (h->fd, &rep, sizeof rep) < 0)
return nbd_mark_dead (h);
- if (be32toh (rep.magic) != NBD_REPLY_MAGIC)
+ if (be32toh (rep.magic) != NBD_SIMPLE_REPLY_MAGIC)
return nbd_mark_dead (h);
nbdkit_debug ("received reply for cookie %#" PRIx64 ", status %s",
rep.handle, name_of_nbd_error(be32toh (rep.error)));
diff --git a/server/connections.c b/server/connections.c
index 51d4912..aeb27f8 100644
--- a/server/connections.c
+++ b/server/connections.c
@@ -86,6 +86,7 @@ struct connection {
bool can_fua;
bool can_multi_conn;
bool using_tls;
+ bool structured_replies;
int sockin, sockout;
connection_recv_function recv;
@@ -905,6 +906,26 @@ _negotiate_handshake_newstyle_options (struct connection *conn)
break;
+ case NBD_OPT_STRUCTURED_REPLY:
+ if (optlen != 0) {
+ if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID)
+ == -1)
+ return -1;
+ if (conn_recv_full (conn, data, optlen,
+ "read: %s: %m", name_of_nbd_opt (option)) == -1)
+ return -1;
+ continue;
+ }
+
+ debug ("newstyle negotiation: %s: client requested structured replies",
+ name_of_nbd_opt (option));
+
+ if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1)
+ return -1;
+
+ conn->structured_replies = true;
+ break;
+
default:
/* Unknown option. */
if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1)
@@ -1224,12 +1245,123 @@ nbd_errno (int error)
}
}
+static int
+send_simple_reply (struct connection *conn,
+ uint64_t handle, uint16_t cmd,
+ const char *buf, uint32_t count,
+ uint32_t error)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+ struct simple_reply reply;
+ int r;
+
+ reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
+ reply.handle = handle;
+ reply.error = htobe32 (nbd_errno (error));
+
+ r = conn->send (conn, &reply, sizeof reply);
+ if (r == -1) {
+ nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+
+ /* Send the read data buffer. */
+ if (cmd == NBD_CMD_READ && !error) {
+ r = conn->send (conn, buf, count);
+ if (r == -1) {
+ nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+ }
+
+ return 1; /* command processed ok */
+}
+
+static int
+send_structured_reply_read (struct connection *conn,
+ uint64_t handle, uint16_t cmd,
+ const char *buf, uint32_t count, uint64_t offset)
+{
+ /* Once we are really using structured replies and sending data back
+ * in chunks, we'll be able to grab the write lock for each chunk,
+ * allowing other threads to interleave replies. As we're not doing
+ * that yet we acquire the lock for the whole function.
+ */
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+ struct structured_reply reply;
+ struct structured_reply_offset_data offset_data;
+ int r;
+
+ assert (cmd == NBD_CMD_READ);
+
+ reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
+ reply.handle = handle;
+ reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
+ reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
+ reply.length = htobe32 (sizeof offset_data + count);
+
+ r = conn->send (conn, &reply, sizeof reply);
+ if (r == -1) {
+ nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+
+ /* Send the offset + read data buffer. */
+ offset_data.offset = htobe64 (offset);
+ r = conn->send (conn, &offset_data, sizeof offset_data);
+ if (r == -1) {
+ nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+
+ r = conn->send (conn, buf, count);
+ if (r == -1) {
+ nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+
+ return 1; /* command processed ok */
+}
+
+static int
+send_structured_reply_error (struct connection *conn,
+ uint64_t handle, uint16_t cmd, uint32_t error)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
+ struct structured_reply reply;
+ struct structured_reply_error error_data;
+ int r;
+
+ reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
+ reply.handle = handle;
+ reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
+ reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
+ reply.length = htobe32 (sizeof error_data + 0 /* no human readable error */);
+
+ r = conn->send (conn, &reply, sizeof reply);
+ if (r == -1) {
+ nbdkit_error ("write error reply: %m");
+ return set_status (conn, -1);
+ }
+
+ /* Send the error. */
+ error_data.error = htobe32 (error);
+ error_data.len = htobe16 (0);
+ r = conn->send (conn, &error_data, sizeof error_data);
+ if (r == -1) {
+ nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
+ return set_status (conn, -1);
+ }
+ /* No human readable error message at the moment. */
+
+ return 1; /* command processed ok */
+}
+
static int
recv_request_send_reply (struct connection *conn)
{
int r;
struct request request;
- struct reply reply;
uint16_t cmd, flags;
uint32_t magic, count, error = 0;
uint64_t offset;
@@ -1317,40 +1449,33 @@ recv_request_send_reply (struct connection *conn)
/* Send the reply packet. */
send_reply:
- {
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
- if (get_status (conn) < 0)
- return -1;
- reply.magic = htobe32 (NBD_REPLY_MAGIC);
- reply.handle = request.handle;
- reply.error = htobe32 (nbd_errno (error));
+ if (get_status (conn) < 0)
+ return -1;
- if (error != 0) {
- /* Since we're about to send only the limited NBD_E* errno to the
- * client, don't lose the information about what really happened
- * on the server side. Make sure there is a way for the operator
- * to retrieve the real error.
- */
- debug ("sending error reply: %s", strerror (error));
- }
-
- r = conn->send (conn, &reply, sizeof reply);
- if (r == -1) {
- nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
- return set_status (conn, -1);
- }
-
- /* Send the read data buffer. */
- if (cmd == NBD_CMD_READ && !error) {
- r = conn->send (conn, buf, count);
- if (r == -1) {
- nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
- return set_status (conn, -1);
- }
- }
+ if (error != 0) {
+ /* Since we're about to send only the limited NBD_E* errno to the
+ * client, don't lose the information about what really happened
+ * on the server side. Make sure there is a way for the operator
+ * to retrieve the real error.
+ */
+ debug ("sending error reply: %s", strerror (error));
}
- return 1; /* command processed ok */
+ /* Currently we prefer to send simple replies for everything except
+ * where we have to (ie. NBD_CMD_READ when structured_replies have
+ * been negotiated). However this prevents us from sending
+ * human-readable error messages to the client, so we should
+ * reconsider this in future.
+ */
+ if (conn->structured_replies && cmd == NBD_CMD_READ) {
+ if (!error)
+ return send_structured_reply_read (conn, request.handle, cmd,
+ buf, count, offset);
+ else
+ return send_structured_reply_error (conn, request.handle, cmd, error);
+ }
+ else
+ return send_simple_reply (conn, request.handle, cmd, buf, count, error);
}
/* Write buffer to conn->sockout and either succeed completely
diff --git a/tests/test-layers.c b/tests/test-layers.c
index c8d15d2..6149c02 100644
--- a/tests/test-layers.c
+++ b/tests/test-layers.c
@@ -92,7 +92,7 @@ main (int argc, char *argv[])
struct new_handshake_finish handshake_finish;
uint16_t eflags;
struct request request;
- struct reply reply;
+ struct simple_reply reply;
char data[512];
#ifndef HAVE_EXIT_WITH_PARENT
--
2.20.1