Support receiving headers for 64-bit replies if extended headers were
negotiated. We already insist that the server not send us too much
payload in one reply, so we can exploit that and continue to use
h->payload_left as a 32-bit length for the rest of the payload
parsing. The NBD protocol specifically documents that extended mode
takes precedence over structured replies, and that there are no simple
replies in extended mode. We can also take advantage that the cookie
field is in the same offset in all the various reply types. Checking
that the server sent back the correct offset adds a bit more verbosity
for the sake of nicer debug messages.
Note that if we negotiate extended headers, but a non-compliant server
replies with a non-extended header, this patch will stall waiting for
the server to send more bytes rather than noticing that the magic
number is wrong (for aio operations, you'll get a magic number
mismatch once you send a second command that elicits a reply; but for
blocking operations, we basically deadlock). The easy alternative
would be to read just the first 4 bytes of magic, then determine how
many more bytes to expect; but that would require more states and
syscalls, and not worth it since the typical server will be compliant.
The other alternative is what the next patch implements: teaching
REPLY.RECV_REPLY to handle short reads that were at least long enough
to transmit magic to specifically look for magic number mismatch.
At this point, h->extended_headers is permanently false (we can't
enable it until all other aspects of the protocol have likewise been
converted).
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
v4: don't normalize length in-place [Laszlo], rebase on earlier
changes, better debug reporting if offsets mismatch
---
lib/internal.h | 1 +
generator/states-reply.c | 89 ++++++++++++++++++++++++----------
generator/states-reply-chunk.c | 40 +++++++++++----
3 files changed, 95 insertions(+), 35 deletions(-)
diff --git a/lib/internal.h b/lib/internal.h
index 2e4f987c..9df6a685 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -244,6 +244,7 @@ struct nbd_handle {
union reply_header {
struct nbd_simple_reply simple;
struct nbd_structured_reply structured;
+ struct nbd_extended_reply extended;
/* The wire formats share magic and cookie at the same offsets;
* provide aliases for one less level of typing.
*/
diff --git a/generator/states-reply.c b/generator/states-reply.c
index 0d200513..05301ae8 100644
--- a/generator/states-reply.c
+++ b/generator/states-reply.c
@@ -68,22 +68,30 @@ REPLY.START:
*/
ssize_t r;
- /* We read all replies initially as if they are simple replies, but
- * check the magic in CHECK_REPLY_MAGIC below. This works because
- * the structured_reply header is larger, and because the last
- * member of a simple reply, cookie, is coincident between the two
- * structs (an intentional design decision in the NBD spec when
- * structured replies were added).
+ /* With extended headers, there is only one size to read, so we can
+ * do it all in one syscall. But for older structured replies, we
+ * don't know if we have a simple or structured reply until we read
+ * the magic number, requiring a two-part read with
+ * CHECK_REPLY_MAGIC below. This works because the structured_reply
+ * header is larger, and because the last member of a simple reply,
+ * cookie, is coincident between all three structs (intentional
+ * design decisions in the NBD spec when structured and extended
+ * replies were added).
*/
ASSERT_MEMBER_ALIAS (union reply_header, simple.magic, magic);
ASSERT_MEMBER_ALIAS (union reply_header, simple.cookie, cookie);
ASSERT_MEMBER_ALIAS (union reply_header, structured.magic, magic);
ASSERT_MEMBER_ALIAS (union reply_header, structured.cookie, cookie);
+ ASSERT_MEMBER_ALIAS (union reply_header, extended.magic, magic);
+ ASSERT_MEMBER_ALIAS (union reply_header, extended.cookie, cookie);
assert (h->reply_cmd == NULL);
assert (h->rlen == 0);
h->rbuf = &h->sbuf.reply.hdr;
- h->rlen = sizeof h->sbuf.reply.hdr.simple;
+ if (h->extended_headers)
+ h->rlen = sizeof h->sbuf.reply.hdr.extended;
+ else
+ h->rlen = sizeof h->sbuf.reply.hdr.simple;
r = h->sock->ops->recv (h, h->sock, h->rbuf, h->rlen);
if (r == -1) {
@@ -129,10 +137,25 @@ REPLY.CHECK_REPLY_MAGIC:
uint64_t cookie;
magic = be32toh (h->sbuf.reply.hdr.magic);
- if (magic == NBD_SIMPLE_REPLY_MAGIC) {
+ switch (magic) {
+ case NBD_SIMPLE_REPLY_MAGIC:
+ if (h->extended_headers)
+ /* Server is non-compliant, and we've already read more bytes
+ * than a simple header contains; no recovery possible
+ */
+ goto invalid;
+
+ /* All other payload checks handled in the simple payload engine */
SET_NEXT_STATE (%SIMPLE_REPLY.START);
- }
- else if (magic == NBD_STRUCTURED_REPLY_MAGIC) {
+ break;
+
+ case NBD_STRUCTURED_REPLY_MAGIC:
+ if (h->extended_headers)
+ /* Server is non-compliant, and we've already read more bytes
+ * than a structured header contains; no recovery possible
+ */
+ goto invalid;
+
/* We've only read the bytes that fill hdr.simple. But
* hdr.structured is longer, so prepare to read the remaining
* bytes. We depend on the memory aliasing in union sbuf to
@@ -145,23 +168,29 @@ REPLY.CHECK_REPLY_MAGIC:
h->rlen = sizeof h->sbuf.reply.hdr.structured;
h->rlen -= sizeof h->sbuf.reply.hdr.simple;
SET_NEXT_STATE (%RECV_STRUCTURED_REMAINING);
- }
- else {
- /* We've probably lost synchronization. */
- SET_NEXT_STATE (%.DEAD);
- set_error (0, "invalid reply magic 0x%" PRIx32, magic);
-#if 0 /* uncomment to see desynchronized data */
- nbd_internal_hexdump (&h->sbuf.reply.hdr.simple,
- sizeof (h->sbuf.reply.hdr.simple),
- stderr);
-#endif
- return 0;
+ break;
+
+ case NBD_EXTENDED_REPLY_MAGIC:
+ if (!h->extended_headers)
+ /* Server is non-compliant. We could continue reading bytes up
+ * to the length of an extended reply to regain sync, but old
+ * servers are unlikely to send this magic, so it's just as easy
+ * to punt.
+ */
+ goto invalid;
+
+ /* All other payload checks handled in the chunk payload engine */
+ SET_NEXT_STATE (%CHUNK_REPLY.START);
+ break;
+
+ default:
+ goto invalid;
}
- /* NB: This works for both simple and structured replies, even
- * though we haven't finished reading the structured header yet,
- * because the cookie is stored at the same offset. See the
- * STATIC_ASSERT above in state REPLY.START that confirmed this.
+ /* NB: This works for all three reply types, even though we haven't
+ * finished reading a structured header yet, because the cookie is
+ * stored at the same offset. See the ASSERT_MEMBER_ALIAS above in
+ * state REPLY.START that confirmed this.
*/
h->chunks_received++;
cookie = be64toh (h->sbuf.reply.hdr.cookie);
@@ -175,6 +204,16 @@ REPLY.CHECK_REPLY_MAGIC:
h->reply_cmd = cmd;
return 0;
+ invalid:
+ SET_NEXT_STATE (%.DEAD); /* We've probably lost synchronization. */
+ set_error (0, "invalid or unexpected reply magic 0x%" PRIx32, magic);
+#if 0 /* uncomment to see desynchronized data */
+ nbd_internal_hexdump (&h->sbuf.reply.hdr.simple,
+ sizeof (h->sbuf.reply.hdr.simple),
+ stderr);
+#endif
+ return 0;
+
REPLY.RECV_STRUCTURED_REMAINING:
switch (recv_into_rbuf (h)) {
case -1: SET_NEXT_STATE (%.DEAD); return 0;
diff --git a/generator/states-reply-chunk.c b/generator/states-reply-chunk.c
index 1758ac34..17bb5149 100644
--- a/generator/states-reply-chunk.c
+++ b/generator/states-reply-chunk.c
@@ -69,11 +69,17 @@ STATE_MACHINE {
REPLY.CHUNK_REPLY.START:
struct command *cmd = h->reply_cmd;
uint16_t flags, type;
- uint32_t length;
+ uint64_t length;
+ uint64_t offset = -1;
flags = be16toh (h->sbuf.reply.hdr.structured.flags);
type = be16toh (h->sbuf.reply.hdr.structured.type);
- length = be32toh (h->sbuf.reply.hdr.structured.length);
+ if (h->extended_headers) {
+ length = be64toh (h->sbuf.reply.hdr.extended.length);
+ offset = be64toh (h->sbuf.reply.hdr.extended.offset);
+ }
+ else
+ length = be32toh (h->sbuf.reply.hdr.structured.length);
/* Reject a server that replies with too much information, but don't
* reject a single structured reply to NBD_CMD_READ on the largest
@@ -83,13 +89,14 @@ REPLY.CHUNK_REPLY.START:
* not worth keeping the connection alive.
*/
if (length > MAX_REQUEST_SIZE + sizeof h->sbuf.reply.payload.offset_data) {
- set_error (0, "invalid server reply length %" PRIu32, length);
+ set_error (0, "invalid server reply length %" PRIu64, length);
SET_NEXT_STATE (%.DEAD);
return 0;
}
/* Skip an unexpected structured reply, including to an unknown cookie. */
- if (cmd == NULL || !h->structured_replies)
+ if (cmd == NULL || !h->structured_replies ||
+ (h->extended_headers && offset != cmd->offset))
goto resync;
h->payload_left = length;
@@ -504,7 +511,8 @@ REPLY.CHUNK_REPLY.RECV_BS_ENTRIES:
REPLY.CHUNK_REPLY.RESYNC:
struct command *cmd = h->reply_cmd;
uint16_t type;
- uint32_t length;
+ uint64_t length;
+ uint64_t offset = -1;
assert (h->rbuf == NULL);
switch (recv_into_rbuf (h)) {
@@ -524,11 +532,23 @@ REPLY.CHUNK_REPLY.RESYNC:
return 0;
}
type = be16toh (h->sbuf.reply.hdr.structured.type);
- length = be32toh (h->sbuf.reply.hdr.structured.length);
- debug (h, "unexpected reply type %u or payload length %" PRIu32
- " for cookie %" PRIu64 " and command %" PRIu32
- ", this is probably a server bug",
- type, length, cmd->cookie, cmd->type);
+ if (h->extended_headers) {
+ length = be64toh (h->sbuf.reply.hdr.extended.length);
+ offset = be64toh (h->sbuf.reply.hdr.extended.offset);
+ if (offset != cmd->offset)
+ debug (h, "unexpected reply offset %" PRIu64 " for cookie %"
PRIu64
+ " and command %" PRIu32 ", this is probably a server
bug",
+ length, cmd->cookie, cmd->type);
+ else
+ offset = -1;
+ }
+ else
+ length = be32toh (h->sbuf.reply.hdr.structured.length);
+ if (offset == -1)
+ debug (h, "unexpected reply type %u or payload length %" PRIu64
+ " for cookie %" PRIu64 " and command %" PRIu32
+ ", this is probably a server bug",
+ type, length, cmd->cookie, cmd->type);
if (cmd->error == 0)
cmd->error = EPROTO;
SET_NEXT_STATE (%FINISH);
--
2.41.0