Instead of killing the connection when the server gives us an
unexpected structured reply for a known command cookie, we can just
ignore any payload, then mark that particular command as failed with
EPROTO and proceed to the next command. For ease of review, this
patch just uses the new state for a server that replies with an
un-negotiated structured reply to a client normally expecting only
simple replies; the next patch will then tweak other error handling
spots within structured reply handling that can also utilize this
state instead of going to %.DEAD.
A compliant server will never trigger this state; but I was able to
test it by quickly hacking nbdkit as follows:
| diff --git i/server/protocol.c w/server/protocol.c
| index 2ac77055..99f1bec1 100644
| --- i/server/protocol.c
| +++ w/server/protocol.c
| @@ -738,7 +738,7 @@ protocol_recv_request_send_reply (void)
| * us from sending human-readable error messages to the client, so
| * we should reconsider this in future.
| */
| - if (conn->structured_replies &&
| + if (/*conn->structured_replies &&*/
| (cmd == NBD_CMD_READ || cmd == NBD_CMD_BLOCK_STATUS)) {
| if (!error) {
| if (cmd == NBD_CMD_READ)
With the following command line:
$ ./run /path/to/nbdkit --no-sr -U - memory 1M --run 'nbdsh -u "$uri" -c
"\
try:
h.pread(1, 0)
except nbd.Error as ex:
print(ex.string)
h.flush()
print(h.get_size())
"'
the pre-patch results show that we killed the connection without
reading the server's reply payload:
nbd_pread: server sent unexpected structured reply
nbdkit: memory.0: error: read request: Connection reset by peer
nbdsh: command line script failed: nbd_flush: invalid state: DEAD: the handle must be
connected with the server: Invalid argument
while post-patch, we are now able to successfully flush despite the
intermediate failure to read:
nbd_pread: read: command failed: Protocol error
1048576
Running with LIBNBD_DEBUG=1 gives more details about the client's
observation of a server bug.
---
generator/state_machine.ml | 9 ++++++-
generator/states-reply-structured.c | 38 +++++++++++++++++++++++++----
2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/generator/state_machine.ml b/generator/state_machine.ml
index 2cfad54..4d4635f 100644
--- a/generator/state_machine.ml
+++ b/generator/state_machine.ml
@@ -1,6 +1,6 @@
(* hey emacs, this is OCaml code: -*- tuareg -*- *)
(* nbd client library in userspace: state machine definition
- * Copyright (C) 2013-2020 Red Hat Inc.
+ * Copyright (C) 2013-2022 Red Hat Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -871,6 +871,13 @@ and
external_events = [];
};
+ State {
+ default_state with
+ name = "RESYNC";
+ comment = "Ignore payload of an unexpected structured reply";
+ external_events = [];
+ };
+
State {
default_state with
name = "FINISH";
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index 2159798..752468c 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -48,11 +48,6 @@ STATE_MACHINE {
/* We've only read the simple_reply. The structured_reply is longer,
* so read the remaining part.
*/
- if (!h->structured_replies) {
- set_error (0, "server sent unexpected structured reply");
- SET_NEXT_STATE(%.DEAD);
- return 0;
- }
h->rbuf = &h->sbuf;
h->rbuf += sizeof h->sbuf.simple_reply;
h->rlen = sizeof h->sbuf.sr.structured_reply;
@@ -98,6 +93,13 @@ STATE_MACHINE {
return 0;
}
+ if (!h->structured_replies) {
+ h->rbuf = NULL;
+ h->rlen = length;
+ SET_NEXT_STATE (%RESYNC);
+ return 0;
+ }
+
switch (type) {
case NBD_REPLY_TYPE_NONE:
if (length != 0) {
@@ -513,6 +515,32 @@ STATE_MACHINE {
}
return 0;
+ REPLY.STRUCTURED_REPLY.RESYNC:
+ struct command *cmd = h->reply_cmd;
+ uint16_t type;
+ uint32_t length;
+
+ assert (cmd);
+ assert (h->rbuf == NULL);
+ switch (recv_into_rbuf (h)) {
+ case -1: SET_NEXT_STATE (%.DEAD); return 0;
+ case 1:
+ save_reply_state (h);
+ SET_NEXT_STATE (%.READY);
+ return 0;
+ case 0:
+ type = be16toh (h->sbuf.sr.structured_reply.type);
+ length = be32toh (h->sbuf.sr.structured_reply.length);
+ debug (h, "unexpected reply type %u or payload length %" PRIu32
+ " for cookie %" PRIu64 " and command %" PRIu32
+ ", this is probably a server bug",
+ type, length, cmd->cookie, cmd->type);
+ if (cmd->error == 0)
+ cmd->error = EPROTO;
+ SET_NEXT_STATE (%FINISH);
+ return 0;
+ }
+
REPLY.STRUCTURED_REPLY.FINISH:
uint16_t flags;
--
2.37.1