As already noted in our state machine, a client that batches up a
large read followed by large writes, coupled with a server that only
processes commands in order, can result in deadlock (the server won't
read more until we unblock its ability to write out its reply to our
first command; but we aren't willing to read until we are done writing
out our second command). Break the deadlock by teaching the generator
that while we are in the middle of writing a command, we must remain
responsive to read_notify events; if the server has data for us to
read, we should consume that before jumping back into the middle of
our command issue (and consuming a reply can invalidate sbuf, so we
have to drop an assertion in PREPARE_WRITE_PAYLOAD).
---
generator/generator | 26 ++++++++++++++++++--------
generator/states-issue-command.c | 25 ++++++++++++++++++++++++-
lib/internal.h | 1 +
3 files changed, 43 insertions(+), 9 deletions(-)
diff --git a/generator/generator b/generator/generator
index a4ad362..5c84a5d 100755
--- a/generator/generator
+++ b/generator/generator
@@ -620,12 +620,6 @@ and issue_command_state_machine = [
State {
default_state with
name = "START";
- (* XXX There's a possible deadlock here if a server cannot
- * handle multiple requests pipelined on a single connection.
- * We could try to issue a command and block, but reads might
- * be available. It should be possible to break this with
- * another state.
- *)
comment = "Begin issuing a command to the remote server";
external_events = [];
};
@@ -634,7 +628,15 @@ and issue_command_state_machine = [
default_state with
name = "SEND_REQUEST";
comment = "Sending a request to the remote server";
- external_events = [ NotifyWrite, "" ];
+ external_events = [ NotifyWrite, "";
+ NotifyRead, "PAUSE_SEND_REQUEST" ];
+ };
+
+ State {
+ default_state with
+ name = "PAUSE_SEND_REQUEST";
+ comment = "Interrupt send request to receive an earlier command's
reply";
+ external_events = [];
};
State {
@@ -648,7 +650,15 @@ and issue_command_state_machine = [
default_state with
name = "SEND_WRITE_PAYLOAD";
comment = "Sending the write payload to the remote server";
- external_events = [ NotifyWrite, "" ];
+ external_events = [ NotifyWrite, "";
+ NotifyRead, "PAUSE_WRITE_PAYLOAD" ];
+ };
+
+State {
+ default_state with
+ name = "PAUSE_WRITE_PAYLOAD";
+ comment = "Interrupt write payload to receive an earlier command's
reply";
+ external_events = [];
};
State {
diff --git a/generator/states-issue-command.c b/generator/states-issue-command.c
index e24ea34..3a5980d 100644
--- a/generator/states-issue-command.c
+++ b/generator/states-issue-command.c
@@ -25,6 +25,15 @@
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
+ /* Were we interrupted by reading a reply to an earlier command? */
+ if (conn->wlen) {
+ if (conn->in_write_payload)
+ SET_NEXT_STATE(%SEND_WRITE_PAYLOAD);
+ else
+ SET_NEXT_STATE(%SEND_REQUEST);
+ return 0;
+ }
+
conn->sbuf.request.magic = htobe32 (NBD_REQUEST_MAGIC);
conn->sbuf.request.flags = htobe16 (cmd->flags);
conn->sbuf.request.type = htobe16 (cmd->type);
@@ -43,12 +52,18 @@
}
return 0;
+ ISSUE_COMMAND.PAUSE_SEND_REQUEST:
+ assert (conn->wlen);
+ assert (conn->cmds_to_issue != NULL);
+ conn->in_write_payload = false;
+ SET_NEXT_STATE (%^REPLY.START);
+ return 0;
+
ISSUE_COMMAND.PREPARE_WRITE_PAYLOAD:
struct command_in_flight *cmd;
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
- assert (cmd->handle == be64toh (conn->sbuf.request.handle));
if (cmd->type == NBD_CMD_WRITE) {
conn->wbuf = cmd->data;
conn->wlen = cmd->count;
@@ -65,9 +80,17 @@
}
return 0;
+ ISSUE_COMMAND.PAUSE_WRITE_PAYLOAD:
+ assert (conn->wlen);
+ assert (conn->cmds_to_issue != NULL);
+ conn->in_write_payload = true;
+ SET_NEXT_STATE (%^REPLY.START);
+ return 0;
+
ISSUE_COMMAND.FINISH:
struct command_in_flight *cmd;
+ assert (!conn->wlen);
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
conn->cmds_to_issue = cmd->next;
diff --git a/lib/internal.h b/lib/internal.h
index 2d6ad9d..91c056c 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -185,6 +185,7 @@ struct nbd_connection {
* acknowledge them.
*/
struct command_in_flight *cmds_to_issue, *cmds_in_flight, *cmds_done;
+ bool in_write_payload;
};
struct meta_context {
--
2.20.1