On Tue, May 21, 2019 at 10:09:30AM -0500, Eric Blake wrote:
As already noted in our state machine, a client that batches up a
large read followed by large writes, coupled with a server that only
processes commands in order, can result in deadlock (the server won't
read more until we unblock its ability to write out its reply to our
first command; but we aren't willing to read until we are done writing
out our second command). Break the deadlock by teaching the generator
that while we are in the middle of writing a command, we must remain
responsive to read_notify events; if the server has data for us to
read, we should consume that before jumping back into the middle of
our command issue (and consuming a reply can invalidate sbuf, so we
have to drop an assertion in PREPARE_WRITE_PAYLOAD).
---
generator/generator | 20 ++++++++++++++++++--
generator/states-issue-command.c | 25 ++++++++++++++++++++++++-
generator/states-reply.c | 5 ++++-
lib/internal.h | 1 +
4 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/generator/generator b/generator/generator
index a4ad362..23b3cbf 100755
--- a/generator/generator
+++ b/generator/generator
@@ -634,7 +634,15 @@ and issue_command_state_machine = [
default_state with
name = "SEND_REQUEST";
comment = "Sending a request to the remote server";
- external_events = [ NotifyWrite, "" ];
+ external_events = [ NotifyWrite, "";
+ NotifyRead, "PAUSE_SEND_REQUEST" ];
+ };
+
+ State {
+ default_state with
+ name = "PAUSE_SEND_REQUEST";
+ comment = "Interrupt send request to receive an earlier command's
reply";
+ external_events = [];
};
State {
@@ -648,7 +656,15 @@ and issue_command_state_machine = [
default_state with
name = "SEND_WRITE_PAYLOAD";
comment = "Sending the write payload to the remote server";
- external_events = [ NotifyWrite, "" ];
+ external_events = [ NotifyWrite, "";
+ NotifyRead, "PAUSE_WRITE_PAYLOAD" ];
+ };
+
+State {
+ default_state with
+ name = "PAUSE_WRITE_PAYLOAD";
+ comment = "Interrupt write payload to receive an earlier command's
reply";
+ external_events = [];
};
State {
diff --git a/generator/states-issue-command.c b/generator/states-issue-command.c
index e24ea34..3a5980d 100644
--- a/generator/states-issue-command.c
+++ b/generator/states-issue-command.c
@@ -25,6 +25,15 @@
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
+ /* Were we interrupted by reading a reply to an earlier command? */
+ if (conn->wlen) {
+ if (conn->in_write_payload)
+ SET_NEXT_STATE(%SEND_WRITE_PAYLOAD);
+ else
+ SET_NEXT_STATE(%SEND_REQUEST);
+ return 0;
+ }
+
conn->sbuf.request.magic = htobe32 (NBD_REQUEST_MAGIC);
conn->sbuf.request.flags = htobe16 (cmd->flags);
conn->sbuf.request.type = htobe16 (cmd->type);
@@ -43,12 +52,18 @@
}
return 0;
+ ISSUE_COMMAND.PAUSE_SEND_REQUEST:
+ assert (conn->wlen);
+ assert (conn->cmds_to_issue != NULL);
+ conn->in_write_payload = false;
+ SET_NEXT_STATE (%^REPLY.START);
+ return 0;
+
ISSUE_COMMAND.PREPARE_WRITE_PAYLOAD:
struct command_in_flight *cmd;
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
- assert (cmd->handle == be64toh (conn->sbuf.request.handle));
if (cmd->type == NBD_CMD_WRITE) {
conn->wbuf = cmd->data;
conn->wlen = cmd->count;
@@ -65,9 +80,17 @@
}
return 0;
+ ISSUE_COMMAND.PAUSE_WRITE_PAYLOAD:
+ assert (conn->wlen);
+ assert (conn->cmds_to_issue != NULL);
+ conn->in_write_payload = true;
+ SET_NEXT_STATE (%^REPLY.START);
+ return 0;
+
ISSUE_COMMAND.FINISH:
struct command_in_flight *cmd;
+ assert (!conn->wlen);
assert (conn->cmds_to_issue != NULL);
cmd = conn->cmds_to_issue;
conn->cmds_to_issue = cmd->next;
diff --git a/generator/states-reply.c b/generator/states-reply.c
index 45362d4..6bb503a 100644
--- a/generator/states-reply.c
+++ b/generator/states-reply.c
@@ -118,7 +118,10 @@
else
conn->cmds_done = cmd;
- SET_NEXT_STATE (%.READY);
+ if (conn->cmds_to_issue)
+ SET_NEXT_STATE (%^ISSUE_COMMAND.START);
+ else
+ SET_NEXT_STATE (%.READY);
return 0;
} /* END STATE MACHINE */
diff --git a/lib/internal.h b/lib/internal.h
index 3f2b729..466af9d 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -182,6 +182,7 @@ struct nbd_connection {
* acknowledge them.
*/
struct command_in_flight *cmds_to_issue, *cmds_in_flight, *cmds_done;
+ bool in_write_payload;
};
struct meta_context {
Yes, I think this is correct.
However, I also think that cmds_to_issue might be replaced by a single
command pointer. (If I'm wrong in my reasoning, we'll very quickly
see an assert fail if something tries to overwrite a non-NULL
conn->cmd_to_issue pointer).
Rich.
--
Richard Jones, Virtualization Group, Red Hat
http://people.redhat.com/~rjones
Read my programming and virtualization blog:
http://rwmj.wordpress.com
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine. Supports Linux and Windows.
http://people.redhat.com/~rjones/virt-df/