Having the client polling thread perform an O(n) loop over all known
in-flight commands after each time the poll woke up is somewhat
inefficient, and in a multi-threaded setup requires additional locking
beyond libnbd to track the set of known command handles. Better is a
way for aio commands to call a notify callback the moment a specific
command is ready to complete, and then a separate thread can gather
the final completion status using just libnbd's locking, making the
polling loop more efficient. This also provides an opportunity to
clean up any opaque data and/or change the final command status (for
example, writing a strict validator for nbd_aio_pread_structured can
change the command from success to failure if the server violated
protocol by not returning chunks to cover the entire read).
We also want the client to be aware of any issued/in-flight commands
that failed because they were stranded when the state machine moved to
CLOSED or DEAD. Previously, nbd_aio_command_completed() would never
locate such stranded commands, but adding a common point to fire the
notifier for such commands makes it also possible to move those
commands to the completion queue.
This patch sets up the framework, with observable effects for stranded
commands per the testsuite changes, but nothing yet actually sets the
notify callback; that will come in the next patch.
---
generator/states-reply.c | 8 ++++++++
generator/states.c | 29 +++++++++++++++++++++++++++++
lib/internal.h | 2 ++
tests/server-death.c | 17 ++++++++++++++---
4 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/generator/states-reply.c b/generator/states-reply.c
index 88274bc..c5cd790 100644
--- a/generator/states-reply.c
+++ b/generator/states-reply.c
@@ -180,6 +180,14 @@ save_reply_state (struct nbd_handle *h)
else
h->cmds_done = cmd;
+ /* Notify the user */
+ if (cmd->cb.notify) {
+ int error = cmd->error;
+
+ if (cmd->cb.notify (cmd->cb.opaque, handle, &error) == -1 &&
error)
+ cmd->error = error;
+ }
+
SET_NEXT_STATE (%.READY);
return 0;
diff --git a/generator/states.c b/generator/states.c
index deea73c..c9c3ef7 100644
--- a/generator/states.c
+++ b/generator/states.c
@@ -111,6 +111,31 @@ send_from_wbuf (struct nbd_handle *h)
return 0; /* move to next state */
}
+/* Forcefully fail any remaining in-flight commands in list */
+void abort_commands (struct nbd_handle *h,
+ struct command_in_flight **list)
+{
+ struct command_in_flight *prev_cmd, *cmd;
+
+ for (cmd = *list, prev_cmd = NULL;
+ cmd != NULL;
+ prev_cmd = cmd, cmd = cmd->next) {
+ if (cmd->cb.notify && cmd->type != NBD_CMD_DISC) {
+ int error = cmd->error ? cmd->error : ENOTCONN;
+
+ if (cmd->cb.notify (cmd->cb.opaque, cmd->handle, &error) == -1
&& error)
+ cmd->error = error;
+ }
+ if (cmd->error == 0)
+ cmd->error = ENOTCONN;
+ }
+ if (prev_cmd) {
+ prev_cmd->next = h->cmds_done;
+ h->cmds_done = *list;
+ *list = NULL;
+ }
+}
+
/*----- End of prologue. -----*/
/* STATE MACHINE */ {
@@ -127,6 +152,8 @@ send_from_wbuf (struct nbd_handle *h)
DEAD:
/* The caller should have used set_error() before reaching here */
assert (nbd_get_error ());
+ abort_commands (h, &h->cmds_to_issue);
+ abort_commands (h, &h->cmds_in_flight);
if (h->sock) {
h->sock->ops->close (h->sock);
h->sock = NULL;
@@ -134,6 +161,8 @@ send_from_wbuf (struct nbd_handle *h)
return -1;
CLOSED:
+ abort_commands (h, &h->cmds_to_issue);
+ abort_commands (h, &h->cmds_in_flight);
if (h->sock) {
h->sock->ops->close (h->sock);
h->sock = NULL;
diff --git a/lib/internal.h b/lib/internal.h
index 15f4b64..59074c2 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -239,6 +239,7 @@ typedef int (*extent_fn) (void *data, const char *metacontext,
uint64_t offset,
uint32_t *entries, size_t nr_entries, int *error);
typedef int (*read_fn) (void *data, const void *buf, size_t count,
uint64_t offset, int *error, int status);
+typedef int (*notify_fn) (void *data, int64_t handle, int *error);
struct command_cb {
void *opaque;
@@ -246,6 +247,7 @@ struct command_cb {
extent_fn extent;
read_fn read;
} fn;
+ notify_fn notify;
};
struct command_in_flight {
diff --git a/tests/server-death.c b/tests/server-death.c
index d490753..f8747e4 100644
--- a/tests/server-death.c
+++ b/tests/server-death.c
@@ -32,6 +32,8 @@ int
main (int argc, char *argv[])
{
struct nbd_handle *nbd;
+ int err;
+ const char *msg;
char buf[512];
int64_t handle;
char pidfile[] = "/tmp/libnbd-test-disconnectXXXXXX";
@@ -123,16 +125,25 @@ main (int argc, char *argv[])
goto fail;
}
- /* Proof that the read was stranded */
- if (nbd_aio_peek_command_completed (nbd) != 0) {
+ /* Detection of the dead server completes all remaining in-flight commands */
+ if (nbd_aio_peek_command_completed (nbd) != handle) {
fprintf (stderr, "%s: test failed: nbd_aio_peek_command_completed\n",
argv[0]);
goto fail;
}
- if (nbd_aio_command_completed (nbd, handle) != 0) {
+ if (nbd_aio_command_completed (nbd, handle) != -1) {
fprintf (stderr, "%s: test failed: nbd_aio_command_completed\n", argv[0]);
goto fail;
}
+ msg = nbd_get_error ();
+ err = nbd_get_errno ();
+ printf ("error: \"%s\"\n", msg);
+ printf ("errno: %d (%s)\n", err, strerror (err));
+ if (err != ENOTCONN) {
+ fprintf (stderr, "%s: test failed: unexpected errno %d (%s)\n", argv[0],
+ err, strerror (err));
+ goto fail;
+ }
close (fd);
unlink (pidfile);
--
2.20.1