We can easily demonstrate a memory leak when repeatedly calling
nbd_aio_pread from nbdsh in the wrong state. True, that isn't
something a sane client would normally do, but it's worth fixing.
The culprit: when nbd_aio_pread returns a cookie, we guarantee to
clean up the closure, but if we fail early and never scheduled the
command, nothing ever cleans up the closure. We could document this
as intentional, and force the user to always clean up their closure on
failure. However, this is not nice, for two reasons. First, a
documentation change to codify existing practice would put more burden
on clients - every client that calls a C API with closures is now on
the hook to add boilerplate to avoid a memory leak; and the fact that
we can easily demonstrate the memory leak in nbdsh means this solution
does not scale. Second, consider nbd_pread_structured: that call
takes a chunk closure parameter, and can fail for multiple reasons,
either because it was called in the wrong state (in which case we were
not freeing the closure) or because it succeeded at getting a server
reponse where the server failed (in which case the closure is freed by
virtue of getting the server response). As the caller can't
distinguish which, telling them to free on failure could result in
double-frees.
So the best plan of attack is to document that ALL functions that take
a closure argument promise to eventually free the closure, even if the
API fails, and to declare our memory leak as a bug to be fixed in
libnbd proper. To do that, the generator will now blindly free
anything that has not been cleared by the unlocked_* helper, which in
turn are now careful to clear any callback once it has successfully
been copied into somewhere that will guarantee subsequent cleanup
(whether as part of the state machine, or by the fact that
nbd_internal_command_common now cleans up on all error paths).
Yes, our change means that any client that WAS checking for nbd_aio_*
returning -1 for cookies and manually cleaning up is now performing a
double free, but as argued above, such clients are unlikely because of
the extra boilerplate it would entail, and because of the fact that
most clients are sane enough to not trigger error paths that can fail
client-side without queueing up a trip to the server (for example, no
one intentionally writes a client that calls nbd_aio_pread before
nbd_connect_*, except as part of our testsuite in errors.c), to have
noticed the problem under valgrind before now.
---
docs/libnbd.pod | 2 +-
generator/C.ml | 13 ++++++++
lib/debug.c | 1 +
lib/opt.c | 5 ++++
lib/rw.c | 33 ++++++++++++++++----
tests/closure-lifetimes.c | 63 ++++++++++++++++++++++++++++++++++++++-
tests/newstyle-limited.c | 18 ++++++++++-
7 files changed, 127 insertions(+), 8 deletions(-)
diff --git a/docs/libnbd.pod b/docs/libnbd.pod
index 2c3742c..f2ba3bb 100644
--- a/docs/libnbd.pod
+++ b/docs/libnbd.pod
@@ -773,7 +773,7 @@ because you can use closures to achieve the same effect.
You can associate an optional free function with callbacks. Libnbd
will call this function when the callback will not be called again by
-libnbd.
+libnbd, including in the case where the API fails.
This can be used to free associated C<user_data>. For example:
diff --git a/generator/C.ml b/generator/C.ml
index 280b319..4bdcf60 100644
--- a/generator/C.ml
+++ b/generator/C.ml
@@ -553,6 +553,19 @@ let generate_lib_api_c () =
print_trace_leave ret;
pr "\n"
);
+ (* Finish any closures not transferred to state machine. *)
+ List.iter (
+ function
+ | Closure { cbname } ->
+ pr " FREE_CALLBACK (%s_callback);\n" cbname
+ | _ -> ()
+ ) args;
+ List.iter (
+ function
+ | OClosure { cbname } ->
+ pr " FREE_CALLBACK (%s_callback);\n" cbname
+ | OFlags _ -> ()
+ ) optargs;
if is_locked then (
pr " if (h->public_state != get_next_state (h))\n";
pr " h->public_state = get_next_state (h);\n";
diff --git a/lib/debug.c b/lib/debug.c
index 1b503d9..b598ad3 100644
--- a/lib/debug.c
+++ b/lib/debug.c
@@ -54,6 +54,7 @@ nbd_unlocked_set_debug_callback (struct nbd_handle *h,
nbd_unlocked_clear_debug_callback (h);
h->debug_callback = *debug_callback;
+ SET_CALLBACK_TO_NULL (*debug_callback);
return 0;
}
diff --git a/lib/opt.c b/lib/opt.c
index 003ecf8..6ea8326 100644
--- a/lib/opt.c
+++ b/lib/opt.c
@@ -156,6 +156,7 @@ nbd_unlocked_opt_list (struct nbd_handle *h, nbd_list_callback *list)
if (nbd_unlocked_aio_opt_list (h, &l, &c) == -1)
return -1;
+ SET_CALLBACK_TO_NULL (*list);
if (wait_for_option (h) == -1)
return -1;
if (s.err) {
@@ -172,6 +173,7 @@ nbd_unlocked_aio_opt_go (struct nbd_handle *h,
{
h->opt_current = NBD_OPT_GO;
h->opt_cb.completion = *complete;
+ SET_CALLBACK_TO_NULL (*complete);
if (nbd_internal_run (h, cmd_issue) == -1)
debug (h, "option queued, ignoring state machine failure");
@@ -190,6 +192,7 @@ nbd_unlocked_aio_opt_info (struct nbd_handle *h,
h->opt_current = NBD_OPT_INFO;
h->opt_cb.completion = *complete;
+ SET_CALLBACK_TO_NULL (*complete);
if (nbd_internal_run (h, cmd_issue) == -1)
debug (h, "option queued, ignoring state machine failure");
@@ -219,7 +222,9 @@ nbd_unlocked_aio_opt_list (struct nbd_handle *h, nbd_list_callback
*list,
assert (CALLBACK_IS_NULL (h->opt_cb.fn.list));
h->opt_cb.fn.list = *list;
+ SET_CALLBACK_TO_NULL (*list);
h->opt_cb.completion = *complete;
+ SET_CALLBACK_TO_NULL (*complete);
h->opt_current = NBD_OPT_LIST;
if (nbd_internal_run (h, cmd_issue) == -1)
debug (h, "option queued, ignoring state machine failure");
diff --git a/lib/rw.c b/lib/rw.c
index 9f2909b..4b8eeaf 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -72,6 +72,7 @@ nbd_unlocked_pread_structured (struct nbd_handle *h, void *buf,
if (cookie == -1)
return -1;
+ assert (CALLBACK_IS_NULL (*chunk));
return wait_for_command (h, cookie);
}
@@ -163,6 +164,7 @@ nbd_unlocked_block_status (struct nbd_handle *h,
if (cookie == -1)
return -1;
+ assert (CALLBACK_IS_NULL (*extent));
return wait_for_command (h, cookie);
}
@@ -176,11 +178,11 @@ nbd_internal_command_common (struct nbd_handle *h,
if (h->disconnect_request) {
set_error (EINVAL, "cannot request more commands after NBD_CMD_DISC");
- return -1;
+ goto err;;
}
if (h->in_flight == INT_MAX) {
set_error (ENOMEM, "too many commands already in flight");
- return -1;
+ goto err;
}
switch (type) {
@@ -190,7 +192,7 @@ nbd_internal_command_common (struct nbd_handle *h,
if (count > MAX_REQUEST_SIZE) {
set_error (ERANGE, "request too large: maximum request size is %d",
MAX_REQUEST_SIZE);
- return -1;
+ goto err;
}
break;
@@ -203,7 +205,7 @@ nbd_internal_command_common (struct nbd_handle *h,
if (count > UINT32_MAX) {
set_error (ERANGE, "request too large: maximum request size is %"
PRIu32,
UINT32_MAX);
- return -1;
+ goto err;
}
break;
}
@@ -211,7 +213,7 @@ nbd_internal_command_common (struct nbd_handle *h,
cmd = calloc (1, sizeof *cmd);
if (cmd == NULL) {
set_error (errno, "calloc");
- return -1;
+ goto err;
}
cmd->flags = flags;
cmd->type = type;
@@ -257,6 +259,17 @@ nbd_internal_command_common (struct nbd_handle *h,
}
return cmd->cookie;
+
+ err:
+ /* Since we did not queue the command, we must free the callbacks. */
+ if (cb) {
+ if (type == NBD_CMD_BLOCK_STATUS)
+ FREE_CALLBACK (cb->fn.extent);
+ if (type == NBD_CMD_READ)
+ FREE_CALLBACK (cb->fn.chunk);
+ FREE_CALLBACK (cb->completion);
+ }
+ return -1;
}
int64_t
@@ -276,6 +289,7 @@ nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
buf, &cb);
}
@@ -301,6 +315,8 @@ nbd_unlocked_aio_pread_structured (struct nbd_handle *h, void *buf,
return -1;
}
+ SET_CALLBACK_TO_NULL (*chunk);
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, flags, NBD_CMD_READ, offset, count,
buf, &cb);
}
@@ -329,6 +345,7 @@ nbd_unlocked_aio_pwrite (struct nbd_handle *h, const void *buf,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, flags, NBD_CMD_WRITE, offset, count,
(void *) buf, &cb);
}
@@ -350,6 +367,7 @@ nbd_unlocked_aio_flush (struct nbd_handle *h,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, 0, NBD_CMD_FLUSH, 0, 0,
NULL, &cb);
}
@@ -387,6 +405,7 @@ nbd_unlocked_aio_trim (struct nbd_handle *h,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, flags, NBD_CMD_TRIM, offset, count,
NULL, &cb);
}
@@ -413,6 +432,7 @@ nbd_unlocked_aio_cache (struct nbd_handle *h,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, 0, NBD_CMD_CACHE, offset, count,
NULL, &cb);
}
@@ -457,6 +477,7 @@ nbd_unlocked_aio_zero (struct nbd_handle *h,
return -1;
}
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, flags, NBD_CMD_WRITE_ZEROES, offset,
count, NULL, &cb);
}
@@ -493,6 +514,8 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
return -1;
}
+ SET_CALLBACK_TO_NULL (*extent);
+ SET_CALLBACK_TO_NULL (*completion);
return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
count, NULL, &cb);
}
diff --git a/tests/closure-lifetimes.c b/tests/closure-lifetimes.c
index 70788b6..a7a1e46 100644
--- a/tests/closure-lifetimes.c
+++ b/tests/closure-lifetimes.c
@@ -1,5 +1,5 @@
/* NBD client library in userspace
- * Copyright (C) 2013-2019 Red Hat Inc.
+ * Copyright (C) 2013-2020 Red Hat Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -42,6 +42,8 @@ static unsigned debug_fn_called;
static unsigned debug_fn_freed;
static unsigned read_cb_called;
static unsigned read_cb_freed;
+static unsigned block_status_cb_called;
+static unsigned block_status_cb_freed;
static unsigned completion_cb_called;
static unsigned completion_cb_freed;
@@ -74,6 +76,21 @@ read_cb_free (void *opaque)
read_cb_freed++;
}
+static int
+block_status_cb (void *opaque, const char *meta, uint64_t offset,
+ uint32_t *entries, size_t nr_entries, int *error)
+{
+ assert (!block_status_cb_freed);
+ block_status_cb_called++;
+ return 0;
+}
+
+static void
+block_status_cb_free (void *opaque)
+{
+ read_cb_freed++;
+}
+
static int
completion_cb (void *opaque, int *error)
{
@@ -168,5 +185,49 @@ main (int argc, char *argv[])
assert (read_cb_freed == 1);
assert (completion_cb_freed == 1);
+ /* Test command callbacks are freed if the command fails client-side,
+ * whether from calling in wrong state or because of no server support.
+ */
+ block_status_cb_called = block_status_cb_freed =
+ completion_cb_called = completion_cb_freed = 0;
+ nbd = nbd_create ();
+ if (nbd == NULL) NBD_ERROR;
+ /* Intentionally omit a call to:
+ * nbd_add_meta_context (nbd, LIBNBD_CONTEXT_BASE_ALLOCATION);
+ */
+ cookie = nbd_aio_block_status (nbd, sizeof buf, 0,
+ (nbd_extent_callback) { .callback = block_status_cb,
+ .free = block_status_cb_free },
+ (nbd_completion_callback) { .callback = completion_cb,
+ .free = completion_cb_free
},
+ 0);
+ if (cookie != -1) {
+ fprintf (stderr, "%s: Expecting block_status failure\n", argv[0]);
+ exit (EXIT_FAILURE);
+ }
+ assert (block_status_cb_freed == 1);
+ assert (completion_cb_freed == 1);
+
+ block_status_cb_called = block_status_cb_freed =
+ completion_cb_called = completion_cb_freed = 0;
+
+ if (nbd_connect_command (nbd, nbdkit) == -1) NBD_ERROR;
+
+ cookie = nbd_aio_block_status (nbd, sizeof buf, 0,
+ (nbd_extent_callback) { .callback = block_status_cb,
+ .free = block_status_cb_free },
+ (nbd_completion_callback) { .callback = completion_cb,
+ .free = completion_cb_free
},
+ 0);
+ if (cookie != -1) {
+ fprintf (stderr, "%s: Expecting block_status failure\n", argv[0]);
+ exit (EXIT_FAILURE);
+ }
+ assert (block_status_cb_freed == 1);
+ assert (completion_cb_freed == 1);
+
+ nbd_kill_subprocess (nbd, 0);
+ nbd_close (nbd);
+
exit (EXIT_SUCCESS);
}
diff --git a/tests/newstyle-limited.c b/tests/newstyle-limited.c
index fd89620..4479a14 100644
--- a/tests/newstyle-limited.c
+++ b/tests/newstyle-limited.c
@@ -84,6 +84,17 @@ list_cb (void *opaque, const char *name, const char *description)
exit (EXIT_FAILURE);
}
+static bool list_freed = false;
+static void
+free_list_cb (void *opaque)
+{
+ if (list_freed) {
+ fprintf (stderr, "%s: list callback mistakenly freed twice", progname);
+ exit (EXIT_FAILURE);
+ }
+ list_freed = true;
+}
+
int
main (int argc, char *argv[])
{
@@ -144,10 +155,15 @@ main (int argc, char *argv[])
fprintf (stderr, "unexpected state after negotiating\n");
exit (EXIT_FAILURE);
}
- if (nbd_opt_list (nbd, (nbd_list_callback) { .callback = list_cb }) != -1) {
+ if (nbd_opt_list (nbd, (nbd_list_callback) { .callback = list_cb,
+ .free = free_list_cb }) != -1) {
fprintf (stderr, "nbd_opt_list: expected failure\n");
exit (EXIT_FAILURE);
}
+ if (!list_freed) {
+ fprintf (stderr, "nbd_opt_list: list closure memory leak\n");
+ exit (EXIT_FAILURE);
+ }
if (nbd_get_errno () != ENOTSUP) {
fprintf (stderr, "%s: wrong errno value after failed opt_list\n",
argv[0]);
exit (EXIT_FAILURE);
--
2.28.0