On 07/27/22 18:30, Richard W.M. Jones wrote:
qemu-nbd doesn't call gnutls_bye to cleanly shut down the
connection
after we send NBD_CMD_DISC. When copying from a qemu-nbd server (or
any operation which calls nbd_shutdown) you will see errors like this:
$ nbdcopy nbds://foo?tls-certificates=/var/tmp/pki null:
nbds://foo?tls-certificates=/var/tmp/pki: nbd_shutdown: gnutls_record_recv: The TLS
connection was non-properly terminated.
Relatedly you may also see:
nbd_shutdown: gnutls_record_recv: Error in the pull function.
This commit suppresses the error in the case where we know that we
have shut down writes (which happens after NBD_CMD_DISC has been sent
on the wire).
---
interop/interop.c | 9 ---------
lib/crypto.c | 17 +++++++++++++++++
lib/internal.h | 1 +
3 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/interop/interop.c b/interop/interop.c
index 036545bd82..cce9407aa1 100644
--- a/interop/interop.c
+++ b/interop/interop.c
@@ -226,19 +226,10 @@ main (int argc, char *argv[])
/* XXX In future test more operations here. */
-#if !TLS
- /* XXX qemu doesn't shut down the connection nicely (using
- * gnutls_bye) and because of this the following call will fail
- * with:
- *
- * nbd_shutdown: gnutls_record_recv: The TLS connection was
- * non-properly terminated.
- */
if (nbd_shutdown (nbd, 0) == -1) {
fprintf (stderr, "%s\n", nbd_get_error ());
exit (EXIT_FAILURE);
}
-#endif
nbd_close (nbd);
diff --git a/lib/crypto.c b/lib/crypto.c
index ffba4bda9b..a2832b1585 100644
--- a/lib/crypto.c
+++ b/lib/crypto.c
@@ -189,6 +189,22 @@ tls_recv (struct nbd_handle *h, struct socket *sock, void *buf,
size_t len)
errno = EAGAIN;
return -1;
}
+ if (h->tls_shut_writes &&
+ (r == GNUTLS_E_PULL_ERROR || r == GNUTLS_E_PREMATURE_TERMINATION)) {
+ /* qemu-nbd doesn't call gnutls_bye to cleanly shut down the
+ * connection after we send NBD_CMD_DISC, instead it simply
+ * closes the connection. On the client side we see
+ * "gnutls_record_recv: The TLS connection was non-properly
+ * terminated" or "gnutls_record_recv: Error in the pull
+ * function.".
+ *
+ * If we see these errors after we shut down the write side
+ * (h->tls_shut_writes), which happens after we have sent
+ * NBD_CMD_DISC on the wire, downgrade them to a debug message.
+ */
+ debug (h, "gnutls_record_recv: %s", gnutls_strerror (r));
+ return 0; /* EOF */
+ }
set_error (0, "gnutls_record_recv: %s", gnutls_strerror (r));
errno = EIO;
return -1;
@@ -236,6 +252,7 @@ tls_shut_writes (struct nbd_handle *h, struct socket *sock)
return false;
if (r != 0)
debug (h, "ignoring gnutls_bye failure: %s", gnutls_strerror (r));
+ h->tls_shut_writes = true;
return sock->u.tls.oldsock->ops->shut_writes (h, sock->u.tls.oldsock);
}
diff --git a/lib/internal.h b/lib/internal.h
index 4121a5cdaa..8dbe2e4568 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -307,6 +307,7 @@ struct nbd_handle {
struct command *reply_cmd;
bool disconnect_request; /* True if we've queued NBD_CMD_DISC */
+ bool tls_shut_writes; /* Used by lib/crypto.c to track disconnect. */
};
struct meta_context {
Looks reasonable.
Acked-by: Laszlo Ersek <lersek(a)redhat.com>