On 04/01/22 20:13, Richard W.M. Jones wrote:
On Fri, Apr 01, 2022 at 12:28:08PM +0200, Laszlo Ersek wrote:
> Consider the call tree again:
>
> start_conversion() [conversion.c]
> start_nbd_server() [nbd.c]
> open_listening_socket() [nbd.c]
> bind_tcpip_socket() [nbd.c]
> socket()
> bind()
> listen()
> start_nbdkit() [nbd.c]
> fork()
> /* child */
> execlp()
> close()
> wait_for_nbd_server_to_start() [nbd.c]
> connect_to_nbdkit() [nbd.c]
> connect()
> open_data_connection() [ssh.c]
> /* "-R 0:localhost:<port>" */
>
> Right after we fork(), inside (indirectly) start_nbd_server(), two file
> descriptors exist that point to the same file description that points to
> the same listening socket. In the child, execlp() preserves one of the
> file descriptors (that's the point of socket activation), while in the
> rest of start_nbd_server(), the parent close()s the other file descriptor.
>
> By the time the parent reaches connect(), there will have been no
> opportunity for the listening socket not to exist (unless, in the child,
> execlp() fails, or the new image exits very quickly). Therefore we don't
> need to test the child (by connecting to it and reading NBDMAGIC); we can
> just expose the listening socket to "ssh -R".
>
> Thus, remove the wait_for_nbd_server_to_start() and connect_to_nbdkit()
> functions.
>
> This eliminates the following -- harmless -- message during conversion,
> emitted by the nbdkit server child on the p2v machine, to the serial port:
>
>> nbdkit: file[1]: error: reading initial client flags: conn->recv:
>> Connection reset by peer
>
> nbdkit complains because, after sending "NBDMAGIC", it would like to send
> "IHAVEOPT" and the handshake flags as well to the client. But the client
> -- namely wait_for_nbd_server_to_start() -- stops reading from the socket
> right after seeing "NBDMAGIC" (8 bytes). So that's a "broken
pipe"
> situation (RST) for the sending party (the server).
>
> Ref:
https://listman.redhat.com/archives/libguestfs/2022-March/028528.html
> Suggested-by: Richard W.M. Jones <rjones(a)redhat.com>
> Signed-off-by: Laszlo Ersek <lersek(a)redhat.com>
> ---
>
> Notes:
> v2:
> - new patch [Rich]
>
> p2v.h | 1 -
> conversion.c | 6 -
> nbd.c | 116 --------------------
> 3 files changed, 123 deletions(-)
>
> diff --git a/p2v.h b/p2v.h
> index c55f64317dde..20c4ac7e506a 100644
> --- a/p2v.h
> +++ b/p2v.h
> @@ -113,7 +113,6 @@ extern int scp_file (struct config *config, const char *target,
const char *loca
> /* nbd.c */
> extern void test_nbd_server (void);
> extern pid_t start_nbd_server (int *port, const char *device);
> -extern int wait_for_nbd_server_to_start (int port);
> const char *get_nbd_error (void);
>
> /* utils.c */
> diff --git a/conversion.c b/conversion.c
> index 8bc4119fb47c..b9af47deda74 100644
> --- a/conversion.c
> +++ b/conversion.c
> @@ -155,267 +155,261 @@ int
> start_conversion (struct config *config,
> void (*notify_ui) (int type, const char *data))
> {
> int ret = -1;
> int status;
> size_t i, len;
> const size_t nr_disks = guestfs_int_count_strings (config->disks);
> time_t now;
> struct tm tm;
> CLEANUP_FREE struct data_conn *data_conns = NULL;
> CLEANUP_FREE char *remote_dir = NULL;
> char tmpdir[] = "/tmp/p2v.XXXXXX";
> char name_file[] = "/tmp/p2v.XXXXXX/name";
> char physical_xml_file[] = "/tmp/p2v.XXXXXX/physical.xml";
> char wrapper_script[] = "/tmp/p2v.XXXXXX/virt-v2v-wrapper.sh";
> char dmesg_file[] = "/tmp/p2v.XXXXXX/dmesg";
> char lscpu_file[] = "/tmp/p2v.XXXXXX/lscpu";
> char lspci_file[] = "/tmp/p2v.XXXXXX/lspci";
> char lsscsi_file[] = "/tmp/p2v.XXXXXX/lsscsi";
> char lsusb_file[] = "/tmp/p2v.XXXXXX/lsusb";
> char p2v_version_file[] = "/tmp/p2v.XXXXXX/p2v-version";
> int inhibit_fd = -1;
>
> #if DEBUG_STDERR
> print_config (config, stderr);
> fprintf (stderr, "\n");
> #endif
>
> set_control_h (NULL);
> set_running (1);
> set_cancel_requested (0);
>
> inhibit_fd = inhibit_power_saving ();
> #ifdef DEBUG_STDERR
> if (inhibit_fd == -1)
> fprintf (stderr, "warning: virt-p2v cannot inhibit power saving during
conversion.\n");
> #endif
>
> data_conns = malloc (sizeof (struct data_conn) * nr_disks);
> if (data_conns == NULL)
> error (EXIT_FAILURE, errno, "malloc");
>
> for (i = 0; config->disks[i] != NULL; ++i) {
> data_conns[i].h = NULL;
> data_conns[i].nbd_pid = 0;
> data_conns[i].nbd_remote_port = -1;
> }
>
> /* Start the data connections and NBD server processes, one per disk. */
> for (i = 0; config->disks[i] != NULL; ++i) {
> int nbd_local_port;
> CLEANUP_FREE char *device = NULL;
>
> if (config->disks[i][0] == '/') {
> device = strdup (config->disks[i]);
> if (!device) {
> perror ("strdup");
> cleanup_data_conns (data_conns, nr_disks);
> exit (EXIT_FAILURE);
> }
> }
> else if (asprintf (&device, "/dev/%s", config->disks[i]) == -1)
{
> perror ("asprintf");
> cleanup_data_conns (data_conns, nr_disks);
> exit (EXIT_FAILURE);
> }
>
> if (notify_ui) {
> CLEANUP_FREE char *msg;
> if (asprintf (&msg,
> _("Starting local NBD server for %s ..."),
> config->disks[i]) == -1)
> error (EXIT_FAILURE, errno, "asprintf");
> notify_ui (NOTIFY_STATUS, msg);
> }
>
> /* Start NBD server listening on the given port number. */
> data_conns[i].nbd_pid = start_nbd_server (&nbd_local_port, device);
> if (data_conns[i].nbd_pid == 0) {
> set_conversion_error ("NBD server error: %s", get_nbd_error ());
> goto out;
> }
>
> - /* Wait for NBD server to start up and listen. */
> - if (wait_for_nbd_server_to_start (nbd_local_port) == -1) {
> - set_conversion_error ("NBD server error: %s", get_nbd_error ());
> - goto out;
> - }
> -
> if (notify_ui) {
> CLEANUP_FREE char *msg;
> if (asprintf (&msg,
> _("Opening data connection for %s ..."),
> config->disks[i]) == -1)
> error (EXIT_FAILURE, errno, "asprintf");
> notify_ui (NOTIFY_STATUS, msg);
> }
>
> /* Open the SSH data connection, with reverse port forwarding
> * back to the NBD server.
> */
> data_conns[i].h = open_data_connection (config, nbd_local_port,
> &data_conns[i].nbd_remote_port);
> if (data_conns[i].h == NULL) {
> const char *err = get_ssh_error ();
>
> set_conversion_error ("could not open data connection over SSH to the
conversion server: %s", err);
> goto out;
> }
>
> #if DEBUG_STDERR
> fprintf (stderr,
> "%s: data connection for %s: SSH remote port %d, local port
%d\n",
> g_get_prgname (), device,
> data_conns[i].nbd_remote_port,
> nbd_local_port);
> #endif
> }
>
> /* Create a remote directory name which will be used for libvirt
> * XML, log files and other stuff. We don't delete this directory
> * after the run because (a) it's useful for debugging and (b) it
> * only contains small files.
> *
> * NB: This path MUST NOT require shell quoting.
> */
> time (&now);
> gmtime_r (&now, &tm);
> if (asprintf (&remote_dir,
> "/tmp/virt-p2v-%04d%02d%02d-XXXXXXXX",
> tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday) == -1) {
> perror ("asprintf");
> cleanup_data_conns (data_conns, nr_disks);
> exit (EXIT_FAILURE);
> }
> len = strlen (remote_dir);
> guestfs_int_random_string (&remote_dir[len-8], 8);
> if (notify_ui)
> notify_ui (NOTIFY_LOG_DIR, remote_dir);
>
> /* Generate the local temporary directory. */
> if (mkdtemp (tmpdir) == NULL) {
> perror ("mkdtemp");
> cleanup_data_conns (data_conns, nr_disks);
> exit (EXIT_FAILURE);
> }
> memcpy (name_file, tmpdir, strlen (tmpdir));
> memcpy (physical_xml_file, tmpdir, strlen (tmpdir));
> memcpy (wrapper_script, tmpdir, strlen (tmpdir));
> memcpy (dmesg_file, tmpdir, strlen (tmpdir));
> memcpy (lscpu_file, tmpdir, strlen (tmpdir));
> memcpy (lspci_file, tmpdir, strlen (tmpdir));
> memcpy (lsscsi_file, tmpdir, strlen (tmpdir));
> memcpy (lsusb_file, tmpdir, strlen (tmpdir));
> memcpy (p2v_version_file, tmpdir, strlen (tmpdir));
>
> /* Generate the static files. */
> generate_name (config, name_file);
> generate_physical_xml (config, data_conns, physical_xml_file);
> generate_wrapper_script (config, remote_dir, wrapper_script);
> generate_system_data (dmesg_file,
> lscpu_file, lspci_file, lsscsi_file, lsusb_file);
> generate_p2v_version_file (p2v_version_file);
>
> /* Open the control connection. This also creates remote_dir. */
> if (notify_ui)
> notify_ui (NOTIFY_STATUS, _("Setting up the control connection
..."));
>
> set_control_h (start_remote_connection (config, remote_dir));
> if (control_h == NULL) {
> set_conversion_error ("could not open control connection over SSH to the
conversion server: %s",
> get_ssh_error ());
> goto out;
> }
>
> /* Copy the static files to the remote dir. */
>
> /* These three files must not fail, so check for errors here. */
> if (scp_file (config, remote_dir,
> name_file, physical_xml_file, wrapper_script, NULL) == -1) {
> set_conversion_error ("scp: %s: %s",
> remote_dir, get_ssh_error ());
> goto out;
> }
>
> /* It's not essential that these files are copied, so ignore errors. */
> ignore_value (scp_file (config, remote_dir,
> dmesg_file, lscpu_file, lspci_file, lsscsi_file,
> lsusb_file, p2v_version_file, NULL));
>
> /* Do the conversion. This runs until virt-v2v exits. */
> if (notify_ui)
> notify_ui (NOTIFY_STATUS, _("Doing conversion ..."));
>
> if (mexp_printf (control_h,
> /* To simplify things in the wrapper script, it
> * writes virt-v2v's exit status to
> * /remote_dir/status, and here we read that and
> * exit the ssh shell with the same status.
> */
> "%s/virt-v2v-wrapper.sh; "
> "exit $(< %s/status)\n",
> remote_dir, remote_dir) == -1) {
> set_conversion_error ("mexp_printf: virt-v2v: %m");
> goto out;
> }
>
> /* Read output from the virt-v2v process and echo it through the
> * notify function, until virt-v2v closes the connection.
> */
> while (!is_cancel_requested ()) {
> char buf[257];
> ssize_t r;
>
> r = read (mexp_get_fd (control_h), buf, sizeof buf - 1);
> if (r == -1) {
> /* See comment about this in miniexpect.c. */
> if (errno == EIO)
> break; /* EOF */
> set_conversion_error ("read: %m");
> goto out;
> }
> if (r == 0)
> break; /* EOF */
> buf[r] = '\0';
> if (notify_ui)
> notify_ui (NOTIFY_REMOTE_MESSAGE, buf);
> }
>
> if (is_cancel_requested ()) {
> set_conversion_error ("cancelled by user");
> if (notify_ui)
> notify_ui (NOTIFY_STATUS, _("Conversion cancelled by user."));
> goto out;
> }
>
> if (notify_ui)
> notify_ui (NOTIFY_STATUS, _("Control connection closed by remote."));
>
> ret = 0;
> out:
> if (control_h) {
> mexp_h *h = control_h;
> set_control_h (NULL);
> status = mexp_close (h);
>
> if (status == -1) {
> set_conversion_error ("mexp_close: %m");
> ret = -1;
> }
> else if (ret == 0 &&
> WIFEXITED (status) &&
> WEXITSTATUS (status) != 0) {
> set_conversion_error ("virt-v2v exited with status %d",
> WEXITSTATUS (status));
> ret = -1;
> }
> }
> cleanup_data_conns (data_conns, nr_disks);
>
> if (inhibit_fd >= 0)
> close (inhibit_fd);
>
> set_running (0);
>
> return ret;
> }
> diff --git a/nbd.c b/nbd.c
> index cd1b9ab6197b..39ce16d2b1d9 100644
> --- a/nbd.c
> +++ b/nbd.c
> @@ -50,7 +50,6 @@ static bool nbd_exit_with_parent;
> static pid_t start_nbdkit (const char *device, int *fds, size_t nr_fds);
> static int open_listening_socket (int **fds, size_t *nr_fds);
> static int bind_tcpip_socket (const char *port, int **fds, size_t *nr_fds);
> -static int connect_to_nbdkit (int dest_port);
>
> static char *nbd_error;
>
> @@ -279,202 +278,87 @@ static int
> bind_tcpip_socket (const char *port, int **fds_rtn, size_t *nr_fds_rtn)
> {
> struct addrinfo *ai = NULL;
> struct addrinfo hints;
> struct addrinfo *a;
> int err;
> int *fds = NULL;
> size_t nr_fds;
> int addr_in_use = 0;
>
> memset (&hints, 0, sizeof hints);
> hints.ai_family = AF_UNSPEC;
> hints.ai_flags = AI_PASSIVE;
> hints.ai_socktype = SOCK_STREAM;
>
> err = getaddrinfo ("localhost", port, &hints, &ai);
> if (err != 0) {
> #if DEBUG_STDERR
> fprintf (stderr, "%s: getaddrinfo: localhost: %s: %s", g_get_prgname
(),
> port, gai_strerror (err));
> #endif
> return -1;
> }
>
> nr_fds = 0;
>
> for (a = ai; a != NULL; a = a->ai_next) {
> int sock, opt;
>
> sock = socket (a->ai_family, a->ai_socktype, a->ai_protocol);
> if (sock == -1)
> error (EXIT_FAILURE, errno, "socket");
>
> opt = 1;
> if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof opt) == -1)
> perror ("setsockopt: SO_REUSEADDR");
>
> #ifdef IPV6_V6ONLY
> if (a->ai_family == PF_INET6) {
> if (setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof opt) == -1)
> perror ("setsockopt: IPv6 only");
> }
> #endif
>
> if (bind (sock, a->ai_addr, a->ai_addrlen) == -1) {
> if (errno == EADDRINUSE) {
> addr_in_use = 1;
> close (sock);
> continue;
> }
> perror ("bind");
> close (sock);
> continue;
> }
>
> if (listen (sock, SOMAXCONN) == -1) {
> perror ("listen");
> close (sock);
> continue;
> }
>
> nr_fds++;
> fds = realloc (fds, sizeof (int) * nr_fds);
> if (!fds)
> error (EXIT_FAILURE, errno, "realloc");
> fds[nr_fds-1] = sock;
> }
>
> freeaddrinfo (ai);
>
> if (nr_fds == 0 && addr_in_use) {
> #if DEBUG_STDERR
> fprintf (stderr, "%s: unable to bind to localhost:%s: %s\n",
> g_get_prgname (), port, strerror (EADDRINUSE));
> #endif
> return -1;
> }
>
> #if DEBUG_STDERR
> fprintf (stderr, "%s: bound to localhost:%s (%zu socket(s))\n",
> g_get_prgname (), port, nr_fds);
> #endif
>
> *fds_rtn = fds;
> *nr_fds_rtn = nr_fds;
> return 0;
> }
> -
> -/**
> - * Wait for nbdkit to start and be listening for connections.
> - */
> -int
> -wait_for_nbd_server_to_start (int port)
> -{
> - int sockfd = -1;
> - int result = -1;
> - time_t start_t, now_t;
> - struct timespec half_sec = { .tv_sec = 0, .tv_nsec = 500000000 };
> - struct timeval timeout = { .tv_usec = 0 };
> - char magic[8]; /* NBDMAGIC */
> - size_t bytes_read = 0;
> - ssize_t recvd;
> -
> - time (&start_t);
> -
> - for (;;) {
> - time (&now_t);
> -
> - if (now_t - start_t >= WAIT_NBD_TIMEOUT) {
> - set_nbd_error ("timed out waiting for nbdkit to start");
> - goto cleanup;
> - }
> -
> - sockfd = connect_to_nbdkit (port);
> - if (sockfd >= 0)
> - break;
> -
> - nanosleep (&half_sec, NULL);
> - }
> -
> - time (&now_t);
> - timeout.tv_sec = (start_t + WAIT_NBD_TIMEOUT) - now_t;
> - if (setsockopt (sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) ==
-1) {
> - set_nbd_error ("waiting for nbdkit to start: setsockopt(SO_RCVTIMEO):
%m");
> - goto cleanup;
> - }
> -
> - do {
> - recvd = recv (sockfd, magic, sizeof magic - bytes_read, 0);
> -
> - if (recvd == -1) {
> - set_nbd_error ("waiting for nbdkit to start: recv: %m");
> - goto cleanup;
> - }
> -
> - bytes_read += recvd;
> - } while (bytes_read < sizeof magic);
> -
> - if (memcmp (magic, "NBDMAGIC", sizeof magic) != 0) {
> - set_nbd_error ("waiting for nbdkit to start: "
> - "'NBDMAGIC' was not received from nbdkit");
> - goto cleanup;
> - }
> -
> - result = 0;
> - cleanup:
> - if (sockfd >= 0)
> - close (sockfd);
> -
> - return result;
> -}
> -
> -/**
> - * Connect to C<localhost:dest_port>, resolving the address using
> - * L<getaddrinfo(3)>.
> - *
> - * This may involve multiple connections - to IPv4 and IPv6 for
> - * instance.
> - */
> -static int
> -connect_to_nbdkit (int dest_port)
> -{
> - struct addrinfo hints;
> - struct addrinfo *results, *rp;
> - char dest_port_str[16];
> - int r, sockfd = -1;
> -
> - snprintf (dest_port_str, sizeof dest_port_str, "%d", dest_port);
> -
> - memset (&hints, 0, sizeof hints);
> - hints.ai_family = AF_UNSPEC; /* allow IPv4 or IPv6 */
> - hints.ai_socktype = SOCK_STREAM;
> - hints.ai_flags = AI_NUMERICSERV; /* numeric dest port number */
> - hints.ai_protocol = 0; /* any protocol */
> -
> - r = getaddrinfo ("localhost", dest_port_str, &hints, &results);
> - if (r != 0) {
> - set_nbd_error ("getaddrinfo: localhost/%s: %s",
> - dest_port_str, gai_strerror (r));
> - return -1;
> - }
> -
> - for (rp = results; rp != NULL; rp = rp->ai_next) {
> - sockfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
> - if (sockfd == -1)
> - continue;
> -
> - /* Connect. */
> - if (connect (sockfd, rp->ai_addr, rp->ai_addrlen) == -1) {
> - set_nbd_error ("waiting for nbdkit to start: "
> - "connect to localhost/%s: %m", dest_port_str);
> - close (sockfd);
> - sockfd = -1;
> - continue;
> - }
> -
> - break;
> - }
> -
> - freeaddrinfo (results);
> - return sockfd;
> -}
I think it's fine to take this patch if you want. My main concern was
the this check was required for some reason (to actually wait for
nbdkit to start listening), but AIUI with socket activation the socket
is always listening, so the check is never necessary.
Reviewed-by: Richard W.M. Jones <rjones(a)redhat.com>