We currently track the PIDs of the NBD servers (nbdkit only, at this
point) so that we can kill and reap them in cleanup_data_conns().
cleanup_data_conns() is called from three kinds of contexts:
(1) in start_conversion(), followed immediately by exit (EXIT_FAILURE);
(2) at the end of start_conversion() in GUI mode, where we return to
start_conversion_thread(), and then the conversion thread -- a
detached thread -- exits almost immediately;
(3) at the end of start_conversion() in kernel mode. Here we return to
kernel_conversion(), then main(), and soon exit the process.
As shown above, whenever we intend to kill and reap the nbdkit processes,
in any of these contexts, the parent *thread's* disappearance is also
imminent.
This gives rise to the idea of (a) removing the tracking & explicit
killing of nbdkit processes by PID, and (b) telling the kernel to kill the
nbdkit processes *upon* parent thread disappearance.
And that's what the "--exit-with-parent" option of nbdkit does -- at least
on Linux <
https://libguestfs.org/nbdkit-captive.1.html#EXIT-WITH-PARENT>
--, so replace the tracking with "--exit-with-parent".
... I have no clue how to test this patch (or how the patch affects
non-Linux host OSes).
Ref:
https://listman.redhat.com/archives/libguestfs/2022-March/028475.html
Suggested-by: Richard W.M. Jones <rjones(a)redhat.com>
Signed-off-by: Laszlo Ersek <lersek(a)redhat.com>
---
conversion.c | 12 +++---------
nbd.c | 9 +++++----
p2v.h | 1 -
3 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/conversion.c b/conversion.c
index 8bc4119fb47c..da162d8690e3 100644
--- a/conversion.c
+++ b/conversion.c
@@ -155,267 +155,267 @@ int
start_conversion (struct config *config,
void (*notify_ui) (int type, const char *data))
{
int ret = -1;
int status;
size_t i, len;
const size_t nr_disks = guestfs_int_count_strings (config->disks);
time_t now;
struct tm tm;
CLEANUP_FREE struct data_conn *data_conns = NULL;
CLEANUP_FREE char *remote_dir = NULL;
char tmpdir[] = "/tmp/p2v.XXXXXX";
char name_file[] = "/tmp/p2v.XXXXXX/name";
char physical_xml_file[] = "/tmp/p2v.XXXXXX/physical.xml";
char wrapper_script[] = "/tmp/p2v.XXXXXX/virt-v2v-wrapper.sh";
char dmesg_file[] = "/tmp/p2v.XXXXXX/dmesg";
char lscpu_file[] = "/tmp/p2v.XXXXXX/lscpu";
char lspci_file[] = "/tmp/p2v.XXXXXX/lspci";
char lsscsi_file[] = "/tmp/p2v.XXXXXX/lsscsi";
char lsusb_file[] = "/tmp/p2v.XXXXXX/lsusb";
char p2v_version_file[] = "/tmp/p2v.XXXXXX/p2v-version";
int inhibit_fd = -1;
#if DEBUG_STDERR
print_config (config, stderr);
fprintf (stderr, "\n");
#endif
set_control_h (NULL);
set_running (1);
set_cancel_requested (0);
inhibit_fd = inhibit_power_saving ();
#ifdef DEBUG_STDERR
if (inhibit_fd == -1)
fprintf (stderr, "warning: virt-p2v cannot inhibit power saving during
conversion.\n");
#endif
data_conns = malloc (sizeof (struct data_conn) * nr_disks);
if (data_conns == NULL)
error (EXIT_FAILURE, errno, "malloc");
for (i = 0; config->disks[i] != NULL; ++i) {
data_conns[i].h = NULL;
- data_conns[i].nbd_pid = 0;
data_conns[i].nbd_remote_port = -1;
}
/* Start the data connections and NBD server processes, one per disk. */
for (i = 0; config->disks[i] != NULL; ++i) {
int nbd_local_port;
CLEANUP_FREE char *device = NULL;
+ pid_t nbd_pid;
if (config->disks[i][0] == '/') {
device = strdup (config->disks[i]);
if (!device) {
perror ("strdup");
cleanup_data_conns (data_conns, nr_disks);
exit (EXIT_FAILURE);
}
}
else if (asprintf (&device, "/dev/%s", config->disks[i]) == -1) {
perror ("asprintf");
cleanup_data_conns (data_conns, nr_disks);
exit (EXIT_FAILURE);
}
if (notify_ui) {
CLEANUP_FREE char *msg;
if (asprintf (&msg,
_("Starting local NBD server for %s ..."),
config->disks[i]) == -1)
error (EXIT_FAILURE, errno, "asprintf");
notify_ui (NOTIFY_STATUS, msg);
}
/* Start NBD server listening on the given port number. */
- data_conns[i].nbd_pid = start_nbd_server (&nbd_local_port, device);
- if (data_conns[i].nbd_pid == 0) {
+ nbd_pid = start_nbd_server (&nbd_local_port, device);
+ if (nbd_pid == 0) {
set_conversion_error ("NBD server error: %s", get_nbd_error ());
goto out;
}
/* Wait for NBD server to start up and listen. */
if (wait_for_nbd_server_to_start (nbd_local_port) == -1) {
set_conversion_error ("NBD server error: %s", get_nbd_error ());
goto out;
}
if (notify_ui) {
CLEANUP_FREE char *msg;
if (asprintf (&msg,
_("Opening data connection for %s ..."),
config->disks[i]) == -1)
error (EXIT_FAILURE, errno, "asprintf");
notify_ui (NOTIFY_STATUS, msg);
}
/* Open the SSH data connection, with reverse port forwarding
* back to the NBD server.
*/
data_conns[i].h = open_data_connection (config, nbd_local_port,
&data_conns[i].nbd_remote_port);
if (data_conns[i].h == NULL) {
const char *err = get_ssh_error ();
set_conversion_error ("could not open data connection over SSH to the
conversion server: %s", err);
goto out;
}
#if DEBUG_STDERR
fprintf (stderr,
"%s: data connection for %s: SSH remote port %d, local port
%d\n",
g_get_prgname (), device,
data_conns[i].nbd_remote_port,
nbd_local_port);
#endif
}
/* Create a remote directory name which will be used for libvirt
* XML, log files and other stuff. We don't delete this directory
* after the run because (a) it's useful for debugging and (b) it
* only contains small files.
*
* NB: This path MUST NOT require shell quoting.
*/
time (&now);
gmtime_r (&now, &tm);
if (asprintf (&remote_dir,
"/tmp/virt-p2v-%04d%02d%02d-XXXXXXXX",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday) == -1) {
perror ("asprintf");
cleanup_data_conns (data_conns, nr_disks);
exit (EXIT_FAILURE);
}
len = strlen (remote_dir);
guestfs_int_random_string (&remote_dir[len-8], 8);
if (notify_ui)
notify_ui (NOTIFY_LOG_DIR, remote_dir);
/* Generate the local temporary directory. */
if (mkdtemp (tmpdir) == NULL) {
perror ("mkdtemp");
cleanup_data_conns (data_conns, nr_disks);
exit (EXIT_FAILURE);
}
memcpy (name_file, tmpdir, strlen (tmpdir));
memcpy (physical_xml_file, tmpdir, strlen (tmpdir));
memcpy (wrapper_script, tmpdir, strlen (tmpdir));
memcpy (dmesg_file, tmpdir, strlen (tmpdir));
memcpy (lscpu_file, tmpdir, strlen (tmpdir));
memcpy (lspci_file, tmpdir, strlen (tmpdir));
memcpy (lsscsi_file, tmpdir, strlen (tmpdir));
memcpy (lsusb_file, tmpdir, strlen (tmpdir));
memcpy (p2v_version_file, tmpdir, strlen (tmpdir));
/* Generate the static files. */
generate_name (config, name_file);
generate_physical_xml (config, data_conns, physical_xml_file);
generate_wrapper_script (config, remote_dir, wrapper_script);
generate_system_data (dmesg_file,
lscpu_file, lspci_file, lsscsi_file, lsusb_file);
generate_p2v_version_file (p2v_version_file);
/* Open the control connection. This also creates remote_dir. */
if (notify_ui)
notify_ui (NOTIFY_STATUS, _("Setting up the control connection ..."));
set_control_h (start_remote_connection (config, remote_dir));
if (control_h == NULL) {
set_conversion_error ("could not open control connection over SSH to the
conversion server: %s",
get_ssh_error ());
goto out;
}
/* Copy the static files to the remote dir. */
/* These three files must not fail, so check for errors here. */
if (scp_file (config, remote_dir,
name_file, physical_xml_file, wrapper_script, NULL) == -1) {
set_conversion_error ("scp: %s: %s",
remote_dir, get_ssh_error ());
goto out;
}
/* It's not essential that these files are copied, so ignore errors. */
ignore_value (scp_file (config, remote_dir,
dmesg_file, lscpu_file, lspci_file, lsscsi_file,
lsusb_file, p2v_version_file, NULL));
/* Do the conversion. This runs until virt-v2v exits. */
if (notify_ui)
notify_ui (NOTIFY_STATUS, _("Doing conversion ..."));
if (mexp_printf (control_h,
/* To simplify things in the wrapper script, it
* writes virt-v2v's exit status to
* /remote_dir/status, and here we read that and
* exit the ssh shell with the same status.
*/
"%s/virt-v2v-wrapper.sh; "
"exit $(< %s/status)\n",
remote_dir, remote_dir) == -1) {
set_conversion_error ("mexp_printf: virt-v2v: %m");
goto out;
}
/* Read output from the virt-v2v process and echo it through the
* notify function, until virt-v2v closes the connection.
*/
while (!is_cancel_requested ()) {
char buf[257];
ssize_t r;
r = read (mexp_get_fd (control_h), buf, sizeof buf - 1);
if (r == -1) {
/* See comment about this in miniexpect.c. */
if (errno == EIO)
break; /* EOF */
set_conversion_error ("read: %m");
goto out;
}
if (r == 0)
break; /* EOF */
buf[r] = '\0';
if (notify_ui)
notify_ui (NOTIFY_REMOTE_MESSAGE, buf);
}
if (is_cancel_requested ()) {
set_conversion_error ("cancelled by user");
if (notify_ui)
notify_ui (NOTIFY_STATUS, _("Conversion cancelled by user."));
goto out;
}
if (notify_ui)
notify_ui (NOTIFY_STATUS, _("Control connection closed by remote."));
ret = 0;
out:
if (control_h) {
mexp_h *h = control_h;
set_control_h (NULL);
status = mexp_close (h);
if (status == -1) {
set_conversion_error ("mexp_close: %m");
ret = -1;
}
else if (ret == 0 &&
WIFEXITED (status) &&
WEXITSTATUS (status) != 0) {
set_conversion_error ("virt-v2v exited with status %d",
WEXITSTATUS (status));
ret = -1;
}
}
cleanup_data_conns (data_conns, nr_disks);
if (inhibit_fd >= 0)
close (inhibit_fd);
set_running (0);
return ret;
}
@@ -436,25 +436,19 @@ static void
cleanup_data_conns (struct data_conn *data_conns, size_t nr)
{
size_t i;
for (i = 0; i < nr; ++i) {
if (data_conns[i].h != NULL) {
/* Because there is no SSH prompt (ssh -N), the only way to kill
* these ssh connections is to send a signal. Just closing the
* pipe doesn't do anything.
*/
kill (mexp_get_pid (data_conns[i].h), SIGHUP);
mexp_close (data_conns[i].h);
}
-
- if (data_conns[i].nbd_pid > 0) {
- /* Kill NBD process and clean up. */
- kill (data_conns[i].nbd_pid, SIGTERM);
- waitpid (data_conns[i].nbd_pid, NULL, 0);
- }
}
}
/**
* Write the guest name into C<filename>.
*/
diff --git a/nbd.c b/nbd.c
index e30b4a2195cb..dcedd0a52dce 100644
--- a/nbd.c
+++ b/nbd.c
@@ -187,50 +187,51 @@ static pid_t
start_nbdkit (const char *device, int *fds, size_t nr_fds)
{
pid_t pid;
CLEANUP_FREE char *file_str = NULL;
#if DEBUG_STDERR
fprintf (stderr, "starting nbdkit for %s using socket activation\n",
device);
#endif
if (asprintf (&file_str, "file=%s", device) == -1)
error (EXIT_FAILURE, errno, "asprintf");
pid = fork ();
if (pid == -1) {
set_nbd_error ("fork: %m");
return 0;
}
if (pid == 0) { /* Child. */
close (0);
if (open ("/dev/null", O_RDONLY) == -1) {
perror ("open: /dev/null");
_exit (EXIT_FAILURE);
}
socket_activation (fds, nr_fds);
execlp ("nbdkit",
"nbdkit",
- "-r", /* readonly (vital!) */
- "-f", /* don't fork */
- "file", /* file plugin */
- file_str, /* a device like file=/dev/sda */
+ "-r", /* readonly (vital!) */
+ "--exit-with-parent", /* don't fork, and exit when the parent
thread
+ * does */
+ "file", /* file plugin */
+ file_str, /* a device like file=/dev/sda */
NULL);
perror ("nbdkit");
_exit (EXIT_FAILURE);
}
/* Parent. */
return pid;
}
/**
* Open a listening socket on an unused local port and return it.
*
* Returns the port number on success or C<-1> on error.
*
* The file descriptor(s) bound are returned in the array *fds, *nr_fds.
* The caller must free the array.
*/
diff --git a/p2v.h b/p2v.h
index c55f64317dde..0846a2f959fa 100644
--- a/p2v.h
+++ b/p2v.h
@@ -85,7 +85,6 @@ extern void gui_conversion (struct config *);
/* conversion.c */
struct data_conn { /* Data per NBD connection / physical disk. */
mexp_h *h; /* miniexpect handle to ssh */
- pid_t nbd_pid; /* NBD server PID */
int nbd_remote_port; /* remote NBD port on conversion server */
};
--
2.19.1.3.g30247aa5d201