On 9/30/19 11:32 AM, Richard W.M. Jones wrote:
This adds new APIs for running a local NBD server and connecting to
it
using systemd socket activation (instead of stdin/stdout).
This includes interop tests against nbdkit and qemu-nbd which I
believe are the only NBD servers supporting socket activation. (If we
find others then we can add more interop tests in future.)
The upstream spec for systemd socket activation is here:
http://0pointer.de/blog/projects/socket-activation.html
---
+++ b/generator/states-connect-socket-activation.c
+/* This is baked into the systemd socket activation API. */
+#define FIRST_SOCKET_ACTIVATION_FD 3
+
+/* Prepare environment for calling execvpe when doing systemd socket
+ * activation. Takes the current environment and copies it. Removes
+ * any existing LISTEN_PID or LISTEN_FDS and replaces them with new
+ * variables. env[0] is "LISTEN_PID=..." which is filled in by
+ * CONNECT_SA.START, and env[1] is "LISTEN_FDS=1".
+ */
I know that getenv()/setenv()/putenv() tend to prefer sorted environ,
but I also think that exec HAS to handle a hand-built environ that is
not sorted, so you should be okay with this shortcut.
+static char **
+prepare_socket_activation_environment (void)
+{
+ char **env = NULL;
+ char *p0 = NULL, *p1 = NULL;
+ size_t i, len;
+ void *vp;
+
+ p0 = strdup ("LISTEN_PID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ if (p0 == NULL)
+ goto err;
+ p1 = strdup ("LISTEN_FDS=1");
+ if (p1 == NULL)
+ goto err;
+
+ /* Copy the current environment. */
+ env = nbd_internal_copy_string_list (environ);
POSIX says the external symbol 'environ' has to exist for linking
purposes, but also states that no standard header is required to declare
it. You may want to add an 'extern char **environ;' line before this
function for portability. On the other hand, gnulib documents that on
newer Mac OS, even that doesn't work, where the solution is '#define
environ (*_NSGetEnviron())'. I guess we'll deal with it when somebody
actually reports compilation failure.
+
+ /* Remove any existing LISTEN_PID or LISTEN_FDS instances. */
+ for (i = 2; env[i] != NULL; ++i) {
+ if (strncmp (env[i], "LISTEN_PID=", 11) == 0 ||
+ strncmp (env[i], "LISTEN_FDS=", 11) == 0) {
+ memmove (&env[i], &env[i+1],
+ sizeof (char *) * (nbd_internal_string_list_length (&env[i])));
+ i--;
+ }
+ }
Lots of O(N) traversals of the list, but this probably isn't our hot
spot, and so probably not worth optimizing further.
+STATE_MACHINE {
+ CONNECT_SA.START:
+#ifdef HAVE_EXECVPE
+ size_t len;
+ int s;
+ struct sockaddr_un addr;
+ char **env;
+ pid_t pid;
+ int flags;
+
+ assert (!h->sock);
+ assert (h->argv);
+ assert (h->argv[0]);
+
+ /* Use /tmp instead of TMPDIR because we must ensure the path is
+ * short enough to store in the sockaddr_un. On some platforms this
+ * may cause problems so we may need to revisit it. XXX
+ */
+ h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX");
+ if (h->sa_tmpdir == NULL) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "strdup");
+ return 0;
+ }
+ if (mkdtemp (h->sa_tmpdir) == NULL) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "mkdtemp");
+ /* Avoid cleanup in nbd_close. */
+ free (h->sa_tmpdir);
+ h->sa_tmpdir = NULL;
+ return 0;
+ }
+
+ h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock");
+ if (h->sa_sockpath == NULL) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "strdup");
+ return 0;
+ }
+
+ len = strlen (h->sa_tmpdir);
+ memcpy (h->sa_sockpath, h->sa_tmpdir, len);
Is it worth using:
asprintf (&h->sa_sockpath, "%s/sock", h->sa_tmpdir);
for less code? asprintf might not be standard, but we already require
execvpe, which probably means asprintf is available. But your
open-coded variant works, too.
+
+ s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (s == -1) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "socket");
+ return 0;
+ }
I guess the child process can add O_NONBLOCK if they want it.
+
+ addr.sun_family = AF_UNIX;
+ memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1);
+ if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "bind: %s", h->sa_sockpath);
+ close (s);
+ return 0;
+ }
+
+ if (listen (s, 1) == -1) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "listen");
+ close (s);
+ return 0;
+ }
+
+ env = prepare_socket_activation_environment ();
+ if (!env) {
+ SET_NEXT_STATE (%.DEAD);
+ close (s);
+ return 0;
+ }
+
+ pid = fork ();
+ if (pid == -1) {
+ SET_NEXT_STATE (%.DEAD);
+ set_error (errno, "fork");
+ close (s);
+ nbd_internal_free_string_list (env);
+ return 0;
+ }
+ if (pid == 0) { /* child - run command */
+ if (s != FIRST_SOCKET_ACTIVATION_FD) {
+ dup2 (s, FIRST_SOCKET_ACTIVATION_FD);
+ close (s);
+ }
+ else {
+ /* We must unset CLOEXEC on the fd. (dup2 above does this
+ * implicitly because CLOEXEC is set on the fd, not on the
+ * socket).
+ */
+ flags = fcntl (s, F_GETFD, 0);
+ if (flags == -1) {
+ nbd_internal_fork_safe_perror ("fcntl: F_GETFD");
+ _exit (126);
+ }
+ if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) {
+ nbd_internal_fork_safe_perror ("fcntl: F_SETFD");
+ _exit (126);
+ }
+ }
+
Looks correct.
+ char buf[32];
+ const char *v =
+ nbd_internal_fork_safe_itoa ((long) getpid (), buf, sizeof buf);
+ strcpy (&env[0][11], v);
We're using the magic '11' in several places, maybe it's worth a #define
to make it obvious it is strlen("LISTEN_PID=") ?
+
+ /* Restore SIGPIPE back to SIG_DFL. */
+ signal (SIGPIPE, SIG_DFL);
+
+ execvpe (h->argv[0], h->argv, env);
+ nbd_internal_fork_safe_perror (h->argv[0]);
+ if (errno == ENOENT)
+ _exit (127);
+ else
+ _exit (126);
+ }
+
+ /* Parent. */
+ close (s);
+ nbd_internal_free_string_list (env);
+ h->pid = pid;
+
+ h->connaddrlen = sizeof addr;
+ memcpy (&h->connaddr, &addr, h->connaddrlen);
+ SET_NEXT_STATE (%^CONNECT.START);
+ return 0;
+
+#else /* !HAVE_EXECVPE */
+ SET_NEXT_STATE (%.DEAD)
+ set_error (ENOTSUP, "platform does not support socket activation");
+ return 0;
+#endif
We probably ought to add a matching nbd_supports_socket_activation()
feature function.
Or, it would be possible to create a fallback for execvpe() on platforms
that lack it by using execlpe() and our own path-walker utility
function. Can be done as a followup patch. If we do that, then the
mere presence of LIBNBD_HAVE_NBD_CONNECT_SA is witness enough of the
functionality, rather than needing a runtime probe.
+++ b/lib/connect.c
+
+int
+nbd_unlocked_aio_connect_socket_activation (struct nbd_handle *h, char **argv)
+{
+ char **copy;
+
+ copy = nbd_internal_copy_string_list (argv);
+ if (!copy) {
+ set_error (errno, "copy_string_list");
+ return -1;
+ }
+
+ if (h->argv)
+ nbd_internal_free_string_list (h->argv);
How can h->argv ever be previously set?
+ h->argv = copy;
+
+ return nbd_internal_run (h, cmd_connect_sa);
+}
diff --git a/lib/handle.c b/lib/handle.c
index 2af25fe..a7f2c79 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h)
free_cmd_list (h->cmds_in_flight);
free_cmd_list (h->cmds_done);
nbd_internal_free_string_list (h->argv);
+ if (h->sa_sockpath) {
+ if (h->pid > 0)
+ kill (h->pid, SIGTERM);
+ unlink (h->sa_sockpath);
+ free (h->sa_sockpath);
+ }
+ if (h->sa_tmpdir) {
+ rmdir (h->sa_tmpdir);
+ free (h->sa_tmpdir);
+ }
free (h->unixsocket);
free (h->hostname);
free (h->port);
Somewhat pre-existing: we have a waitpid() here (good, so we don't hang
on to a zombie process), but we are relying on the child process to
gracefully go away (whether for connect_command when stdin closes, or
for connect_sa on receipt of SIGTERM). Do we need a retry loop that
escalates to SIGKILL if the child process does not quickly respond to
the initial condition? On the other hand, the fact that our waitpid()
blocks until the child changes status means that if a child ever wedges,
the fact that we wedge too gives some visibility to the client that it's
not libnbd's fault and that they need to get the bug fixed in their
child process.
I think it is ready to push. We may still need further tweaks, but
that's often the case.
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3226
Virtualization:
qemu.org |
libvirt.org