Socket activation (aka systemd socket activation) is a simple protocol
that lets you pass in an opened, listening socket to a server.
Supporting socket activation allows you to use a modern superserver to
serve infrequent NBD requests without needing nbdkit to be running the
whole time.
Although the protocol was invented by systemd, it has been implemented
in a few other places, and the protocol is almost trivially simple.
This implementation is based on the one in libvirt.
Thanks: Dan Berrange, libvirt team
---
.gitignore | 1 +
docs/nbdkit.pod | 67 +++++++++++-
src/main.c | 111 ++++++++++++++++++++
tests/Makefile.am | 10 +-
tests/test-socket-activation.c | 226 +++++++++++++++++++++++++++++++++++++++++
5 files changed, 411 insertions(+), 4 deletions(-)
create mode 100644 tests/test-socket-activation.c
diff --git a/.gitignore b/.gitignore
index 4a51dea..f8918a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ Makefile.in
/tests/test-perl
/tests/test-python
/tests/test-ruby
+/tests/test-socket-activation
/tests/test-streaming
/tests/test-xz
/test-driver
diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod
index 2b11eed..ad02ec3 100644
--- a/docs/nbdkit.pod
+++ b/docs/nbdkit.pod
@@ -192,9 +192,11 @@ This option implies I<--foreground>.
Don't fork. Handle a single NBD connection on stdin/stdout. After
stdin closes, the server exits.
-You can use this option to run nbdkit from inetd, systemd or similar
+You can use this option to run nbdkit from inetd or similar
superservers; or just for testing; or if you want to run nbdkit in a
-non-conventional way.
+non-conventional way. Note that if you want to run nbdkit from
+systemd, then it may be better to use L</SOCKET ACTIVATION> instead of
+this option.
This option implies I<--foreground>.
@@ -263,6 +265,48 @@ To list all the options supported by a plugin, do:
nbdkit --help file
+=head1 SOCKET ACTIVATION
+
+nbdkit supports socket activation (sometimes called systemd socket
+activation). This is a simple protocol where instead of nbdkit itself
+opening the listening socket(s), the parent process (typically
+systemd) passes in pre-opened file descriptors. Socket activation
+lets you serve infrequent NBD requests using a superserver without
+needing nbdkit to be running the whole time.
+
+Socket activation is triggered when both the C<LISTEN_FDS> and
+C<LISTEN_PID> environment variables are set. In this mode using
+I<-i>, I<-p>, I<--run>, I<-s> or I<-U> flags on the command
line is
+illegal and will cause an error. Also in this mode nbdkit does not
+fork into the background (ie. I<-f> is implied).
+
+=head2 Using socket activation with systemd
+
+To use nbdkit with socket activation from systemd, create a unit file
+ending in C<.socket> (eg. C</etc/systemd/system/nbdkit.socket>)
+containing:
+
+ [Unit]
+ Description=NBDKit Network Block Device server
+
+ [Socket]
+ ListenStream=0.0.0.0:10809
+
+ [Install]
+ WantedBy=sockets.target
+
+There are various formats for the C<ListenStream> key. See
+L<systemd.socket(5)> for more information.
+
+Also create a service unit (eg. C</etc/systemd/system/nbdkit.service>)
+containing:
+
+ [Service]
+ ExecStart=/usr/sbin/nbdkit file file=/path/to/serve
+
+For more information on systemd and socket activation, see
+L<http://0pointer.de/blog/projects/socket-activation.html>
+
=head1 CAPTIVE NBDKIT
You can run nbdkit as a "captive process", using the I<--run> option.
@@ -382,8 +426,23 @@ This signal is ignored.
=back
+=head1 ENVIRONMENT VARIABLES
+
+=over 4
+
+=item C<LISTEN_FDS>
+
+=item C<LISTEN_PID>
+
+If present in the environment when nbdkit starts up, these trigger
+L</SOCKET ACTIVATION>.
+
+=back
+
=head1 SEE ALSO
+Other nbdkit manual pages:
+
L<nbdkit-plugin(3)>,
L<nbdkit-curl-plugin(1)>,
L<nbdkit-example1-plugin(1)>,
@@ -398,6 +457,10 @@ L<nbdkit-python-plugin(3)>,
L<nbdkit-vddk-plugin(1)>.
L<nbdkit-xz-plugin(1)>.
+Other manual pages of interest:
+
+L<systemd.socket(5)>.
+
=head1 AUTHORS
Richard W.M. Jones
diff --git a/src/main.c b/src/main.c
index 9453cce..e935da2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -54,6 +54,8 @@
#include "nbdkit-plugin.h"
#include "internal.h"
+#define FIRST_SOCKET_ACTIVATION_FD 3 /* defined by systemd ABI */
+
static char *make_random_fifo (void);
static void open_plugin_so (const char *filename);
static void start_serving (void);
@@ -64,6 +66,7 @@ static void write_pidfile (void);
static void fork_into_background (void);
static uid_t parseuser (const char *);
static gid_t parsegroup (const char *);
+static unsigned int get_socket_activation (void);
const char *exportname; /* -e */
int foreground; /* -f */
@@ -77,6 +80,7 @@ int listen_stdin; /* -s */
char *unixsocket; /* -U */
const char *user, *group; /* -u & -g */
int verbose; /* -v */
+unsigned int socket_activation /* $LISTEN_FDS and $LISTEN_PID set */;
volatile int quit;
@@ -157,6 +161,9 @@ main (int argc, char *argv[])
tls_init ();
+ /* Returns 0 if no socket activation, or the number of FDs. */
+ socket_activation = get_socket_activation ();
+
for (;;) {
c = getopt_long (argc, argv, short_options, long_options, &option_index);
if (c == -1)
@@ -172,6 +179,11 @@ main (int argc, char *argv[])
dump_plugin = 1;
}
else if (strcmp (long_options[option_index].name, "run") == 0) {
+ if (socket_activation) {
+ fprintf (stderr, "%s: cannot use socket activation with --run
flag\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
run = optarg;
foreground = 1;
}
@@ -196,6 +208,11 @@ main (int argc, char *argv[])
break;
case 'i':
+ if (socket_activation) {
+ fprintf (stderr, "%s: cannot use socket activation with -i flag\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
ipaddr = optarg;
break;
@@ -214,6 +231,11 @@ main (int argc, char *argv[])
break;
case 'p':
+ if (socket_activation) {
+ fprintf (stderr, "%s: cannot use socket activation with -p flag\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
port = optarg;
break;
@@ -222,10 +244,20 @@ main (int argc, char *argv[])
break;
case 's':
+ if (socket_activation) {
+ fprintf (stderr, "%s: cannot use socket activation with -s flag\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
listen_stdin = 1;
break;
case 'U':
+ if (socket_activation) {
+ fprintf (stderr, "%s: cannot use socket activation with -U flag\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
if (strcmp (optarg, "-") == 0)
unixsocket = make_random_fifo ();
else
@@ -454,6 +486,7 @@ start_serving (void)
{
int *socks;
size_t nr_socks;
+ size_t i;
/* If the user has mixed up -p/-U/-s options, then give an error.
*
@@ -470,6 +503,27 @@ start_serving (void)
set_up_signals ();
+ /* Socket activation -- we are handling connections on pre-opened
+ * file descriptors [FIRST_SOCKET_ACTIVATION_FD ..
+ * FIRST_SOCKET_ACTIVATION_FD+nr_socks-1].
+ */
+ if (socket_activation) {
+ nr_socks = socket_activation;
+ debug ("using socket activation, nr_socks = %zu", nr_socks);
+ socks = malloc (sizeof (int) * nr_socks);
+ if (socks == NULL) {
+ perror ("malloc");
+ exit (EXIT_FAILURE);
+ }
+ for (i = 0; i < nr_socks; ++i)
+ socks[i] = FIRST_SOCKET_ACTIVATION_FD + i;
+ change_user ();
+ write_pidfile ();
+ accept_incoming_connections (socks, nr_socks);
+ free_listening_sockets (socks, nr_socks); /* also closes them */
+ return;
+ }
+
/* Handling a single connection on stdin/stdout. */
if (listen_stdin) {
change_user ();
@@ -752,3 +806,60 @@ parsegroup (const char *id)
return grp->gr_gid;
}
+
+/* Returns 0 if no socket activation, or the number of FDs.
+ * See also virGetListenFDs in libvirt.org:src/util/virutil.c
+ */
+static unsigned int
+get_socket_activation (void)
+{
+ const char *s;
+ unsigned int pid;
+ unsigned int nr_fds;
+ unsigned int i;
+ int fd;
+
+ s = getenv ("LISTEN_PID");
+ if (s == NULL)
+ return 0;
+ if (sscanf (s, "%u", &pid) != 1) {
+ fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
+ program_name, "LISTEN_PID");
+ return 0;
+ }
+ if (pid != getpid ()) {
+ fprintf (stderr, "%s: %s was not for us (ignored)\n",
+ program_name, "LISTEN_PID");
+ return 0;
+ }
+
+ s = getenv ("LISTEN_FDS");
+ if (s == NULL)
+ return 0;
+ if (sscanf (s, "%u", &nr_fds) != 1) {
+ fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
+ program_name, "LISTEN_FDS");
+ return 0;
+ }
+
+ /* So these are not passed to any child processes we might start. */
+ unsetenv ("LISTEN_FDS");
+ unsetenv ("LISTEN_PID");
+
+ /* So the file descriptors don't leak into child processes. */
+ for (i = 0; i < nr_fds; ++i) {
+ fd = FIRST_SOCKET_ACTIVATION_FD + i;
+ if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) {
+ /* If we cannot set FD_CLOEXEC then it probably means the file
+ * descriptor is invalid, so socket activation has gone wrong
+ * and we should exit.
+ */
+ fprintf (stderr, "%s: socket activation: "
+ "invalid file descriptor fd = %d: %m\n",
+ program_name, fd);
+ exit (EXIT_FAILURE);
+ }
+ }
+
+ return nr_fds;
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index afb9975..389bcfa 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -83,7 +83,14 @@ TESTS = \
test-single.sh \
test-captive.sh \
test-random-sock.sh \
- test-ipv4.sh
+ test-ipv4.sh \
+ test-socket-activation
+
+check_PROGRAMS = \
+ test-socket-activation
+
+test_socket_activation_SOURCES = test-socket-activation.c
+test_socket_activation_CFLAGS = $(WARNINGS_CFLAGS)
# In-depth tests need libguestfs, since that is a convenient way to
# drive qemu.
@@ -103,7 +110,6 @@ check_LTLIBRARIES = libtest.la
libtest_la_SOURCES = test.c test.h
libtest_la_CFLAGS = $(WARNINGS_CFLAGS)
-check_PROGRAMS =
check_DATA =
check_SCRIPTS =
diff --git a/tests/test-socket-activation.c b/tests/test-socket-activation.c
new file mode 100644
index 0000000..5ecdde1
--- /dev/null
+++ b/tests/test-socket-activation.c
@@ -0,0 +1,226 @@
+/* nbdkit
+ * Copyright (C) 2017 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Test socket activation.
+ *
+ * We cannot use the test framework for this since the framework
+ * always uses the -U flag which is incompatible with socket
+ * activation. Unfortunately this does mean we duplicate some code
+ * from the test framework.
+ *
+ * It's *almost* possible to test this from a shell script
+ * (cf. test-ipv4.sh) but as far as I can tell setting LISTEN_PID
+ * correctly is impossible from shell.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#define FIRST_SOCKET_ACTIVATION_FD 3
+
+#define NBDKIT_START_TIMEOUT 30 /* seconds */
+
+#define NBDKIT_PLUGIN(name) \
+ "../plugins/" name "/.libs/nbdkit-" name "-plugin.so"
+
+/* Declare program_name. */
+#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME == 1
+#include <errno.h>
+#define program_name program_invocation_short_name
+#else
+#define program_name "nbdkit"
+#endif
+
+static char tmpdir[] = "/tmp/nbdkitXXXXXX";
+static char sockpath[] = "/tmp/nbdkitXXXXXX/sock";
+static char pidpath[] = "/tmp/nbdkitXXXXXX/pid";
+
+static pid_t pid = 0;
+
+static void
+cleanup (void)
+{
+ if (pid > 0)
+ kill (pid, SIGTERM);
+
+ unlink (pidpath);
+ unlink (sockpath);
+ rmdir (tmpdir);
+}
+
+int
+main (int argc, char *argv[])
+{
+ int sock;
+ struct sockaddr_un addr;
+ char pid_str[16];
+ size_t i, len;
+ char magic[8];
+
+ if (mkdtemp (tmpdir) == NULL) {
+ perror ("mkdtemp");
+ exit (EXIT_FAILURE);
+ }
+ len = strlen (tmpdir);
+ memcpy (sockpath, tmpdir, len);
+ memcpy (pidpath, tmpdir, len);
+
+ atexit (cleanup);
+
+ /* Open the listening socket which will be passed into nbdkit. */
+ sock = socket (AF_UNIX, SOCK_STREAM /* NB do not use SOCK_CLOEXEC */, 0);
+ if (sock == -1) {
+ perror ("socket");
+ exit (EXIT_FAILURE);
+ }
+
+ addr.sun_family = AF_UNIX;
+ len = strlen (sockpath);
+ memcpy (addr.sun_path, sockpath, len+1 /* trailing \0 */);
+
+ if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+ perror (sockpath);
+ exit (EXIT_FAILURE);
+ }
+
+ if (listen (sock, 1) == -1) {
+ perror ("listen");
+ exit (EXIT_FAILURE);
+ }
+
+ if (sock != FIRST_SOCKET_ACTIVATION_FD) {
+ if (dup2 (sock, FIRST_SOCKET_ACTIVATION_FD) == -1) {
+ perror ("dup2");
+ exit (EXIT_FAILURE);
+ }
+ close (sock);
+ }
+
+ /* Run nbdkit. */
+ pid = fork ();
+ if (pid == -1) {
+ perror ("fork");
+ exit (EXIT_FAILURE);
+ }
+ if (pid == 0) {
+ /* Run nbdkit in the child. */
+ setenv ("LISTEN_FDS", "1", 1);
+ snprintf (pid_str, sizeof pid_str, "%d", (int) getpid ());
+ setenv ("LISTEN_PID", pid_str, 1);
+
+ execlp ("../src/nbdkit",
+ "nbdkit",
+ "-P", pidpath,
+ "-o",
+ "-v",
+ NBDKIT_PLUGIN ("example1"), NULL);
+ perror ("exec: nbdkit");
+ _exit (EXIT_FAILURE);
+ }
+
+ /* We don't need the listening socket now. */
+ close (sock);
+
+ /* Wait for the pidfile to turn up, which indicates that nbdkit has
+ * started up successfully and is ready to serve requests. However
+ * if 'pid' exits in this time it indicates a failure to start up.
+ * Also there is a timeout in case nbdkit hangs.
+ */
+ for (i = 0; i < NBDKIT_START_TIMEOUT; ++i) {
+ if (waitpid (pid, NULL, WNOHANG) == pid)
+ goto early_exit;
+
+ if (kill (pid, 0) == -1) {
+ if (errno == ESRCH) {
+ early_exit:
+ fprintf (stderr,
+ "%s FAILED: nbdkit exited before starting to serve files\n",
+ program_name);
+ pid = 0;
+ exit (EXIT_FAILURE);
+ }
+ perror ("kill");
+ }
+
+ if (access (pidpath, F_OK) == 0)
+ break;
+
+ sleep (1);
+ }
+
+ /* Now nbdkit is supposed to be listening on the Unix domain socket
+ * (which it got via the listening socket that we passed down to it,
+ * not from the path), so we should be able to connect to the Unix
+ * domain socket by its path and receive an NBD magic string.
+ */
+ sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (sock == -1) {
+ perror ("socket");
+ exit (EXIT_FAILURE);
+ }
+
+ /* Reuse addr which was set up above. */
+ if (connect (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+ perror (sockpath);
+ exit (EXIT_FAILURE);
+ }
+
+ if (read (sock, magic, 8) != 8) {
+ perror ("read");
+ exit (EXIT_FAILURE);
+ }
+
+ if (memcmp (magic, "NBDMAGIC", 8) != 0) {
+ fprintf (stderr, "%s FAILED: did not read magic string from server\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
+
+ close (sock);
+
+ /* Test succeeded. */
+ exit (EXIT_SUCCESS);
+}
--
2.10.2