On platforms which support it (only Linux currently) nbdkit can act as
a vsock server. Guests running on the host see a raw NBD socket which
they can connect to by opening an AF_VSOCK connection. (Although only
libnbd supports this).
The current limitations are:
* There is no access control. Any guest which has vsock enabled can
open the socket.
* nbdkit can only listen on either TCP/IP or AF_VSOCK, not both at
the same time. (The same currently applies to TCP/IP vs AF_UNIX so
this is not a new restriction).
* Lacks a test because you cannot use vsock to communicate host to
host.
See:
https://wiki.qemu.org/Features/VirtioVsock
Thanks: Stefan Hajnoczi and Eric Blake
---
configure.ac | 2 ++
docs/nbdkit-service.pod | 47 +++++++++++++++++++++++++-
docs/nbdkit.pod | 5 +++
docs/synopsis.txt | 2 +-
server/internal.h | 2 ++
server/main.c | 37 +++++++++++++++++---
server/options.h | 2 ++
server/sockets.c | 75 +++++++++++++++++++++++++++++++++++++++++
8 files changed, 165 insertions(+), 7 deletions(-)
diff --git a/configure.ac b/configure.ac
index 83eefb6..bd34b8f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,6 +203,8 @@ AC_CHECK_HEADERS([\
sys/prctl.h \
sys/procctl.h])
+AC_CHECK_HEADERS([linux/vm_sockets.h], [], [], [#include <sys/socket.h>])
+
dnl Check for functions in libc, all optional.
AC_CHECK_FUNCS([\
accept4 \
diff --git a/docs/nbdkit-service.pod b/docs/nbdkit-service.pod
index ad5ca50..e743628 100644
--- a/docs/nbdkit-service.pod
+++ b/docs/nbdkit-service.pod
@@ -82,6 +82,50 @@ Using I<--log=syslog> forces all messages to go to the system
log.
Debug messages (I<-v>/I<--verbose>) always go to standard error and
are never sent to the system log.
+=head1 AF_VSOCK
+
+On Linux nbdkit supports the C<AF_VSOCK> address family / protocol.
+This allows you to serve NBD devices into virtual machines without
+using a regular network connection.
+
+B<Note> that this is different from the usual case where you present
+NBD as a virtual block device to a guest (which the guest sees as
+something like a SATA or virtio-scsi disk). With C<AF_VSOCK> the
+virtual machine sees a raw NBD socket which it can connect to by
+opening an C<AF_VSOCK> connection. Only libnbd supports C<AF_VSOCK>
+NBD client connections at the time of writing (2019). For more about
+this protocol, see
L<https://wiki.qemu.org/Features/VirtioVsock>
+
+=head2 AF_VSOCK example
+
+To set up an C<AF_VSOCK> server, use for example:
+
+ nbdkit --vsock [--port PORT] memory 1G
+
+The optional I<-p>/I<--port> argument is used to change the
+C<AF_VSOCK> port number. These port numbers exist in a different
+namespace from TCP/IP port numbers. Also unlike TCP, the port numbers
+are 32 bit. The default port is 10809.
+
+The guest that wishes to access nbdkit must be configured for
+virtio-vsock. On the qemu command line use:
+
+ qemu ... -device vhost-vsock-pci,id=vhost-vsock-pci0
+
+For libvirt add this element to the C<<< <devices> >>> section:
+
+ <vsock/>
+
+If you see the error C<unable to open vhost-vsock device> then you may
+have to unload the VMCI transport on the host:
+
+ modprobe -r vmw_vsock_vmci_transport
+
+Once nbdkit and the guest are running, from inside the guest you can
+connect to nbdkit on the host using libnbd:
+
+ nbdsh -c 'h.connect_vsock(2, 10809)' -c 'print(h.get_size())'
+
=head1 ENVIRONMENT VARIABLES
=over 4
@@ -102,7 +146,8 @@ L<systemd(1)>,
L<systemd.socket(5)>,
L<syslog(3)>,
L<rsyslogd(8)>,
-L<journalctl(1)>.
+L<journalctl(1)>,
+L<nbdsh(1)>.
=head1 AUTHORS
diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod
index 0fa3061..094cfca 100644
--- a/docs/nbdkit.pod
+++ b/docs/nbdkit.pod
@@ -444,6 +444,11 @@ into the background (but not required).
Print the version number of nbdkit and exit.
+=item B<--vsock>
+
+Use the AF_VSOCK protocol (instead of TCP/IP). You must use this in
+conjunction with I<-p>/I<--port>. See L<nbdkit-service(1)/AF_VSOCK>.
+
=back
=head1 PLUGIN NAME
diff --git a/docs/synopsis.txt b/docs/synopsis.txt
index 5fc57fd..a6b6028 100644
--- a/docs/synopsis.txt
+++ b/docs/synopsis.txt
@@ -12,7 +12,7 @@ nbdkit [-D|--debug PLUGIN|FILTER.FLAG=N]
[--tls-certificates /path/to/certificates]
[--tls-psk /path/to/pskfile] [--tls-verify-peer]
[-U|--unix SOCKET] [-u|--user USER]
- [-v|--verbose] [-V|--version]
+ [-v|--verbose] [-V|--version] [--vsock]
PLUGIN [[KEY=]VALUE [KEY=VALUE [...]]]
nbdkit --dump-config
diff --git a/server/internal.h b/server/internal.h
index 167da59..5e11e1a 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -454,6 +454,8 @@ extern int *bind_unix_socket (size_t *)
__attribute__((__nonnull__ (1)));
extern int *bind_tcpip_socket (size_t *)
__attribute__((__nonnull__ (1)));
+extern int *bind_vsock (size_t *)
+ __attribute__((__nonnull__ (1)));
extern void accept_incoming_connections (int *socks, size_t nr_socks)
__attribute__((__nonnull__ (1)));
extern void free_listening_sockets (int *socks, size_t nr_socks)
diff --git a/server/main.c b/server/main.c
index 5623149..115fa98 100644
--- a/server/main.c
+++ b/server/main.c
@@ -45,6 +45,11 @@
#include <syslog.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/socket.h>
+
+#ifdef HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#endif
#include <pthread.h>
@@ -85,6 +90,7 @@ bool tls_verify_peer; /* --tls-verify-peer */
char *unixsocket; /* -U */
const char *user, *group; /* -u & -g */
bool verbose; /* -v */
+bool vsock; /* --vsock */
unsigned int socket_activation /* $LISTEN_FDS and $LISTEN_PID set */;
/* The currently loaded plugin. */
@@ -329,6 +335,16 @@ main (int argc, char *argv[])
tls_verify_peer = true;
break;
+ case VSOCK_OPTION:
+#ifdef AF_VSOCK
+ vsock = true;
+ break;
+#else
+ fprintf (stderr, "%s: AF_VSOCK is not supported on this platform\n",
+ program_name);
+ exit (EXIT_FAILURE);
+#endif
+
case 'e':
exportname = optarg;
if (strnlen (exportname, NBD_MAX_STRING + 1) > NBD_MAX_STRING) {
@@ -826,15 +842,22 @@ start_serving (void)
size_t nr_socks;
size_t i;
- /* If the user has mixed up -p/-U/-s options, then give an error.
+ /* If the user has mixed up -p/--run/-s/-U/--vsock options, then
+ * give an error.
*
* XXX Actually the server could easily be extended to handle both
* TCP/IP and Unix sockets, or even multiple TCP/IP ports.
*/
- if ((port && unixsocket) || (port && listen_stdin) ||
- (unixsocket && listen_stdin) || (listen_stdin && run)) {
+ if ((port && unixsocket) ||
+ (port && listen_stdin) ||
+ (unixsocket && listen_stdin) ||
+ (listen_stdin && run) ||
+ (vsock && unixsocket) ||
+ (vsock && listen_stdin) ||
+ (vsock && run)) {
fprintf (stderr,
- "%s: -p, -U and -s options cannot appear at the same time\n",
+ "%s: -p, --run, -s, -U or --vsock options cannot be used"
+ "in this combination\n",
program_name);
exit (EXIT_FAILURE);
}
@@ -873,9 +896,13 @@ start_serving (void)
return;
}
- /* Handling multiple connections on TCP/IP or a Unix domain socket. */
+ /* Handling multiple connections on TCP/IP, Unix domain socket or
+ * AF_VSOCK.
+ */
if (unixsocket)
socks = bind_unix_socket (&nr_socks);
+ else if (vsock)
+ socks = bind_vsock (&nr_socks);
else
socks = bind_tcpip_socket (&nr_socks);
diff --git a/server/options.h b/server/options.h
index c74e0b8..56dda10 100644
--- a/server/options.h
+++ b/server/options.h
@@ -55,6 +55,7 @@ enum {
TLS_CERTIFICATES_OPTION,
TLS_PSK_OPTION,
TLS_VERIFY_PEER_OPTION,
+ VSOCK_OPTION,
};
static const char *short_options = "D:e:fg:i:nop:P:rst:u:U:vV";
@@ -100,6 +101,7 @@ static const struct option long_options[] = {
{ "user", required_argument, NULL, 'u' },
{ "verbose", no_argument, NULL, 'v' },
{ "version", no_argument, NULL, 'V' },
+ { "vsock", no_argument, NULL, VSOCK_OPTION },
{ NULL },
};
diff --git a/server/sockets.c b/server/sockets.c
index 3514c69..2af5600 100644
--- a/server/sockets.c
+++ b/server/sockets.c
@@ -35,6 +35,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
+#include <inttypes.h>
#include <string.h>
#include <unistd.h>
#include <poll.h>
@@ -47,6 +48,10 @@
#include <netinet/tcp.h>
#include <netdb.h>
+#ifdef HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#endif
+
#ifdef HAVE_LIBSELINUX
#include <selinux/selinux.h>
#endif
@@ -247,6 +252,76 @@ bind_tcpip_socket (size_t *nr_socks)
return socks;
}
+int *
+bind_vsock (size_t *nr_socks)
+{
+#ifdef AF_VSOCK
+ uint32_t vsock_port;
+ int sock;
+ int *ret;
+ struct sockaddr_vm addr;
+
+ if (port == NULL)
+ vsock_port = 10809;
+ else {
+ /* --port parameter must be numeric for vsock, unless
+ * /etc/services is extended but that seems unlikely. XXX
+ */
+ if (nbdkit_parse_uint32_t ("port", port, &vsock_port) == -1)
+ exit (EXIT_FAILURE);
+ }
+
+ /* Any platform with AF_VSOCK also supports SOCK_CLOEXEC so there is
+ * no fallback path.
+ */
+ sock = socket (AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (sock == -1) {
+ perror ("bind_unix_socket: socket");
+ exit (EXIT_FAILURE);
+ }
+
+ memset (&addr, 0, sizeof addr);
+ addr.svm_family = AF_VSOCK;
+ addr.svm_cid = VMADDR_CID_ANY;
+ addr.svm_port = vsock_port;
+
+ if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
+ perror (unixsocket);
+ exit (EXIT_FAILURE);
+ }
+
+ if (listen (sock, SOMAXCONN) == -1) {
+ perror ("listen");
+ exit (EXIT_FAILURE);
+ }
+
+ ret = malloc (sizeof (int));
+ if (!ret) {
+ perror ("malloc");
+ exit (EXIT_FAILURE);
+ }
+ ret[0] = sock;
+ *nr_socks = 1;
+
+ /* It's not easy to get the actual CID here.
+ * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
+ * opening /dev/vsock which is not accessible to non-root users.
+ * bind above doesn't update the sockaddr. Using getsockname
+ * doesn't work.
+ */
+ debug ("bound to vsock any:%" PRIu32, addr.svm_port);
+
+ return ret;
+
+#else
+ /* Can't happen because main() checks if AF_VSOCK is defined and
+ * prevents vsock from being set, so this function can never be
+ * called.
+ */
+ abort ();
+#endif
+}
+
void
free_listening_sockets (int *socks, size_t nr_socks)
{
--
2.23.0