This patch merely adds the framework to compile the nbd plugin either
as stanadlone (the version frozen in time to this commit) or as a
client of the brand-new libnbd (a later patch will actually enable
that part). Since libnbd does not yet have a stable API, falling back
to the standalone version makes sense for a while longer.
The configure check requires at least 0.1.3; at the moment, Fedora 29
has access to pre-built 0.1.4).
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
configure.ac | 19 +
plugins/nbd/nbd-standalone.c | 1369 ++++++++++++++++++++++++++++++++++
plugins/nbd/Makefile.am | 17 +-
3 files changed, 1403 insertions(+), 2 deletions(-)
create mode 100644 plugins/nbd/nbd-standalone.c
diff --git a/configure.ac b/configure.ac
index 2757630f..7d8fbd9f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -711,6 +711,23 @@ AS_IF([test "$with_zlib" != "no"],[
])
AM_CONDITIONAL([HAVE_ZLIB],[test "x$ZLIB_LIBS" != "x"])
+dnl Check for libnbd (only if you want to compile the full nbd plugin).
+AC_ARG_WITH([libnbd],
+ [AS_HELP_STRING([--without-libnbd],
+ [disable nbd plugin @<:@default=check@:>@])],
+ [],
+ [with_libnbd=check])
+AS_IF([test "$with_libnbd" != "no"],[
+ PKG_CHECK_MODULES([LIBNBD], [libnbd >= 0.1.3],[
+ AC_SUBST([LIBNBD_CFLAGS])
+ AC_SUBST([LIBNBD_LIBS])
+ AC_DEFINE([HAVE_LIBNBD],[1],[libnbd found at compile time.])
+ ],
+ [AC_MSG_WARN([libnbd >= 0.1.3 not found, nbd plugin will be crippled])])
+])
+#test "x$LIBNBD_LIBS" != "x"
+AM_CONDITIONAL([HAVE_LIBNBD], [false])
+
dnl Check for liblzma (only if you want to compile the xz filter).
AC_ARG_WITH([liblzma],
[AS_HELP_STRING([--without-liblzma],
@@ -984,6 +1001,8 @@ feature "iso .................................... " \
test "x$HAVE_ISO_TRUE" = "x"
feature "libvirt ................................ " \
test "x$HAVE_LIBVIRT_TRUE" = "x"
+feature "nbd .................................... " \
+ test "x$HAVE_LIBNBD_TRUE" = "x"
feature "ssh .................................... " \
test "x$HAVE_SSH_TRUE" = "x"
feature "tar .................................... " \
diff --git a/plugins/nbd/nbd-standalone.c b/plugins/nbd/nbd-standalone.c
new file mode 100644
index 00000000..d176dd5f
--- /dev/null
+++ b/plugins/nbd/nbd-standalone.c
@@ -0,0 +1,1369 @@
+/* nbdkit
+ * Copyright (C) 2017-2019 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <assert.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+#define NBDKIT_API_VERSION 2
+
+#include <nbdkit-plugin.h>
+#include "protocol.h"
+#include "byte-swapping.h"
+#include "cleanup.h"
+
+/* The per-transaction details */
+struct transaction {
+ uint64_t cookie;
+ sem_t sem;
+ void *buf;
+ uint64_t offset;
+ uint32_t count;
+ uint32_t err;
+ struct nbdkit_extents *extents;
+ struct transaction *next;
+};
+
+/* The per-connection handle */
+struct handle {
+ /* These fields are read-only once initialized */
+ int fd;
+ int flags;
+ int64_t size;
+ bool structured;
+ bool extents;
+ pthread_t reader;
+
+ /* Prevents concurrent threads from interleaving writes to server */
+ pthread_mutex_t write_lock;
+
+ pthread_mutex_t trans_lock; /* Covers access to all fields below */
+ struct transaction *trans;
+ uint64_t unique;
+ bool dead;
+};
+
+/* Connect to server via absolute name of Unix socket */
+static char *sockname;
+
+/* Connect to server via TCP socket */
+static const char *hostname;
+static const char *port;
+
+/* Human-readable server description */
+static char *servname;
+
+/* Name of export on remote server, default '', ignored for oldstyle */
+static const char *export;
+
+/* Number of retries */
+static unsigned long retry;
+
+/* True to share single server connection among all clients */
+static bool shared;
+static struct handle *shared_handle;
+
+static struct handle *nbd_open_handle (int readonly);
+static void nbd_close_handle (struct handle *h);
+
+static void
+nbd_unload (void)
+{
+ if (shared)
+ nbd_close_handle (shared_handle);
+ free (sockname);
+ free (servname);
+}
+
+/* Called for each key=value passed on the command line. This plugin
+ * accepts socket=<sockname> or hostname=<hostname>/port=<port>
+ * (exactly one connection required), and optional parameters
+ * export=<name>, retry=<n> and shared=<bool>.
+ */
+static int
+nbd_config (const char *key, const char *value)
+{
+ char *end;
+ int r;
+
+ if (strcmp (key, "socket") == 0) {
+ /* See FILENAMES AND PATHS in nbdkit-plugin(3) */
+ free (sockname);
+ sockname = nbdkit_absolute_path (value);
+ if (!sockname)
+ return -1;
+ }
+ else if (strcmp (key, "hostname") == 0)
+ hostname = value;
+ else if (strcmp (key, "port") == 0)
+ port = value;
+ else if (strcmp (key, "export") == 0)
+ export = value;
+ else if (strcmp (key, "retry") == 0) {
+ errno = 0;
+ retry = strtoul (value, &end, 0);
+ if (value == end || errno) {
+ nbdkit_error ("could not parse retry as integer (%s)", value);
+ return -1;
+ }
+ }
+ else if (strcmp (key, "shared") == 0) {
+ r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ shared = r;
+ }
+ else {
+ nbdkit_error ("unknown parameter '%s'", key);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Check the user passed exactly one socket description. */
+static int
+nbd_config_complete (void)
+{
+ int r;
+
+ if (sockname) {
+ struct sockaddr_un sock;
+
+ if (hostname || port) {
+ nbdkit_error ("cannot mix Unix socket and TCP hostname/port
parameters");
+ return -1;
+ }
+ if (strlen (sockname) > sizeof sock.sun_path) {
+ nbdkit_error ("socket file name too large");
+ return -1;
+ }
+ servname = strdup (sockname);
+ }
+ else {
+ if (!hostname) {
+ nbdkit_error ("must supply socket= or hostname= of external NBD
server");
+ return -1;
+ }
+ if (!port)
+ port = "10809";
+ if (strchr (hostname, ':'))
+ r = asprintf (&servname, "[%s]:%s", hostname, port);
+ else
+ r = asprintf (&servname, "%s:%s", hostname, port);
+ if (r < 0) {
+ nbdkit_error ("asprintf: %m");
+ return -1;
+ }
+ }
+
+ if (!export)
+ export = "";
+
+ if (shared && (shared_handle = nbd_open_handle (false)) == NULL)
+ return -1;
+ return 0;
+}
+
+#define nbd_config_help \
+ "socket=<SOCKNAME> The Unix socket to connect to.\n" \
+ "hostname=<HOST> The hostname for the TCP socket to connect
to.\n" \
+ "port=<PORT> TCP port or service name to use (default
10809).\n" \
+ "export=<NAME> Export name to connect to (default
\"\").\n" \
+ "retry=<N> Retry connection up to N seconds (default
0).\n" \
+ "shared=<BOOL> True to share one server connection among all
clients,\n" \
+ " rather than a connection per client (default
false).\n" \
+
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
+
+/* Read an entire buffer, returning 0 on success or -1 with errno set. */
+static int
+read_full (int fd, void *buf, size_t len)
+{
+ ssize_t r;
+
+ while (len) {
+ r = read (fd, buf, len);
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ return -1;
+ }
+ if (!r) {
+ /* Unexpected EOF */
+ errno = EBADMSG;
+ return -1;
+ }
+ buf += r;
+ len -= r;
+ }
+ return 0;
+}
+
+/* Write an entire buffer, returning 0 on success or -1 with errno set. */
+static int
+write_full (int fd, const void *buf, size_t len)
+{
+ ssize_t r;
+
+ while (len) {
+ r = write (fd, buf, len);
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ return -1;
+ }
+ buf += r;
+ len -= r;
+ }
+ return 0;
+}
+
+/* Called during transmission phases when there is no hope of
+ * resynchronizing with the server, and all further requests from the
+ * client will fail. Returns -1 for convenience. */
+static int
+nbd_mark_dead (struct handle *h)
+{
+ int err = errno;
+
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
+ if (!h->dead) {
+ nbdkit_debug ("permanent failure while talking to server %s: %m",
+ servname);
+ h->dead = true;
+ }
+ else if (!err)
+ errno = ESHUTDOWN;
+ /* NBD only accepts a limited set of errno values over the wire, and
+ nbdkit converts all other values to EINVAL. If we died due to an
+ errno value that cannot transmit over the wire, translate it to
+ ESHUTDOWN instead. */
+ if (err == EPIPE || err == EBADMSG)
+ nbdkit_set_error (ESHUTDOWN);
+ return -1;
+}
+
+/* Find and possibly remove the transaction corresponding to cookie
+ from the list. */
+static struct transaction *
+find_trans_by_cookie (struct handle *h, uint64_t cookie, bool remove)
+{
+ struct transaction **ptr;
+ struct transaction *trans;
+
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
+ ptr = &h->trans;
+ while ((trans = *ptr) != NULL) {
+ if (cookie == trans->cookie)
+ break;
+ ptr = &trans->next;
+ }
+ if (trans && remove)
+ *ptr = trans->next;
+ return trans;
+}
+
+/* Send a request, return 0 on success or -1 on write failure. */
+static int
+nbd_request_raw (struct handle *h, uint16_t flags, uint16_t type,
+ uint64_t offset, uint32_t count, uint64_t cookie,
+ const void *buf)
+{
+ struct request req = {
+ .magic = htobe32 (NBD_REQUEST_MAGIC),
+ .flags = htobe16 (flags),
+ .type = htobe16 (type),
+ .handle = cookie, /* Opaque to server, so endianness doesn't matter */
+ .offset = htobe64 (offset),
+ .count = htobe32 (count),
+ };
+ int r;
+
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->write_lock);
+ nbdkit_debug ("sending request type %d (%s), flags %#x, offset %#" PRIx64
+ ", count %#x, cookie %#" PRIx64, type, name_of_nbd_cmd (type),
+ flags, offset, count, cookie);
+ r = write_full (h->fd, &req, sizeof req);
+ if (buf && !r)
+ r = write_full (h->fd, buf, count);
+ return r;
+}
+
+/* Perform the request half of a transaction. On success, return the
+ transaction; on error return NULL. */
+static struct transaction *
+nbd_request_full (struct handle *h, uint16_t flags, uint16_t type,
+ uint64_t offset, uint32_t count, const void *req_buf,
+ void *rep_buf, struct nbdkit_extents *extents)
+{
+ int err;
+ struct transaction *trans;
+ uint64_t cookie;
+
+ trans = calloc (1, sizeof *trans);
+ if (!trans) {
+ nbdkit_error ("unable to track transaction: %m");
+ /* Still in sync with server, so don't mark connection dead */
+ return NULL;
+ }
+ if (sem_init (&trans->sem, 0, 0)) {
+ nbdkit_error ("unable to create semaphore: %m");
+ /* Still in sync with server, so don't mark connection dead */
+ free (trans);
+ return NULL;
+ }
+ trans->buf = rep_buf;
+ trans->count = rep_buf ? count : 0;
+ trans->offset = offset;
+ trans->extents = extents;
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
+ if (h->dead)
+ goto err;
+ cookie = trans->cookie = h->unique++;
+ trans->next = h->trans;
+ h->trans = trans;
+ }
+ if (nbd_request_raw (h, flags, type, offset, count, cookie, req_buf) == 0)
+ return trans;
+ trans = find_trans_by_cookie (h, cookie, true);
+
+ err:
+ err = errno;
+ if (sem_destroy (&trans->sem))
+ abort ();
+ free (trans);
+ nbd_mark_dead (h);
+ errno = err;
+ return NULL;
+}
+
+/* Shorthand for nbd_request_full when no extra buffers are involved. */
+static struct transaction *
+nbd_request (struct handle *h, uint16_t flags, uint16_t type, uint64_t offset,
+ uint32_t count)
+{
+ return nbd_request_full (h, flags, type, offset, count, NULL, NULL, NULL);
+}
+
+/* Read a reply, and look up the corresponding transaction.
+ Return the server's non-negative answer (converted to local errno
+ value) on success, or -1 on read failure. If structured replies
+ were negotiated, trans_out is set to NULL if there are still more replies
+ expected. */
+static int
+nbd_reply_raw (struct handle *h, struct transaction **trans_out)
+{
+ union {
+ struct simple_reply simple;
+ struct structured_reply structured;
+ } rep;
+ struct transaction *trans;
+ void *buf = NULL;
+ CLEANUP_FREE char *payload = NULL;
+ uint32_t count;
+ uint32_t id;
+ struct block_descriptor *extents = NULL;
+ size_t nextents = 0;
+ int error = NBD_SUCCESS;
+ bool more = false;
+ uint32_t len = 0; /* 0 except for structured reads */
+ uint64_t offset = 0; /* if len, absolute offset of structured read chunk */
+ bool zero = false; /* if len, whether to read or memset */
+ uint16_t errlen;
+
+ *trans_out = NULL;
+ /* magic and handle overlap between simple and structured replies */
+ if (read_full (h->fd, &rep, sizeof rep.simple))
+ return nbd_mark_dead (h);
+ rep.simple.magic = be32toh (rep.simple.magic);
+ switch (rep.simple.magic) {
+ case NBD_SIMPLE_REPLY_MAGIC:
+ nbdkit_debug ("received simple reply for cookie %#" PRIx64 ", status
%s",
+ rep.simple.handle,
+ name_of_nbd_error (be32toh (rep.simple.error)));
+ error = be32toh (rep.simple.error);
+ break;
+ case NBD_STRUCTURED_REPLY_MAGIC:
+ if (!h->structured) {
+ nbdkit_error ("structured response without negotiation");
+ return nbd_mark_dead (h);
+ }
+ if (read_full (h->fd, sizeof rep.simple + (char *) &rep,
+ sizeof rep - sizeof rep.simple))
+ return nbd_mark_dead (h);
+ rep.structured.flags = be16toh (rep.structured.flags);
+ rep.structured.type = be16toh (rep.structured.type);
+ rep.structured.length = be32toh (rep.structured.length);
+ nbdkit_debug ("received structured reply %s for cookie %#" PRIx64
+ ", payload length %" PRId32,
+ name_of_nbd_reply_type (rep.structured.type),
+ rep.structured.handle, rep.structured.length);
+ if (rep.structured.length > 64 * 1024 * 1024) {
+ nbdkit_error ("structured reply length is suspiciously large: %" PRId32,
+ rep.structured.length);
+ return nbd_mark_dead (h);
+ }
+ if (rep.structured.length) {
+ /* Special case for OFFSET_DATA in order to read tail of chunk
+ directly into final buffer later on */
+ len = (rep.structured.type == NBD_REPLY_TYPE_OFFSET_DATA &&
+ rep.structured.length > sizeof offset) ? sizeof offset :
+ rep.structured.length;
+ payload = malloc (len);
+ if (!payload) {
+ nbdkit_error ("reading structured reply payload: %m");
+ return nbd_mark_dead (h);
+ }
+ if (read_full (h->fd, payload, len))
+ return nbd_mark_dead (h);
+ len = 0;
+ }
+ more = !(rep.structured.flags & NBD_REPLY_FLAG_DONE);
+ switch (rep.structured.type) {
+ case NBD_REPLY_TYPE_NONE:
+ if (rep.structured.length) {
+ nbdkit_error ("NBD_REPLY_TYPE_NONE with invalid payload");
+ return nbd_mark_dead (h);
+ }
+ if (more) {
+ nbdkit_error ("NBD_REPLY_TYPE_NONE without done flag");
+ return nbd_mark_dead (h);
+ }
+ break;
+ case NBD_REPLY_TYPE_OFFSET_DATA:
+ if (rep.structured.length <= sizeof offset) {
+ nbdkit_error ("structured reply OFFSET_DATA too small");
+ return nbd_mark_dead (h);
+ }
+ memcpy (&offset, payload, sizeof offset);
+ offset = be64toh (offset);
+ len = rep.structured.length - sizeof offset;
+ break;
+ case NBD_REPLY_TYPE_OFFSET_HOLE:
+ if (rep.structured.length != sizeof offset + sizeof len) {
+ nbdkit_error ("structured reply OFFSET_HOLE size incorrect");
+ return nbd_mark_dead (h);
+ }
+ memcpy (&offset, payload, sizeof offset);
+ offset = be64toh (offset);
+ memcpy (&len, payload, sizeof len);
+ len = be32toh (len);
+ if (!len) {
+ nbdkit_error ("structured reply OFFSET_HOLE length incorrect");
+ return nbd_mark_dead (h);
+ }
+ zero = true;
+ break;
+ case NBD_REPLY_TYPE_BLOCK_STATUS:
+ if (!h->extents) {
+ nbdkit_error ("block status response without negotiation");
+ return nbd_mark_dead (h);
+ }
+ if (rep.structured.length < sizeof *extents ||
+ rep.structured.length % sizeof *extents != sizeof id) {
+ nbdkit_error ("structured reply OFFSET_HOLE size incorrect");
+ return nbd_mark_dead (h);
+ }
+ nextents = rep.structured.length / sizeof *extents;
+ extents = (struct block_descriptor *) &payload[sizeof id];
+ memcpy (&id, payload, sizeof id);
+ id = be32toh (id);
+ nbdkit_debug ("parsing %zu extents for context id %" PRId32,
+ nextents, id);
+ break;
+ default:
+ if (!NBD_REPLY_TYPE_IS_ERR (rep.structured.type)) {
+ nbdkit_error ("received unexpected structured reply %s",
+ name_of_nbd_reply_type (rep.structured.type));
+ return nbd_mark_dead (h);
+ }
+
+ if (rep.structured.length < sizeof error + sizeof errlen) {
+ nbdkit_error ("structured reply error size incorrect");
+ return nbd_mark_dead (h);
+ }
+ memcpy (&errlen, payload + sizeof error, sizeof errlen);
+ errlen = be16toh (errlen);
+ if (errlen > rep.structured.length - sizeof error - sizeof errlen) {
+ nbdkit_error ("structured reply error message size incorrect");
+ return nbd_mark_dead (h);
+ }
+ memcpy (&error, payload, sizeof error);
+ error = be32toh (error);
+ if (errlen)
+ nbdkit_debug ("received structured error %s with message: %.*s",
+ name_of_nbd_error (error), (int) errlen,
+ payload + sizeof error + sizeof errlen);
+ else
+ nbdkit_debug ("received structured error %s without message",
+ name_of_nbd_error (error));
+ }
+ break;
+
+ default:
+ nbdkit_error ("received unexpected magic in reply: %#" PRIx32,
+ rep.simple.magic);
+ return nbd_mark_dead (h);
+ }
+
+ trans = find_trans_by_cookie (h, rep.simple.handle, !more);
+ if (!trans) {
+ nbdkit_error ("reply with unexpected cookie %#" PRIx64,
rep.simple.handle);
+ return nbd_mark_dead (h);
+ }
+
+ buf = trans->buf;
+ count = trans->count;
+ if (nextents) {
+ if (!trans->extents) {
+ nbdkit_error ("block status response to a non-status command");
+ return nbd_mark_dead (h);
+ }
+ offset = trans->offset;
+ for (size_t i = 0; i < nextents; i++) {
+ /* We rely on the fact that NBDKIT_EXTENT_* match NBD_STATE_* */
+ if (nbdkit_add_extent (trans->extents, offset,
+ be32toh (extents[i].length),
+ be32toh (extents[i].status_flags)) == -1) {
+ error = errno;
+ break;
+ }
+ offset += be32toh (extents[i].length);
+ }
+ }
+ if (buf && h->structured && rep.simple.magic ==
NBD_SIMPLE_REPLY_MAGIC) {
+ nbdkit_error ("simple read reply when structured was expected");
+ return nbd_mark_dead (h);
+ }
+ if (len) {
+ if (!buf) {
+ nbdkit_error ("structured read response to a non-read command");
+ return nbd_mark_dead (h);
+ }
+ if (offset < trans->offset || offset > INT64_MAX ||
+ offset + len > trans->offset + count) {
+ nbdkit_error ("structured read reply with unexpected offset/length");
+ return nbd_mark_dead (h);
+ }
+ buf = (char *) buf + offset - trans->offset;
+ if (zero) {
+ memset (buf, 0, len);
+ buf = NULL;
+ }
+ else
+ count = len;
+ }
+
+ /* Thanks to structured replies, we must preserve an error in any
+ earlier chunk for replay during the final chunk. */
+ if (!more) {
+ *trans_out = trans;
+ if (!error)
+ error = trans->err;
+ }
+ else if (error && !trans->err)
+ trans->err = error;
+
+ /* Convert from wire value to local errno, and perform any final read */
+ switch (error) {
+ case NBD_SUCCESS:
+ if (buf && read_full (h->fd, buf, count))
+ return nbd_mark_dead (h);
+ return 0;
+ case NBD_EPERM:
+ return EPERM;
+ case NBD_EIO:
+ return EIO;
+ case NBD_ENOMEM:
+ return ENOMEM;
+ default:
+ nbdkit_debug ("unexpected error %d, squashing to EINVAL", error);
+ /* fallthrough */
+ case NBD_EINVAL:
+ return EINVAL;
+ case NBD_ENOSPC:
+ return ENOSPC;
+ case NBD_EOVERFLOW:
+ return EOVERFLOW;
+ case NBD_ESHUTDOWN:
+ return ESHUTDOWN;
+ }
+}
+
+/* Reader loop. */
+void *
+nbd_reader (void *handle)
+{
+ struct handle *h = handle;
+ bool done = false;
+ int r;
+
+ while (!done) {
+ struct transaction *trans;
+
+ r = nbd_reply_raw (h, &trans);
+ if (r >= 0) {
+ if (!trans)
+ nbdkit_debug ("partial reply handled, waiting for final reply");
+ else {
+ trans->err = r;
+ if (sem_post (&trans->sem)) {
+ nbdkit_error ("failed to post semaphore: %m");
+ abort ();
+ }
+ }
+ }
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
+ done = h->dead;
+ }
+
+ /* Clean up any stranded in-flight requests */
+ r = ESHUTDOWN;
+ while (1) {
+ struct transaction *trans;
+
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
+ trans = h->trans;
+ h->trans = trans ? trans->next : NULL;
+ }
+ if (!trans)
+ break;
+ trans->err = r;
+ if (sem_post (&trans->sem)) {
+ nbdkit_error ("failed to post semaphore: %m");
+ abort ();
+ }
+ }
+ return NULL;
+}
+
+/* Perform the reply half of a transaction. */
+static int
+nbd_reply (struct handle *h, struct transaction *trans)
+{
+ int err;
+
+ if (!trans) {
+ assert (errno);
+ return -1;
+ }
+
+ while ((err = sem_wait (&trans->sem)) == -1 && errno == EINTR)
+ /* try again */;
+ if (err) {
+ nbdkit_debug ("failed to wait on semaphore: %m");
+ err = EIO;
+ }
+ else
+ err = trans->err;
+ if (sem_destroy (&trans->sem))
+ abort ();
+ free (trans);
+ errno = err;
+ return err ? -1 : 0;
+}
+
+/* Receive response to @option into @reply, and consume any
+ payload. If @payload is non-NULL, caller must free *payload. Return
+ 0 on success, or -1 if communication to server is no longer
+ possible. */
+static int
+nbd_newstyle_recv_option_reply (struct handle *h, uint32_t option,
+ struct fixed_new_option_reply *reply,
+ void **payload)
+{
+ CLEANUP_FREE char *buffer = NULL;
+
+ if (payload)
+ *payload = NULL;
+ if (read_full (h->fd, reply, sizeof *reply)) {
+ nbdkit_error ("unable to read option reply: %m");
+ return -1;
+ }
+ reply->magic = be64toh (reply->magic);
+ reply->option = be32toh (reply->option);
+ reply->reply = be32toh (reply->reply);
+ reply->replylen = be32toh (reply->replylen);
+ if (reply->magic != NBD_REP_MAGIC || reply->option != option) {
+ nbdkit_error ("unexpected option reply");
+ return -1;
+ }
+ if (reply->replylen) {
+ if (reply->reply == NBD_REP_ACK) {
+ nbdkit_error ("NBD_REP_ACK should not have replylen %" PRId32,
+ reply->replylen);
+ return -1;
+ }
+ if (reply->replylen > 16 * 1024 * 1024) {
+ nbdkit_error ("option reply length is suspiciously large: %" PRId32,
+ reply->replylen);
+ return -1;
+ }
+ /* buffer is a string for NBD_REP_ERR_*; adding a NUL terminator
+ makes that string easier to use, without hurting other reply
+ types where buffer is not a string */
+ buffer = malloc (reply->replylen + 1);
+ if (!buffer) {
+ nbdkit_error ("malloc: %m");
+ return -1;
+ }
+ if (read_full (h->fd, buffer, reply->replylen)) {
+ nbdkit_error ("unable to read option reply payload: %m");
+ return -1;
+ }
+ buffer[reply->replylen] = '\0';
+ if (!payload)
+ nbdkit_debug ("ignoring option reply payload");
+ else {
+ *payload = buffer;
+ buffer = NULL;
+ }
+ }
+ return 0;
+}
+
+/* Attempt to negotiate structured reads, block status, and NBD_OPT_GO.
+ Return 1 if haggling completed, 0 if haggling failed but
+ NBD_OPT_EXPORT_NAME is still viable, or -1 on inability to connect. */
+static int
+nbd_newstyle_haggle (struct handle *h)
+{
+ const char *const query = "base:allocation";
+ struct new_option opt;
+ uint32_t exportnamelen = htobe32 (strlen (export));
+ uint32_t nrqueries = htobe32 (1);
+ uint32_t querylen = htobe32 (strlen (query));
+ /* For now, we make no NBD_INFO_* requests, relying on the server to
+ send its defaults. TODO: nbdkit should let plugins report block
+ sizes, at which point we should request NBD_INFO_BLOCK_SIZE and
+ obey any sizes set by server. */
+ uint16_t nrinfos = htobe16 (0);
+ struct fixed_new_option_reply reply;
+
+ nbdkit_debug ("trying NBD_OPT_STRUCTURED_REPLY");
+ opt.version = htobe64 (NEW_VERSION);
+ opt.option = htobe32 (NBD_OPT_STRUCTURED_REPLY);
+ opt.optlen = htobe32 (0);
+ if (write_full (h->fd, &opt, sizeof opt)) {
+ nbdkit_error ("unable to request NBD_OPT_STRUCTURED_REPLY: %m");
+ return -1;
+ }
+ if (nbd_newstyle_recv_option_reply (h, NBD_OPT_STRUCTURED_REPLY, &reply,
+ NULL) < 0)
+ return -1;
+ if (reply.reply == NBD_REP_ACK) {
+ nbdkit_debug ("structured replies enabled, trying
NBD_OPT_SET_META_CONTEXT");
+ h->structured = true;
+
+ opt.version = htobe64 (NEW_VERSION);
+ opt.option = htobe32 (NBD_OPT_SET_META_CONTEXT);
+ opt.optlen = htobe32 (sizeof exportnamelen + strlen (export) +
+ sizeof nrqueries + sizeof querylen + strlen (query));
+ if (write_full (h->fd, &opt, sizeof opt) ||
+ write_full (h->fd, &exportnamelen, sizeof exportnamelen) ||
+ write_full (h->fd, export, strlen (export)) ||
+ write_full (h->fd, &nrqueries, sizeof nrqueries) ||
+ write_full (h->fd, &querylen, sizeof querylen) ||
+ write_full (h->fd, query, strlen (query))) {
+ nbdkit_error ("unable to request NBD_OPT_SET_META_CONTEXT: %m");
+ return -1;
+ }
+ if (nbd_newstyle_recv_option_reply (h, NBD_OPT_SET_META_CONTEXT, &reply,
+ NULL) < 0)
+ return -1;
+ if (reply.reply == NBD_REP_META_CONTEXT) {
+ /* Cheat: we asked for exactly one context. We could double
+ check that the server is replying with exactly the
+ "base:allocation" context, and then remember the id it tells
+ us to later confirm that responses to NBD_CMD_BLOCK_STATUS
+ match up; but in the absence of multiple contexts, it's
+ easier to just assume the server is compliant, and will reuse
+ the same id, without bothering to check further. */
+ nbdkit_debug ("extents enabled");
+ h->extents = true;
+ if (nbd_newstyle_recv_option_reply (h, NBD_OPT_SET_META_CONTEXT, &reply,
+ NULL) < 0)
+ return -1;
+ }
+ if (reply.reply != NBD_REP_ACK) {
+ if (h->extents) {
+ nbdkit_error ("unexpected response to set meta context");
+ return -1;
+ }
+ nbdkit_debug ("ignoring meta context response %s",
+ name_of_nbd_rep (reply.reply));
+ }
+ }
+ else {
+ nbdkit_debug ("structured replies disabled");
+ }
+
+ /* Try NBD_OPT_GO */
+ nbdkit_debug ("trying NBD_OPT_GO");
+ opt.version = htobe64 (NEW_VERSION);
+ opt.option = htobe32 (NBD_OPT_GO);
+ opt.optlen = htobe32 (sizeof exportnamelen + strlen (export) +
+ sizeof nrinfos);
+ if (write_full (h->fd, &opt, sizeof opt) ||
+ write_full (h->fd, &exportnamelen, sizeof exportnamelen) ||
+ write_full (h->fd, export, strlen (export)) ||
+ write_full (h->fd, &nrinfos, sizeof nrinfos)) {
+ nbdkit_error ("unable to request NBD_OPT_GO: %m");
+ return -1;
+ }
+ while (1) {
+ CLEANUP_FREE void *buffer;
+ struct fixed_new_option_reply_info_export *reply_export;
+ uint16_t info;
+
+ if (nbd_newstyle_recv_option_reply (h, NBD_OPT_GO, &reply, &buffer) < 0)
+ return -1;
+ switch (reply.reply) {
+ case NBD_REP_INFO:
+ /* Parse payload, but ignore all except NBD_INFO_EXPORT */
+ if (reply.replylen < 2) {
+ nbdkit_error ("NBD_REP_INFO reply too short");
+ return -1;
+ }
+ memcpy (&info, buffer, sizeof info);
+ info = be16toh (info);
+ switch (info) {
+ case NBD_INFO_EXPORT:
+ if (reply.replylen != sizeof *reply_export) {
+ nbdkit_error ("NBD_INFO_EXPORT reply wrong size");
+ return -1;
+ }
+ reply_export = buffer;
+ h->size = be64toh (reply_export->exportsize);
+ h->flags = be16toh (reply_export->eflags);
+ break;
+ default:
+ nbdkit_debug ("ignoring server info %d", info);
+ }
+ break;
+ case NBD_REP_ACK:
+ /* End of replies, valid if server already sent NBD_INFO_EXPORT,
+ observable since h->flags must contain NBD_FLAG_HAS_FLAGS */
+ assert (!buffer);
+ if (!h->flags) {
+ nbdkit_error ("server omitted NBD_INFO_EXPORT reply to NBD_OPT_GO");
+ return -1;
+ }
+ nbdkit_debug ("NBD_OPT_GO complete");
+ return 1;
+ case NBD_REP_ERR_UNSUP:
+ /* Special case this failure to fall back to NBD_OPT_EXPORT_NAME */
+ nbdkit_debug ("server lacks NBD_OPT_GO support");
+ return 0;
+ default:
+ /* Unexpected. Either the server sent a legitimate error or an
+ unexpected reply, but either way, we can't connect. */
+ if (NBD_REP_IS_ERR (reply.reply))
+ if (reply.replylen)
+ nbdkit_error ("server rejected NBD_OPT_GO with %s: %s",
+ name_of_nbd_rep (reply.reply), (char *) buffer);
+ else
+ nbdkit_error ("server rejected NBD_OPT_GO with %s",
+ name_of_nbd_rep (reply.reply));
+ else
+ nbdkit_error ("server used unexpected reply %s to NBD_OPT_GO",
+ name_of_nbd_rep (reply.reply));
+ return -1;
+ }
+ }
+}
+
+/* Connect to a Unix socket, returning the fd on success */
+static int
+nbd_connect_unix (void)
+{
+ struct sockaddr_un sock = { .sun_family = AF_UNIX };
+ int fd;
+
+ nbdkit_debug ("connecting to Unix socket name=%s", sockname);
+ fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ nbdkit_error ("socket: %m");
+ return -1;
+ }
+
+ /* We already validated length during nbd_config_complete */
+ assert (strlen (sockname) <= sizeof sock.sun_path);
+ memcpy (sock.sun_path, sockname, strlen (sockname));
+ if (connect (fd, (const struct sockaddr *) &sock, sizeof sock) < 0) {
+ nbdkit_error ("connect: %m");
+ return -1;
+ }
+ return fd;
+}
+
+/* Connect to a TCP socket, returning the fd on success */
+static int
+nbd_connect_tcp (void)
+{
+ struct addrinfo hints = { .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM, };
+ struct addrinfo *result, *rp;
+ int r;
+ const int optval = 1;
+ int fd;
+
+ nbdkit_debug ("connecting to TCP socket host=%s port=%s", hostname, port);
+ r = getaddrinfo (hostname, port, &hints, &result);
+ if (r != 0) {
+ nbdkit_error ("getaddrinfo: %s", gai_strerror (r));
+ return -1;
+ }
+
+ assert (result != NULL);
+
+ for (rp = result; rp; rp = rp->ai_next) {
+ fd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
+ if (fd == -1)
+ continue;
+ if (connect (fd, rp->ai_addr, rp->ai_addrlen) != -1)
+ break;
+ close (fd);
+ }
+ freeaddrinfo (result);
+ if (rp == NULL) {
+ nbdkit_error ("connect: %m");
+ close (fd);
+ return -1;
+ }
+
+ if (setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &optval,
+ sizeof (int)) == -1) {
+ nbdkit_error ("cannot set TCP_NODELAY option: %m");
+ close (fd);
+ return -1;
+ }
+ return fd;
+}
+
+/* Create the shared or per-connection handle. */
+static struct handle *
+nbd_open_handle (int readonly)
+{
+ struct handle *h;
+ struct old_handshake old;
+ uint64_t version;
+
+ h = calloc (1, sizeof *h);
+ if (h == NULL) {
+ nbdkit_error ("malloc: %m");
+ return NULL;
+ }
+
+ retry:
+ if (sockname)
+ h->fd = nbd_connect_unix ();
+ else
+ h->fd = nbd_connect_tcp ();
+ if (h->fd == -1) {
+ if (retry--) {
+ sleep (1);
+ goto retry;
+ }
+ goto err;
+ }
+
+ /* old and new handshake share same meaning of first 16 bytes */
+ if (read_full (h->fd, &old, offsetof (struct old_handshake, exportsize))) {
+ nbdkit_error ("unable to read magic: %m");
+ goto err;
+ }
+ if (strncmp (old.nbdmagic, "NBDMAGIC", sizeof old.nbdmagic)) {
+ nbdkit_error ("wrong magic, %s is not an NBD server", servname);
+ goto err;
+ }
+ version = be64toh (old.version);
+ if (version == OLD_VERSION) {
+ nbdkit_debug ("trying oldstyle connection");
+ if (read_full (h->fd,
+ (char *) &old + offsetof (struct old_handshake, exportsize),
+ sizeof old - offsetof (struct old_handshake, exportsize))) {
+ nbdkit_error ("unable to read old handshake: %m");
+ goto err;
+ }
+ h->size = be64toh (old.exportsize);
+ h->flags = be16toh (old.eflags);
+ }
+ else if (version == NEW_VERSION) {
+ uint16_t gflags;
+ uint32_t cflags;
+ struct new_option opt;
+ struct new_handshake_finish finish;
+ size_t expect;
+
+ nbdkit_debug ("trying newstyle connection");
+ if (read_full (h->fd, &gflags, sizeof gflags)) {
+ nbdkit_error ("unable to read global flags: %m");
+ goto err;
+ }
+ gflags = be16toh (gflags);
+ cflags = htobe32 (gflags & (NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES));
+ if (write_full (h->fd, &cflags, sizeof cflags)) {
+ nbdkit_error ("unable to return global flags: %m");
+ goto err;
+ }
+
+ /* Prefer NBD_OPT_GO if possible */
+ if (gflags & NBD_FLAG_FIXED_NEWSTYLE) {
+ int rc = nbd_newstyle_haggle (h);
+ if (rc < 0)
+ goto err;
+ if (!rc)
+ goto export_name;
+ }
+ else {
+ export_name:
+ /* Option haggling untried or failed, use older NBD_OPT_EXPORT_NAME */
+ nbdkit_debug ("trying NBD_OPT_EXPORT_NAME");
+ opt.version = htobe64 (NEW_VERSION);
+ opt.option = htobe32 (NBD_OPT_EXPORT_NAME);
+ opt.optlen = htobe32 (strlen (export));
+ if (write_full (h->fd, &opt, sizeof opt) ||
+ write_full (h->fd, export, strlen (export))) {
+ nbdkit_error ("unable to request export '%s': %m", export);
+ goto err;
+ }
+ expect = sizeof finish;
+ if (gflags & NBD_FLAG_NO_ZEROES)
+ expect -= sizeof finish.zeroes;
+ if (read_full (h->fd, &finish, expect)) {
+ nbdkit_error ("unable to read new handshake: %m");
+ goto err;
+ }
+ h->size = be64toh (finish.exportsize);
+ h->flags = be16toh (finish.eflags);
+ }
+ }
+ else {
+ nbdkit_error ("unexpected version %#" PRIx64, version);
+ goto err;
+ }
+ if (readonly)
+ h->flags |= NBD_FLAG_READ_ONLY;
+
+ /* Spawn a dedicated reader thread */
+ if ((errno = pthread_mutex_init (&h->write_lock, NULL))) {
+ nbdkit_error ("failed to initialize write mutex: %m");
+ goto err;
+ }
+ if ((errno = pthread_mutex_init (&h->trans_lock, NULL))) {
+ nbdkit_error ("failed to initialize transaction mutex: %m");
+ pthread_mutex_destroy (&h->write_lock);
+ goto err;
+ }
+ if ((errno = pthread_create (&h->reader, NULL, nbd_reader, h))) {
+ nbdkit_error ("failed to initialize reader thread: %m");
+ pthread_mutex_destroy (&h->write_lock);
+ pthread_mutex_destroy (&h->trans_lock);
+ goto err;
+ }
+
+ return h;
+
+ err:
+ if (h->fd >= 0)
+ close (h->fd);
+ free (h);
+ return NULL;
+}
+
+/* Create the per-connection handle. */
+static void *
+nbd_open (int readonly)
+{
+ if (shared)
+ return shared_handle;
+ return nbd_open_handle (readonly);
+}
+
+/* Free up the shared or per-connection handle. */
+static void
+nbd_close_handle (struct handle *h)
+{
+ if (!h->dead) {
+ nbd_request_raw (h, 0, NBD_CMD_DISC, 0, 0, 0, NULL);
+ shutdown (h->fd, SHUT_WR);
+ }
+ if ((errno = pthread_join (h->reader, NULL)))
+ nbdkit_debug ("failed to join reader thread: %m");
+ close (h->fd);
+ pthread_mutex_destroy (&h->write_lock);
+ pthread_mutex_destroy (&h->trans_lock);
+ free (h);
+}
+
+/* Free up the per-connection handle. */
+static void
+nbd_close (void *handle)
+{
+ struct handle *h = handle;
+
+ if (!shared)
+ nbd_close_handle (h);
+}
+
+/* Get the file size. */
+static int64_t
+nbd_get_size (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->size;
+}
+
+static int
+nbd_can_write (void *handle)
+{
+ struct handle *h = handle;
+
+ return !(h->flags & NBD_FLAG_READ_ONLY);
+}
+
+static int
+nbd_can_flush (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_SEND_FLUSH;
+}
+
+static int
+nbd_is_rotational (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_ROTATIONAL;
+}
+
+static int
+nbd_can_trim (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_SEND_TRIM;
+}
+
+static int
+nbd_can_zero (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_SEND_WRITE_ZEROES;
+}
+
+static int
+nbd_can_fua (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_SEND_FUA ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
+}
+
+static int
+nbd_can_multi_conn (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->flags & NBD_FLAG_CAN_MULTI_CONN;
+}
+
+static int
+nbd_can_cache (void *handle)
+{
+ struct handle *h = handle;
+
+ if (h->flags & NBD_FLAG_SEND_CACHE)
+ return NBDKIT_CACHE_NATIVE;
+ return NBDKIT_CACHE_NONE;
+}
+
+static int
+nbd_can_extents (void *handle)
+{
+ struct handle *h = handle;
+
+ return h->extents;
+}
+
+/* Read data from the file. */
+static int
+nbd_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!flags);
+ s = nbd_request_full (h, 0, NBD_CMD_READ, offset, count, NULL, buf, NULL);
+ return nbd_reply (h, s);
+}
+
+/* Write data to the file. */
+static int
+nbd_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!(flags & ~NBDKIT_FLAG_FUA));
+ s = nbd_request_full (h, flags & NBDKIT_FLAG_FUA ? NBD_CMD_FLAG_FUA : 0,
+ NBD_CMD_WRITE, offset, count, buf, NULL, NULL);
+ return nbd_reply (h, s);
+}
+
+/* Write zeroes to the file. */
+static int
+nbd_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+ int f = 0;
+
+ assert (!(flags & ~(NBDKIT_FLAG_FUA | NBDKIT_FLAG_MAY_TRIM)));
+ assert (h->flags & NBD_FLAG_SEND_WRITE_ZEROES);
+
+ if (!(flags & NBDKIT_FLAG_MAY_TRIM))
+ f |= NBD_CMD_FLAG_NO_HOLE;
+ if (flags & NBDKIT_FLAG_FUA)
+ f |= NBD_CMD_FLAG_FUA;
+ s = nbd_request (h, f, NBD_CMD_WRITE_ZEROES, offset, count);
+ return nbd_reply (h, s);
+}
+
+/* Trim a portion of the file. */
+static int
+nbd_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!(flags & ~NBDKIT_FLAG_FUA));
+ s = nbd_request (h, flags & NBDKIT_FLAG_FUA ? NBD_CMD_FLAG_FUA : 0,
+ NBD_CMD_TRIM, offset, count);
+ return nbd_reply (h, s);
+}
+
+/* Flush the file to disk. */
+static int
+nbd_flush (void *handle, uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!flags);
+ s = nbd_request (h, 0, NBD_CMD_FLUSH, 0, 0);
+ return nbd_reply (h, s);
+}
+
+/* Read extents of the file. */
+static int
+nbd_extents (void *handle, uint32_t count, uint64_t offset,
+ uint32_t flags, struct nbdkit_extents *extents)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!(flags & ~NBDKIT_FLAG_REQ_ONE) && h->extents);
+ s = nbd_request_full (h, flags & NBDKIT_FLAG_REQ_ONE ? NBD_CMD_FLAG_REQ_ONE : 0,
+ NBD_CMD_BLOCK_STATUS, offset, count, NULL, NULL,
+ extents);
+ return nbd_reply (h, s);
+}
+
+/* Cache a portion of the file. */
+static int
+nbd_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+ struct handle *h = handle;
+ struct transaction *s;
+
+ assert (!flags);
+ s = nbd_request (h, 0, NBD_CMD_CACHE, offset, count);
+ return nbd_reply (h, s);
+}
+
+static struct nbdkit_plugin plugin = {
+ .name = "nbd",
+ .longname = "nbdkit nbd plugin",
+ .version = PACKAGE_VERSION,
+ .unload = nbd_unload,
+ .config = nbd_config,
+ .config_complete = nbd_config_complete,
+ .config_help = nbd_config_help,
+ .open = nbd_open,
+ .close = nbd_close,
+ .get_size = nbd_get_size,
+ .can_write = nbd_can_write,
+ .can_flush = nbd_can_flush,
+ .is_rotational = nbd_is_rotational,
+ .can_trim = nbd_can_trim,
+ .can_zero = nbd_can_zero,
+ .can_fua = nbd_can_fua,
+ .can_multi_conn = nbd_can_multi_conn,
+ .can_extents = nbd_can_extents,
+ .can_cache = nbd_can_cache,
+ .pread = nbd_pread,
+ .pwrite = nbd_pwrite,
+ .zero = nbd_zero,
+ .flush = nbd_flush,
+ .trim = nbd_trim,
+ .extents = nbd_extents,
+ .cache = nbd_cache,
+ .errno_is_preserved = 1,
+};
+
+NBDKIT_REGISTER_PLUGIN (plugin)
diff --git a/plugins/nbd/Makefile.am b/plugins/nbd/Makefile.am
index 7368e591..bfc2a838 100644
--- a/plugins/nbd/Makefile.am
+++ b/plugins/nbd/Makefile.am
@@ -1,5 +1,5 @@
# nbdkit
-# Copyright (C) 2017 Red Hat Inc.
+# Copyright (C) 2017-2019 Red Hat Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -36,7 +36,6 @@ EXTRA_DIST = nbdkit-nbd-plugin.pod
plugin_LTLIBRARIES = nbdkit-nbd-plugin.la
nbdkit_nbd_plugin_la_SOURCES = \
- nbd.c \
$(top_srcdir)/include/nbdkit-plugin.h
nbdkit_nbd_plugin_la_CPPFLAGS = \
@@ -54,6 +53,20 @@ nbdkit_nbd_plugin_la_LIBADD = \
$(top_builddir)/common/protocol/libprotocol.la \
$(top_builddir)/common/utils/libutils.la
+# TODO: drop standalone version, which is locked at nbdkit 1.13.4 behavior,
+# once libnbd is more commonly available with stable API.
+if HAVE_LIBNBD
+nbdkit_nbd_plugin_la_SOURCES += \
+ nbd.c
+nbdkit_nbd_plugin_la_CFLAGS += \
+ $(LIBNBD_CFLAGS)
+nbdkit_nbd_plugin_la_LIBADD += \
+ $(LIBNBD_LIBS)
+else !HAVE_LIBNBD
+nbdkit_nbd_plugin_la_SOURCES += \
+ nbd-standalone.c
+endif !HAVE_LIBNBD
+
if HAVE_POD
man_MANS = nbdkit-nbd-plugin.1
--
2.20.1