[nbdkit PATCH] nbd: Drop nbd-standalone fallback
by Eric Blake
When we first used libnbd (nbdkit 1.14), it was brand new, so we left
in the ability to build a limited nbd plugin without the use of libnbd
for comparison. But now that libnbd has had more time under its belt,
and we have learned that our nbd-standalone code is not getting much
testing, it's time to follow through with our plan to finish off the
old code, and now build the nbd plugin only when libnbd is present.
Note that ./configure --disable-nbd-plugin is now synonymous with
./configure --without-libnbd; but I left both spellings working for
now. In the future, we may want to remove --disable-nbd-plugin
(reverting commit 0a190bd4).
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
configure.ac | 10 +-
plugins/nbd/Makefile.am | 37 +-
tests/Makefile.am | 8 +-
plugins/nbd/nbd-standalone.c | 1367 ----------------------------------
4 files changed, 17 insertions(+), 1405 deletions(-)
delete mode 100644 plugins/nbd/nbd-standalone.c
diff --git a/configure.ac b/configure.ac
index f28eb43f..3024a2c8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -838,10 +838,10 @@ AS_IF([test "$with_zlib" != "no"],[
])
AM_CONDITIONAL([HAVE_ZLIB],[test "x$ZLIB_LIBS" != "x"])
-dnl Check for libnbd (only if you want to compile the full nbd plugin).
+dnl Check for libnbd (only if you want to compile the nbd plugin).
AC_ARG_WITH([libnbd],
[AS_HELP_STRING([--without-libnbd],
- [disable full nbd plugin @<:@default=check@:>@])],
+ [disable nbd plugin @<:@default=check@:>@])],
[],
[with_libnbd=check])
AS_IF([test "$with_libnbd" != "no"],[
@@ -852,14 +852,14 @@ AS_IF([test "$with_libnbd" != "no"],[
],
[AC_MSG_WARN([libnbd >= 0.9.8 not found, nbd plugin will be crippled])])
])
-AM_CONDITIONAL([HAVE_LIBNBD], [test "x$LIBNBD_LIBS" != "x"])
-dnl Also you can disable the whole nbd plugin.
+dnl For backwards compatibilty, we have a second way to disable the nbd plugin.
AC_ARG_ENABLE([nbd-plugin],
[AS_HELP_STRING([--disable-nbd-plugin], [disable nbd plugin])],
[],
[enable_nbd_plugin=yes])
-AM_CONDITIONAL([HAVE_NBD_PLUGIN],[test "x$enable_nbd_plugin" = "xyes"])
+AM_CONDITIONAL([HAVE_LIBNBD],
+ [test "x$LIBNBD_LIBS" != "x" && test "x$enable_nbd_plugin" = "xyes"])
dnl Check for liblzma (only if you want to compile the xz filter).
AC_ARG_WITH([liblzma],
diff --git a/plugins/nbd/Makefile.am b/plugins/nbd/Makefile.am
index 8ea9e166..4f6def32 100644
--- a/plugins/nbd/Makefile.am
+++ b/plugins/nbd/Makefile.am
@@ -1,5 +1,5 @@
# nbdkit
-# Copyright (C) 2017-2019 Red Hat Inc.
+# Copyright (C) 2017-2020 Red Hat Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -33,12 +33,13 @@ include $(top_srcdir)/common-rules.mk
EXTRA_DIST = nbdkit-nbd-plugin.pod
-if HAVE_NBD_PLUGIN
+if HAVE_LIBNBD
plugin_LTLIBRARIES = nbdkit-nbd-plugin.la
nbdkit_nbd_plugin_la_SOURCES = \
$(top_srcdir)/include/nbdkit-plugin.h \
+ nbd.c \
$(NULL)
nbdkit_nbd_plugin_la_CPPFLAGS = \
@@ -47,41 +48,19 @@ nbdkit_nbd_plugin_la_CPPFLAGS = \
-I$(top_srcdir)/common/utils \
-I$(top_srcdir)/server \
$(NULL)
-nbdkit_nbd_plugin_la_CFLAGS = $(WARNINGS_CFLAGS)
+nbdkit_nbd_plugin_la_CFLAGS = \
+ $(WARNINGS_CFLAGS) \
+ $(LIBNBD_CFLAGS) \
+ $(NULL)
nbdkit_nbd_plugin_la_LDFLAGS = \
-module -avoid-version -shared \
-Wl,--version-script=$(top_srcdir)/plugins/plugins.syms \
$(NULL)
nbdkit_nbd_plugin_la_LIBADD = \
$(top_builddir)/common/utils/libutils.la \
- $(NULL)
-
-# TODO: drop standalone version, which is locked at nbdkit 1.13.4 behavior,
-# once libnbd is more commonly available with stable API.
-if HAVE_LIBNBD
-nbdkit_nbd_plugin_la_SOURCES += \
- nbd.c \
- $(NULL)
-nbdkit_nbd_plugin_la_CFLAGS += \
- $(LIBNBD_CFLAGS) \
- $(NULL)
-nbdkit_nbd_plugin_la_LIBADD += \
$(LIBNBD_LIBS) \
$(NULL)
-else !HAVE_LIBNBD
-nbdkit_nbd_plugin_la_SOURCES += \
- nbd-standalone.c \
- $(NULL)
-nbdkit_nbd_plugin_la_CPPFLAGS += \
- -I$(top_srcdir)/common/protocol \
- $(NULL)
-nbdkit_nbd_plugin_la_LIBADD += \
- $(top_builddir)/common/protocol/libprotocol.la \
- $(NULL)
-
-endif !HAVE_LIBNBD
-
if HAVE_POD
man_MANS = nbdkit-nbd-plugin.1
@@ -94,4 +73,4 @@ nbdkit-nbd-plugin.1: nbdkit-nbd-plugin.pod
endif HAVE_POD
-endif HAVE_NBD_PLUGIN
+endif HAVE_LIBNBD
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 398ba3bd..254164cb 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -676,7 +676,7 @@ TESTS += \
test-pattern-largest-for-qemu.sh \
$(NULL)
-if HAVE_NBD_PLUGIN
+if HAVE_LIBNBD
# nbd plugin test.
LIBGUESTFS_TESTS += test-nbd
TESTS += \
@@ -688,7 +688,7 @@ TESTS += \
test_nbd_SOURCES = test-nbd.c test.h
test_nbd_CFLAGS = $(WARNINGS_CFLAGS) $(LIBGUESTFS_CFLAGS)
test_nbd_LDADD = libtest.la $(LIBGUESTFS_LIBS)
-endif HAVE_NBD_PLUGIN
+endif HAVE_LIBNBD
# null plugin test.
LIBNBD_TESTS += test-null
@@ -1118,10 +1118,10 @@ TESTS += test-log.sh
# nofilter test.
TESTS += test-nofilter.sh
-if HAVE_NBD_PLUGIN
+if HAVE_LIBNBD
# nozero filter test.
TESTS += test-nozero.sh
-endif HAVE_NBD_PLUGIN
+endif HAVE_LIBNBD
# offset filter test.
LIBGUESTFS_TESTS += test-offset
diff --git a/plugins/nbd/nbd-standalone.c b/plugins/nbd/nbd-standalone.c
deleted file mode 100644
index 4230ecd9..00000000
--- a/plugins/nbd/nbd-standalone.c
+++ /dev/null
@@ -1,1367 +0,0 @@
-/* nbdkit
- * Copyright (C) 2017-2020 Red Hat Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name of Red Hat nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <config.h>
-
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <netdb.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <assert.h>
-#include <pthread.h>
-#include <semaphore.h>
-
-#define NBDKIT_API_VERSION 2
-
-#include <nbdkit-plugin.h>
-#include "nbd-protocol.h"
-#include "protostrings.h"
-#include "byte-swapping.h"
-#include "cleanup.h"
-
-/* The per-transaction details */
-struct transaction {
- uint64_t cookie;
- sem_t sem;
- void *buf;
- uint64_t offset;
- uint32_t count;
- uint32_t err;
- struct nbdkit_extents *extents;
- struct transaction *next;
-};
-
-/* The per-connection handle */
-struct handle {
- /* These fields are read-only once initialized */
- int fd;
- int flags;
- int64_t size;
- bool structured;
- bool extents;
- pthread_t reader;
-
- /* Prevents concurrent threads from interleaving writes to server */
- pthread_mutex_t write_lock;
-
- pthread_mutex_t trans_lock; /* Covers access to all fields below */
- struct transaction *trans;
- uint64_t unique;
- bool dead;
-};
-
-/* Connect to server via absolute name of Unix socket */
-static char *sockname;
-
-/* Connect to server via TCP socket */
-static const char *hostname;
-static const char *port;
-
-/* Human-readable server description */
-static char *servname;
-
-/* Name of export on remote server, default '', ignored for oldstyle */
-static const char *export;
-
-/* Number of retries */
-static unsigned retry;
-
-/* True to share single server connection among all clients */
-static bool shared;
-static struct handle *shared_handle;
-
-static struct handle *nbd_open_handle (int readonly);
-static void nbd_close_handle (struct handle *h);
-
-static void
-nbd_unload (void)
-{
- if (shared)
- nbd_close_handle (shared_handle);
- free (sockname);
- free (servname);
-}
-
-/* Called for each key=value passed on the command line. This plugin
- * accepts socket=<sockname> or hostname=<hostname>/port=<port>
- * (exactly one connection required), and optional parameters
- * export=<name>, retry=<n> and shared=<bool>.
- */
-static int
-nbd_config (const char *key, const char *value)
-{
- int r;
-
- if (strcmp (key, "socket") == 0) {
- /* See FILENAMES AND PATHS in nbdkit-plugin(3) */
- free (sockname);
- sockname = nbdkit_absolute_path (value);
- if (!sockname)
- return -1;
- }
- else if (strcmp (key, "hostname") == 0)
- hostname = value;
- else if (strcmp (key, "port") == 0)
- port = value;
- else if (strcmp (key, "export") == 0)
- export = value;
- else if (strcmp (key, "retry") == 0) {
- if (nbdkit_parse_unsigned ("retry", value, &retry) == -1)
- return -1;
- }
- else if (strcmp (key, "shared") == 0) {
- r = nbdkit_parse_bool (value);
- if (r == -1)
- return -1;
- shared = r;
- }
- else {
- nbdkit_error ("unknown parameter '%s'", key);
- return -1;
- }
-
- return 0;
-}
-
-/* Check the user passed exactly one socket description. */
-static int
-nbd_config_complete (void)
-{
- int r;
-
- if (sockname) {
- struct sockaddr_un sock;
-
- if (hostname || port) {
- nbdkit_error ("cannot mix Unix socket and TCP hostname/port parameters");
- return -1;
- }
- if (strlen (sockname) > sizeof sock.sun_path) {
- nbdkit_error ("socket file name too large");
- return -1;
- }
- servname = strdup (sockname);
- }
- else {
- if (!hostname) {
- nbdkit_error ("must supply socket= or hostname= of external NBD server");
- return -1;
- }
- if (!port)
- port = "10809";
- if (strchr (hostname, ':'))
- r = asprintf (&servname, "[%s]:%s", hostname, port);
- else
- r = asprintf (&servname, "%s:%s", hostname, port);
- if (r < 0) {
- nbdkit_error ("asprintf: %m");
- return -1;
- }
- }
-
- if (!export)
- export = "";
-
- if (shared && (shared_handle = nbd_open_handle (false)) == NULL)
- return -1;
- return 0;
-}
-
-#define nbd_config_help \
- "socket=<SOCKNAME> The Unix socket to connect to.\n" \
- "hostname=<HOST> The hostname for the TCP socket to connect to.\n" \
- "port=<PORT> TCP port or service name to use (default 10809).\n" \
- "export=<NAME> Export name to connect to (default \"\").\n" \
- "retry=<N> Retry connection up to N seconds (default 0).\n" \
- "shared=<BOOL> True to share one server connection among all clients,\n" \
- " rather than a connection per client (default false).\n" \
-
-#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
-
-/* Read an entire buffer, returning 0 on success or -1 with errno set. */
-static int
-read_full (int fd, void *buf, size_t len)
-{
- ssize_t r;
-
- while (len) {
- r = read (fd, buf, len);
- if (r < 0) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- return -1;
- }
- if (!r) {
- /* Unexpected EOF */
- errno = EBADMSG;
- return -1;
- }
- buf += r;
- len -= r;
- }
- return 0;
-}
-
-/* Write an entire buffer, returning 0 on success or -1 with errno set. */
-static int
-write_full (int fd, const void *buf, size_t len)
-{
- ssize_t r;
-
- while (len) {
- r = write (fd, buf, len);
- if (r < 0) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- return -1;
- }
- buf += r;
- len -= r;
- }
- return 0;
-}
-
-/* Called during transmission phases when there is no hope of
- * resynchronizing with the server, and all further requests from the
- * client will fail. Returns -1 for convenience. */
-static int
-nbd_mark_dead (struct handle *h)
-{
- int err = errno;
-
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
- if (!h->dead) {
- nbdkit_debug ("permanent failure while talking to server %s: %m",
- servname);
- h->dead = true;
- }
- else if (!err)
- errno = ESHUTDOWN;
- /* NBD only accepts a limited set of errno values over the wire, and
- nbdkit converts all other values to EINVAL. If we died due to an
- errno value that cannot transmit over the wire, translate it to
- ESHUTDOWN instead. */
- if (err == EPIPE || err == EBADMSG)
- nbdkit_set_error (ESHUTDOWN);
- return -1;
-}
-
-/* Find and possibly remove the transaction corresponding to cookie
- from the list. */
-static struct transaction *
-find_trans_by_cookie (struct handle *h, uint64_t cookie, bool remove)
-{
- struct transaction **ptr;
- struct transaction *trans;
-
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
- ptr = &h->trans;
- while ((trans = *ptr) != NULL) {
- if (cookie == trans->cookie)
- break;
- ptr = &trans->next;
- }
- if (trans && remove)
- *ptr = trans->next;
- return trans;
-}
-
-/* Send a request, return 0 on success or -1 on write failure. */
-static int
-nbd_request_raw (struct handle *h, uint16_t flags, uint16_t type,
- uint64_t offset, uint32_t count, uint64_t cookie,
- const void *buf)
-{
- struct nbd_request req = {
- .magic = htobe32 (NBD_REQUEST_MAGIC),
- .flags = htobe16 (flags),
- .type = htobe16 (type),
- .handle = cookie, /* Opaque to server, so endianness doesn't matter */
- .offset = htobe64 (offset),
- .count = htobe32 (count),
- };
- int r;
-
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->write_lock);
- nbdkit_debug ("sending request type %d (%s), flags %#x, offset %#" PRIx64
- ", count %#x, cookie %#" PRIx64, type, name_of_nbd_cmd (type),
- flags, offset, count, cookie);
- r = write_full (h->fd, &req, sizeof req);
- if (buf && !r)
- r = write_full (h->fd, buf, count);
- return r;
-}
-
-/* Perform the request half of a transaction. On success, return the
- transaction; on error return NULL. */
-static struct transaction *
-nbd_request_full (struct handle *h, uint16_t flags, uint16_t type,
- uint64_t offset, uint32_t count, const void *req_buf,
- void *rep_buf, struct nbdkit_extents *extents)
-{
- int err;
- struct transaction *trans;
- uint64_t cookie;
-
- trans = calloc (1, sizeof *trans);
- if (!trans) {
- nbdkit_error ("unable to track transaction: %m");
- /* Still in sync with server, so don't mark connection dead */
- return NULL;
- }
- if (sem_init (&trans->sem, 0, 0)) {
- nbdkit_error ("unable to create semaphore: %m");
- /* Still in sync with server, so don't mark connection dead */
- free (trans);
- return NULL;
- }
- trans->buf = rep_buf;
- trans->count = rep_buf ? count : 0;
- trans->offset = offset;
- trans->extents = extents;
- {
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
- if (h->dead)
- goto err;
- cookie = trans->cookie = h->unique++;
- trans->next = h->trans;
- h->trans = trans;
- }
- if (nbd_request_raw (h, flags, type, offset, count, cookie, req_buf) == 0)
- return trans;
- trans = find_trans_by_cookie (h, cookie, true);
-
- err:
- err = errno;
- if (sem_destroy (&trans->sem))
- abort ();
- free (trans);
- nbd_mark_dead (h);
- errno = err;
- return NULL;
-}
-
-/* Shorthand for nbd_request_full when no extra buffers are involved. */
-static struct transaction *
-nbd_request (struct handle *h, uint16_t flags, uint16_t type, uint64_t offset,
- uint32_t count)
-{
- return nbd_request_full (h, flags, type, offset, count, NULL, NULL, NULL);
-}
-
-/* Read a reply, and look up the corresponding transaction.
- Return the server's non-negative answer (converted to local errno
- value) on success, or -1 on read failure. If structured replies
- were negotiated, trans_out is set to NULL if there are still more replies
- expected. */
-static int
-nbd_reply_raw (struct handle *h, struct transaction **trans_out)
-{
- union {
- struct nbd_simple_reply simple;
- struct nbd_structured_reply structured;
- } rep;
- struct transaction *trans;
- void *buf = NULL;
- CLEANUP_FREE char *payload = NULL;
- uint32_t count;
- uint32_t id;
- struct nbd_block_descriptor *extents = NULL;
- size_t nextents = 0;
- int error = NBD_SUCCESS;
- bool more = false;
- uint32_t len = 0; /* 0 except for structured reads */
- uint64_t offset = 0; /* if len, absolute offset of structured read chunk */
- bool zero = false; /* if len, whether to read or memset */
- uint16_t errlen;
-
- *trans_out = NULL;
- /* magic and handle overlap between simple and structured replies */
- if (read_full (h->fd, &rep, sizeof rep.simple))
- return nbd_mark_dead (h);
- rep.simple.magic = be32toh (rep.simple.magic);
- switch (rep.simple.magic) {
- case NBD_SIMPLE_REPLY_MAGIC:
- nbdkit_debug ("received simple reply for cookie %#" PRIx64 ", status %s",
- rep.simple.handle,
- name_of_nbd_error (be32toh (rep.simple.error)));
- error = be32toh (rep.simple.error);
- break;
- case NBD_STRUCTURED_REPLY_MAGIC:
- if (!h->structured) {
- nbdkit_error ("structured response without negotiation");
- return nbd_mark_dead (h);
- }
- if (read_full (h->fd, sizeof rep.simple + (char *) &rep,
- sizeof rep - sizeof rep.simple))
- return nbd_mark_dead (h);
- rep.structured.flags = be16toh (rep.structured.flags);
- rep.structured.type = be16toh (rep.structured.type);
- rep.structured.length = be32toh (rep.structured.length);
- nbdkit_debug ("received structured reply %s for cookie %#" PRIx64
- ", payload length %" PRId32,
- name_of_nbd_reply_type (rep.structured.type),
- rep.structured.handle, rep.structured.length);
- if (rep.structured.length > 64 * 1024 * 1024) {
- nbdkit_error ("structured reply length is suspiciously large: %" PRId32,
- rep.structured.length);
- return nbd_mark_dead (h);
- }
- if (rep.structured.length) {
- /* Special case for OFFSET_DATA in order to read tail of chunk
- directly into final buffer later on */
- len = (rep.structured.type == NBD_REPLY_TYPE_OFFSET_DATA &&
- rep.structured.length > sizeof offset) ? sizeof offset :
- rep.structured.length;
- payload = malloc (len);
- if (!payload) {
- nbdkit_error ("reading structured reply payload: %m");
- return nbd_mark_dead (h);
- }
- if (read_full (h->fd, payload, len))
- return nbd_mark_dead (h);
- len = 0;
- }
- more = !(rep.structured.flags & NBD_REPLY_FLAG_DONE);
- switch (rep.structured.type) {
- case NBD_REPLY_TYPE_NONE:
- if (rep.structured.length) {
- nbdkit_error ("NBD_REPLY_TYPE_NONE with invalid payload");
- return nbd_mark_dead (h);
- }
- if (more) {
- nbdkit_error ("NBD_REPLY_TYPE_NONE without done flag");
- return nbd_mark_dead (h);
- }
- break;
- case NBD_REPLY_TYPE_OFFSET_DATA:
- if (rep.structured.length <= sizeof offset) {
- nbdkit_error ("structured reply OFFSET_DATA too small");
- return nbd_mark_dead (h);
- }
- memcpy (&offset, payload, sizeof offset);
- offset = be64toh (offset);
- len = rep.structured.length - sizeof offset;
- break;
- case NBD_REPLY_TYPE_OFFSET_HOLE:
- if (rep.structured.length != sizeof offset + sizeof len) {
- nbdkit_error ("structured reply OFFSET_HOLE size incorrect");
- return nbd_mark_dead (h);
- }
- memcpy (&offset, payload, sizeof offset);
- offset = be64toh (offset);
- memcpy (&len, payload, sizeof len);
- len = be32toh (len);
- if (!len) {
- nbdkit_error ("structured reply OFFSET_HOLE length incorrect");
- return nbd_mark_dead (h);
- }
- zero = true;
- break;
- case NBD_REPLY_TYPE_BLOCK_STATUS:
- if (!h->extents) {
- nbdkit_error ("block status response without negotiation");
- return nbd_mark_dead (h);
- }
- if (rep.structured.length < sizeof *extents ||
- rep.structured.length % sizeof *extents != sizeof id) {
- nbdkit_error ("structured reply OFFSET_HOLE size incorrect");
- return nbd_mark_dead (h);
- }
- nextents = rep.structured.length / sizeof *extents;
- extents = (struct nbd_block_descriptor *) &payload[sizeof id];
- memcpy (&id, payload, sizeof id);
- id = be32toh (id);
- nbdkit_debug ("parsing %zu extents for context id %" PRId32,
- nextents, id);
- break;
- default:
- if (!NBD_REPLY_TYPE_IS_ERR (rep.structured.type)) {
- nbdkit_error ("received unexpected structured reply %s",
- name_of_nbd_reply_type (rep.structured.type));
- return nbd_mark_dead (h);
- }
-
- if (rep.structured.length < sizeof error + sizeof errlen) {
- nbdkit_error ("structured reply error size incorrect");
- return nbd_mark_dead (h);
- }
- memcpy (&errlen, payload + sizeof error, sizeof errlen);
- errlen = be16toh (errlen);
- if (errlen > rep.structured.length - sizeof error - sizeof errlen) {
- nbdkit_error ("structured reply error message size incorrect");
- return nbd_mark_dead (h);
- }
- memcpy (&error, payload, sizeof error);
- error = be32toh (error);
- if (errlen)
- nbdkit_debug ("received structured error %s with message: %.*s",
- name_of_nbd_error (error), (int) errlen,
- payload + sizeof error + sizeof errlen);
- else
- nbdkit_debug ("received structured error %s without message",
- name_of_nbd_error (error));
- }
- break;
-
- default:
- nbdkit_error ("received unexpected magic in reply: %#" PRIx32,
- rep.simple.magic);
- return nbd_mark_dead (h);
- }
-
- trans = find_trans_by_cookie (h, rep.simple.handle, !more);
- if (!trans) {
- nbdkit_error ("reply with unexpected cookie %#" PRIx64, rep.simple.handle);
- return nbd_mark_dead (h);
- }
-
- buf = trans->buf;
- count = trans->count;
- if (nextents) {
- if (!trans->extents) {
- nbdkit_error ("block status response to a non-status command");
- return nbd_mark_dead (h);
- }
- offset = trans->offset;
- for (size_t i = 0; i < nextents; i++) {
- /* We rely on the fact that NBDKIT_EXTENT_* match NBD_STATE_* */
- if (nbdkit_add_extent (trans->extents, offset,
- be32toh (extents[i].length),
- be32toh (extents[i].status_flags)) == -1) {
- error = errno;
- break;
- }
- offset += be32toh (extents[i].length);
- }
- }
- if (buf && h->structured && rep.simple.magic == NBD_SIMPLE_REPLY_MAGIC) {
- nbdkit_error ("simple read reply when structured was expected");
- return nbd_mark_dead (h);
- }
- if (len) {
- if (!buf) {
- nbdkit_error ("structured read response to a non-read command");
- return nbd_mark_dead (h);
- }
- if (offset < trans->offset || offset > INT64_MAX ||
- offset + len > trans->offset + count) {
- nbdkit_error ("structured read reply with unexpected offset/length");
- return nbd_mark_dead (h);
- }
- buf = (char *) buf + offset - trans->offset;
- if (zero) {
- memset (buf, 0, len);
- buf = NULL;
- }
- else
- count = len;
- }
-
- /* Thanks to structured replies, we must preserve an error in any
- earlier chunk for replay during the final chunk. */
- if (!more) {
- *trans_out = trans;
- if (!error)
- error = trans->err;
- }
- else if (error && !trans->err)
- trans->err = error;
-
- /* Convert from wire value to local errno, and perform any final read */
- switch (error) {
- case NBD_SUCCESS:
- if (buf && read_full (h->fd, buf, count))
- return nbd_mark_dead (h);
- return 0;
- case NBD_EPERM:
- return EPERM;
- case NBD_EIO:
- return EIO;
- case NBD_ENOMEM:
- return ENOMEM;
- default:
- nbdkit_debug ("unexpected error %d, squashing to EINVAL", error);
- /* fallthrough */
- case NBD_EINVAL:
- return EINVAL;
- case NBD_ENOSPC:
- return ENOSPC;
- case NBD_EOVERFLOW:
- return EOVERFLOW;
- case NBD_ESHUTDOWN:
- return ESHUTDOWN;
- }
-}
-
-/* Reader loop. */
-void *
-nbd_reader (void *handle)
-{
- struct handle *h = handle;
- bool done = false;
- int r;
-
- while (!done) {
- struct transaction *trans;
-
- r = nbd_reply_raw (h, &trans);
- if (r >= 0) {
- if (!trans)
- nbdkit_debug ("partial reply handled, waiting for final reply");
- else {
- trans->err = r;
- if (sem_post (&trans->sem)) {
- nbdkit_error ("failed to post semaphore: %m");
- abort ();
- }
- }
- }
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
- done = h->dead;
- }
-
- /* Clean up any stranded in-flight requests */
- r = ESHUTDOWN;
- while (1) {
- struct transaction *trans;
-
- {
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->trans_lock);
- trans = h->trans;
- h->trans = trans ? trans->next : NULL;
- }
- if (!trans)
- break;
- trans->err = r;
- if (sem_post (&trans->sem)) {
- nbdkit_error ("failed to post semaphore: %m");
- abort ();
- }
- }
- return NULL;
-}
-
-/* Perform the reply half of a transaction. */
-static int
-nbd_reply (struct handle *h, struct transaction *trans)
-{
- int err;
-
- if (!trans) {
- assert (errno);
- return -1;
- }
-
- while ((err = sem_wait (&trans->sem)) == -1 && errno == EINTR)
- /* try again */;
- if (err) {
- nbdkit_debug ("failed to wait on semaphore: %m");
- err = EIO;
- }
- else
- err = trans->err;
- if (sem_destroy (&trans->sem))
- abort ();
- free (trans);
- errno = err;
- return err ? -1 : 0;
-}
-
-/* Receive response to @option into @reply, and consume any
- payload. If @payload is non-NULL, caller must free *payload. Return
- 0 on success, or -1 if communication to server is no longer
- possible. */
-static int
-nbd_newstyle_recv_option_reply (struct handle *h, uint32_t option,
- struct nbd_fixed_new_option_reply *reply,
- void **payload)
-{
- CLEANUP_FREE char *buffer = NULL;
-
- if (payload)
- *payload = NULL;
- if (read_full (h->fd, reply, sizeof *reply)) {
- nbdkit_error ("unable to read option reply: %m");
- return -1;
- }
- reply->magic = be64toh (reply->magic);
- reply->option = be32toh (reply->option);
- reply->reply = be32toh (reply->reply);
- reply->replylen = be32toh (reply->replylen);
- if (reply->magic != NBD_REP_MAGIC || reply->option != option) {
- nbdkit_error ("unexpected option reply");
- return -1;
- }
- if (reply->replylen) {
- if (reply->reply == NBD_REP_ACK) {
- nbdkit_error ("NBD_REP_ACK should not have replylen %" PRId32,
- reply->replylen);
- return -1;
- }
- if (reply->replylen > 16 * 1024 * 1024) {
- nbdkit_error ("option reply length is suspiciously large: %" PRId32,
- reply->replylen);
- return -1;
- }
- /* buffer is a string for NBD_REP_ERR_*; adding a NUL terminator
- makes that string easier to use, without hurting other reply
- types where buffer is not a string */
- buffer = malloc (reply->replylen + 1);
- if (!buffer) {
- nbdkit_error ("malloc: %m");
- return -1;
- }
- if (read_full (h->fd, buffer, reply->replylen)) {
- nbdkit_error ("unable to read option reply payload: %m");
- return -1;
- }
- buffer[reply->replylen] = '\0';
- if (!payload)
- nbdkit_debug ("ignoring option reply payload");
- else {
- *payload = buffer;
- buffer = NULL;
- }
- }
- return 0;
-}
-
-/* Attempt to negotiate structured reads, block status, and NBD_OPT_GO.
- Return 1 if haggling completed, 0 if haggling failed but
- NBD_OPT_EXPORT_NAME is still viable, or -1 on inability to connect. */
-static int
-nbd_newstyle_haggle (struct handle *h)
-{
- const char *const query = "base:allocation";
- struct nbd_new_option opt;
- uint32_t exportnamelen = htobe32 (strlen (export));
- uint32_t nrqueries = htobe32 (1);
- uint32_t querylen = htobe32 (strlen (query));
- /* For now, we make no NBD_INFO_* requests, relying on the server to
- send its defaults. TODO: nbdkit should let plugins report block
- sizes, at which point we should request NBD_INFO_BLOCK_SIZE and
- obey any sizes set by server. */
- uint16_t nrinfos = htobe16 (0);
- struct nbd_fixed_new_option_reply reply;
-
- nbdkit_debug ("trying NBD_OPT_STRUCTURED_REPLY");
- opt.version = htobe64 (NBD_NEW_VERSION);
- opt.option = htobe32 (NBD_OPT_STRUCTURED_REPLY);
- opt.optlen = htobe32 (0);
- if (write_full (h->fd, &opt, sizeof opt)) {
- nbdkit_error ("unable to request NBD_OPT_STRUCTURED_REPLY: %m");
- return -1;
- }
- if (nbd_newstyle_recv_option_reply (h, NBD_OPT_STRUCTURED_REPLY, &reply,
- NULL) < 0)
- return -1;
- if (reply.reply == NBD_REP_ACK) {
- nbdkit_debug ("structured replies enabled, trying NBD_OPT_SET_META_CONTEXT");
- h->structured = true;
-
- opt.version = htobe64 (NBD_NEW_VERSION);
- opt.option = htobe32 (NBD_OPT_SET_META_CONTEXT);
- opt.optlen = htobe32 (sizeof exportnamelen + strlen (export) +
- sizeof nrqueries + sizeof querylen + strlen (query));
- if (write_full (h->fd, &opt, sizeof opt) ||
- write_full (h->fd, &exportnamelen, sizeof exportnamelen) ||
- write_full (h->fd, export, strlen (export)) ||
- write_full (h->fd, &nrqueries, sizeof nrqueries) ||
- write_full (h->fd, &querylen, sizeof querylen) ||
- write_full (h->fd, query, strlen (query))) {
- nbdkit_error ("unable to request NBD_OPT_SET_META_CONTEXT: %m");
- return -1;
- }
- if (nbd_newstyle_recv_option_reply (h, NBD_OPT_SET_META_CONTEXT, &reply,
- NULL) < 0)
- return -1;
- if (reply.reply == NBD_REP_META_CONTEXT) {
- /* Cheat: we asked for exactly one context. We could double
- check that the server is replying with exactly the
- "base:allocation" context, and then remember the id it tells
- us to later confirm that responses to NBD_CMD_BLOCK_STATUS
- match up; but in the absence of multiple contexts, it's
- easier to just assume the server is compliant, and will reuse
- the same id, without bothering to check further. */
- nbdkit_debug ("extents enabled");
- h->extents = true;
- if (nbd_newstyle_recv_option_reply (h, NBD_OPT_SET_META_CONTEXT, &reply,
- NULL) < 0)
- return -1;
- }
- if (reply.reply != NBD_REP_ACK) {
- if (h->extents) {
- nbdkit_error ("unexpected response to set meta context");
- return -1;
- }
- nbdkit_debug ("ignoring meta context response %s",
- name_of_nbd_rep (reply.reply));
- }
- }
- else {
- nbdkit_debug ("structured replies disabled");
- }
-
- /* Try NBD_OPT_GO */
- nbdkit_debug ("trying NBD_OPT_GO");
- opt.version = htobe64 (NBD_NEW_VERSION);
- opt.option = htobe32 (NBD_OPT_GO);
- opt.optlen = htobe32 (sizeof exportnamelen + strlen (export) +
- sizeof nrinfos);
- if (write_full (h->fd, &opt, sizeof opt) ||
- write_full (h->fd, &exportnamelen, sizeof exportnamelen) ||
- write_full (h->fd, export, strlen (export)) ||
- write_full (h->fd, &nrinfos, sizeof nrinfos)) {
- nbdkit_error ("unable to request NBD_OPT_GO: %m");
- return -1;
- }
- while (1) {
- CLEANUP_FREE void *buffer;
- struct nbd_fixed_new_option_reply_info_export *reply_export;
- uint16_t info;
-
- if (nbd_newstyle_recv_option_reply (h, NBD_OPT_GO, &reply, &buffer) < 0)
- return -1;
- switch (reply.reply) {
- case NBD_REP_INFO:
- /* Parse payload, but ignore all except NBD_INFO_EXPORT */
- if (reply.replylen < 2) {
- nbdkit_error ("NBD_REP_INFO reply too short");
- return -1;
- }
- memcpy (&info, buffer, sizeof info);
- info = be16toh (info);
- switch (info) {
- case NBD_INFO_EXPORT:
- if (reply.replylen != sizeof *reply_export) {
- nbdkit_error ("NBD_INFO_EXPORT reply wrong size");
- return -1;
- }
- reply_export = buffer;
- h->size = be64toh (reply_export->exportsize);
- h->flags = be16toh (reply_export->eflags);
- break;
- default:
- nbdkit_debug ("ignoring server info %d", info);
- }
- break;
- case NBD_REP_ACK:
- /* End of replies, valid if server already sent NBD_INFO_EXPORT,
- observable since h->flags must contain NBD_FLAG_HAS_FLAGS */
- assert (!buffer);
- if (!h->flags) {
- nbdkit_error ("server omitted NBD_INFO_EXPORT reply to NBD_OPT_GO");
- return -1;
- }
- nbdkit_debug ("NBD_OPT_GO complete");
- return 1;
- case NBD_REP_ERR_UNSUP:
- /* Special case this failure to fall back to NBD_OPT_EXPORT_NAME */
- nbdkit_debug ("server lacks NBD_OPT_GO support");
- return 0;
- default:
- /* Unexpected. Either the server sent a legitimate error or an
- unexpected reply, but either way, we can't connect. */
- if (NBD_REP_IS_ERR (reply.reply))
- if (reply.replylen)
- nbdkit_error ("server rejected NBD_OPT_GO with %s: %s",
- name_of_nbd_rep (reply.reply), (char *) buffer);
- else
- nbdkit_error ("server rejected NBD_OPT_GO with %s",
- name_of_nbd_rep (reply.reply));
- else
- nbdkit_error ("server used unexpected reply %s to NBD_OPT_GO",
- name_of_nbd_rep (reply.reply));
- return -1;
- }
- }
-}
-
-/* Connect to a Unix socket, returning the fd on success */
-static int
-nbd_connect_unix (void)
-{
- struct sockaddr_un sock = { .sun_family = AF_UNIX };
- int fd;
-
- nbdkit_debug ("connecting to Unix socket name=%s", sockname);
- fd = socket (AF_UNIX, SOCK_STREAM, 0);
- if (fd < 0) {
- nbdkit_error ("socket: %m");
- return -1;
- }
-
- /* We already validated length during nbd_config_complete */
- assert (strlen (sockname) <= sizeof sock.sun_path);
- memcpy (sock.sun_path, sockname, strlen (sockname));
- if (connect (fd, (const struct sockaddr *) &sock, sizeof sock) < 0) {
- nbdkit_error ("connect: %m");
- return -1;
- }
- return fd;
-}
-
-/* Connect to a TCP socket, returning the fd on success */
-static int
-nbd_connect_tcp (void)
-{
- struct addrinfo hints = { .ai_family = AF_UNSPEC,
- .ai_socktype = SOCK_STREAM, };
- struct addrinfo *result, *rp;
- int r;
- const int optval = 1;
- int fd;
-
- nbdkit_debug ("connecting to TCP socket host=%s port=%s", hostname, port);
- r = getaddrinfo (hostname, port, &hints, &result);
- if (r != 0) {
- nbdkit_error ("getaddrinfo: %s", gai_strerror (r));
- return -1;
- }
-
- assert (result != NULL);
-
- for (rp = result; rp; rp = rp->ai_next) {
- fd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
- if (fd == -1)
- continue;
- if (connect (fd, rp->ai_addr, rp->ai_addrlen) != -1)
- break;
- close (fd);
- }
- freeaddrinfo (result);
- if (rp == NULL) {
- nbdkit_error ("connect: %m");
- close (fd);
- return -1;
- }
-
- if (setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &optval,
- sizeof (int)) == -1) {
- nbdkit_error ("cannot set TCP_NODELAY option: %m");
- close (fd);
- return -1;
- }
- return fd;
-}
-
-/* Create the shared or per-connection handle. */
-static struct handle *
-nbd_open_handle (int readonly)
-{
- struct handle *h;
- struct nbd_old_handshake old;
- uint64_t version;
- unsigned long retries = retry;
-
- h = calloc (1, sizeof *h);
- if (h == NULL) {
- nbdkit_error ("malloc: %m");
- return NULL;
- }
-
- retry:
- if (sockname)
- h->fd = nbd_connect_unix ();
- else
- h->fd = nbd_connect_tcp ();
- if (h->fd == -1) {
- if (retries--) {
- sleep (1);
- goto retry;
- }
- goto err;
- }
-
- /* old and new handshake share same meaning of first 16 bytes */
- if (read_full (h->fd, &old,
- offsetof (struct nbd_old_handshake, exportsize))) {
- nbdkit_error ("unable to read magic: %m");
- goto err;
- }
- if (be64toh (old.nbdmagic) != NBD_MAGIC) {
- nbdkit_error ("wrong magic, %s is not an NBD server", servname);
- goto err;
- }
- version = be64toh (old.version);
- if (version == NBD_OLD_VERSION) {
- nbdkit_debug ("trying oldstyle connection");
- if (read_full (h->fd,
- (char *) &old + offsetof (struct nbd_old_handshake, exportsize),
- sizeof old - offsetof (struct nbd_old_handshake, exportsize))) {
- nbdkit_error ("unable to read old handshake: %m");
- goto err;
- }
- h->size = be64toh (old.exportsize);
- h->flags = be16toh (old.eflags);
- }
- else if (version == NBD_NEW_VERSION) {
- uint16_t gflags;
- uint32_t cflags;
- struct nbd_new_option opt;
- struct nbd_export_name_option_reply finish;
- size_t expect;
-
- nbdkit_debug ("trying newstyle connection");
- if (read_full (h->fd, &gflags, sizeof gflags)) {
- nbdkit_error ("unable to read global flags: %m");
- goto err;
- }
- gflags = be16toh (gflags);
- cflags = htobe32 (gflags & (NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES));
- if (write_full (h->fd, &cflags, sizeof cflags)) {
- nbdkit_error ("unable to return global flags: %m");
- goto err;
- }
-
- /* Prefer NBD_OPT_GO if possible */
- if (gflags & NBD_FLAG_FIXED_NEWSTYLE) {
- int rc = nbd_newstyle_haggle (h);
- if (rc < 0)
- goto err;
- if (!rc)
- goto export_name;
- }
- else {
- export_name:
- /* Option haggling untried or failed, use older NBD_OPT_EXPORT_NAME */
- nbdkit_debug ("trying NBD_OPT_EXPORT_NAME");
- opt.version = htobe64 (NBD_NEW_VERSION);
- opt.option = htobe32 (NBD_OPT_EXPORT_NAME);
- opt.optlen = htobe32 (strlen (export));
- if (write_full (h->fd, &opt, sizeof opt) ||
- write_full (h->fd, export, strlen (export))) {
- nbdkit_error ("unable to request export '%s': %m", export);
- goto err;
- }
- expect = sizeof finish;
- if (gflags & NBD_FLAG_NO_ZEROES)
- expect -= sizeof finish.zeroes;
- if (read_full (h->fd, &finish, expect)) {
- nbdkit_error ("unable to read new handshake: %m");
- goto err;
- }
- h->size = be64toh (finish.exportsize);
- h->flags = be16toh (finish.eflags);
- }
- }
- else {
- nbdkit_error ("unexpected version %#" PRIx64, version);
- goto err;
- }
- if (readonly)
- h->flags |= NBD_FLAG_READ_ONLY;
-
- /* Spawn a dedicated reader thread */
- if ((errno = pthread_mutex_init (&h->write_lock, NULL))) {
- nbdkit_error ("failed to initialize write mutex: %m");
- goto err;
- }
- if ((errno = pthread_mutex_init (&h->trans_lock, NULL))) {
- nbdkit_error ("failed to initialize transaction mutex: %m");
- pthread_mutex_destroy (&h->write_lock);
- goto err;
- }
- if ((errno = pthread_create (&h->reader, NULL, nbd_reader, h))) {
- nbdkit_error ("failed to initialize reader thread: %m");
- pthread_mutex_destroy (&h->write_lock);
- pthread_mutex_destroy (&h->trans_lock);
- goto err;
- }
-
- return h;
-
- err:
- if (h->fd >= 0)
- close (h->fd);
- free (h);
- return NULL;
-}
-
-/* Create the per-connection handle. */
-static void *
-nbd_open (int readonly)
-{
- if (shared)
- return shared_handle;
- return nbd_open_handle (readonly);
-}
-
-/* Free up the shared or per-connection handle. */
-static void
-nbd_close_handle (struct handle *h)
-{
- if (!h->dead) {
- nbd_request_raw (h, 0, NBD_CMD_DISC, 0, 0, 0, NULL);
- shutdown (h->fd, SHUT_WR);
- }
- if ((errno = pthread_join (h->reader, NULL)))
- nbdkit_debug ("failed to join reader thread: %m");
- close (h->fd);
- pthread_mutex_destroy (&h->write_lock);
- pthread_mutex_destroy (&h->trans_lock);
- free (h);
-}
-
-/* Free up the per-connection handle. */
-static void
-nbd_close (void *handle)
-{
- struct handle *h = handle;
-
- if (!shared)
- nbd_close_handle (h);
-}
-
-/* Get the file size. */
-static int64_t
-nbd_get_size (void *handle)
-{
- struct handle *h = handle;
-
- return h->size;
-}
-
-static int
-nbd_can_write (void *handle)
-{
- struct handle *h = handle;
-
- return !(h->flags & NBD_FLAG_READ_ONLY);
-}
-
-static int
-nbd_can_flush (void *handle)
-{
- struct handle *h = handle;
-
- return !!(h->flags & NBD_FLAG_SEND_FLUSH);
-}
-
-static int
-nbd_is_rotational (void *handle)
-{
- struct handle *h = handle;
-
- return !!(h->flags & NBD_FLAG_ROTATIONAL);
-}
-
-static int
-nbd_can_trim (void *handle)
-{
- struct handle *h = handle;
-
- return !!(h->flags & NBD_FLAG_SEND_TRIM);
-}
-
-static int
-nbd_can_zero (void *handle)
-{
- struct handle *h = handle;
-
- return !!(h->flags & NBD_FLAG_SEND_WRITE_ZEROES);
-}
-
-static int
-nbd_can_fua (void *handle)
-{
- struct handle *h = handle;
-
- return h->flags & NBD_FLAG_SEND_FUA ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
-}
-
-static int
-nbd_can_multi_conn (void *handle)
-{
- struct handle *h = handle;
-
- return !!(h->flags & NBD_FLAG_CAN_MULTI_CONN);
-}
-
-static int
-nbd_can_cache (void *handle)
-{
- struct handle *h = handle;
-
- if (h->flags & NBD_FLAG_SEND_CACHE)
- return NBDKIT_CACHE_NATIVE;
- return NBDKIT_CACHE_NONE;
-}
-
-static int
-nbd_can_extents (void *handle)
-{
- struct handle *h = handle;
-
- return h->extents;
-}
-
-/* Read data from the file. */
-static int
-nbd_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
- uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!flags);
- s = nbd_request_full (h, 0, NBD_CMD_READ, offset, count, NULL, buf, NULL);
- return nbd_reply (h, s);
-}
-
-/* Write data to the file. */
-static int
-nbd_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
- uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!(flags & ~NBDKIT_FLAG_FUA));
- s = nbd_request_full (h, flags & NBDKIT_FLAG_FUA ? NBD_CMD_FLAG_FUA : 0,
- NBD_CMD_WRITE, offset, count, buf, NULL, NULL);
- return nbd_reply (h, s);
-}
-
-/* Write zeroes to the file. */
-static int
-nbd_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
- int f = 0;
-
- assert (!(flags & ~(NBDKIT_FLAG_FUA | NBDKIT_FLAG_MAY_TRIM)));
- assert (h->flags & NBD_FLAG_SEND_WRITE_ZEROES);
-
- if (!(flags & NBDKIT_FLAG_MAY_TRIM))
- f |= NBD_CMD_FLAG_NO_HOLE;
- if (flags & NBDKIT_FLAG_FUA)
- f |= NBD_CMD_FLAG_FUA;
- s = nbd_request (h, f, NBD_CMD_WRITE_ZEROES, offset, count);
- return nbd_reply (h, s);
-}
-
-/* Trim a portion of the file. */
-static int
-nbd_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!(flags & ~NBDKIT_FLAG_FUA));
- s = nbd_request (h, flags & NBDKIT_FLAG_FUA ? NBD_CMD_FLAG_FUA : 0,
- NBD_CMD_TRIM, offset, count);
- return nbd_reply (h, s);
-}
-
-/* Flush the file to disk. */
-static int
-nbd_flush (void *handle, uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!flags);
- s = nbd_request (h, 0, NBD_CMD_FLUSH, 0, 0);
- return nbd_reply (h, s);
-}
-
-/* Read extents of the file. */
-static int
-nbd_extents (void *handle, uint32_t count, uint64_t offset,
- uint32_t flags, struct nbdkit_extents *extents)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!(flags & ~NBDKIT_FLAG_REQ_ONE) && h->extents);
- s = nbd_request_full (h, flags & NBDKIT_FLAG_REQ_ONE ? NBD_CMD_FLAG_REQ_ONE : 0,
- NBD_CMD_BLOCK_STATUS, offset, count, NULL, NULL,
- extents);
- return nbd_reply (h, s);
-}
-
-/* Cache a portion of the file. */
-static int
-nbd_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
- struct handle *h = handle;
- struct transaction *s;
-
- assert (!flags);
- s = nbd_request (h, 0, NBD_CMD_CACHE, offset, count);
- return nbd_reply (h, s);
-}
-
-static struct nbdkit_plugin plugin = {
- .name = "nbd",
- .longname = "nbdkit nbd plugin",
- .version = PACKAGE_VERSION,
- .unload = nbd_unload,
- .config = nbd_config,
- .config_complete = nbd_config_complete,
- .config_help = nbd_config_help,
- .open = nbd_open,
- .close = nbd_close,
- .get_size = nbd_get_size,
- .can_write = nbd_can_write,
- .can_flush = nbd_can_flush,
- .is_rotational = nbd_is_rotational,
- .can_trim = nbd_can_trim,
- .can_zero = nbd_can_zero,
- .can_fua = nbd_can_fua,
- .can_multi_conn = nbd_can_multi_conn,
- .can_extents = nbd_can_extents,
- .can_cache = nbd_can_cache,
- .pread = nbd_pread,
- .pwrite = nbd_pwrite,
- .zero = nbd_zero,
- .flush = nbd_flush,
- .trim = nbd_trim,
- .extents = nbd_extents,
- .cache = nbd_cache,
- .errno_is_preserved = 1,
-};
-
-NBDKIT_REGISTER_PLUGIN (plugin)
--
2.25.1
4 years, 8 months
[nbdkit PATCH 0/2] More caching of initial setup
by Eric Blake
When I added .can_FOO caching in 1.16, I missed the case that the sh
plugin itself was calling .can_flush twice in some situations (in
order to default .can_fua). Then right after, I regressed it to call
.can_zero twice (in order to default .can_fast_zero). I also missed
that .thread_model could use better caching, because at the time, I
did not add testsuite coverage. Fix that now.
Eric Blake (2):
sh, eval: Cache .can_zero and .can_flush
server: Better caching of .thread_model
server/internal.h | 1 +
server/connections.c | 8 ++-
server/filters.c | 8 +--
server/locks.c | 7 ++-
server/plugins.c | 12 ++---
server/protocol-handshake.c | 5 +-
server/sockets.c | 3 +-
plugins/sh/methods.c | 102 +++++++++++++++++++++++-------------
tests/test-eflags.sh | 42 +++++++++++++--
9 files changed, 123 insertions(+), 65 deletions(-)
--
2.25.1
4 years, 8 months
Re: [Libguestfs] Fuzzing Questions
by Richard W.M. Jones
On Wed, Mar 18, 2020 at 01:46:14PM -0400, habib dan aouta wrote:
> Hello Richard,
>
> Hope you are doing well. My name is Habib and I am current student
> at the University of North Carolina at Charlotte(U.S). I am
> currently following your Libnbd client fuzzing tutorial from the
> Wordpress articles
[https://rwmj.wordpress.com/2019/10/09/using-american-fuzzy-lop-on-network...]
> and I am having issues compiling the
> Libnbd-wrapper.c file. After some research I am missing the
> Libnbd.so library file that I cannot seems to find anywhere. It will
> be nice if you can provide some help. Thank you for your time and
> consideration.
Not much to go on. What did you try? What error did you see?
Rich.
--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines. Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v
4 years, 8 months
[PATCH libnbd v2 0/3] Unfinished golang bindings.
by Richard W.M. Jones
These bindings get as far as running very simple connections.
However there are many missing parts still:
* No callbacks.
* No functions which handle buffers (pread/pwrite!)
This is posted just for general early interest, not even for review.
Rich.
4 years, 8 months
Re: [Libguestfs] Go bindings for libnbd
by Richard W.M. Jones
[NB: This is posted to the public libguestfs mailing list]
Fabien,
I just posted a very (very) rough set of patches to the mailing list:
https://www.redhat.com/archives/libguestfs/2020-March/msg00170.html
I think what would help me from here is if there is someone in the
KubeVirt community who could help me with making the golang bindings
more natural. I'm obviously not a regular golang programmer, so I
don't know about what will be a natural way to express certain things.
I could easily bake in mistakes like the wrong type of foo (string,
buffer, etc) which would hurt users in the long term.
Do you know anyone I can talk to about that?
Rich.
--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines. Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v
4 years, 8 months
[PATCH nbdkit v3] New tmpdisk plugin.
by Richard W.M. Jones
v2 was here:
https://www.redhat.com/archives/libguestfs/2020-March/msg00154.html
v3:
- Micro-optimize tmpdir.
- Quote $disk in default command shell fragment.
- Don't redirect mkfs output to /dev/null. Instead use
exec </dev/null >/dev/null before the shell fragment.
We may want to do this in other places where we run
external shell scripts, or more generally for all
plugins, but this commit does not fix this.
- Improve can_multi_conn comment.
- Use mkostemp if available. If not we have to use a
racy mkstemp + set_cloexec instead.
- I still didn't implement .zero, because the implementation
(see plugins/file/file.c) is really complicated even if you
remove the block device code. I guess it would be nice to
isolate all this complexity into common/ at some point, which
would allow us to implement efficient zero (and trim) in other
file-backed plugins.
- I left WIFSTOPPED, but I guess I could remove it. Are we sure
it can never happen (eg. if the user is running nbdkit in the
foreground and uses ^Z)?
- Rerun the tests & valgrind.
Rich.
4 years, 8 months
[nbdkit PATCH 0/4] Fix testsuite hang with nbd-stadalone
by Eric Blake
Either patch 1 or patch 2 in isolation is sufficient to fix the
problem that Rich forwarded on from an archlinux tester (name so I can
credit them?). But both patches should be applied, as well as
backported to appropriate stable branches, to maximize cross-version
interoperability of nbdkit vs. plugins. Patch 3 will let us detect
future similar bugs much faster. I want patch 4 to ensure that we
guarantee our API in the future, but it isn't finished yet.
Then I still need to follow up with my threat to remove nbd-standalone
for 1.20 (relying on libnbd is a lot easier now than it was when we
first added optional libnbd support in 1.14).
Eric Blake (4):
server: Normalize plugin can_* values
nbd: Normalize return values of can_*
tests: Don't let test-parallel-* hang on nbdkit bug
RFC tests: Add test to cover unusual .can_flush return
tests/Makefile.am | 21 +++++++
server/plugins.c | 24 ++++---
plugins/nbd/nbd-standalone.c | 12 ++--
tests/test-flush.sh | 85 +++++++++++++++++++++++++
tests/test-parallel-file.sh | 19 +++---
tests/test-parallel-nbd.sh | 13 ++--
tests/test-parallel-sh.sh | 19 +++---
tests/test-flush-plugin.c | 119 +++++++++++++++++++++++++++++++++++
8 files changed, 275 insertions(+), 37 deletions(-)
create mode 100755 tests/test-flush.sh
create mode 100644 tests/test-flush-plugin.c
--
2.25.1
4 years, 8 months
[PATCH nbdkit v2] New tmpdisk plugin.
by Richard W.M. Jones
This can be used for creating temporary disks to thin clients, as a
kind of "remote tmpfs".
See also:
https://www.redhat.com/archives/libguestfs/2020-March/msg00134.html
---
plugins/data/nbdkit-data-plugin.pod | 1 +
plugins/file/nbdkit-file-plugin.pod | 1 +
plugins/linuxdisk/nbdkit-linuxdisk-plugin.pod | 7 +-
plugins/memory/nbdkit-memory-plugin.pod | 3 +-
plugins/tmpdisk/nbdkit-tmpdisk-plugin.pod | 157 +++++++
configure.ac | 2 +
plugins/tmpdisk/Makefile.am | 66 +++
tests/Makefile.am | 21 +
plugins/tmpdisk/tmpdisk.c | 427 ++++++++++++++++++
tests/test-tmpdisk.c | 157 +++++++
.gitignore | 1 +
11 files changed, 840 insertions(+), 3 deletions(-)
diff --git a/plugins/data/nbdkit-data-plugin.pod b/plugins/data/nbdkit-data-plugin.pod
index 057392d3..ef8d1e08 100644
--- a/plugins/data/nbdkit-data-plugin.pod
+++ b/plugins/data/nbdkit-data-plugin.pod
@@ -269,6 +269,7 @@ L<nbdkit-null-plugin(1)>,
L<nbdkit-partitioning-plugin(1)>,
L<nbdkit-pattern-plugin(1)>,
L<nbdkit-random-plugin(1)>,
+L<nbdkit-tmpdisk-plugin(1)>,
L<nbdkit-zero-plugin(1)>,
L<https://github.com/libguestfs/nbdkit/blob/master/plugins/data/disk2data.pl>,
L<https://en.wikipedia.org/wiki/Base64>.
diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod
index d538b127..0c1cfd57 100644
--- a/plugins/file/nbdkit-file-plugin.pod
+++ b/plugins/file/nbdkit-file-plugin.pod
@@ -111,6 +111,7 @@ L<nbdkit(1)>,
L<nbdkit-plugin(3)>,
L<nbdkit-split-plugin(1)>,
L<nbdkit-partitioning-plugin(1)>,
+L<nbdkit-tmpdisk-plugin(1)>,
L<nbdkit-noextents-filter(1)>.
=head1 AUTHORS
diff --git a/plugins/linuxdisk/nbdkit-linuxdisk-plugin.pod b/plugins/linuxdisk/nbdkit-linuxdisk-plugin.pod
index 3cac883c..53f4d89d 100644
--- a/plugins/linuxdisk/nbdkit-linuxdisk-plugin.pod
+++ b/plugins/linuxdisk/nbdkit-linuxdisk-plugin.pod
@@ -24,7 +24,9 @@ symbolic links, block special devices etc.
To create a FAT-formatted virtual floppy disk, see
L<nbdkit-floppy-plugin(1)>. To create a CD/ISO, see
-L<nbdkit-iso-plugin(1)>.
+L<nbdkit-iso-plugin(1)>. To create an empty filesystem for each
+client that connects (like a "remote tmpfs") use
+L<nbdkit-tmpdisk-plugin(1)>.
=head1 EXAMPLES
@@ -184,7 +186,8 @@ L<nbdkit-file-plugin(1)>,
L<nbdkit-floppy-plugin(1)>,
L<nbdkit-iso-plugin(1)>,
L<nbdkit-partition-filter(1)>,
-L<nbdkit-partitioning-plugin(1)>.
+L<nbdkit-partitioning-plugin(1)>,
+L<nbdkit-tmpdisk-plugin(1)>,
=head1 AUTHORS
diff --git a/plugins/memory/nbdkit-memory-plugin.pod b/plugins/memory/nbdkit-memory-plugin.pod
index ccdc017d..bc565c55 100644
--- a/plugins/memory/nbdkit-memory-plugin.pod
+++ b/plugins/memory/nbdkit-memory-plugin.pod
@@ -103,7 +103,8 @@ L<nbdkit-plugin(3)>,
L<nbdkit-loop(1)>,
L<nbdkit-data-plugin(1)>,
L<nbdkit-file-plugin(1)>,
-L<nbdkit-info-plugin(1)>.
+L<nbdkit-info-plugin(1)>,
+L<nbdkit-tmpdisk-plugin(1)>.
=head1 AUTHORS
diff --git a/plugins/tmpdisk/nbdkit-tmpdisk-plugin.pod b/plugins/tmpdisk/nbdkit-tmpdisk-plugin.pod
new file mode 100644
index 00000000..925a5091
--- /dev/null
+++ b/plugins/tmpdisk/nbdkit-tmpdisk-plugin.pod
@@ -0,0 +1,157 @@
+=head1 NAME
+
+nbdkit-tmpdisk-plugin - create a fresh temporary filesystem for each client
+
+=head1 SYNOPSIS
+
+ nbdkit tmpdisk [size=]SIZE
+ [type=ext4|xfs|vfat|...] [label=LABEL]
+ [command=COMMAND]
+
+=head1 DESCRIPTION
+
+This L<nbdkit(1)> plugin is used for creating temporary filesystems
+for thin clients. Each time a client connects it will see a fresh,
+empty filesystem for its exclusive use. B<The filesystem is deleted>
+when the client disconnects.
+
+When a new client connects, a blank, sparse file of the required size
+is created in C<$TMPDIR> (or F</var/tmp>). L<mkfs(8)> is then run on
+the file to create the empty filesystem, and this filesystem is served
+to the client. Unlike L<nbdkit-linuxdisk-plugin(1)> each client of
+this plugin sees a different disk.
+
+The size of the disk is chosen using the C<size> parameter. The
+filesystem type is C<ext4> but this can be changed using the C<type>
+parameter (controlling the I<-t> option of mkfs).
+
+Instead of running mkfs you can run an arbitrary C<command> to create
+the disk.
+
+=head2 Example
+
+One use for this is to create a kind of "remote L<tmpfs(5)>" for thin
+clients. On the server you would run (see L<nbdkit-service(1)>):
+
+ nbdkit tmpdisk 16G
+
+and each thin client would have a file F</etc/rc.d/rc.local>
+containing:
+
+ nm-online
+ modprobe nbd
+ nbd-client server /dev/nbd0
+ mount /dev/nbd0 /var/scratch
+
+Clients would see a fresh, empty C</var/scratch> directory after boot.
+
+=head2 Security considerations
+
+Because each client gets a new disk, the amount of disk space
+required on the server can be as much as
+S<<< I<number of clients> × I<size parameter> >>>. It is therefore
+best to limit the number of clients using L<nbdkit-limit-filter(1)> or
+take steps to limit where clients can connect from using
+L<nbdkit-ip-filter(1)>, firewalls, or TLS client certificates.
+
+=head1 PARAMETERS
+
+=over 4
+
+=item B<command='>COMMANDB<'>
+
+Instead of running L<mkfs(8)> to create the initial filesystem, run
+C<COMMAND> (which usually must be quoted to protect it from the
+shell). The following shell variables may be used in C<COMMAND>:
+
+=over 4
+
+=item C<$disk>
+
+The absolute path of the file that the command must initialize. This
+file is created by nbdkit before the command runs.
+
+=item C<$label>
+
+The filesystem label (from the C<label> parameter).
+
+=item C<$size>
+
+The virtual size in bytes, which is the same as the file size.
+
+=item C<$type>
+
+The filesystem type (from the C<type> parameter), defaulting to
+C<ext4>. (Commands can ignore this if it is not relevant).
+
+=back
+
+=item B<label=>LABEL
+
+Select the filesystem label. The default is not set.
+
+=item [B<size=>]SIZE
+
+Specify the virtual size of the disk image.
+
+This parameter is required.
+
+C<size=> is a magic config key and may be omitted in most cases.
+See L<nbdkit(1)/Magic parameters>.
+
+=item B<type=>FS
+
+Select the filesystem type. The default is C<ext4>. Most
+non-networked, non-cluster filesystem types supported by the
+L<mkfs(8)> command can be used here.
+
+=back
+
+=head1 ENVIRONMENT VARIABLES
+
+=over 4
+
+=item C<TMPDIR>
+
+The temporary disks for this plugin are created in this directory, one
+per connected client. If not set this defaults to F</var/tmp>.
+
+=back
+
+=head1 FILES
+
+=over 4
+
+=item F<$plugindir/nbdkit-tmpdisk-plugin.so>
+
+The plugin.
+
+Use C<nbdkit --dump-config> to find the location of C<$plugindir>.
+
+=back
+
+=head1 VERSION
+
+C<nbdkit-tmpdisk-plugin> first appeared in nbdkit 1.20.
+
+=head1 SEE ALSO
+
+L<nbdkit(1)>,
+L<nbdkit-plugin(3)>,
+L<nbdkit-data-plugin(1)>,
+L<nbdkit-file-plugin(1)>,
+L<nbdkit-ip-filter(1)>,
+L<nbdkit-limit-filter(1)>,
+L<nbdkit-linuxdisk-plugin(1)>,
+L<nbdkit-memory-plugin(1)>,
+L<nbdkit-loop(1)>,
+L<nbdkit-tls(1)>,
+L<mkfs(8)>.
+
+=head1 AUTHORS
+
+Richard W.M. Jones
+
+=head1 COPYRIGHT
+
+Copyright (C) 2018-2020 Red Hat Inc.
diff --git a/configure.ac b/configure.ac
index dd9ca765..7b5e61e3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ non_lang_plugins="\
ssh \
streaming \
tar \
+ tmpdisk \
vddk \
zero \
"
@@ -1016,6 +1017,7 @@ AC_CONFIG_FILES([Makefile
plugins/streaming/Makefile
plugins/tar/Makefile
plugins/tcl/Makefile
+ plugins/tmpdisk/Makefile
plugins/vddk/Makefile
plugins/zero/Makefile
filters/Makefile
diff --git a/plugins/tmpdisk/Makefile.am b/plugins/tmpdisk/Makefile.am
new file mode 100644
index 00000000..2e487e92
--- /dev/null
+++ b/plugins/tmpdisk/Makefile.am
@@ -0,0 +1,66 @@
+# nbdkit
+# Copyright (C) 2017-2020 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+include $(top_srcdir)/common-rules.mk
+
+EXTRA_DIST = nbdkit-tmpdisk-plugin.pod
+
+plugin_LTLIBRARIES = nbdkit-tmpdisk-plugin.la
+
+nbdkit_tmpdisk_plugin_la_SOURCES = \
+ tmpdisk.c \
+ $(top_srcdir)/include/nbdkit-plugin.h \
+ $(NULL)
+
+nbdkit_tmpdisk_plugin_la_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/common/utils \
+ $(NULL)
+nbdkit_tmpdisk_plugin_la_CFLAGS = $(WARNINGS_CFLAGS)
+nbdkit_tmpdisk_plugin_la_LDFLAGS = \
+ -module -avoid-version -shared \
+ -Wl,--version-script=$(top_srcdir)/plugins/plugins.syms \
+ $(NULL)
+nbdkit_tmpdisk_plugin_la_LIBADD = \
+ $(top_builddir)/common/utils/libutils.la \
+ $(NULL)
+
+if HAVE_POD
+
+man_MANS = nbdkit-tmpdisk-plugin.1
+CLEANFILES += $(man_MANS)
+
+nbdkit-tmpdisk-plugin.1: nbdkit-tmpdisk-plugin.pod
+ $(PODWRAPPER) --section=1 --man $@ \
+ --html $(top_builddir)/html/$@.html \
+ $<
+
+endif HAVE_POD
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 65dd148d..17f2c8da 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -766,6 +766,27 @@ test_streaming_SOURCES = test-streaming.c
test_streaming_CFLAGS = $(WARNINGS_CFLAGS) $(LIBNBD_CFLAGS)
test_streaming_LDADD = $(LIBNBD_LIBS)
+# tmpdisk plugin test.
+LIBGUESTFS_TESTS += test-tmpdisk
+
+test_tmpdisk_SOURCES = \
+ test-tmpdisk.c \
+ test.h \
+ $(NULL)
+test_tmpdisk_CPPFLAGS = \
+ -I$(top_srcdir)/common/utils
+test_tmpdisk_CFLAGS = \
+ $(WARNINGS_CFLAGS) \
+ $(LIBGUESTFS_CFLAGS) \
+ $(NULL)
+test_tmpdisk_LDFLAGS = \
+ $(top_builddir)/common/utils/libutils.la \
+ $(NULL)
+test_tmpdisk_LDADD = \
+ libtest.la \
+ $(LIBGUESTFS_LIBS) \
+ $(NULL)
+
if HAVE_VDDK
# VDDK plugin test.
# This only tests that the plugin can be loaded against a
diff --git a/plugins/tmpdisk/tmpdisk.c b/plugins/tmpdisk/tmpdisk.c
new file mode 100644
index 00000000..8fba3359
--- /dev/null
+++ b/plugins/tmpdisk/tmpdisk.c
@@ -0,0 +1,427 @@
+/* nbdkit
+ * Copyright (C) 2017-2020 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#define NBDKIT_API_VERSION 2
+#include <nbdkit-plugin.h>
+
+#include "cleanup.h"
+#include "utils.h"
+
+static int64_t size = -1;
+
+static const char *command =
+ "labelopt='-L'\n"
+ "case \"$type\" in\n"
+ " ext?)\n"
+ " extra='-F' ;;\n"
+ " *fat|msdos)\n"
+ " extra='-I' ;;\n"
+ " ntfs)\n"
+ " extra='-Q -F'\n"
+ " labelopt='-n' ;;\n"
+ " xfs)\n"
+ " extra='-f' ;;\n"
+ "esac\n"
+ "if [ \"x$label\" = \"x\" ]; then\n"
+ " mkfs -t \"$type\" $extra $disk >/dev/null\n"
+ "else\n"
+ " mkfs -t \"$type\" $extra $labelopt \"$label\" $disk >/dev/null\n"
+ "fi\n";
+static const char *label = NULL;
+static const char *type = "ext4";
+
+static int
+tmpdisk_config (const char *key, const char *value)
+{
+ if (strcmp (key, "command") == 0) {
+ command = value;
+ }
+ else if (strcmp (key, "label") == 0) {
+ if (strcmp (value, "") == 0)
+ label = NULL;
+ else
+ label = value;
+ }
+ else if (strcmp (key, "size") == 0) {
+ size = nbdkit_parse_size (value);
+ if (size == -1)
+ return -1;
+ }
+ else if (strcmp (key, "type") == 0) {
+ type = value;
+ }
+ else {
+ nbdkit_error ("unknown parameter '%s'", key);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+tmpdisk_config_complete (void)
+{
+ if (size == -1) {
+ nbdkit_error ("size parameter is required");
+ return -1;
+ }
+
+ return 0;
+}
+
+#define tmpdisk_config_help \
+ "size=<SIZE> (required) Virtual filesystem size.\n" \
+ "label=<LABEL> The filesystem label.\n" \
+ "type=ext4|... The filesystem type.\n" \
+ "command=<COMMAND> Alternate command instead of mkfs."
+
+struct handle {
+ int fd;
+ bool can_punch_hole;
+};
+
+/* Absolutely unsafe! Although this is simply returning the default
+ * value, provide this callback to make it clear.
+ */
+static int
+tmpdisk_can_multi_conn (void *handle)
+{
+ return 0;
+}
+
+static int
+tmpdisk_can_trim (void *handle)
+{
+#ifdef FALLOC_FL_PUNCH_HOLE
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/* Pretend we have native FUA support, but actually because all disks
+ * are temporary we will deliberately ignore flush/FUA operations.
+ */
+static int
+tmpdisk_can_fua (void *handle)
+{
+ return NBDKIT_FUA_NATIVE;
+}
+
+static int64_t
+tmpdisk_get_size (void *handle)
+{
+ return size;
+}
+
+/* This creates and runs the full "mkfs" (or whatever) command. */
+static int
+run_command (const char *disk)
+{
+ FILE *fp;
+ CLEANUP_FREE char *cmd = NULL;
+ size_t len = 0;
+ int r;
+
+ fp = open_memstream (&cmd, &len);
+ if (fp == NULL) {
+ nbdkit_error ("open_memstream: %m");
+ return -1;
+ }
+
+ /* Set the shell variables. */
+ fprintf (fp, "disk=");
+ shell_quote (disk, fp);
+ putc ('\n', fp);
+ if (label) {
+ fprintf (fp, "label=");
+ shell_quote (label, fp);
+ putc ('\n', fp);
+ }
+ fprintf (fp, "size=%" PRIi64 "\n", size);
+ fprintf (fp, "type=");
+ shell_quote (type, fp);
+ putc ('\n', fp);
+
+ putc ('\n', fp);
+ fprintf (fp, "%s", command);
+
+ if (fclose (fp) == EOF) {
+ nbdkit_error ("memstream failed");
+ return -1;
+ }
+
+ r = system (cmd);
+ if (r == -1) {
+ nbdkit_error ("failed to execute command: %m");
+ return -1;
+ }
+ if (WIFEXITED (r) && WEXITSTATUS (r) != 0) {
+ nbdkit_error ("command exited with code %d", WEXITSTATUS (r));
+ return -1;
+ }
+ else if (WIFSIGNALED (r)) {
+ nbdkit_error ("command killed by signal %d", WTERMSIG (r));
+ return -1;
+ }
+ else if (WIFSTOPPED (r)) {
+ nbdkit_error ("command stopped by signal %d", WSTOPSIG (r));
+ return -1;
+ }
+
+ return 0;
+}
+
+static void *
+tmpdisk_open (int readonly)
+{
+ struct handle *h;
+ CLEANUP_FREE char *disk = NULL;
+ const char *tmpdir;
+
+ tmpdir = getenv ("TMPDIR");
+ if (!tmpdir)
+ tmpdir = "/var/tmp";
+
+ h = malloc (sizeof *h);
+ if (h == NULL) {
+ nbdkit_error ("malloc: %m");
+ goto error;
+ }
+ h->fd = -1;
+ h->can_punch_hole = true;
+
+ /* Create the new disk image for this connection. */
+ if (asprintf (&disk, "%s/tmpdiskXXXXXX", tmpdir) == -1) {
+ nbdkit_error ("asprintf: %m");
+ goto error;
+ }
+
+ h->fd = mkstemp (disk);
+ if (h->fd == -1) {
+ nbdkit_error ("mkstemp: %m");
+ goto error;
+ }
+
+ /* Truncate the disk to a sparse file of the right size. */
+ if (ftruncate (h->fd, size) == -1) {
+ nbdkit_error ("ftruncate: %s: %m", disk);
+ goto error;
+ }
+
+ /* Now run the mkfs command. */
+ if (run_command (disk) == -1)
+ goto error;
+
+ /* We don't need the disk to appear in the filesystem since we hold
+ * a file descriptor and access it through that, so unlink the disk.
+ * This also ensures it is always cleaned up.
+ */
+ unlink (disk);
+
+ /* Return the handle. */
+ return h;
+
+ error:
+ if (h) {
+ if (h->fd >= 0) {
+ close (h->fd);
+ unlink (disk);
+ }
+ free (h);
+ }
+ return NULL;
+}
+
+static void
+tmpdisk_close (void *handle)
+{
+ struct handle *h = handle;
+
+ close (h->fd);
+ free (h);
+}
+
+/* Read data from the file. */
+static int
+tmpdisk_pread (void *handle, void *buf,
+ uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct handle *h = handle;
+
+ while (count > 0) {
+ ssize_t r = pread (h->fd, buf, count, offset);
+ if (r == -1) {
+ nbdkit_error ("pread: %m");
+ return -1;
+ }
+ if (r == 0) {
+ nbdkit_error ("pread: unexpected end of file");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offset += r;
+ }
+
+ return 0;
+}
+
+/* Write data to the file. */
+static int
+tmpdisk_pwrite (void *handle, const void *buf,
+ uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct handle *h = handle;
+
+ while (count > 0) {
+ ssize_t r = pwrite (h->fd, buf, count, offset);
+ if (r == -1) {
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offset += r;
+ }
+
+ /* Deliberately ignore FUA if present in flags. */
+
+ return 0;
+}
+
+/* This plugin deliberately provides a null flush operation, because
+ * all of the disks created are temporary.
+ */
+static int
+tmpdisk_flush (void *handle, uint32_t flags)
+{
+ return 0;
+}
+
+#if defined (FALLOC_FL_PUNCH_HOLE)
+static int
+do_fallocate (int fd, int mode, off_t offset, off_t len)
+{
+ int r = fallocate (fd, mode, offset, len);
+ if (r == -1 && errno == ENODEV) {
+ /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
+ * with EOPNOTSUPP in this case. Normalize errno to simplify callers.
+ */
+ errno = EOPNOTSUPP;
+ }
+ return r;
+}
+
+static bool
+is_enotsup (int err)
+{
+ return err == ENOTSUP || err == EOPNOTSUPP;
+}
+#endif
+
+/* Punch a hole in the file. */
+static int
+tmpdisk_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+#ifdef FALLOC_FL_PUNCH_HOLE
+ struct handle *h = handle;
+ int r;
+
+ if (h->can_punch_hole) {
+ r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, count);
+ if (r == -1) {
+ /* Trim is advisory; we don't care if it fails for anything other
+ * than EIO or EPERM.
+ */
+ if (errno == EPERM || errno == EIO) {
+ nbdkit_error ("fallocate: %m");
+ return -1;
+ }
+
+ if (is_enotsup (EOPNOTSUPP))
+ h->can_punch_hole = false;
+
+ nbdkit_debug ("ignoring failed fallocate during trim: %m");
+ }
+ }
+#endif
+
+ /* Deliberately ignore FUA if present in flags. */
+
+ return 0;
+}
+
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
+
+static struct nbdkit_plugin plugin = {
+ .name = "tmpdisk",
+ .version = PACKAGE_VERSION,
+
+ .config = tmpdisk_config,
+ .config_complete = tmpdisk_config_complete,
+ .config_help = tmpdisk_config_help,
+ .magic_config_key = "size",
+
+ .can_multi_conn = tmpdisk_can_multi_conn,
+ .can_trim = tmpdisk_can_trim,
+ .can_fua = tmpdisk_can_fua,
+ .get_size = tmpdisk_get_size,
+
+ .open = tmpdisk_open,
+ .close = tmpdisk_close,
+ .pread = tmpdisk_pread,
+ .pwrite = tmpdisk_pwrite,
+ .flush = tmpdisk_flush,
+ .trim = tmpdisk_trim,
+
+ .errno_is_preserved = 1,
+};
+
+NBDKIT_REGISTER_PLUGIN(plugin)
diff --git a/tests/test-tmpdisk.c b/tests/test-tmpdisk.c
new file mode 100644
index 00000000..e96f1b82
--- /dev/null
+++ b/tests/test-tmpdisk.c
@@ -0,0 +1,157 @@
+/* nbdkit
+ * Copyright (C) 2013-2020 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <guestfs.h>
+
+#include "test.h"
+
+int
+main (int argc, char *argv[])
+{
+ guestfs_h *g1, *g2;
+ int r;
+ char *label;
+
+ /* Start nbdkit. */
+ if (test_start_nbdkit ("tmpdisk", "1G", "label=TEST", NULL) == -1)
+ exit (EXIT_FAILURE);
+
+ /* We can open multiple connections and they should see different
+ * disks.
+ */
+ g1 = guestfs_create ();
+ if (g1 == NULL) {
+ perror ("guestfs_create");
+ exit (EXIT_FAILURE);
+ }
+ guestfs_set_identifier (g1, "g1");
+
+ r = guestfs_add_drive_opts (g1, "",
+ GUESTFS_ADD_DRIVE_OPTS_FORMAT, "raw",
+ GUESTFS_ADD_DRIVE_OPTS_PROTOCOL, "nbd",
+ GUESTFS_ADD_DRIVE_OPTS_SERVER, server,
+ -1);
+ if (r == -1)
+ exit (EXIT_FAILURE);
+
+ if (guestfs_launch (g1) == -1)
+ exit (EXIT_FAILURE);
+
+ g2 = guestfs_create ();
+ if (g2 == NULL) {
+ perror ("guestfs_create");
+ exit (EXIT_FAILURE);
+ }
+ guestfs_set_identifier (g2, "g2");
+
+ r = guestfs_add_drive_opts (g2, "",
+ GUESTFS_ADD_DRIVE_OPTS_FORMAT, "raw",
+ GUESTFS_ADD_DRIVE_OPTS_PROTOCOL, "nbd",
+ GUESTFS_ADD_DRIVE_OPTS_SERVER, server,
+ -1);
+ if (r == -1)
+ exit (EXIT_FAILURE);
+
+ if (guestfs_launch (g2) == -1)
+ exit (EXIT_FAILURE);
+
+ /* But they should both see the same filesystem label. */
+ label = guestfs_vfs_label (g1, "/dev/sda");
+ if (!label)
+ exit (EXIT_FAILURE);
+ if (strcmp (label, "TEST") != 0) {
+ fprintf (stderr, "%s FAILED: unexpected label: %s\n",
+ program_name, label);
+ exit (EXIT_FAILURE);
+ }
+ free (label);
+
+ label = guestfs_vfs_label (g2, "/dev/sda");
+ if (!label)
+ exit (EXIT_FAILURE);
+ if (strcmp (label, "TEST") != 0) {
+ fprintf (stderr, "%s FAILED: unexpected label: %s\n",
+ program_name, label);
+ exit (EXIT_FAILURE);
+ }
+ free (label);
+
+ /* Mount both disks. */
+ if (guestfs_mount (g1, "/dev/sda", "/") == -1)
+ exit (EXIT_FAILURE);
+ if (guestfs_mount (g2, "/dev/sda", "/") == -1)
+ exit (EXIT_FAILURE);
+
+ /* Create some files and directories on each. */
+ if (guestfs_mkdir (g1, "/test1") == -1)
+ exit (EXIT_FAILURE);
+ if (guestfs_touch (g1, "/test1/file1") == -1)
+ exit (EXIT_FAILURE);
+ if (guestfs_mkdir (g2, "/test2") == -1)
+ exit (EXIT_FAILURE);
+ if (guestfs_touch (g2, "/test2/file2") == -1)
+ exit (EXIT_FAILURE);
+
+ if (guestfs_sync (g1) == -1 || guestfs_sync (g2) == -1)
+ exit (EXIT_FAILURE);
+
+ if (guestfs_is_file (g1, "/test1/file1") != 1) {
+ fprintf (stderr, "%s FAILED: /test1/file1 is not a file\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
+ if (guestfs_is_file (g2, "/test2/file2") != 1) {
+ fprintf (stderr, "%s FAILED: /test2/file2 is not a file\n",
+ program_name);
+ exit (EXIT_FAILURE);
+ }
+
+ /* Shut down the connection. */
+ if (guestfs_shutdown (g1) == -1)
+ exit (EXIT_FAILURE);
+ if (guestfs_shutdown (g2) == -1)
+ exit (EXIT_FAILURE);
+ guestfs_close (g1);
+ guestfs_close (g2);
+
+ exit (EXIT_SUCCESS);
+}
diff --git a/.gitignore b/.gitignore
index ae4aaf3c..ae4e5061 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,6 +134,7 @@ plugins/*/*.3
/tests/test-split
/tests/test-streaming
/tests/test-tcl
+/tests/test-tmpdisk
/tests/test-xz
/tests/test-xz-curl
/test-driver
--
2.25.0
4 years, 8 months