Writing "file-like" plugins is hard because you have to implement your
own .zero, .trim, .extents, etc, and that is very complicated.
However implementations of these functions already exist in the file
plugin. By factoring out the file plugin into a separate "fileops"
mini-library we can reuse these implementations in other plugins.
This refactoring commit creates a new mini-library called fileops, and
uses it to implement the file plugin.
Note that the name or prefix "file" leaks into fileops in a few
places: the debug option is still called ‘-D file.zero=1’ and the
nbdkit --dump-plugin output will still contain ‘file_blksszget’ etc.
However I think that is fine as it should make usage more consistent
across future file-like plugins.
---
configure.ac | 1 +
Makefile.am | 1 +
common/fileops/Makefile.am | 45 +++
plugins/file/Makefile.am | 2 +
common/fileops/fileops.h | 141 ++++++++++
common/fileops/fileops.c | 547 +++++++++++++++++++++++++++++++++++++
plugins/file/file.c | 540 ++----------------------------------
7 files changed, 755 insertions(+), 522 deletions(-)
diff --git a/configure.ac b/configure.ac
index c5db962c..7a859bca 100644
--- a/configure.ac
+++ b/configure.ac
@@ -981,6 +981,7 @@ AC_CONFIG_FILES([common/protocol/generate-protostrings.sh],
AC_CONFIG_FILES([Makefile
bash/Makefile
common/bitmap/Makefile
+ common/fileops/Makefile
common/gpt/Makefile
common/include/Makefile
common/protocol/Makefile
diff --git a/Makefile.am b/Makefile.am
index ec8ae05d..6c6e5053 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -81,6 +81,7 @@ SUBDIRS = \
if HAVE_PLUGINS
SUBDIRS += \
common/bitmap \
+ common/fileops \
common/gpt \
common/regions \
common/sparse \
diff --git a/common/fileops/Makefile.am b/common/fileops/Makefile.am
new file mode 100644
index 00000000..1fce2d81
--- /dev/null
+++ b/common/fileops/Makefile.am
@@ -0,0 +1,45 @@
+# nbdkit
+# Copyright (C) 2020 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+include $(top_srcdir)/common-rules.mk
+
+noinst_LTLIBRARIES = libfileops.la
+
+libfileops_la_SOURCES = \
+ fileops.c \
+ fileops.h \
+ $(NULL)
+libfileops_la_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/common/include \
+ -I$(top_srcdir)/common/utils \
+ $(NULL)
+libfileops_la_CFLAGS = $(WARNINGS_CFLAGS)
diff --git a/plugins/file/Makefile.am b/plugins/file/Makefile.am
index 7fc40cf4..f3bd86c0 100644
--- a/plugins/file/Makefile.am
+++ b/plugins/file/Makefile.am
@@ -42,6 +42,7 @@ nbdkit_file_plugin_la_SOURCES = \
nbdkit_file_plugin_la_CPPFLAGS = \
-I$(top_srcdir)/include \
+ -I$(top_srcdir)/common/fileops \
-I$(top_srcdir)/common/include \
-I$(top_srcdir)/common/utils \
$(NULL)
@@ -51,6 +52,7 @@ nbdkit_file_plugin_la_LDFLAGS = \
-Wl,--version-script=$(top_srcdir)/plugins/plugins.syms \
$(NULL)
nbdkit_file_plugin_la_LIBADD = \
+ $(top_builddir)/common/fileops/libfileops.la \
$(top_builddir)/common/utils/libutils.la \
$(NULL)
diff --git a/common/fileops/fileops.h b/common/fileops/fileops.h
new file mode 100644
index 00000000..1aecf785
--- /dev/null
+++ b/common/fileops/fileops.h
@@ -0,0 +1,141 @@
+/* nbdkit
+ * Copyright (C) 2020 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* This mini-library helps when writing plugins which are like
+ * nbdkit-file-plugin. It is also used to implement
+ * nbdkit-file-plugin itself. What this means is if your plugin
+ * (after perhaps some custom setup) serves a local file or local
+ * block device then you can implement most of the data serving part
+ * of the plugin using these generic fileops. The advantage is that
+ * this mini-library deals with the complexity of implementing
+ * callbacks such as .zero and .trim efficiently.
+ *
+ * To use it, your plugin per-connection handle must either have type
+ * ‘struct fileops *’, or include ‘struct fileops’ as the first
+ * member. Then add FILEOPS_CALLBACKS to your ‘struct nbdkit_plugin’.
+ * The mini-library is linked statically into the plugin.
+ *
+ * Your plugin must define and use NBDKIT_API_VERSION == 2. This
+ * library is safe to use from PARALLEL plugins (in fact, that is the
+ * recommended thread model).
+ *
+ * See plugins/file/file.c for an example.
+ */
+
+#ifndef NBDKIT_FILEOPS_H
+#define NBDKIT_FILEOPS_H
+
+#include <config.h>
+
+#include <stdbool.h>
+
+/* For SEEK_HOLE. */
+#include <unistd.h>
+#include <sys/types.h>
+
+/* Either use this as the per-connection handle, or it must appear as
+ * the first member of the per-connection handle. Don’t access these
+ * fields directly from your plugin.
+ */
+struct fileops {
+ int fd;
+ int sector_size;
+ bool is_block_device;
+ bool can_punch_hole;
+ bool can_zero_range;
+ bool can_fallocate;
+ bool can_zeroout;
+};
+
+/* Initialize the fileops struct. ‘fd’ is a file descriptor opened on
+ * the local file or block device that you want to serve. Call this
+ * from your .open callback after allocating the handle and setting up
+ * the file descriptor.
+ */
+extern int init_fileops (int fd, struct fileops *fops);
+
+/* Close the file descriptor and perform any other cleanup (but it
+ * doesn’t free the struct or handle). Use this in your .close
+ * callback.
+ */
+extern void close_fileops (struct fileops *fops);
+
+/* You may optionally define a .dump_plugin callback which calls this. */
+extern void fileops_dump_plugin (void);
+
+/* Use the remaining callbacks by adding FILEOPS_CALLBACKS to your
+ * ‘struct nbdkit_plugin’.
+ */
+#define FILEOPS_CALLBACKS \
+ .get_size = fileops_get_size, \
+ .can_trim = fileops_can_trim, \
+ .can_fua = fileops_can_fua, \
+ .can_cache = fileops_can_cache, \
+ .pread = fileops_pread, \
+ .pwrite = fileops_pwrite, \
+ .flush = fileops_flush, \
+ .trim = fileops_trim, \
+ .zero = fileops_zero, \
+ .errno_is_preserved = 1, \
+ FILEOPS_MAYBE_EXTENTS \
+ FILEOPS_MAYBE_CACHE
+
+#ifdef SEEK_HOLE
+#define FILEOPS_MAYBE_EXTENTS \
+ .can_extents = fileops_can_extents, \
+ .extents = fileops_extents,
+#else
+#define FILEOPS_MAYBE_EXTENTS /* nothing */
+#endif
+
+#ifdef HAVE_POSIX_FADVISE
+#define FILEOPS_MAYBE_CACHE \
+ .cache = fileops_cache,
+#else
+#define FILEOPS_MAYBE_CACHE /* nothing */
+#endif
+
+/* Don’t call these directly. */
+extern int64_t fileops_get_size (void *handle);
+extern int fileops_can_trim (void *handle);
+extern int fileops_can_fua (void *handle);
+extern int fileops_can_cache (void *handle);
+extern int fileops_flush (void *handle, uint32_t flags);
+extern int fileops_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
uint32_t flags);
+extern int fileops_pwrite (void *handle, const void *buf, uint32_t count, uint64_t
offset, uint32_t flags);
+extern int fileops_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags);
+extern int fileops_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags);
+extern int fileops_can_extents (void *handle);
+extern int fileops_extents (void *handle, uint32_t count, uint64_t offset, uint32_t
flags, struct nbdkit_extents *extents);
+extern int fileops_cache (void *handle, uint32_t count, uint64_t offset, uint32_t
flags);
+
+#endif /* NBDKIT_FILEOPS_H */
diff --git a/common/fileops/fileops.c b/common/fileops/fileops.c
new file mode 100644
index 00000000..6eb22aab
--- /dev/null
+++ b/common/fileops/fileops.c
@@ -0,0 +1,547 @@
+/* nbdkit
+ * Copyright (C) 2020 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+
+#include <pthread.h>
+
+#if defined (__linux__) && !defined (FALLOC_FL_PUNCH_HOLE)
+#include <linux/falloc.h> /* For FALLOC_FL_*, glibc < 2.18 */
+#endif
+
+#if defined (__linux__)
+#include <linux/fs.h> /* For BLKZEROOUT */
+#endif
+
+#define NBDKIT_API_VERSION 2
+#include <nbdkit-plugin.h>
+
+#include "cleanup.h"
+#include "fileops.h"
+#include "isaligned.h"
+
+#ifndef HAVE_FDATASYNC
+#define fdatasync fsync
+#endif
+
+/* Any callbacks using lseek must be protected by this lock. */
+static pthread_mutex_t lseek_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/* To enable: -D file.zero=1 */
+int file_debug_zero;
+
+static bool
+is_enotsup (int err)
+{
+ return err == ENOTSUP || err == EOPNOTSUPP;
+}
+
+/* Print some extra information about how the plugin was compiled. */
+void
+fileops_dump_plugin (void)
+{
+#ifdef BLKSSZGET
+ printf ("file_blksszget=yes\n");
+#endif
+#ifdef BLKZEROOUT
+ printf ("file_blkzeroout=yes\n");
+#endif
+#ifdef FALLOC_FL_PUNCH_HOLE
+ printf ("file_falloc_fl_punch_hole=yes\n");
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+ printf ("file_falloc_fl_zero_range=yes\n");
+#endif
+}
+
+int
+init_fileops (int fd, struct fileops *fops)
+{
+ struct stat statbuf;
+
+ if (fstat (fd, &statbuf) == -1) {
+ nbdkit_error ("fstat: %m");
+ return -1;
+ }
+
+ fops->fd = fd;
+ fops->is_block_device = S_ISBLK (statbuf.st_mode);
+ fops->sector_size = 4096; /* Start with safe guess */
+
+#ifdef BLKSSZGET
+ if (fops->is_block_device) {
+ if (ioctl (fops->fd, BLKSSZGET, &fops->sector_size))
+ nbdkit_debug ("cannot get sector size: %m");
+ }
+#endif
+
+#ifdef FALLOC_FL_PUNCH_HOLE
+ fops->can_punch_hole = true;
+#else
+ fops->can_punch_hole = false;
+#endif
+
+#ifdef FALLOC_FL_ZERO_RANGE
+ fops->can_zero_range = true;
+#else
+ fops->can_zero_range = false;
+#endif
+
+ fops->can_fallocate = true;
+ fops->can_zeroout = fops->is_block_device;
+
+ return 0;
+}
+
+void
+close_fileops (struct fileops *fops)
+{
+ close (fops->fd);
+}
+
+/* For block devices, stat->st_size is not the true size. The caller
+ * grabs the lseek_lock.
+ */
+static int64_t
+block_device_size (int fd)
+{
+ off_t size;
+
+ size = lseek (fd, 0, SEEK_END);
+ if (size == -1) {
+ nbdkit_error ("lseek (to find device size): %m");
+ return -1;
+ }
+
+ return size;
+}
+
+/* Get the file size. */
+int64_t
+fileops_get_size (void *handle)
+{
+ struct fileops *fops = handle;
+
+ if (fops->is_block_device) {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
+ return block_device_size (fops->fd);
+ } else {
+ /* Regular file. */
+ struct stat statbuf;
+
+ if (fstat (fops->fd, &statbuf) == -1) {
+ nbdkit_error ("fstat: %m");
+ return -1;
+ }
+
+ return statbuf.st_size;
+ }
+}
+
+int
+fileops_can_trim (void *handle)
+{
+ /* Trim is advisory, but we prefer to advertise it only when we can
+ * actually (attempt to) punch holes. Since not all filesystems
+ * support all fallocate modes, it would be nice if we had a way
+ * from fpathconf() to definitively learn what will work on a given
+ * fd for a more precise answer; oh well. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int
+fileops_can_fua (void *handle)
+{
+ return NBDKIT_FUA_NATIVE;
+}
+
+int
+fileops_can_cache (void *handle)
+{
+ /* Prefer posix_fadvise(), but letting nbdkit call .pread on our
+ * behalf also tends to work well for the local file system
+ * cache.
+ */
+#if HAVE_POSIX_FADVISE
+ return NBDKIT_FUA_NATIVE;
+#else
+ return NBDKIT_FUA_EMULATE;
+#endif
+}
+
+/* Flush the file to disk. */
+int
+fileops_flush (void *handle, uint32_t flags)
+{
+ struct fileops *fops = handle;
+
+ if (fdatasync (fops->fd) == -1) {
+ nbdkit_error ("fdatasync: %m");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Read data from the file. */
+int
+fileops_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct fileops *fops = handle;
+
+ while (count > 0) {
+ ssize_t r = pread (fops->fd, buf, count, offset);
+ if (r == -1) {
+ nbdkit_error ("pread: %m");
+ return -1;
+ }
+ if (r == 0) {
+ nbdkit_error ("pread: unexpected end of file");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offset += r;
+ }
+
+ return 0;
+}
+
+/* Write data to the file. */
+int
+fileops_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
+ uint32_t flags)
+{
+ struct fileops *fops = handle;
+
+ while (count > 0) {
+ ssize_t r = pwrite (fops->fd, buf, count, offset);
+ if (r == -1) {
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offset += r;
+ }
+
+ if ((flags & NBDKIT_FLAG_FUA) && fileops_flush (handle, 0) == -1)
+ return -1;
+
+ return 0;
+}
+
+#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
+static int
+do_fallocate (int fd, int mode, off_t offset, off_t len)
+{
+ int r = fallocate (fd, mode, offset, len);
+ if (r == -1 && errno == ENODEV) {
+ /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
+ with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
+ errno = EOPNOTSUPP;
+ }
+ return r;
+}
+#endif
+
+/* Write zeroes to the file. */
+int
+fileops_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+ struct fileops *fops = handle;
+ int r;
+
+#ifdef FALLOC_FL_PUNCH_HOLE
+ if (fops->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) {
+ r = do_fallocate (fops->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, count);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("fops->can_punch_hole && may_trim: "
+ "zero succeeded using fallocate");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ fops->can_punch_hole = false;
+ }
+#endif
+
+#ifdef FALLOC_FL_ZERO_RANGE
+ if (fops->can_zero_range) {
+ r = do_fallocate (fops->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("fops->can_zero-range: "
+ "zero succeeded using fallocate");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ fops->can_zero_range = false;
+ }
+#endif
+
+#ifdef FALLOC_FL_PUNCH_HOLE
+ /* If we can punch hole but may not trim, we can combine punching hole and
+ * fallocate to zero a range. This is expected to be more efficient than
+ * writing zeroes manually. */
+ if (fops->can_punch_hole && fops->can_fallocate) {
+ r = do_fallocate (fops->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, count);
+ if (r == 0) {
+ r = do_fallocate (fops->fd, 0, offset, count);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("fops->can_punch_hole && fops->can_fallocate:
"
+ "zero succeeded using fallocate");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ fops->can_fallocate = false;
+ } else {
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ fops->can_punch_hole = false;
+ }
+ }
+#endif
+
+#ifdef BLKZEROOUT
+ /* For aligned range and block device, we can use BLKZEROOUT. */
+ if (fops->can_zeroout && IS_ALIGNED (offset | count, fops->sector_size))
{
+ uint64_t range[2] = {offset, count};
+
+ r = ioctl (fops->fd, BLKZEROOUT, &range);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("fops->can_zeroout && IS_ALIGNED: "
+ "zero succeeded using BLKZEROOUT");
+ goto out;
+ }
+
+ if (errno != ENOTTY) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ fops->can_zeroout = false;
+ }
+#endif
+
+ /* Trigger a fall back to writing */
+ if (file_debug_zero)
+ nbdkit_debug ("zero falling back to writing");
+ errno = EOPNOTSUPP;
+ return -1;
+
+ out:
+ if ((flags & NBDKIT_FLAG_FUA) && fileops_flush (handle, 0) == -1)
+ return -1;
+ return 0;
+}
+
+/* Punch a hole in the file. */
+int
+fileops_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+#ifdef FALLOC_FL_PUNCH_HOLE
+ struct fileops *fops = handle;
+ int r;
+
+ if (fops->can_punch_hole) {
+ r = do_fallocate (fops->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, count);
+ if (r == -1) {
+ /* Trim is advisory; we don't care if it fails for anything other
+ * than EIO or EPERM. */
+ if (errno == EPERM || errno == EIO) {
+ nbdkit_error ("fallocate: %m");
+ return -1;
+ }
+
+ if (is_enotsup (EOPNOTSUPP))
+ fops->can_punch_hole = false;
+
+ nbdkit_debug ("ignoring failed fallocate during trim: %m");
+ }
+ }
+#endif
+
+ if ((flags & NBDKIT_FLAG_FUA) && fileops_flush (handle, 0) == -1)
+ return -1;
+
+ return 0;
+}
+
+#ifdef SEEK_HOLE
+/* Extents. */
+
+int
+fileops_can_extents (void *handle)
+{
+ struct fileops *fops = handle;
+ off_t r;
+
+ /* A simple test to see whether SEEK_HOLE etc is likely to work on
+ * the current filesystem.
+ */
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
+ r = lseek (fops->fd, 0, SEEK_HOLE);
+ if (r == -1) {
+ nbdkit_debug ("extents disabled: lseek: SEEK_HOLE: %m");
+ return 0;
+ }
+ return 1;
+}
+
+static int
+do_extents (void *handle, uint32_t count, uint64_t offset,
+ uint32_t flags, struct nbdkit_extents *extents)
+{
+ struct fileops *fops = handle;
+ const bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
+ uint64_t end = offset + count;
+
+ do {
+ off_t pos;
+
+ pos = lseek (fops->fd, offset, SEEK_DATA);
+ if (pos == -1) {
+ if (errno == ENXIO) {
+ /* The current man page does not describe this situation well,
+ * but a proposed change to POSIX adds these words for ENXIO:
+ * "or the whence argument is SEEK_DATA and the offset falls
+ * within the final hole of the file."
+ */
+ pos = end;
+ }
+ else {
+ nbdkit_error ("lseek: SEEK_DATA: %" PRIu64 ": %m", offset);
+ return -1;
+ }
+ }
+
+ /* We know there is a hole from offset to pos-1. */
+ if (pos > offset) {
+ if (nbdkit_add_extent (extents, offset, pos - offset,
+ NBDKIT_EXTENT_HOLE | NBDKIT_EXTENT_ZERO) == -1)
+ return -1;
+ if (req_one)
+ break;
+ }
+
+ offset = pos;
+ if (offset >= end)
+ break;
+
+ pos = lseek (fops->fd, offset, SEEK_HOLE);
+ if (pos == -1) {
+ nbdkit_error ("lseek: SEEK_HOLE: %" PRIu64 ": %m", offset);
+ return -1;
+ }
+
+ /* We know there is data from offset to pos-1. */
+ if (pos > offset) {
+ if (nbdkit_add_extent (extents, offset, pos - offset,
+ 0 /* allocated data */) == -1)
+ return -1;
+ if (req_one)
+ break;
+ }
+
+ offset = pos;
+ } while (offset < end);
+
+ return 0;
+}
+
+int
+fileops_extents (void *handle, uint32_t count, uint64_t offset,
+ uint32_t flags, struct nbdkit_extents *extents)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
+ return do_extents (handle, count, offset, flags, extents);
+}
+#endif /* SEEK_HOLE */
+
+#if HAVE_POSIX_FADVISE
+/* Caching. */
+int
+fileops_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
+{
+ struct fileops *fops = handle;
+ int r;
+
+ /* Cache is advisory, we don't care if this fails */
+ r = posix_fadvise (fops->fd, offset, count, POSIX_FADV_WILLNEED);
+ if (r) {
+ errno = r;
+ nbdkit_error ("posix_fadvise: %m");
+ return -1;
+ }
+ return 0;
+}
+#endif /* HAVE_POSIX_FADVISE */
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 8c2ea077..b3446fc3 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -34,51 +34,18 @@
#include <stdio.h>
#include <stdlib.h>
-#include <stdbool.h>
+#include <stdint.h>
#include <string.h>
-#include <inttypes.h>
#include <fcntl.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
#include <errno.h>
-#include <pthread.h>
-
-#if defined (__linux__) && !defined (FALLOC_FL_PUNCH_HOLE)
-#include <linux/falloc.h> /* For FALLOC_FL_*, glibc < 2.18 */
-#endif
-
-#if defined (__linux__)
-#include <linux/fs.h> /* For BLKZEROOUT */
-#endif
-
#define NBDKIT_API_VERSION 2
-
#include <nbdkit-plugin.h>
-#include "cleanup.h"
-#include "isaligned.h"
-
-#ifndef HAVE_FDATASYNC
-#define fdatasync fsync
-#endif
+#include "fileops.h"
static char *filename = NULL;
-/* Any callbacks using lseek must be protected by this lock. */
-static pthread_mutex_t lseek_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/* to enable: -D file.zero=1 */
-int file_debug_zero;
-
-static bool
-is_enotsup (int err)
-{
- return err == ENOTSUP || err == EOPNOTSUPP;
-}
-
static void
file_unload (void)
{
@@ -131,41 +98,18 @@ file_config_complete (void)
static void
file_dump_plugin (void)
{
-#ifdef BLKSSZGET
- printf ("file_blksszget=yes\n");
-#endif
-#ifdef BLKZEROOUT
- printf ("file_blkzeroout=yes\n");
-#endif
-#ifdef FALLOC_FL_PUNCH_HOLE
- printf ("file_falloc_fl_punch_hole=yes\n");
-#endif
-#ifdef FALLOC_FL_ZERO_RANGE
- printf ("file_falloc_fl_zero_range=yes\n");
-#endif
+ fileops_dump_plugin ();
}
-/* The per-connection handle. */
-struct handle {
- int fd;
- bool is_block_device;
- int sector_size;
- bool can_punch_hole;
- bool can_zero_range;
- bool can_fallocate;
- bool can_zeroout;
-};
-
/* Create the per-connection handle. */
static void *
file_open (int readonly)
{
- struct handle *h;
- struct stat statbuf;
- int flags;
+ struct fileops *fops;
+ int fd, flags;
- h = malloc (sizeof *h);
- if (h == NULL) {
+ fops = malloc (sizeof *fops);
+ if (fops == NULL) {
nbdkit_error ("malloc: %m");
return NULL;
}
@@ -176,98 +120,33 @@ file_open (int readonly)
else
flags |= O_RDWR;
- h->fd = open (filename, flags);
- if (h->fd == -1) {
+ fd = open (filename, flags);
+ if (fd == -1) {
nbdkit_error ("open: %s: %m", filename);
- free (h);
+ free (fops);
return NULL;
}
- if (fstat (h->fd, &statbuf) == -1) {
- nbdkit_error ("fstat: %s: %m", filename);
- free (h);
+ if (init_fileops (fd, fops) == -1) {
+ free (fops);
return NULL;
}
- h->is_block_device = S_ISBLK (statbuf.st_mode);
- h->sector_size = 4096; /* Start with safe guess */
-
-#ifdef BLKSSZGET
- if (h->is_block_device) {
- if (ioctl (h->fd, BLKSSZGET, &h->sector_size))
- nbdkit_debug ("cannot get sector size: %s: %m", filename);
- }
-#endif
-
-#ifdef FALLOC_FL_PUNCH_HOLE
- h->can_punch_hole = true;
-#else
- h->can_punch_hole = false;
-#endif
-
-#ifdef FALLOC_FL_ZERO_RANGE
- h->can_zero_range = true;
-#else
- h->can_zero_range = false;
-#endif
-
- h->can_fallocate = true;
- h->can_zeroout = h->is_block_device;
-
- return h;
+ return fops;
}
/* Free up the per-connection handle. */
static void
file_close (void *handle)
{
- struct handle *h = handle;
+ struct fileops *fops = handle;
- close (h->fd);
- free (h);
+ close_fileops (fops);
+ free (fops);
}
#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
-/* For block devices, stat->st_size is not the true size. The caller
- * grabs the lseek_lock.
- */
-static int64_t
-block_device_size (int fd)
-{
- off_t size;
-
- size = lseek (fd, 0, SEEK_END);
- if (size == -1) {
- nbdkit_error ("lseek (to find device size): %m");
- return -1;
- }
-
- return size;
-}
-
-/* Get the file size. */
-static int64_t
-file_get_size (void *handle)
-{
- struct handle *h = handle;
-
- if (h->is_block_device) {
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
- return block_device_size (h->fd);
- } else {
- /* Regular file. */
- struct stat statbuf;
-
- if (fstat (h->fd, &statbuf) == -1) {
- nbdkit_error ("fstat: %m");
- return -1;
- }
-
- return statbuf.st_size;
- }
-}
-
/* Allow multiple parallel connections from a single client. */
static int
file_can_multi_conn (void *handle)
@@ -275,374 +154,6 @@ file_can_multi_conn (void *handle)
return 1;
}
-static int
-file_can_trim (void *handle)
-{
- /* Trim is advisory, but we prefer to advertise it only when we can
- * actually (attempt to) punch holes. Since not all filesystems
- * support all fallocate modes, it would be nice if we had a way
- * from fpathconf() to definitively learn what will work on a given
- * fd for a more precise answer; oh well. */
-#ifdef FALLOC_FL_PUNCH_HOLE
- return 1;
-#else
- return 0;
-#endif
-}
-
-static int
-file_can_fua (void *handle)
-{
- return NBDKIT_FUA_NATIVE;
-}
-
-static int
-file_can_cache (void *handle)
-{
- /* Prefer posix_fadvise(), but letting nbdkit call .pread on our
- * behalf also tends to work well for the local file system
- * cache.
- */
-#if HAVE_POSIX_FADVISE
- return NBDKIT_FUA_NATIVE;
-#else
- return NBDKIT_FUA_EMULATE;
-#endif
-}
-
-/* Flush the file to disk. */
-static int
-file_flush (void *handle, uint32_t flags)
-{
- struct handle *h = handle;
-
- if (fdatasync (h->fd) == -1) {
- nbdkit_error ("fdatasync: %m");
- return -1;
- }
-
- return 0;
-}
-
-/* Read data from the file. */
-static int
-file_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
- uint32_t flags)
-{
- struct handle *h = handle;
-
- while (count > 0) {
- ssize_t r = pread (h->fd, buf, count, offset);
- if (r == -1) {
- nbdkit_error ("pread: %m");
- return -1;
- }
- if (r == 0) {
- nbdkit_error ("pread: unexpected end of file");
- return -1;
- }
- buf += r;
- count -= r;
- offset += r;
- }
-
- return 0;
-}
-
-/* Write data to the file. */
-static int
-file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
- uint32_t flags)
-{
- struct handle *h = handle;
-
- while (count > 0) {
- ssize_t r = pwrite (h->fd, buf, count, offset);
- if (r == -1) {
- nbdkit_error ("pwrite: %m");
- return -1;
- }
- buf += r;
- count -= r;
- offset += r;
- }
-
- if ((flags & NBDKIT_FLAG_FUA) && file_flush (handle, 0) == -1)
- return -1;
-
- return 0;
-}
-
-#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
-static int
-do_fallocate (int fd, int mode, off_t offset, off_t len)
-{
- int r = fallocate (fd, mode, offset, len);
- if (r == -1 && errno == ENODEV) {
- /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
- with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
- errno = EOPNOTSUPP;
- }
- return r;
-}
-#endif
-
-/* Write zeroes to the file. */
-static int
-file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
- struct handle *h = handle;
- int r;
-
-#ifdef FALLOC_FL_PUNCH_HOLE
- if (h->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) {
- r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- offset, count);
- if (r == 0) {
- if (file_debug_zero)
- nbdkit_debug ("h->can_punch_hole && may_trim: "
- "zero succeeded using fallocate");
- goto out;
- }
-
- if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
- return -1;
- }
-
- h->can_punch_hole = false;
- }
-#endif
-
-#ifdef FALLOC_FL_ZERO_RANGE
- if (h->can_zero_range) {
- r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
- if (r == 0) {
- if (file_debug_zero)
- nbdkit_debug ("h->can_zero-range: "
- "zero succeeded using fallocate");
- goto out;
- }
-
- if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
- return -1;
- }
-
- h->can_zero_range = false;
- }
-#endif
-
-#ifdef FALLOC_FL_PUNCH_HOLE
- /* If we can punch hole but may not trim, we can combine punching hole and
- * fallocate to zero a range. This is expected to be more efficient than
- * writing zeroes manually. */
- if (h->can_punch_hole && h->can_fallocate) {
- r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- offset, count);
- if (r == 0) {
- r = do_fallocate (h->fd, 0, offset, count);
- if (r == 0) {
- if (file_debug_zero)
- nbdkit_debug ("h->can_punch_hole && h->can_fallocate:
"
- "zero succeeded using fallocate");
- goto out;
- }
-
- if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
- return -1;
- }
-
- h->can_fallocate = false;
- } else {
- if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
- return -1;
- }
-
- h->can_punch_hole = false;
- }
- }
-#endif
-
-#ifdef BLKZEROOUT
- /* For aligned range and block device, we can use BLKZEROOUT. */
- if (h->can_zeroout && IS_ALIGNED (offset | count, h->sector_size)) {
- uint64_t range[2] = {offset, count};
-
- r = ioctl (h->fd, BLKZEROOUT, &range);
- if (r == 0) {
- if (file_debug_zero)
- nbdkit_debug ("h->can_zeroout && IS_ALIGNED: "
- "zero succeeded using BLKZEROOUT");
- goto out;
- }
-
- if (errno != ENOTTY) {
- nbdkit_error ("zero: %m");
- return -1;
- }
-
- h->can_zeroout = false;
- }
-#endif
-
- /* Trigger a fall back to writing */
- if (file_debug_zero)
- nbdkit_debug ("zero falling back to writing");
- errno = EOPNOTSUPP;
- return -1;
-
- out:
- if ((flags & NBDKIT_FLAG_FUA) && file_flush (handle, 0) == -1)
- return -1;
- return 0;
-}
-
-/* Punch a hole in the file. */
-static int
-file_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
-#ifdef FALLOC_FL_PUNCH_HOLE
- struct handle *h = handle;
- int r;
-
- if (h->can_punch_hole) {
- r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- offset, count);
- if (r == -1) {
- /* Trim is advisory; we don't care if it fails for anything other
- * than EIO or EPERM. */
- if (errno == EPERM || errno == EIO) {
- nbdkit_error ("fallocate: %m");
- return -1;
- }
-
- if (is_enotsup (EOPNOTSUPP))
- h->can_punch_hole = false;
-
- nbdkit_debug ("ignoring failed fallocate during trim: %m");
- }
- }
-#endif
-
- if ((flags & NBDKIT_FLAG_FUA) && file_flush (handle, 0) == -1)
- return -1;
-
- return 0;
-}
-
-#ifdef SEEK_HOLE
-/* Extents. */
-
-static int
-file_can_extents (void *handle)
-{
- struct handle *h = handle;
- off_t r;
-
- /* A simple test to see whether SEEK_HOLE etc is likely to work on
- * the current filesystem.
- */
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
- r = lseek (h->fd, 0, SEEK_HOLE);
- if (r == -1) {
- nbdkit_debug ("extents disabled: lseek: SEEK_HOLE: %m");
- return 0;
- }
- return 1;
-}
-
-static int
-do_extents (void *handle, uint32_t count, uint64_t offset,
- uint32_t flags, struct nbdkit_extents *extents)
-{
- struct handle *h = handle;
- const bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
- uint64_t end = offset + count;
-
- do {
- off_t pos;
-
- pos = lseek (h->fd, offset, SEEK_DATA);
- if (pos == -1) {
- if (errno == ENXIO) {
- /* The current man page does not describe this situation well,
- * but a proposed change to POSIX adds these words for ENXIO:
- * "or the whence argument is SEEK_DATA and the offset falls
- * within the final hole of the file."
- */
- pos = end;
- }
- else {
- nbdkit_error ("lseek: SEEK_DATA: %" PRIu64 ": %m", offset);
- return -1;
- }
- }
-
- /* We know there is a hole from offset to pos-1. */
- if (pos > offset) {
- if (nbdkit_add_extent (extents, offset, pos - offset,
- NBDKIT_EXTENT_HOLE | NBDKIT_EXTENT_ZERO) == -1)
- return -1;
- if (req_one)
- break;
- }
-
- offset = pos;
- if (offset >= end)
- break;
-
- pos = lseek (h->fd, offset, SEEK_HOLE);
- if (pos == -1) {
- nbdkit_error ("lseek: SEEK_HOLE: %" PRIu64 ": %m", offset);
- return -1;
- }
-
- /* We know there is data from offset to pos-1. */
- if (pos > offset) {
- if (nbdkit_add_extent (extents, offset, pos - offset,
- 0 /* allocated data */) == -1)
- return -1;
- if (req_one)
- break;
- }
-
- offset = pos;
- } while (offset < end);
-
- return 0;
-}
-
-static int
-file_extents (void *handle, uint32_t count, uint64_t offset,
- uint32_t flags, struct nbdkit_extents *extents)
-{
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock);
- return do_extents (handle, count, offset, flags, extents);
-}
-#endif /* SEEK_HOLE */
-
-#if HAVE_POSIX_FADVISE
-/* Caching. */
-static int
-file_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
-{
- struct handle *h = handle;
- int r;
-
- /* Cache is advisory, we don't care if this fails */
- r = posix_fadvise (h->fd, offset, count, POSIX_FADV_WILLNEED);
- if (r) {
- errno = r;
- nbdkit_error ("posix_fadvise: %m");
- return -1;
- }
- return 0;
-}
-#endif /* HAVE_POSIX_FADVISE */
-
static struct nbdkit_plugin plugin = {
.name = "file",
.longname = "nbdkit file plugin",
@@ -655,24 +166,9 @@ static struct nbdkit_plugin plugin = {
.dump_plugin = file_dump_plugin,
.open = file_open,
.close = file_close,
- .get_size = file_get_size,
.can_multi_conn = file_can_multi_conn,
- .can_trim = file_can_trim,
- .can_fua = file_can_fua,
- .can_cache = file_can_cache,
- .pread = file_pread,
- .pwrite = file_pwrite,
- .flush = file_flush,
- .trim = file_trim,
- .zero = file_zero,
-#ifdef SEEK_HOLE
- .can_extents = file_can_extents,
- .extents = file_extents,
-#endif
-#if HAVE_POSIX_FADVISE
- .cache = file_cache,
-#endif
- .errno_is_preserved = 1,
+
+ FILEOPS_CALLBACKS
};
NBDKIT_REGISTER_PLUGIN(plugin)
--
2.25.0