[still a work in progress, as I finish rebasing to capture the
ideas raised on the list, but posting now for initial feedback]
The NBD protocol supports Forced Unit Access (FUA) as a more efficient
way to wait for just one write to land in persistent storage, rather
than all outstanding writes at the time of a flush; modeled after
the kernel's block I/O flag of the same name. While we can emulate
the proper semantics with a full-blown flush, there are some plugins
that can properly pass the FUA flag on to the end storage and thereby
avoid some overhead.
This patch introduces new callbacks and documentations for those
callbacks, although the actual implementation to take advantage of
the new callbacks will be in later patches. The biggest thing to
note is that we now support 2 API versions for the plugin, where
the plugin author chooses whether to keep version 1 (default, no
FUA support) or opt in to version 2 (FUA support).
Signed-off-by: Eric Blake <eblake@redhat.com>
---
docs/nbdkit-plugin.pod | 89 +++++++++++++++++++++++++++++++++++++++++++++++--
docs/nbdkit.pod | 7 +++-
include/nbdkit-plugin.h | 31 ++++++++++++++++-
src/internal.h | 4 +--
src/plugins.c | 2 +-
5 files changed, 125 insertions(+), 8 deletions(-)
diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod
index 3cafc42..d982e65 100644
--- a/docs/nbdkit-plugin.pod
+++ b/docs/nbdkit-plugin.pod
@@ -6,6 +6,8 @@ nbdkit-plugin - How to write nbdkit plugins
=head1 SYNOPSIS
+ #define NBDKIT_API_VERSION 2
+
#include <nbdkit-plugin.h>
#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
@@ -51,9 +53,21 @@ L<nbdkit-perl-plugin(3)>,
L<nbdkit-python-plugin(3)>,
L<nbdkit-ruby-plugin(3)>.
+=head1 C<#define NBDKIT_API_VERSION>
+
+Plugins must choose which API version they want to use. The default
+version is 1; but if a plugin defines NBDKIT_API_VERSION to a positive
+integer prior to including C<nbdkit-plugin.h>, the signature of
+several callbacks is enhanced. A newer nbdkit will always support
+plugins compiled against an older API version, but plugins that opt in
+to newer versions require a new enough nbdkit. For now, the maximum
+version is 2, which enables fine-tuned response to client flags
+including efficient Forced Unit Access (FUA) on writes.
+
=head1 C<nbdkit-plugin.h>
-All plugins should start by including this header file:
+All plugins should start by including this header file, after
+optionally choosing an API version:
#include <nbdkit-plugin.h>
@@ -400,7 +414,28 @@ If there is an error, C<.can_trim> should call C<nbdkit_error> with an
error message and return C<-1>.
This callback is not required. If omitted, then we return true iff a
-C<.trim> callback has been defined.
+C<.trim> or C<.trim_fua> callback has been defined.
+
+=head2 C<.can_fua>
+
+ int can_fua (void *handle);
+
+This is called during the option negotiation phase to find out if the
+plugin supports the Forced Unit Access (FUA) flag on write and trim
+requests.
+
+If there is an error, C<.can_fua> should call C<nbdkit_error> with an
+error message and return C<-1>.
+
+This callback is not required. If omitted, then we return true iff
+either the C<.pwrite_fua> callback has been defined, or if C<.can_flush>
+returns true (in the latter case, FUA semantics are emulated by nbdkit
+calling C<.flush> before completing any write or trim operation with
+the FUA flag set).
+
+Note that if this defaults to true and C<.can_trim> also returns true,
+the plugin must provide either C<.flush> or C<.trim_fua> for correct
+FUA semantics.
=head2 C<.pread>
@@ -442,6 +477,21 @@ recovered from), C<.pwrite> should call C<nbdkit_error> with an error
message, and C<nbdkit_set_error> to record an appropriate error
(unless C<errno> is sufficient), then return C<-1>.
+If the plugin can provide efficient Forced Unit Access (FUA) semantics,
+it should define C<.pwrite_fua> instead.
+
+=head2 C<.pwrite_fua>
+
+ int pwrite_fua (void *handle, const void *buf, uint32_t count, uint64_t offset, int fua);
+
+This callback has the same requirements as C<.pwrite>, with the
+additional parameter C<fua> set to a non-zero value if the client
+wants FUA semantics (where the command must not return until the
+actions of the write have landed in persistent storage). If the
+plugin cannot provide efficient FUA, but C<.can_flush> returns true
+and C<.can_fua> does not return false, then client requests for FUA
+semantics are emulated by nbdkit calling C<.flush>.
+
=head2 C<.flush>
int flush (void *handle);
@@ -455,6 +505,11 @@ If there is an error, C<.flush> should call C<nbdkit_error> with an
error message, and C<nbdkit_set_error> to record an appropriate error
(unless C<errno> is sufficient), then return C<-1>.
+Note that C<.flush> can be called both by the client doing an explicit
+flush request, and by nbdkit when emulating Forced Unit Access (FUA)
+semantics after a write or trim where the plugin did not provide FUA
+callbacks (C<.pwrite_fua>, C<.zero_fua>, and C<.trim_fua>).
+
=head2 C<.trim>
int trim (void *handle, uint32_t count, uint64_t offset);
@@ -467,6 +522,21 @@ If there is an error, C<.trim> should call C<nbdkit_error> with an
error message, and C<nbdkit_set_error> to record an appropriate error
(unless C<errno> is sufficient), then return C<-1>.
+If the plugin can provide efficient Forced Unit Access (FUA) semantics,
+it should define C<.trim_fua> instead.
+
+=head2 C<.trim_fua>
+
+ int trim_fua (void *handle, uint32_t count, uint64_t offset, int fua);
+
+This callback has the same requirements as C<.trim>, with the
+additional parameter C<fua> set to a non-zero value if the client
+wants FUA semantics (where the command must not return until the
+actions of the trim have landed in persistent storage). If the plugin
+cannot provide efficient FUA, but C<.can_flush> returns true and
+C<.can_fua> does not return false, then client requests for FUA
+semantics are emulated by nbdkit calling C<.flush>.
+
=head2 C<.zero>
int zero (void *handle, uint32_t count, uint64_t offset, int may_trim);
@@ -488,6 +558,21 @@ If there is an error, C<.zero> should call C<nbdkit_error> with an
error message, and C<nbdkit_set_error> to record an appropriate error
(unless C<errno> is sufficient), then return C<-1>.
+If the plugin can provide efficient Forced Unit Access (FUA) semantics,
+it should define C<.zero_fua> instead.
+
+=head2 C<.zero_fua>
+
+ int zero_fua (void *handle, uint32_t count, uint64_t offset, int may_trim, int fua);
+
+This callback has the same requirements as C<.zero>, with the
+additional parameter C<fua> set to a non-zero value if the client
+wants FUA semantics (where the command must not return until the
+actions of the write have landed in persistent storage). If the
+plugin cannot provide efficient FUA, but C<.can_flush> returns true
+and C<.can_fua> does not return false, then client requests for FUA
+semantics are emulated by nbdkit calling C<.flush>.
+
=head1 THREADS
Each nbdkit plugin must declare its thread safety model by defining
diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod
index 636eedc..eaa638b 100644
--- a/docs/nbdkit.pod
+++ b/docs/nbdkit.pod
@@ -804,7 +804,12 @@ information about that plugin, eg:
[etc]
Plugins which ship with nbdkit usually have the same version as the
-corresponding nbdkit binary.
+corresponding nbdkit binary. The nbdkit binary will always be able
+to utilize plugins compiled against an older version of the header;
+however, there are cases where a newer plugin may not be fully
+supported by an older nbdkit binary (for example, a plugin that
+supplies C<.pwrite_fua> but not C<.pwrite> may not support writes
+when loaded by the older nbdkit).
=head2 Detect if a plugin is installed
diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h
index 13541e5..b67d343 100644
--- a/include/nbdkit-plugin.h
+++ b/include/nbdkit-plugin.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2017 Red Hat Inc.
+ * Copyright (C) 2013-2018 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,16 @@ extern "C" {
#define NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS 2
#define NBDKIT_THREAD_MODEL_PARALLEL 3
+#define NBDKIT_FLAG_MAY_TRIM (1<<0)
+#define NBDKIT_FLAG_FUA (1<<1)
+
+/* By default, a plugin gets API version 1; but you may request
+ * version 2 prior to including this header */
+#ifndef NBDKIT_API_VERSION
#define NBDKIT_API_VERSION 1
+#elif (NBDKIT_API_VERSION - 0) < 1 || NBDKIT_API_VERSION > 2)
+#error Unsupported API version
+#endif
struct nbdkit_plugin {
/* Do not set these fields directly; use NBDKIT_REGISTER_PLUGIN.
@@ -87,15 +96,35 @@ struct nbdkit_plugin {
int (*can_trim) (void *handle);
int (*pread) (void *handle, void *buf, uint32_t count, uint64_t offset);
+#if NBDKIT_API_VERSION == 1
int (*pwrite) (void *handle, const void *buf, uint32_t count, uint64_t offset);
+#else
+ int (*pwrite_old) (void *handle, const void *buf, uint32_t count, uint64_t offset);
+#endif
int (*flush) (void *handle);
+#if NBDKIT_API_VERSION == 1
int (*trim) (void *handle, uint32_t count, uint64_t offset);
int (*zero) (void *handle, uint32_t count, uint64_t offset, int may_trim);
+#else
+ int (*trim_old) (void *handle, uint32_t count, uint64_t offset);
+ int (*zero_old) (void *handle, uint32_t count, uint64_t offset, int may_trim);
+#endif
int errno_is_preserved;
void (*dump_plugin) (void);
+ int (*can_fua) (void *handle);
+#if NBDKIT_API_VERSION == 1
+ int (*_unused1) (void *, const void *, uint32_t, uint64_t, uint32_t);
+ int (*_unused2) (void *, uint32_t, uint64_t, uint32_t);
+ int (*_unused3) (void *, uint32_t, uint64_t, uint32_t);
+#else
+ int (*pwrite) (void *handle, const void *buf, uint32_t count,
+ uint64_t offset, uint32_t flags);
+ int (*zero) (void *handle, uint32_t count, uint64_t offset, uint32_t flags);
+ int (*trim) (void *handle, uint32_t count, uint64_t offset, uint32_t flags);
+#endif
/* int (*set_exportname) (void *handle, const char *exportname); */
};
diff --git a/src/internal.h b/src/internal.h
index 7fd52a2..c76c0d3 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -40,6 +40,7 @@
#include <sys/socket.h>
#include <pthread.h>
+#define NBDKIT_API_VERSION 2
#include "nbdkit-plugin.h"
#include "nbdkit-filter.h"
@@ -98,9 +99,6 @@
(type *) ((char *) __mptr - offsetof(type, member)); \
})
-#define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */
-#define NBDKIT_FLAG_FUA (1<<1) /* Maps to NBD_CMD_FLAG_FUA */
-
/* main.c */
extern const char *exportname;
extern const char *ipaddr;
diff --git a/src/plugins.c b/src/plugins.c
index 1de2ba2..fd5e843 100644
--- a/src/plugins.c
+++ b/src/plugins.c
@@ -553,7 +553,7 @@ plugin_register (size_t index, const char *filename,
}
/* Check for incompatible future versions. */
- if (plugin->_api_version != 1) {
+ if (plugin->_api_version < 0 || plugin->_api_version > 2) {
fprintf (stderr, "%s: %s: plugin is incompatible with this version of nbdkit (_api_version = %d)\n",
program_name, p->filename, plugin->_api_version);
exit (EXIT_FAILURE);
--
2.14.3
_______________________________________________
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs