Similar to .trim, except that it guarantees that zeroes are read
back, and also clients must obey the may_trim argument with
regards to whether a hole may be used or whether the file must
remain allocated with actual zeroes written. If the callback is
not implemented, or if the callback fails with EOPNOTSUPP, fall
back to fragmenting the request and calling .pwrite with a
known-zero buffer.
The handling of EOPNOTSUPP allows callbacks to avoid the need
to reimplement the work of allocating an all-zero buffer; at least
the file driver on Linux will benefit from these semantics as it
means we can try to use fallocate(), then gracefully use normal
writes if the underlying file system doesn't support what we need.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-plugin.pod | 19 +++++++++++++++++++
include/nbdkit-plugin.h | 1 +
src/internal.h | 1 +
src/plugins.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 71 insertions(+)
diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod
index 30e1f86..ee6bbd5 100644
--- a/docs/nbdkit-plugin.pod
+++ b/docs/nbdkit-plugin.pod
@@ -432,6 +432,25 @@ callback.
If there is an error, C<.trim> should call C<nbdkit_error> with an
error message and return C<-1>.
+=head2 C<.zero>
+
+ int zero (void *handle, uint32_count, uint64_t offset, int may_trim);
+
+During the data serving phase, this callback is used to write C<count>
+bytes of zeroes at C<offset> in the backing store. If C<may_trim> is
+non-zero, the operation can punch a hole instead of writing actual
+zero bytes, but only if subsequent reads from the hole read as zeroes.
+If this callback is omitted, or if it fails with errno set to
+EOPNOTSUPP, then C<.pwrite> will be used instead.
+
+The callback must write the whole C<count> bytes if it can. The NBD
+protocol doesn't allow partial writes (instead, these would be
+errors). If the whole C<count> bytes was written successfully, the
+callback should return C<0> to indicate there was I<no> error.
+
+If there is an error, C<.zero> should call C<nbdkit_error> with an
+error message and return C<-1>.
+
=head1 THREADS
Each nbdkit plugin must declare its thread safety model by defining
diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h
index bc9794e..3d25642 100644
--- a/include/nbdkit-plugin.h
+++ b/include/nbdkit-plugin.h
@@ -77,6 +77,7 @@ struct nbdkit_plugin {
int (*pwrite) (void *handle, const void *buf, uint32_t count, uint64_t offset);
int (*flush) (void *handle);
int (*trim) (void *handle, uint32_t count, uint64_t offset);
+ int (*zero) (void *handle, uint32_t count, uint64_t offset, int may_trim);
/* int (*set_exportname) (void *handle, const char *exportname); */
};
diff --git a/src/internal.h b/src/internal.h
index bc4fa12..cb15bab 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -107,6 +107,7 @@ extern int plugin_pread (struct connection *conn, void *buf, uint32_t
count, uin
extern int plugin_pwrite (struct connection *conn, void *buf, uint32_t count, uint64_t
offset);
extern int plugin_flush (struct connection *conn);
extern int plugin_trim (struct connection *conn, uint32_t count, uint64_t offset);
+extern int plugin_zero (struct connection *conn, uint32_t count, uint64_t offset, int
may_trim);
/* sockets.c */
extern int *bind_unix_socket (size_t *);
diff --git a/src/plugins.c b/src/plugins.c
index 8f38761..574be03 100644
--- a/src/plugins.c
+++ b/src/plugins.c
@@ -48,6 +48,9 @@
static pthread_mutex_t connection_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t all_requests_lock = PTHREAD_MUTEX_INITIALIZER;
+/* Maximum read or write request that we will handle. */
+#define MAX_REQUEST_SIZE (64 * 1024 * 1024)
+
/* Currently the server can only load one plugin (see TODO). Hence we
* can just use globals to store these.
*/
@@ -252,6 +255,7 @@ plugin_dump_fields (void)
HAS (pwrite);
HAS (flush);
HAS (trim);
+ HAS (zero);
#undef HAS
}
@@ -506,3 +510,49 @@ plugin_trim (struct connection *conn, uint32_t count, uint64_t
offset)
return -1;
}
}
+
+int
+plugin_zero (struct connection *conn,
+ uint32_t count, uint64_t offset, int may_trim)
+{
+ assert (dl);
+ assert (conn->handle);
+ char *buf;
+ uint32_t limit;
+ int result;
+ int err;
+
+ debug ("zero count=%" PRIu32 " offset=%" PRIu64 "
may_trim=%d",
+ count, offset, may_trim);
+
+ if (!count)
+ return 0;
+ if (plugin.zero) {
+ errno = 0;
+ result = plugin.zero (conn->handle, count, offset, may_trim);
+ if (result == 0 || errno != EOPNOTSUPP)
+ return result;
+ }
+
+ assert (plugin.pwrite);
+ limit = count < MAX_REQUEST_SIZE ? count : MAX_REQUEST_SIZE;
+ buf = calloc (limit, 1);
+ if (!buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ while (count) {
+ result = plugin.pwrite (conn->handle, buf, limit, offset);
+ if (result < 0)
+ break;
+ count -= limit;
+ if (count < limit)
+ limit = count;
+ }
+
+ err = errno;
+ free (buf);
+ errno = err;
+ return result;
+}
--
2.9.3