This pair of calls allows plugins to describe which extents in the
virtual disk are allocated, holes or zeroes.
---
docs/nbdkit-filter.pod | 22 +++++++++++
docs/nbdkit-plugin.pod | 86 +++++++++++++++++++++++++++++++++++++++++
include/nbdkit-common.h | 14 ++++++-
include/nbdkit-filter.h | 12 +++++-
include/nbdkit-plugin.h | 6 ++-
server/internal.h | 7 +++-
server/filters.c | 61 +++++++++++++++++++++++++++++
server/plugins.c | 72 +++++++++++++++++++++++++++++++++-
8 files changed, 275 insertions(+), 5 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index dc9a262..ada49dd 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -350,6 +350,8 @@ calls.
=head2 C<.can_zero>
+=head2 C<.can_extents>
+
=head2 C<.can_fua>
=head2 C<.can_multi_conn>
@@ -365,6 +367,8 @@ calls.
void *handle);
int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*can_extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
int (*can_multi_conn) (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -513,6 +517,24 @@ value to return to the client. The filter should never fail with
C<EOPNOTSUPP> (while plugins have automatic fallback to C<.pwrite>,
filters do not).
+=head2 C<.extents>
+
+ int (*extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err);
+
+This intercepts the plugin C<.extents> method and can be used to
+modify extent requests.
+
+This function will not be called if C<.can_extents> returned false; in
+turn, the filter should not call C<next_ops-E<gt>extents> if
+C<next_ops-E<gt>can_extents> did not return true.
+
+If there is an error, C<.extents> should call C<nbdkit_error> with an
+error message B<and> return -1 with C<err> set to the positive errno
+value to return to the client.
+
=head1 ERROR HANDLING
If there is an error in the filter itself, the filter should call
diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod
index 47545f3..9cc62b6 100644
--- a/docs/nbdkit-plugin.pod
+++ b/docs/nbdkit-plugin.pod
@@ -555,6 +555,20 @@ This callback is not required. If omitted, then nbdkit always tries
C<.zero> first if it is present, and gracefully falls back to
C<.pwrite> if C<.zero> was absent or failed with C<EOPNOTSUPP>.
+=head2 C<.can_extents>
+
+ int can_extents (void *handle);
+
+This is called during the option negotiation phase to find out if the
+plugin supports detecting allocated (non-sparse) regions of the disk
+with the C<.extents> callback.
+
+If there is an error, C<.can_extents> should call C<nbdkit_error> with
+an error message and return C<-1>.
+
+This callback is not required. If omitted, then we return true iff a
+C<.extents> callback has been defined.
+
=head2 C<.can_fua>
int can_fua (void *handle);
@@ -717,6 +731,78 @@ If there is an error, C<.zero> should call
C<nbdkit_error> with an
error message, and C<nbdkit_set_error> to record an appropriate error
(unless C<errno> is sufficient), then return C<-1>.
+=head2 C<.extents>
+
+ int zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents);
+
+During the data serving phase, this callback is used to
+detect allocated (non-sparse) regions of the disk.
+
+This function will not be called if C<.can_extents> returned false.
+
+The callback should allocate and return the list of extents
+overlapping the range C<[offset...offset+count-1]>.
+
+The callback should set C<*extents> to an array of
+S<C<struct nbdkit_extent>>, and set C<*nr_extents> to the number of
+extents in the array. The returned extents array is freed by nbdkit
+by calling L<free(3)> (so it must not be statically allocated).
+
+ struct nbdkit_extent {
+ uint64_t offset;
+ uint64_t length;
+ uint32_t type; /* NBDKIT_EXTENT_TYPE_* */
+ };
+
+The C<offset> and C<length> describe the extent, and the C<type>
+should be one of the extent types:
+
+=over 4
+
+=item C<NBDKIT_EXTENT_TYPE_HOLE>
+
+This extent represents an unallocated hole in the disk.
+
+=item C<NBDKIT_EXTENT_TYPE_DATA>
+
+This extent represents normal allocated data.
+
+=item C<NBDKIT_EXTENT_TYPE_ZERO>
+
+This extent represents zeroes.
+
+=item C<NBDKIT_EXTENT_TYPE_NONE>
+
+This is a special type which means the extent record is ignored by
+nbdkit. It is used by some filters to delete extents from the array
+without having to move later entries, but plugins can also create
+extents of this type if it makes creating the array more convenient.
+
+=back
+
+As a convenience for plugin writers, extents do not need to be
+returned in order. nbdkit will reorder them for the client if
+necessary. Also nbdkit will coalesce adjacent extents with the same
+type automatically. Any extents which are outside the requested range
+will be ignored by nbdkit, so plugins may (if it is easier) return all
+extent information and nbdkit will filter as necessary. Parts of the
+range C<[offset...offset+count-1]> which are not covered by any extent
+information are assumed to be holes, so plugins may if they wish only
+return information about allocated extents.
+
+However no two extents in the returned array may overlap. This is
+treated as an error by nbdkit.
+
+The C<flags> parameter may contain the flag C<NBDKIT_FLAG_REQ_ONE>
+which means that the client is only requesting information about the
+extent overlapping C<offset>. The plugin may ignore this flag, or as
+an optimization it may return just a single entry for C<offset>.
+
+If there is an error, C<.extents> should call C<nbdkit_error> with an
+error message, and C<nbdkit_set_error> to record an appropriate error
+(unless C<errno> is sufficient), then return C<-1>.
+
=head1 THREADS
Each nbdkit plugin must declare its thread safety model by defining
diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h
index cb9954e..4a64954 100644
--- a/include/nbdkit-common.h
+++ b/include/nbdkit-common.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -60,11 +60,23 @@ extern "C" {
#define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */
#define NBDKIT_FLAG_FUA (1<<1) /* Maps to NBD_CMD_FLAG_FUA */
+#define NBDKIT_FLAG_REQ_ONE (1<<2) /* Maps to NBD_CMD_FLAG_REQ_ONE */
#define NBDKIT_FUA_NONE 0
#define NBDKIT_FUA_EMULATE 1
#define NBDKIT_FUA_NATIVE 2
+#define NBDKIT_EXTENT_TYPE_NONE 0
+#define NBDKIT_EXTENT_TYPE_HOLE 1
+#define NBDKIT_EXTENT_TYPE_DATA 2
+#define NBDKIT_EXTENT_TYPE_ZERO 3
+
+struct nbdkit_extent {
+ uint64_t offset;
+ uint64_t length;
+ uint32_t type; /* NBDKIT_EXTENT_TYPE_* */
+};
+
extern void nbdkit_error (const char *msg, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
extern void nbdkit_verror (const char *msg, va_list args);
extern void nbdkit_debug (const char *msg, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 71c06c8..1049447 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,7 @@ struct nbdkit_next_ops {
int (*is_rotational) (void *nxdata);
int (*can_trim) (void *nxdata);
int (*can_zero) (void *nxdata);
+ int (*can_extents) (void *nxdata);
int (*can_fua) (void *nxdata);
int (*can_multi_conn) (void *nxdata);
@@ -71,6 +72,9 @@ struct nbdkit_next_ops {
int *err);
int (*zero) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
int *err);
+ int (*extents) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err);
};
struct nbdkit_filter {
@@ -120,6 +124,8 @@ struct nbdkit_filter {
void *handle);
int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*can_extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
int (*can_multi_conn) (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -140,6 +146,10 @@ struct nbdkit_filter {
int (*zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle, uint32_t count, uint64_t offset, uint32_t flags,
int *err);
+ int (*extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err);
};
#define NBDKIT_REGISTER_FILTER(filter) \
diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h
index d43b2f5..b53fecf 100644
--- a/include/nbdkit-plugin.h
+++ b/include/nbdkit-plugin.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -125,6 +125,10 @@ struct nbdkit_plugin {
const char *magic_config_key;
int (*can_multi_conn) (void *handle);
+
+ int (*can_extents) (void *handle);
+ int (*extents) (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents);
};
extern void nbdkit_set_error (int err);
diff --git a/server/internal.h b/server/internal.h
index 506882b..0c6da77 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -235,6 +235,7 @@ struct backend {
int (*is_rotational) (struct backend *, struct connection *conn);
int (*can_trim) (struct backend *, struct connection *conn);
int (*can_zero) (struct backend *, struct connection *conn);
+ int (*can_extents) (struct backend *, struct connection *conn);
int (*can_fua) (struct backend *, struct connection *conn);
int (*can_multi_conn) (struct backend *, struct connection *conn);
@@ -248,6 +249,10 @@ struct backend {
uint64_t offset, uint32_t flags, int *err);
int (*zero) (struct backend *, struct connection *conn, uint32_t count,
uint64_t offset, uint32_t flags, int *err);
+ int (*extents) (struct backend *, struct connection *conn, uint32_t count,
+ uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err);
};
/* plugins.c */
diff --git a/server/filters.c b/server/filters.c
index 5b7abc4..d6cc00f 100644
--- a/server/filters.c
+++ b/server/filters.c
@@ -309,6 +309,13 @@ next_can_zero (void *nxdata)
return b_conn->b->can_zero (b_conn->b, b_conn->conn);
}
+static int
+next_can_extents (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return b_conn->b->can_extents (b_conn->b, b_conn->conn);
+}
+
static int
next_can_fua (void *nxdata)
{
@@ -364,6 +371,16 @@ next_zero (void *nxdata, uint32_t count, uint64_t offset, uint32_t
flags,
return b_conn->b->zero (b_conn->b, b_conn->conn, count, offset, flags,
err);
}
+static int
+next_extents (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err)
+{
+ struct b_conn *b_conn = nxdata;
+ return b_conn->b->extents (b_conn->b, b_conn->conn, count, offset, flags,
+ nr_extents, extents, err);
+}
+
static struct nbdkit_next_ops next_ops = {
.get_size = next_get_size,
.can_write = next_can_write,
@@ -371,6 +388,7 @@ static struct nbdkit_next_ops next_ops = {
.is_rotational = next_is_rotational,
.can_trim = next_can_trim,
.can_zero = next_can_zero,
+ .can_extents = next_can_extents,
.can_fua = next_can_fua,
.can_multi_conn = next_can_multi_conn,
.pread = next_pread,
@@ -378,6 +396,7 @@ static struct nbdkit_next_ops next_ops = {
.flush = next_flush,
.trim = next_trim,
.zero = next_zero,
+ .extents = next_extents,
};
static int
@@ -511,6 +530,21 @@ filter_can_zero (struct backend *b, struct connection *conn)
return f->backend.next->can_zero (f->backend.next, conn);
}
+static int
+filter_can_extents (struct backend *b, struct connection *conn)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ void *handle = connection_get_handle (conn, f->backend.i);
+ struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+ debug ("%s: can_extents", f->name);
+
+ if (f->filter.can_extents)
+ return f->filter.can_extents (&next_ops, &nxdata, handle);
+ else
+ return f->backend.next->can_extents (f->backend.next, conn);
+}
+
static int
filter_can_fua (struct backend *b, struct connection *conn)
{
@@ -646,6 +680,31 @@ filter_zero (struct backend *b, struct connection *conn,
count, offset, flags, err);
}
+static int
+filter_extents (struct backend *b, struct connection *conn,
+ uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err)
+{
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ void *handle = connection_get_handle (conn, f->backend.i);
+ struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+ assert (!(flags & ~NBDKIT_FLAG_REQ_ONE));
+
+ debug ("%s: extents count=%" PRIu32 " offset=%" PRIu64 "
flags=0x%" PRIx32,
+ f->name, count, offset, flags);
+
+ if (f->filter.extents)
+ return f->filter.extents (&next_ops, &nxdata, handle,
+ count, offset, flags,
+ nr_extents, extents, err);
+ else
+ return f->backend.next->extents (f->backend.next, conn,
+ count, offset, flags,
+ nr_extents, extents, err);
+}
+
static struct backend filter_functions = {
.free = filter_free,
.thread_model = filter_thread_model,
@@ -667,6 +726,7 @@ static struct backend filter_functions = {
.is_rotational = filter_is_rotational,
.can_trim = filter_can_trim,
.can_zero = filter_can_zero,
+ .can_extents = filter_can_extents,
.can_fua = filter_can_fua,
.can_multi_conn = filter_can_multi_conn,
.pread = filter_pread,
@@ -674,6 +734,7 @@ static struct backend filter_functions = {
.flush = filter_flush,
.trim = filter_trim,
.zero = filter_zero,
+ .extents = filter_extents,
};
/* Register and load a filter. */
diff --git a/server/plugins.c b/server/plugins.c
index 28b96ad..71e4f45 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -199,6 +199,8 @@ plugin_dump_fields (struct backend *b)
HAS (trim);
HAS (zero);
HAS (can_multi_conn);
+ HAS (can_extents);
+ HAS (extents);
#undef HAS
/* Custom fields. */
@@ -392,6 +394,21 @@ plugin_can_zero (struct backend *b, struct connection *conn)
return plugin_can_write (b, conn);
}
+static int
+plugin_can_extents (struct backend *b, struct connection *conn)
+{
+ struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+
+ assert (connection_get_handle (conn, 0));
+
+ debug ("can_extents");
+
+ if (p->plugin.can_extents)
+ return p->plugin.can_extents (connection_get_handle (conn, 0));
+ else
+ return p->plugin.extents != NULL;
+}
+
static int
plugin_can_fua (struct backend *b, struct connection *conn)
{
@@ -651,6 +668,57 @@ plugin_zero (struct backend *b, struct connection *conn,
return r;
}
+static int
+plugin_extents (struct backend *b, struct connection *conn,
+ uint32_t count, uint64_t offset, uint32_t flags,
+ size_t *nr_extents, struct nbdkit_extent **extents,
+ int *err)
+{
+ struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+ bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
+ int r;
+ int can_extents = 0; /* XXX probably should be cached per connection */
+
+ assert (connection_get_handle (conn, 0));
+ assert (!(flags & ~NBDKIT_FLAG_REQ_ONE));
+
+ debug ("extents count=%" PRIu32 " offset=%" PRIu64 "
req_one=%d",
+ count, offset, req_one);
+
+ if (!count)
+ return 0;
+
+ if (p->plugin.can_extents) {
+ can_extents = p->plugin.can_extents (connection_get_handle (conn, 0));
+ if (can_extents == -1) {
+ *err = get_error (p);
+ return -1;
+ }
+ }
+
+ if (can_extents) {
+ r = p->plugin.extents (connection_get_handle (conn, 0), count, offset,
+ flags, nr_extents, extents);
+ if (r == -1)
+ *err = get_error (p);
+ return r;
+ }
+ else {
+ /* By default we assume that everything in the disk is allocated. */
+ *extents = calloc (1, sizeof (struct nbdkit_extent));
+ if (*extents == NULL) {
+ *err = errno;
+ nbdkit_error ("calloc");
+ return -1;
+ }
+ (*extents)[0].offset = offset;
+ (*extents)[0].length = count;
+ (*extents)[0].type = NBDKIT_EXTENT_TYPE_DATA;
+ *nr_extents = 1;
+ return 0;
+ }
+}
+
static struct backend plugin_functions = {
.free = plugin_free,
.thread_model = plugin_thread_model,
@@ -672,6 +740,7 @@ static struct backend plugin_functions = {
.is_rotational = plugin_is_rotational,
.can_trim = plugin_can_trim,
.can_zero = plugin_can_zero,
+ .can_extents = plugin_can_extents,
.can_fua = plugin_can_fua,
.can_multi_conn = plugin_can_multi_conn,
.pread = plugin_pread,
@@ -679,6 +748,7 @@ static struct backend plugin_functions = {
.flush = plugin_flush,
.trim = plugin_trim,
.zero = plugin_zero,
+ .extents = plugin_extents,
};
/* Register and load a plugin. */
--
2.20.1