The NBD protocol is adding an extension to let servers advertise
initialization state to the client: whether the image contains holes,
and whether it is known to read as all zeroes. Most filters just pass
through the plugin's result (as it is only checked once when first
connecting), but we can do useful things in log (mention the setting),
extentlist (answer based on the extents we are advertising), noextents
(suppress the advertisement), and truncate (advertise sparse support
for our added tail hole). Expand a comment in the cache filter about
potential future enchnancements.
Initial read-only zero status is easy to add through the extentlist
filter (supply a filter list with no entries and the entire image is
sparse); supplying startup zero status without overriding the
plugin's runtime extents would require either a new filter or a new
knob to an existing one.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
docs/nbdkit-filter.pod | 10 +++++-
filters/cache/blk.c | 7 ++--
filters/extentlist/extentlist.c | 28 +++++++++++++++
filters/log/log.c | 9 +++--
filters/log/nbdkit-log-filter.pod | 2 +-
filters/noextents/nbdkit-noextents-filter.pod | 11 +++---
filters/noextents/noextents.c | 18 +++++++++-
filters/truncate/truncate.c | 15 +++++++-
include/nbdkit-filter.h | 8 ++++-
server/filters.c | 34 ++++++++++++++++---
10 files changed, 123 insertions(+), 19 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index 55dfab1..0f81684 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -413,6 +413,10 @@ cached value.
=head2 C<.can_cache>
+=head2 C<.init_sparse>
+
+=head2 C<.init_zero>
+
int (*can_write) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
int (*can_flush) (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -434,6 +438,10 @@ cached value.
void *handle);
int (*can_cache) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*init_sparse) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+ int (*init_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
These intercept the corresponding plugin methods, and control feature
bits advertised to the client.
@@ -821,4 +829,4 @@ Richard W.M. Jones
=head1 COPYRIGHT
-Copyright (C) 2013-2019 Red Hat Inc.
+Copyright (C) 2013-2020 Red Hat Inc.
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index da27318..24d7310 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018-2019 Red Hat Inc.
+ * Copyright (C) 2018-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -68,7 +68,7 @@ static int fd = -1;
* 10 = <unused>
* 11 = block cached and dirty
*
- * Future enhancement:
+ * Future enhancements:
*
* We need to cache information about holes, ie. blocks which read as
* zeroes but are not explicitly stored in the cache. This
@@ -79,6 +79,9 @@ static int fd = -1;
* not have to query the plugin a second time (especially useful for
* VDDK where querying extents is slow, and for qemu which [in 2019]
* repeatedly requests the same information with REQ_ONE set).
+ * Another potential interaction is when plugin->init_zero returns 1,
+ * in which case we know any uncached read will see zero up until the
+ * cache undergoes its first eviction.
*/
static struct bitmap bm;
diff --git a/filters/extentlist/extentlist.c b/filters/extentlist/extentlist.c
index 5f4990b..ea0e347 100644
--- a/filters/extentlist/extentlist.c
+++ b/filters/extentlist/extentlist.c
@@ -271,6 +271,32 @@ extentlist_can_extents (struct nbdkit_next_ops *next_ops, void
*nxdata,
return 1;
}
+static int
+extentlist_init_sparse (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ /* True if at least one extent is a hole */
+ int i;
+
+ for (i = 0; i < nr_extents; i++)
+ if (extents[i].type & NBDKIT_EXTENT_HOLE)
+ return 1;
+ return 0;
+}
+
+static int
+extentlist_init_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ /* True if all extents read as zero */
+ int i;
+
+ for (i = 0; i < nr_extents; i++)
+ if (!(extents[i].type & NBDKIT_EXTENT_ZERO))
+ return 0;
+ return 1;
+}
+
/* Use ‘-D extentlist.lookup=1’ to debug the function below. */
int extentlist_debug_lookup = 0;
@@ -320,6 +346,8 @@ static struct nbdkit_filter filter = {
.config = extentlist_config,
.config_complete = extentlist_config_complete,
.can_extents = extentlist_can_extents,
+ .init_sparse = extentlist_init_sparse,
+ .init_zero = extentlist_init_zero,
.extents = extentlist_extents,
};
diff --git a/filters/log/log.c b/filters/log/log.c
index 7eb608c..3eeb033 100644
--- a/filters/log/log.c
+++ b/filters/log/log.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018-2019 Red Hat Inc.
+ * Copyright (C) 2018-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -262,14 +262,17 @@ log_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, void
*handle,
int e = next_ops->can_extents (nxdata);
int c = next_ops->can_cache (nxdata);
int Z = next_ops->can_fast_zero (nxdata);
+ int is = next_ops->init_sparse (nxdata);
+ int iz = next_ops->init_zero (nxdata);
if (size < 0 || w < 0 || f < 0 || r < 0 || t < 0 || z < 0 || F < 0
||
- e < 0 || c < 0 || Z < 0)
+ e < 0 || c < 0 || Z < 0 || is < 0 || iz < 0)
return -1;
output (h, "Connect", 0, "size=0x%" PRIx64 " write=%d flush=%d
"
"rotational=%d trim=%d zero=%d fua=%d extents=%d cache=%d "
- "fast_zero=%d", size, w, f, r, t, z, F, e, c, Z);
+ "fast_zero=%d init_sparse=%d init_zero=%d", size, w, f, r, t, z,
+ F, e, c, Z, is, iz);
return 0;
}
diff --git a/filters/log/nbdkit-log-filter.pod b/filters/log/nbdkit-log-filter.pod
index b412e7e..2a5f8a2 100644
--- a/filters/log/nbdkit-log-filter.pod
+++ b/filters/log/nbdkit-log-filter.pod
@@ -59,7 +59,7 @@ on the connection).
An example logging session of a client that performs a single
successful read is:
- 2018-01-27 20:38:22.959984 connection=1 Connect size=0x400 write=1 flush=1 rotational=0
trim=0 zero=1 fua=1 extents=1 cache=0 fast_zero=0
+ 2018-01-27 20:38:22.959984 connection=1 Connect size=0x400 write=1 flush=1 rotational=0
trim=0 zero=1 fua=1 extents=1 cache=0 fast_zero=0 init_sparse=0 init_zero=0
2018-01-27 20:38:23.001720 connection=1 Read id=1 offset=0x0 count=0x100 ...
2018-01-27 20:38:23.001995 connection=1 ...Read id=1 return=0 (Success)
2018-01-27 20:38:23.044259 connection=1 Disconnect transactions=1
diff --git a/filters/noextents/nbdkit-noextents-filter.pod
b/filters/noextents/nbdkit-noextents-filter.pod
index 0260a5c..1c69d6e 100644
--- a/filters/noextents/nbdkit-noextents-filter.pod
+++ b/filters/noextents/nbdkit-noextents-filter.pod
@@ -12,10 +12,11 @@ nbdkit-noextents-filter - disable extents in the underlying plugin
client to detect sparse regions of the underlying disk.
C<nbdkit-noextents-filter> disables this so that the plugin appears to
be fully allocated, at least to a client that requests structured
-replies. It is also possible to use the I<--no-sr> option to nbdkit
-to prevent the client from using structured replies, which among its
-other side effects will prevent the client from querying extents at
-all.
+replies. This filter also disables any server advertisement on
+whether the image begins life with holes or zero content. It is also
+possible to use the I<--no-sr> option to nbdkit to prevent the client
+from using structured replies, which among its other side effects will
+prevent the client from querying extents at all.
This filter can be useful when combined with L<nbdkit-file-plugin(1)>
serving a file from a file system known to have poor C<lseek(2)>
@@ -60,4 +61,4 @@ Richard W.M. Jones
=head1 COPYRIGHT
-Copyright (C) 2019 Red Hat Inc.
+Copyright (C) 2019-2020 Red Hat Inc.
diff --git a/filters/noextents/noextents.c b/filters/noextents/noextents.c
index e6ac33b..091f30b 100644
--- a/filters/noextents/noextents.c
+++ b/filters/noextents/noextents.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2019 Red Hat Inc.
+ * Copyright (C) 2019-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -41,10 +41,26 @@ noextents_can_extents (struct nbdkit_next_ops *next_ops, void
*nxdata,
return 0;
}
+static int
+noextents_init_sparse (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return 0;
+}
+
+static int
+noextents_init_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return 0;
+}
+
static struct nbdkit_filter filter = {
.name = "noextents",
.longname = "nbdkit noextents filter",
.can_extents = noextents_can_extents,
+ .init_sparse = noextents_init_sparse,
+ .init_zero = noextents_init_zero,
};
NBDKIT_REGISTER_FILTER(filter)
diff --git a/filters/truncate/truncate.c b/filters/truncate/truncate.c
index 8317449..efd1f03 100644
--- a/filters/truncate/truncate.c
+++ b/filters/truncate/truncate.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018-2019 Red Hat Inc.
+ * Copyright (C) 2018-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -229,6 +229,18 @@ truncate_can_fast_zero (struct nbdkit_next_ops *next_ops, void
*nxdata,
return 1;
}
+/* Override the plugin's .init_sparse, because any tail is sparse. */
+static int
+truncate_init_sparse (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ struct handle *h = handle;
+
+ if (h->real_size < h->size)
+ return 1;
+ return next_ops->init_sparse (nxdata);
+}
+
/* Read data. */
static int
truncate_pread (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -426,6 +438,7 @@ static struct nbdkit_filter filter = {
.prepare = truncate_prepare,
.get_size = truncate_get_size,
.can_fast_zero = truncate_can_fast_zero,
+ .init_sparse = truncate_init_sparse,
.pread = truncate_pread,
.pwrite = truncate_pwrite,
.trim = truncate_trim,
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 3500317..d327bf8 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2019 Red Hat Inc.
+ * Copyright (C) 2013-2020 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -86,6 +86,8 @@ struct nbdkit_next_ops {
int (*can_fua) (void *nxdata);
int (*can_multi_conn) (void *nxdata);
int (*can_cache) (void *nxdata);
+ int (*init_sparse) (void *nxdata);
+ int (*init_zero) (void *nxdata);
int (*pread) (void *nxdata, void *buf, uint32_t count, uint64_t offset,
uint32_t flags, int *err);
@@ -164,6 +166,10 @@ struct nbdkit_filter {
void *handle);
int (*can_cache) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle);
+ int (*init_sparse) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
+ int (*init_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle);
int (*pread) (struct nbdkit_next_ops *next_ops, void *nxdata,
void *handle, void *buf, uint32_t count, uint64_t offset,
diff --git a/server/filters.c b/server/filters.c
index 6b9d0b8..a654656 100644
--- a/server/filters.c
+++ b/server/filters.c
@@ -336,6 +336,20 @@ next_can_cache (void *nxdata)
return backend_can_cache (b_conn->b, b_conn->conn);
}
+static int
+next_init_sparse (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return backend_init_sparse (b_conn->b, b_conn->conn);
+}
+
+static int
+next_init_zero (void *nxdata)
+{
+ struct b_conn *b_conn = nxdata;
+ return backend_init_zero (b_conn->b, b_conn->conn);
+}
+
static int
next_pread (void *nxdata, void *buf, uint32_t count, uint64_t offset,
uint32_t flags, int *err)
@@ -407,6 +421,8 @@ static struct nbdkit_next_ops next_ops = {
.can_fua = next_can_fua,
.can_multi_conn = next_can_multi_conn,
.can_cache = next_can_cache,
+ .init_sparse = next_init_sparse,
+ .init_zero = next_init_zero,
.pread = next_pread,
.pwrite = next_pwrite,
.flush = next_flush,
@@ -577,15 +593,25 @@ filter_can_cache (struct backend *b, struct connection *conn, void
*handle)
static int
filter_init_sparse (struct backend *b, struct connection *conn, void *handle)
{
- /* TODO Allow filter to control this. */
- return 0;
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ struct b_conn nxdata = { .b = b->next, .conn = conn };
+
+ if (f->filter.init_sparse)
+ return f->filter.init_sparse (&next_ops, &nxdata, handle);
+ else
+ return backend_init_sparse (b->next, conn);
}
static int
filter_init_zero (struct backend *b, struct connection *conn, void *handle)
{
- /* TODO Allow filter to control this. */
- return 0;
+ struct backend_filter *f = container_of (b, struct backend_filter, backend);
+ struct b_conn nxdata = { .b = b->next, .conn = conn };
+
+ if (f->filter.init_zero)
+ return f->filter.init_zero (&next_ops, &nxdata, handle);
+ else
+ return backend_init_zero (b->next, conn);
}
static int
--
2.24.1