[PATCH libnbd 0/2] Fix abort with --sparse=
by Nir Soffer
Trying to play with --sparse= I found that we abort when converting standard
images (.e.g fedora-32 image created with virt-builder). Add temporary
debugging logs to diagnose the issue (should not be merged
as is) and fix the issue.
Nir Soffer (2):
copy: Add temporary debugging messages
copy: Fix sparsifying if start is not aligned
copy/multi-thread-copying.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
--
2.26.2
3 years, 9 months
[PATCH libnbd v3 0/2] Improve zero correctness, reliability and compatibility
by Nir Soffer
Don't use trim() for zeroing. NBD_CMD_TRIM does not guarantee anything
about the content of the trimmed range. Add allocate flag to zero(), so
it can be used both for sparse and allocated copy.
The zero strategy was less advanced than what we have in nbdkit file
plugin. Port the zero strategy from the file plugin, improving
reliability and compatibility.
Tested with loop devices, local files, and qemu-nbd.
Changes in v2:
- Rebase after struct rw refactoring
- Fix block device checks
- Improve formatting (Rich)
Changes in v3:
- Remvoe trim support, which also fix the compilation (v1 and v2 were
broken, I forgot to add the allocate flag to pipe-ops.c).
- Remove block device fix (already in master)
Nir Soffer (2):
copy: Do not use trim for zeroing
copy/file-ops.c: Port zero strategy from nbdkit
copy/copy-sparse.sh | 66 +++++++-------
copy/file-ops.c | 168 ++++++++++++++++++++++++++----------
copy/multi-thread-copying.c | 24 ++----
copy/nbd-ops.c | 51 ++---------
copy/nbdcopy.h | 20 ++---
copy/null-ops.c | 28 +-----
copy/pipe-ops.c | 12 ++-
copy/synch-copying.c | 8 +-
8 files changed, 190 insertions(+), 187 deletions(-)
--
2.26.2
3 years, 9 months
[PATCH libnbd v2 0/3] Improve zero correctness, reliability and compatibility
by Nir Soffer
Don't use trim() for zeroing. NBD_CMD_TRIM does not guarantee anything about
the content of the trimmed range. Add allocate flag to zero(), so it can be
used both for sparse and allocated copy.
The zero strategy was less advanced than what we have in nbdkit file plugin.
Port the zero strategy from the file plugin, improving reliability and
compatibility.
Tested with loop devices, local files, and qemu-nbd.
Changes since v1:
- Rebase after struct rw refactoring
- Fix block device checks
- Improve formatting (Rich)
Nir Soffer (3):
copy/file-ops.c: Fix copy for block device
copy: Do not use trim for zeroing
copy/file-ops.c: Port zero strategy from nbdkit
copy/copy-sparse.sh | 66 +++++++-------
copy/file-ops.c | 168 +++++++++++++++++++++++++++++-------
copy/multi-thread-copying.c | 20 ++---
copy/nbd-ops.c | 11 +--
copy/nbdcopy.h | 5 +-
copy/null-ops.c | 4 +-
copy/synch-copying.c | 8 +-
7 files changed, 193 insertions(+), 89 deletions(-)
--
2.26.2
3 years, 9 months
[PATCH libnbd] copy: Refactor struct rw
by Richard W.M. Jones
This is a large refactoring change which tries to clean up the
internals of nbdcopy by making the various "ops" truly encapsulated.
It obviously will create massive conflicts with Nir's patch series
(although almost completely mechanical to fix). I promise to resolve
those conflicts for you if you get me an updated version of your patch
series.
Rich.
3 years, 9 months
[nbdkit PATCH] gzip/xz: Advertise multi-conn
by Eric Blake
Since we aren't modifying the data when decompressing, we are
inherently consistent between connections.
---
filters/gzip/gzip.c | 11 ++++++++++-
filters/xz/xz.c | 11 ++++++++++-
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/filters/gzip/gzip.c b/filters/gzip/gzip.c
index 20b533b9..03e023e5 100644
--- a/filters/gzip/gzip.c
+++ b/filters/gzip/gzip.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018-2020 Red Hat Inc.
+ * Copyright (C) 2018-2021 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -266,6 +266,14 @@ gzip_can_write (struct nbdkit_next_ops *next_ops, void *nxdata,
return 0;
}
+/* Whatever the plugin says, this filter is consistent across connections. */
+static int
+gzip_can_multi_conn (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return 1;
+}
+
/* Similar to above, whatever the plugin says, extents are not
* supported.
*/
@@ -360,6 +368,7 @@ static struct nbdkit_filter filter = {
.can_write = gzip_can_write,
.can_extents = gzip_can_extents,
.can_cache = gzip_can_cache,
+ .can_multi_conn = gzip_can_multi_conn,
.prepare = gzip_prepare,
.export_description = gzip_export_description,
.get_size = gzip_get_size,
diff --git a/filters/xz/xz.c b/filters/xz/xz.c
index 72801536..63adb37e 100644
--- a/filters/xz/xz.c
+++ b/filters/xz/xz.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2020 Red Hat Inc.
+ * Copyright (C) 2013-2021 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -190,6 +190,14 @@ xz_can_write (struct nbdkit_next_ops *next_ops, void *nxdata,
return 0;
}
+/* Whatever the plugin says, this filter is consistent across connections. */
+static int
+xz_can_multi_conn (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return 1;
+}
+
/* Similar to above. However xz files themselves do support
* sparseness so in future we should generate extents information. XXX
*/
@@ -271,6 +279,7 @@ static struct nbdkit_filter filter = {
.can_write = xz_can_write,
.can_extents = xz_can_extents,
.can_cache = xz_can_cache,
+ .can_multi_conn = xz_can_multi_conn,
.pread = xz_pread,
};
--
2.30.1
3 years, 9 months
[nbdkit PATCH] readahead: Fix multi-conn inconsistency
by Eric Blake
The readahead filter was blindly passing through the plugin's
.can_multi_conn, but does not kill the readahead buffer on a flush.
This is a bug in the following scenario, when the plugin advertised
multi-conn:
connA connB
write
wipes our buffer
plugin caches the write
read
reads stale data into buffer
flush
plugin finally flushes the write
read
data in buffer is still stale
In order to preserve the plugin's multi-conn status, we MUST drop our
cache any time we flush, so that our next read picks up whatever got
flushed from another connection.
But when the server lacks multi-conn, we don't have to kill our
readahead buffer on flush, because the client already has no guarantee
that a flush from one connection will affect the read seen by another
(killing on write was already good enough to maintain semantics).
---
filters/readahead/readahead.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/filters/readahead/readahead.c b/filters/readahead/readahead.c
index 8163c4fb..a97c5da5 100644
--- a/filters/readahead/readahead.c
+++ b/filters/readahead/readahead.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2019 Red Hat Inc.
+ * Copyright (C) 2019-2021 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -244,6 +244,15 @@ readahead_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
return next_ops->zero (nxdata, count, offset, flags, err);
}
+static int
+readahead_flush (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle, uint32_t flags, int *err)
+{
+ if (next_ops->can_multi_conn(nxdata) != 1)
+ kill_readahead ();
+ return next_ops->flush (nxdata, flags, err);
+}
+
static struct nbdkit_filter filter = {
.name = "readahead",
.longname = "nbdkit readahead filter",
@@ -255,6 +264,7 @@ static struct nbdkit_filter filter = {
.pwrite = readahead_pwrite,
.trim = readahead_trim,
.zero = readahead_zero,
+ .flush = readahead_flush,
};
NBDKIT_REGISTER_FILTER(filter)
--
2.30.1
3 years, 9 months
[PATCH libnbd 0/2] Improve zero correctness, reliability and compatibility
by Nir Soffer
Don't use trim() for zeroing. NBD_CMD_TRIM does not guarantee anything about
the content of the trimmed range. Add allocate flag to zero(), so it can be
used both for sparse and allocated copy.
The zero strategy was less advanced than what we have in nbdkit file plugin.
Port the zero strategy from the file plugin, improving reliability and
compatibility.
Tested with loop devices, local files, and qemu-nbd.
Nir Soffer (2):
nbdcopy: Do not use trim for zeroing
nbdcopy: Port zero strategy from nbdkit
copy/copy-sparse.sh | 66 ++++++++---------
copy/file-ops.c | 141 +++++++++++++++++++++++++++++-------
copy/main.c | 19 +++++
copy/multi-thread-copying.c | 20 ++---
copy/nbd-ops.c | 8 +-
copy/nbdcopy.h | 11 ++-
copy/null-ops.c | 4 +-
copy/synch-copying.c | 8 +-
8 files changed, 190 insertions(+), 87 deletions(-)
--
2.26.2
3 years, 9 months
[PATCH libnbd] copy: Set POSIX_FADV_SEQUENTIAL and POSIX_FADV_DONTNEED
by Richard W.M. Jones
I'm trying to emulate what we do in nbdkit-file-plugin to avoid the
page cache getting trashed when copying to/from local files:
https://gitlab.com/nbdkit/nbdkit/-/commit/aa5a2183a6d16afd919174b5ba8222b...
This patch works well when reading from a local file as you can see
from the results in the commit message. However when writing to a
local file it has absolutely no effect, and I've no idea why. (Even
checked with strace and it's making the correct fadvise64 system
calls).
For the reading case it has a slight performance impact which could be
problematic. Should we only enable it with a flag? I would prefer to
fix it so it's always beneficial, but it's not too easy to do that
without adding more threads and synchronization.
Rich.
3 years, 9 months
[nbdkit PATCH] ext2: Disable .can_multi_conn
by Eric Blake
Although passing through the plugin's multi-conn status happens to be
safe due to our current .thread_model forbidding parallel connections,
explicitly overriding it now will help us remember to think about the
issue if we later improve the filter to allow parallelism.
---
filters/ext2/ext2.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/filters/ext2/ext2.c b/filters/ext2/ext2.c
index 549ef5ed..05ff634e 100644
--- a/filters/ext2/ext2.c
+++ b/filters/ext2/ext2.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2017-2020 Red Hat Inc.
+ * Copyright (C) 2017-2021 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -305,6 +305,20 @@ ext2_can_cache (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle)
return NBDKIT_CACHE_EMULATE;
}
+static int
+ext2_can_multi_conn (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ /* Since we do not permit parallel connections, it does not matter
+ * what we advertise here, and we could just as easily inherit the
+ * plugin's .can_multi_conn for now. But realistically, if we
+ * adjust .thread_model, we cannot advertise support unless .flush
+ * is consistent, and that would require inspecting the ext2 source
+ * code, so for now, we hard-code a safe answer.
+ */
+ return 0;
+}
+
/* It might be possible to relax this, but it's complicated.
*
* It's desirable for ‘nbdkit -r’ to behave the same way as
@@ -470,6 +484,7 @@ static struct nbdkit_filter filter = {
.close = ext2_close,
.can_fua = ext2_can_fua,
.can_cache = ext2_can_cache,
+ .can_multi_conn = ext2_can_multi_conn,
.export_description = ext2_export_description,
.get_size = ext2_get_size,
.pread = ext2_pread,
--
2.30.1
3 years, 9 months
[nbdkit PATCH] checkwrite: Implement .can_fast_zero, .can_multi_conn
by Eric Blake
Blindly passing through the plugin's .can_fast_zero when our own .zero
never calls the plugin is fishy; rather, we can use the heuristic that
if .extents shows all zeroes, we were fast, otherwise we fail fast
rather than calling the plugin's .pread. And since we never modify
any of the plugin's data, we can advertise multi-conn (note that it is
already unwise to use this filter with a plugin that reports different
data on read without any intervening writes, such as the info plugin).
---
filters/checkwrite/checkwrite.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/filters/checkwrite/checkwrite.c b/filters/checkwrite/checkwrite.c
index 2792d4d8..bbd7f1cd 100644
--- a/filters/checkwrite/checkwrite.c
+++ b/filters/checkwrite/checkwrite.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2019-2020 Red Hat Inc.
+ * Copyright (C) 2019-2021 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -91,6 +91,23 @@ checkwrite_can_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
return NBDKIT_ZERO_NATIVE;
}
+static int
+checkwrite_can_fast_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ /* It is better to advertise support, even if we always reject fast
+ * zero attempts when the plugin lacks .can_extents.
+ */
+ return 1;
+}
+
+static int
+checkwrite_can_multi_conn (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle)
+{
+ return 1;
+}
+
static inline int
data_does_not_match (int *err)
{
@@ -171,6 +188,10 @@ checkwrite_trim_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
}
/* Otherwise we have to read the underlying data and check. */
+ if (flags & NBDKIT_FLAG_FAST_ZERO) {
+ *err = ENOTSUP;
+ return -1;
+ }
while (count > 0) {
size_t buflen = MIN (MAX_REQUEST_SIZE, count);
buflen = MIN (buflen, next_extent_offset - offset);
@@ -199,6 +220,10 @@ checkwrite_trim_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
else {
CLEANUP_FREE char *buf;
+ if (flags & NBDKIT_FLAG_FAST_ZERO) {
+ *err = ENOTSUP;
+ return -1;
+ }
buf = malloc (MIN (MAX_REQUEST_SIZE, count));
if (buf == NULL) {
*err = errno;
@@ -231,6 +256,8 @@ static struct nbdkit_filter filter = {
.can_fua = checkwrite_can_fua,
.can_trim = checkwrite_can_trim,
.can_zero = checkwrite_can_zero,
+ .can_fast_zero = checkwrite_can_fast_zero,
+ .can_multi_conn = checkwrite_can_multi_conn,
.pwrite = checkwrite_pwrite,
.flush = checkwrite_flush,
--
2.30.1
3 years, 9 months