When exporting sparse raw image, qemu-nbd reports unallocated area as
zero:
$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
--format raw empty-6g.raw --cache=none --aio=native
$ nbdinfo --map nbd+unix:///?socket=/tmp/nbd.sock
0 6442450944 2 zero
When using qcow2 image, it reports unallocated areas as a hole:
$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
--format qcow2 empty-6g.qcow2 --cache=none --aio=native
$ nbdinfo --map nbd+unix:///?socket=/tmp/nbd.sock
0 6442450944 3 hole,zero
Since nbdcopy is ignoring the ZERO flag and using only the HOLE flag,
coping raw images is extremely slow:
$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
Time (mean ± σ): 1.595 s ± 0.034 s [User: 2.284 s, System: 3.565 s]
Range (min … max): 1.522 s … 1.630 s 10 runs
This is 69 times slower than qemu-img:
$ hyperfine -w3 "qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock \
'json:{\"file.driver\":\"null-co\",\"file.size\":\"6g\"}'"
Benchmark #1: qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock
'json:{"file.driver":"null-co","file.size":"6g"}'
Time (mean ± σ): 23.1 ms ± 0.5 ms [User: 6.3 ms, System: 16.5 ms]
Range (min … max): 22.6 ms … 25.5 ms 124 runs
Using ZERO instead of HOLE, nbdcopy does not read zero extents from the
server so it can copy this image 165 times faster:
$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
Time (mean ± σ): 9.8 ms ± 0.8 ms [User: 6.7 ms, System: 5.2 ms]
Range (min … max): 9.2 ms … 15.4 ms 287 runs
Real images show smaller speedup, only 2 times faster:
$ qemu-nbd --persistent --socket /tmp/nbd.sock --read-only --shared=10 \
--format raw fedora-32.raw --cache=none --aio=native
Before:
$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
Time (mean ± σ): 1.613 s ± 0.181 s [User: 1.843 s, System: 2.820 s]
Range (min … max): 1.407 s … 1.829 s 10 runs
After:
$ hyperfine -w3 "./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:"
Benchmark #1: ./nbdcopy nbd+unix:///?socket=/tmp/nbd.sock null:
Time (mean ± σ): 795.5 ms ± 78.7 ms [User: 198.3 ms, System: 743.1 ms]
Range (min … max): 743.3 ms … 1012.0 ms 10 runs
For reference, copying same image with qemu-img:
$ hyperfine -w3 "qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock \
'json:{\"file.driver\":\"null-co\",\"file.size\":\"6g\"}'"
Benchmark #1: qemu-img convert -n nbd+unix:///?socket=/tmp/nbd.sock
'json:{"file.driver":"null-co","file.size":"6g"}'
Time (mean ± σ): 1.046 s ± 0.028 s [User: 122.3 ms, System: 354.5 ms]
Range (min … max): 1.026 s … 1.121 s 10 runs
This issue does not exist when copying from file, since in this case we
detect unallocated areas as holes.
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
copy/file-ops.c | 4 ++--
copy/main.c | 2 +-
copy/multi-thread-copying.c | 4 ++--
copy/nbd-ops.c | 10 ++++++++--
copy/nbdcopy.h | 2 +-
copy/synch-copying.c | 2 +-
6 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/copy/file-ops.c b/copy/file-ops.c
index f61b67e..2a239d0 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -250,7 +250,7 @@ file_get_extents (struct rw *rw, uintptr_t index,
if (pos > offset) {
e.offset = offset;
e.length = pos - offset;
- e.hole = true;
+ e.zero = true;
if (extent_list_append (ret, e) == -1) {
perror ("realloc");
exit (EXIT_FAILURE);
@@ -271,7 +271,7 @@ file_get_extents (struct rw *rw, uintptr_t index,
if (pos > offset) {
e.offset = offset;
e.length = pos - offset;
- e.hole = false;
+ e.zero = false;
if (extent_list_append (ret, e) == -1) {
perror ("realloc");
exit (EXIT_FAILURE);
diff --git a/copy/main.c b/copy/main.c
index cfecb32..68a6030 100644
--- a/copy/main.c
+++ b/copy/main.c
@@ -667,7 +667,7 @@ default_get_extents (struct rw *rw, uintptr_t index,
e.offset = offset;
e.length = count;
- e.hole = false;
+ e.zero = false;
if (extent_list_append (ret, e) == -1) {
perror ("realloc");
exit (EXIT_FAILURE);
diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c
index 4576119..98b4056 100644
--- a/copy/multi-thread-copying.c
+++ b/copy/multi-thread-copying.c
@@ -157,8 +157,8 @@ worker_thread (void *indexp)
char *data;
size_t len;
- if (exts.ptr[i].hole) {
- /* The source is a hole so we can proceed directly to
+ if (exts.ptr[i].zero) {
+ /* The source is zero so we can proceed directly to
* skipping, trimming or writing zeroes at the destination.
*/
command = calloc (1, sizeof *command);
diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
index f7dc37c..0bcf29b 100644
--- a/copy/nbd-ops.c
+++ b/copy/nbd-ops.c
@@ -190,8 +190,14 @@ add_extent (void *vp, const char *metacontext,
e.offset = offset;
e.length = entries[i];
- /* Note we deliberately don't care about the ZERO flag. */
- e.hole = (entries[i+1] & LIBNBD_STATE_HOLE) != 0;
+
+ /*
+ * Note we deliberately don't care about the HOLE flag. There is no need to
+ * read extent that reads as zeroes. We will convert to it to a hole or
+ * allocated extents based on the command line arguments.
+ */
+ e.zero = (entries[i+1] & LIBNBD_STATE_ZERO) != 0;
+
if (extent_list_append (ret, e) == -1) {
perror ("realloc");
exit (EXIT_FAILURE);
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index f586fc5..69fac2a 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -100,7 +100,7 @@ struct command {
struct extent {
uint64_t offset;
uint64_t length;
- bool hole;
+ bool zero;
};
DEFINE_VECTOR_TYPE(extent_list, struct extent);
diff --git a/copy/synch-copying.c b/copy/synch-copying.c
index 043893f..2712c10 100644
--- a/copy/synch-copying.c
+++ b/copy/synch-copying.c
@@ -68,7 +68,7 @@ synch_copying (void)
for (i = 0; i < exts.size; ++i) {
assert (exts.ptr[i].length <= count);
- if (exts.ptr[i].hole) {
+ if (exts.ptr[i].zero) {
if (!dst.ops->synch_trim (&dst, offset, exts.ptr[i].length) &&
!dst.ops->synch_zero (&dst, offset, exts.ptr[i].length)) {
/* If neither trimming nor efficient zeroing are possible,
--
2.26.2