When we turned the result of .can_zero into a tri-state for filters
back in v1.16, the intention was that the backend would emulate by
calling into .pwrite instead of .zero.  The nozero filter already had
an implementation of that algorithm, but it needs to live in backend.c
to be used by all filters, rather than repeatedly recoded in each
filter that wants .zero support by .pwrite emulation.
Discovered because the luks filter asked for .pwrite emulation in
.can_zero without providing a .zero override, resulting in:
$ qemu-img create -f luks --object secret,data=LETMEPASS,id=sec0 -o key-secret=sec0
encrypted.img 100M
Formatting 'encrypted.img', fmt=luks size=104857600 key-secret=sec0
$ rm -f data.img
$ truncate -s 100M data.img
$ nbdkit file encrypted.img --filter=luks passphrase=LETMEPASS --run 'nbdcopy data.img
$nbd'
nbdkit: backend.c:718: backend_zero: Assertion `c->can_zero > NBDKIT_ZERO_NONE'
failed.
write at offset 0 failed: Transport endpoint is not connected
nbdkit: nbdkit command was killed by signal 6
As a result of moving it into the backend, the nozero filter is
simplified, but the corresponding test has to change.  Since
--filter=log does not alter .can_zero, the emulation actually occurs
earlier in the stack (prior to calling into any filter, regardless of
whether log is placed before or after nozero, rather than at the point
of the nozero filter), so there is now no discernable difference
between sock3 and sock4 (we can eliminate the duplicate), and the old
sock5a no longer shows a Zero request.  [Historically, when the nozero
filter was first written, we were relying on qemu to send packets, and
the use of the log filter was essential to ensure that we were getting
the desired NBD_CMD_WRITE_ZEROES across various versions of qemu; but
now that the test uses nbdkit, we know that h.zero() is doing exactly
that, so the reduced length of the testsuite is no longer risky.]
Reported-by: Richard W.M. Jones <rjones(a)redhat.com>
Fixes: fd361554 ("filters: Cache and change semantics of can_zero", v1.15.1)
---
 docs/nbdkit-filter.pod  |  4 +--
 server/backend.c        | 37 ++++++++++++++++++-
 filters/nozero/nozero.c | 39 +++------------------
 tests/test-nozero.sh    | 78 +++++++++++++++++------------------------
 4 files changed, 75 insertions(+), 83 deletions(-)
diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index 4ce5bda0..c4ec1004 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -898,8 +898,8 @@ This intercepts the plugin C<.zero> method and can be used to
modify
 zero requests.
 This function will not be called if C<.can_zero> returned
-C<NBDKIT_ZERO_NONE>; in turn, the filter should not call
-C<next-E<gt>zero> if C<next-E<gt>can_zero> returned
+C<NBDKIT_ZERO_NONE> or C<NBDKIT_ZERO_EMULATE>; in turn, the filter
+should not call C<next-E<gt>zero> if C<next-E<gt>can_zero>
returned
 C<NBDKIT_ZERO_NONE>.
 On input, the parameter C<flags> may include C<NBDKIT_FLAG_MAY_TRIM>
diff --git a/server/backend.c b/server/backend.c
index 7d46b99d..30ec4c24 100644
--- a/server/backend.c
+++ b/server/backend.c
@@ -728,7 +728,42 @@ backend_zero (struct context *c,
                   b->name, count, offset,
                   !!(flags & NBDKIT_FLAG_MAY_TRIM), fua, fast);
-  r = b->zero (c, count, offset, flags, err);
+  if (c->can_zero == NBDKIT_ZERO_NATIVE)
+    r = b->zero (c, count, offset, flags, err);
+  else { /* NBDKIT_ZERO_EMULATE */
+    int writeflags = 0;
+    bool need_flush = false;
+
+    if (fast) {
+      *err = ENOTSUP;
+      return -1;
+    }
+
+    if (fua) {
+      if (c->can_fua == NBDKIT_FUA_EMULATE)
+        need_flush = true;
+      else
+        writeflags = NBDKIT_FLAG_FUA;
+    }
+
+    while (count) {
+      /* Always contains zeroes, but we can't use const or else gcc 9
+       * will use .rodata instead of .bss and inflate the binary size.
+       */
+      static /* const */ char buffer[MAX_REQUEST_SIZE];
+      uint32_t limit = MIN (count, MAX_REQUEST_SIZE);
+
+      if (limit == count && need_flush)
+        writeflags = NBDKIT_FLAG_FUA;
+
+      if (backend_pwrite (c, buffer, limit, offset, writeflags, err) == -1)
+        return -1;
+      offset += limit;
+      count -= limit;
+    }
+    r = 0;
+  }
+
   if (r == -1) {
     assert (*err);
     if (!fast)
diff --git a/filters/nozero/nozero.c b/filters/nozero/nozero.c
index 7c5d67c4..5c04975b 100644
--- a/filters/nozero/nozero.c
+++ b/filters/nozero/nozero.c
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2018-2020 Red Hat Inc.
+ * Copyright (C) 2018-2022 Red Hat Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -49,8 +49,6 @@
 #undef IGNORE
 #endif
-#define MAX_WRITE (64 * 1024 * 1024)
-
 static enum ZeroMode {
   NONE,
   EMULATE,
@@ -159,14 +157,10 @@ nozero_zero (nbdkit_next *next,
              void *handle, uint32_t count, uint64_t offs, uint32_t flags,
              int *err)
 {
-  int writeflags = 0;
-  bool need_flush = false;
-
-  assert (zeromode != NONE);
+  assert (zeromode != NONE && zeromode != EMULATE);
   if (flags & NBDKIT_FLAG_FAST_ZERO) {
     assert (fastzeromode != NOFAST);
-    if (fastzeromode == SLOW ||
-        (fastzeromode == DEFAULT && zeromode == EMULATE)) {
+    if (fastzeromode == SLOW) {
       *err = ENOTSUP;
       return -1;
     }
@@ -177,32 +171,7 @@ nozero_zero (nbdkit_next *next,
   if (zeromode == NOTRIM)
     flags &= ~NBDKIT_FLAG_MAY_TRIM;
-  if (zeromode != EMULATE)
-    return next->zero (next, count, offs, flags, err);
-
-  if (flags & NBDKIT_FLAG_FUA) {
-    if (next->can_fua (next) == NBDKIT_FUA_EMULATE)
-      need_flush = true;
-    else
-      writeflags = NBDKIT_FLAG_FUA;
-  }
-
-  while (count) {
-    /* Always contains zeroes, but we can't use const or else gcc 9
-     * will use .rodata instead of .bss and inflate the binary size.
-     */
-    static /* const */ char buffer[MAX_WRITE];
-    uint32_t size = MIN (count, MAX_WRITE);
-
-    if (size == count && need_flush)
-      writeflags = NBDKIT_FLAG_FUA;
-
-    if (next->pwrite (next, buffer, size, offs, writeflags, err) == -1)
-      return -1;
-    offs += size;
-    count -= size;
-  }
-  return 0;
+  return next->zero (next, count, offs, flags, err);
 }
 static struct nbdkit_filter filter = {
diff --git a/tests/test-nozero.sh b/tests/test-nozero.sh
index 61911a42..5940ad33 100755
--- a/tests/test-nozero.sh
+++ b/tests/test-nozero.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # nbdkit
-# Copyright (C) 2018-2020 Red Hat Inc.
+# Copyright (C) 2018-2022 Red Hat Inc.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
@@ -36,17 +36,15 @@ set -x
 sock2=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
 sock3=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
-sock4=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
-sock5a=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
-sock5b=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
-sock6=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+sock4a=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+sock4b=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+sock5=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
 files="nozero1.img nozero1.log
        nozero2.img nozero2.log $sock2 nozero2.pid
        nozero3.img nozero3.log $sock3 nozero3.pid
-       nozero4.img nozero4.log $sock4 nozero4.pid
-       nozero5.img nozero5a.log nozero5b.log $sock5a $sock5b
-       nozero5a.pid nozero5b.pid
-       nozero6.img nozero6.log $sock6 nozero6.pid"
+       nozero4.img nozero4a.log nozero4b.log $sock4a $sock4b
+       nozero4a.pid nozero4b.pid
+       nozero5.img nozero5.log $sock5 nozero5.pid"
 rm -f $files
 fail=0
@@ -60,14 +58,12 @@ cleanup ()
     cat nozero2.log || :
     echo "Log 3 file contents:"
     cat nozero3.log || :
-    echo "Log 4 file contents:"
-    cat nozero4.log || :
-    echo "Log 5a file contents:"
-    cat nozero5a.log || :
-    echo "Log 5b file contents:"
-    cat nozero5b.log || :
-    echo "Log 6 file contents:"
-    cat nozero6.log || :
+    echo "Log 4a file contents:"
+    cat nozero4a.log || :
+    echo "Log 4b file contents:"
+    cat nozero4b.log || :
+    echo "Log 5 file contents:"
+    cat nozero5.log || :
     rm -f $files
 }
 cleanup_fn cleanup
@@ -79,10 +75,9 @@ cp nozero1.img nozero2.img
 cp nozero1.img nozero3.img
 cp nozero1.img nozero4.img
 cp nozero1.img nozero5.img
-cp nozero1.img nozero6.img
 # Debug number of blocks and block size in the images.
-for f in {1..6}; do
+for f in {1..5}; do
     stat -c "%n: %b allocated blocks of size %B bytes, total size %s" \
          nozero$f.img
     sizes[$f]=$(stat -c %b nozero$f.img)
@@ -99,24 +94,21 @@ fi
 # Run several parallel nbdkit; to compare the logs and see what changes.
 # 1: unfiltered (above), check that nbdsh sends ZERO request and plugin trims
 # 2: log before filter with zeromode=none (default), to ensure no ZERO request
-# 3: log before filter with zeromode=emulate, to ensure ZERO from client
-# 4: log after filter with zeromode=emulate, to ensure no ZERO to plugin
-# 5a/b: both sides of nbd plugin: even though server side does not advertise
+# 3: log after filter with zeromode=emulate, to ensure no ZERO to plugin
+# 4a/b: both sides of nbd plugin: even though server side does not advertise
 # ZERO, the client side still exposes it, and just skips calling nbd's .zero
-# 6: log after filter with zeromode=notrim, to ensure plugin does not trim
+# 5: log after filter with zeromode=notrim, to ensure plugin does not trim
 start_nbdkit -P nozero2.pid -U $sock2 --filter=log --filter=nozero \
        file logfile=nozero2.log nozero2.img
-start_nbdkit -P nozero3.pid -U $sock3 --filter=log --filter=nozero \
+start_nbdkit -P nozero3.pid -U $sock3 --filter=nozero --filter=log \
        file logfile=nozero3.log nozero3.img zeromode=emulate
-start_nbdkit -P nozero4.pid -U $sock4 --filter=nozero --filter=log \
-       file logfile=nozero4.log nozero4.img zeromode=emulate
-# Start 5b before 5a so that cleanup visits the client before the server
-start_nbdkit -P nozero5b.pid -U $sock5b --filter=log \
-       nbd logfile=nozero5b.log socket=$sock5a
-start_nbdkit -P nozero5a.pid -U $sock5a --filter=log --filter=nozero \
-       file logfile=nozero5a.log nozero5.img
-start_nbdkit -P nozero6.pid -U $sock6 --filter=nozero --filter=log \
-       file logfile=nozero6.log nozero6.img zeromode=notrim
+# Start 4b before 4a so that cleanup visits the client before the server
+start_nbdkit -P nozero4b.pid -U $sock4b --filter=log \
+       nbd logfile=nozero4b.log socket=$sock4a
+start_nbdkit -P nozero4a.pid -U $sock4a --filter=log --filter=nozero \
+       file logfile=nozero4a.log nozero4.img
+start_nbdkit -P nozero5.pid -U $sock5 --filter=nozero --filter=log \
+       file logfile=nozero5.log nozero5.img zeromode=notrim
 # Perform the zero write.
 nbdsh -u "nbd+unix://?socket=$sock2" -c '
@@ -124,38 +116,34 @@ assert not h.can_zero()
 h.pwrite (bytearray(1024*1024), 0)
 '
 nbdsh -u "nbd+unix://?socket=$sock3" -c 'h.zero(1024*1024, 0)'
-nbdsh -u "nbd+unix://?socket=$sock4" -c 'h.zero(1024*1024, 0)'
-nbdsh -u "nbd+unix://?socket=$sock5b" -c 'h.zero(1024*1024, 0)'
-nbdsh -u "nbd+unix://?socket=$sock6" -c 'h.zero(1024*1024, 0)'
+nbdsh -u "nbd+unix://?socket=$sock4b" -c 'h.zero(1024*1024, 0)'
+nbdsh -u "nbd+unix://?socket=$sock5" -c 'h.zero(1024*1024, 0)'
 # Check for expected ZERO vs. WRITE results
-grep 'connection=1 Zero' nozero1.log
+grep 'connection=1 Zero' nozero1.log || fail=1
 if grep 'connection=1 Zero' nozero2.log; then
     echo "filter should have prevented zero"
     fail=1
 fi
-grep 'connection=1 Zero' nozero3.log
-if grep 'connection=1 Zero' nozero4.log; then
+if grep 'connection=1 Zero' nozero3.log; then
     echo "filter should have converted zero into write"
     fail=1
 fi
-grep 'connection=1 Zero' nozero5b.log
-if grep 'connection=1 Zero' nozero5a.log; then
+if grep 'connection=1 Zero' nozero4a.log; then
     echo "nbdkit should have converted zero into write before nbd plugin"
     fail=1
 fi
-grep 'connection=1 Zero' nozero6.log
+grep 'connection=1 Zero' nozero5.log
 # Sanity check on contents - all 5 files should read identically
 cmp nozero1.img nozero2.img
 cmp nozero2.img nozero3.img
 cmp nozero3.img nozero4.img
 cmp nozero4.img nozero5.img
-cmp nozero5.img nozero6.img
-# Sanity check on sparseness: images 2-6 should not be sparse (although the
+# Sanity check on sparseness: images 2-5 should not be sparse (although the
 # filesystem may have reserved additional space due to our writes)
-for i in {2..6}; do
+for i in {2..5}; do
     if test "$(stat -c %b nozero$i.img)" -lt "${sizes[$i]}"; then
         echo "nozero$i.img was trimmed by mistake"
         fail=1
-- 
2.36.1