Current code uses NBD_CMD_TRIM for writing zeroes without allocating
space (--allocate not specified). This incorrect and can lead to
corrupting the target image.
NBD manual warns about NBD_CMD_TRIM:
After issuing this command, a client MUST NOT make any assumptions
about the contents of the export affected by this command, until
overwriting it again with NBD_CMD_WRITE or NBD_CMD_WRITE_ZEROES
Add allocate flag to the zero operation, to allow zeroing by punching
hole (default) or zeroing by allocating space.
nbd-ops use NBD_CMD_WRITE_ZEROES with or without NBD_FLAG_NO_HOLE
flag. file-ops use fallocate with PUNCH_HOLE or ZERO_RANGE flag.
The copy-sparse tests was modified to require zero operation with
may_trim flag.
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
copy/copy-sparse.sh | 66 ++++++++++++++---------------
copy/file-ops.c | 82 +++++++++++++++++++++++--------------
copy/multi-thread-copying.c | 20 +++------
copy/nbd-ops.c | 11 ++---
copy/nbdcopy.h | 5 ++-
copy/null-ops.c | 4 +-
copy/synch-copying.c | 8 ++--
7 files changed, 106 insertions(+), 90 deletions(-)
diff --git a/copy/copy-sparse.sh b/copy/copy-sparse.sh
index 846767b..c43b41a 100755
--- a/copy/copy-sparse.sh
+++ b/copy/copy-sparse.sh
@@ -33,7 +33,7 @@ cleanup_fn rm -f $out
# Copy from a sparse data disk to an nbdkit-eval-plugin instance which
# is logging everything. This allows us to see exactly what nbdcopy
-# is writing, to ensure it is writing and trimming the target as
+# is writing, to ensure it is writing and zeroing the target as
# expected.
$VG nbdcopy -S 0 -- \
[ nbdkit --exit-with-parent data data='
@@ -60,38 +60,38 @@ if [ "$(cat $out)" != "pwrite 1 4294967296
pwrite 32768 0
pwrite 32768 1073709056
pwrite 32768 4294934528
-trim 134184960 32768
-trim 134184960 4160749568
-trim 134184960 939524096
-trim 134217728 1073741824
-trim 134217728 1207959552
-trim 134217728 134217728
-trim 134217728 1342177280
-trim 134217728 1476395008
-trim 134217728 1610612736
-trim 134217728 1744830464
-trim 134217728 1879048192
-trim 134217728 2013265920
-trim 134217728 2147483648
-trim 134217728 2281701376
-trim 134217728 2415919104
-trim 134217728 2550136832
-trim 134217728 268435456
-trim 134217728 2684354560
-trim 134217728 2818572288
-trim 134217728 2952790016
-trim 134217728 3087007744
-trim 134217728 3221225472
-trim 134217728 3355443200
-trim 134217728 3489660928
-trim 134217728 3623878656
-trim 134217728 3758096384
-trim 134217728 3892314112
-trim 134217728 402653184
-trim 134217728 4026531840
-trim 134217728 536870912
-trim 134217728 671088640
-trim 134217728 805306368" ]; then
+zero 134184960 32768 may_trim
+zero 134184960 4160749568 may_trim
+zero 134184960 939524096 may_trim
+zero 134217728 1073741824 may_trim
+zero 134217728 1207959552 may_trim
+zero 134217728 1342177280 may_trim
+zero 134217728 134217728 may_trim
+zero 134217728 1476395008 may_trim
+zero 134217728 1610612736 may_trim
+zero 134217728 1744830464 may_trim
+zero 134217728 1879048192 may_trim
+zero 134217728 2013265920 may_trim
+zero 134217728 2147483648 may_trim
+zero 134217728 2281701376 may_trim
+zero 134217728 2415919104 may_trim
+zero 134217728 2550136832 may_trim
+zero 134217728 2684354560 may_trim
+zero 134217728 268435456 may_trim
+zero 134217728 2818572288 may_trim
+zero 134217728 2952790016 may_trim
+zero 134217728 3087007744 may_trim
+zero 134217728 3221225472 may_trim
+zero 134217728 3355443200 may_trim
+zero 134217728 3489660928 may_trim
+zero 134217728 3623878656 may_trim
+zero 134217728 3758096384 may_trim
+zero 134217728 3892314112 may_trim
+zero 134217728 4026531840 may_trim
+zero 134217728 402653184 may_trim
+zero 134217728 536870912 may_trim
+zero 134217728 671088640 may_trim
+zero 134217728 805306368 may_trim" ]; then
echo "$0: output does not match expected"
exit 1
fi
diff --git a/copy/file-ops.c b/copy/file-ops.c
index 6d098c2..d22273a 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -221,11 +221,9 @@ file_synch_write (struct rw *rw,
}
static bool
-file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
+file_punch_hole(int fd, uint64_t offset, uint64_t count)
{
#ifdef FALLOC_FL_PUNCH_HOLE
- struct rw_file *rwf = (struct rw_file *)rw;
- int fd = rwf->fd;
int r;
r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
@@ -235,42 +233,66 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
exit (EXIT_FAILURE);
}
return true;
-#else /* !FALLOC_FL_PUNCH_HOLE */
- return false;
#endif
+ return false;
}
static bool
-file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+file_zero_range(int fd, uint64_t offset, uint64_t count)
{
- struct rw_file *rwf = (struct rw_file *)rw;
-
- if (! rwf->is_block) {
#ifdef FALLOC_FL_ZERO_RANGE
- int fd = rwf->fd;
- int r;
+ int r;
- r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
- if (r == -1) {
- perror ("fallocate: FALLOC_FL_ZERO_RANGE");
- exit (EXIT_FAILURE);
- }
- return true;
-#endif
+ r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == -1) {
+ perror ("fallocate: FALLOC_FL_ZERO_RANGE");
+ exit (EXIT_FAILURE);
}
- else if (rwf->is_block && IS_ALIGNED (offset | count, rwf->sector_size))
{
+ return true;
+#endif
+ return false;
+}
+
+static bool
+file_zeroout(int fd, uint64_t offset, uint64_t count)
+{
#ifdef BLKZEROOUT
- int fd = rwf->fd;
- int r;
- uint64_t range[2] = {offset, count};
+ int r;
+ uint64_t range[2] = {offset, count};
- r = ioctl (fd, BLKZEROOUT, &range);
- if (r == -1) {
- perror ("ioctl: BLKZEROOUT");
- exit (EXIT_FAILURE);
- }
- return true;
+ r = ioctl (fd, BLKZEROOUT, &range);
+ if (r == -1) {
+ perror ("ioctl: BLKZEROOUT");
+ exit (EXIT_FAILURE);
+ }
+ return true;
#endif
+ return false;
+}
+
+static bool
+file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
+{
+ struct rw_file *rwf = (struct rw_file *)rw;
+
+ return file_punch_hole (rwf->fd, offset, count);
+}
+
+static bool
+file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
+{
+ struct rw_file *rwf = (struct rw_file *)rw;
+
+ if (!rwf->is_block) {
+ if (allocate) {
+ return file_zero_range (rwf->fd, offset, count);
+ } else {
+ return file_punch_hole (rwf->fd, offset, count);
+ }
+ }
+ else if (IS_ALIGNED (offset | count, rwf->sector_size)) {
+ /* Always allocate, discard and gurantee zeroing. */
+ return file_zeroout (rwf->fd, offset, count);
}
return false;
@@ -320,9 +342,9 @@ file_asynch_trim (struct rw *rw, struct command *command,
static bool
file_asynch_zero (struct rw *rw, struct command *command,
- nbd_completion_callback cb)
+ nbd_completion_callback cb, bool allocate)
{
- if (!file_synch_zero (rw, command->offset, command->slice.len))
+ if (!file_synch_zero (rw, command->offset, command->slice.len, allocate))
return false;
errno = 0;
if (cb.callback (cb.user_data, &errno) == -1) {
diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c
index 4f57054..768bca1 100644
--- a/copy/multi-thread-copying.c
+++ b/copy/multi-thread-copying.c
@@ -487,11 +487,12 @@ finished_read (void *vp, int *error)
* --destination-is-zero:
* do nothing
*
- * --allocated: we must write zeroes either using an efficient
+ * --allocated: write zeroes allocating space using an efficient
* zeroing command or writing a command of zeroes
*
- * (neither flag) try trimming if supported, else write zeroes
- * as above
+ * (neither flag) write zeroes punching a hole using an efficient
+ * zeroing command or fallback to writing a command
+ * of zeroes.
*
* This takes over ownership of the command and frees it eventually.
*/
@@ -503,22 +504,13 @@ fill_dst_range_with_zeroes (struct command *command)
if (destination_is_zero)
goto free_and_return;
- if (!allocated) {
- /* Try trimming. */
- if (dst->ops->asynch_trim (dst, command,
- (nbd_completion_callback) {
- .callback = free_command,
- .user_data = command,
- }))
- return;
- }
-
/* Try efficient zeroing. */
if (dst->ops->asynch_zero (dst, command,
(nbd_completion_callback) {
.callback = free_command,
.user_data = command,
- }))
+ },
+ allocated))
return;
/* Fall back to loop writing zeroes. This is going to be slow
diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
index f001d02..388bd53 100644
--- a/copy/nbd-ops.c
+++ b/copy/nbd-ops.c
@@ -277,15 +277,16 @@ nbd_ops_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
}
static bool
-nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count,
+ bool allocate)
{
struct rw_nbd *rwn = (struct rw_nbd *) rw;
if (!rwn->can_zero)
return false;
- if (nbd_zero (rwn->handles.ptr[0],
- count, offset, LIBNBD_CMD_FLAG_NO_HOLE) == -1) {
+ if (nbd_zero (rwn->handles.ptr[0], count, offset,
+ allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) {
fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
exit (EXIT_FAILURE);
}
@@ -346,7 +347,7 @@ nbd_ops_asynch_trim (struct rw *rw, struct command *command,
static bool
nbd_ops_asynch_zero (struct rw *rw, struct command *command,
- nbd_completion_callback cb)
+ nbd_completion_callback cb, bool allocate)
{
struct rw_nbd *rwn = (struct rw_nbd *) rw;
@@ -357,7 +358,7 @@ nbd_ops_asynch_zero (struct rw *rw, struct command *command,
if (nbd_aio_zero (rwn->handles.ptr[command->index],
command->slice.len, command->offset,
- cb, LIBNBD_CMD_FLAG_NO_HOLE) == -1) {
+ cb, allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) {
fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
exit (EXIT_FAILURE);
}
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index 97e3b49..d553399 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -152,7 +152,8 @@ struct rw_ops {
bool (*synch_trim) (struct rw *rw, uint64_t offset, uint64_t count);
/* Synchronously zero. If not possible, returns false. */
- bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count);
+ bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count,
+ bool allocate);
/* Asynchronous I/O operations. These start the operation and call
* 'cb' on completion.
@@ -180,7 +181,7 @@ struct rw_ops {
* returns false.
*/
bool (*asynch_zero) (struct rw *rw, struct command *command,
- nbd_completion_callback cb);
+ nbd_completion_callback cb, bool allocate);
/* Number of asynchronous commands in flight for a particular thread. */
unsigned (*in_flight) (struct rw *rw, uintptr_t index);
diff --git a/copy/null-ops.c b/copy/null-ops.c
index b75ca1f..aed2a03 100644
--- a/copy/null-ops.c
+++ b/copy/null-ops.c
@@ -105,7 +105,7 @@ null_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
}
static bool
-null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count)
+null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)
{
return true;
}
@@ -144,7 +144,7 @@ null_asynch_trim (struct rw *rw, struct command *command,
static bool
null_asynch_zero (struct rw *rw, struct command *command,
- nbd_completion_callback cb)
+ nbd_completion_callback cb, bool allocate)
{
errno = 0;
if (cb.callback (cb.user_data, &errno) == -1) {
diff --git a/copy/synch-copying.c b/copy/synch-copying.c
index 985f005..17bda16 100644
--- a/copy/synch-copying.c
+++ b/copy/synch-copying.c
@@ -69,10 +69,10 @@ synch_copying (void)
assert (exts.ptr[i].length <= count);
if (exts.ptr[i].zero) {
- if (!dst->ops->synch_trim (dst, offset, exts.ptr[i].length) &&
- !dst->ops->synch_zero (dst, offset, exts.ptr[i].length)) {
- /* If neither trimming nor efficient zeroing are possible,
- * write zeroes the hard way.
+ if (!dst->ops->synch_zero (dst, offset, exts.ptr[i].length, false)
&&
+ !dst->ops->synch_zero (dst, offset, exts.ptr[i].length, true)) {
+ /* If efficient zeroing (punching a hole or allocating
+ * space) are possible, write zeroes the hard way.
*/
memset (buf, 0, exts.ptr[i].length);
dst->ops->synch_write (dst, buf, exts.ptr[i].length, offset);
--
2.26.2