On 07/19/22 11:45, Richard W.M. Jones wrote:
I think the new assert from commit aaf5484462cd is wrong; from the
filter's perspective, it is OK if the trim/zero request extends beyond
the currently allocated disk size. The checkwrite_trim_zero() function
starts with this comment:
/* Trim and zero are effectively the same operation for this plugin.
* We have to check that the underlying plugin contains all zeroes.
*
* Note we don't check that the extents exactly match since a valid
* copying operation is to either add sparseness (qemu-img convert -S)
* or create a fully allocated target (nbdcopy --allocated).
The note states that the trim request/zero request (i.e., "count") may
extend beyond the extent list that is available from the underlying
plugin.
IOW, I think nbdkit_extents_full() does not guarantee that the final
extent ends exactly where "count" does. Here's the function:
/* This is a convenient wrapper around next_ops->extents which can
be
* used from filters where you want to get a complete set of extents
* covering the region [offset..offset+count-1].
*/
NBDKIT_DLL_PUBLIC struct nbdkit_extents *
nbdkit_extents_full (struct context *next_c,
uint32_t count, uint64_t offset, uint32_t flags,
int *err)
{
struct nbdkit_next_ops *next = &next_c->next;
struct nbdkit_extents *ret;
/* Clear REQ_ONE to ask the plugin for as much information as it is
* willing to return (the plugin may still truncate if it is too
* costly to provide everything).
*/
flags &= ~NBDKIT_FLAG_REQ_ONE;
ret = nbdkit_extents_new (offset, offset+count);
So this sets ret->start=offset and ret->end=offset+count, and
ret->extents to the empty vector.
if (ret == NULL) goto error0;
while (count > 0) {
const uint64_t old_offset = offset;
size_t i;
CLEANUP_EXTENTS_FREE struct nbdkit_extents *t
= nbdkit_extents_new (offset, offset+count);
if (t == NULL) goto error1;
if (next->extents (next_c, count, offset, flags, t, err) == -1)
goto error0;
for (i = 0; i < nbdkit_extents_count (t); ++i) {
const struct nbdkit_extent e = nbdkit_get_extent (t, i);
if (nbdkit_add_extent (ret, e.offset, e.length, e.type) == -1)
goto error1;
So here we add the underlying plugin's extents one by one, trimming each
in nbdkit_add_extent() if necessary. However, if the source extent list
does not extend up to offset+count, then our output ("ret") extent list
will also not extend up to that point.
One example: what if the input file is completely empty (zero bytes in
size)? I think that would even mean an extent list that has 0 entries.
Thus, I think the properties of the extent list produced by
nbdkit_extents_full() are crucial. They are:
- There may be 0 extents in the extent list.
- The first extent starts precisely at "offset".
- The (exclusive) end of the last extent is *less than or equal to*
"offset+count".
Based on these properties (especially the last two), I think we should /
could modify the checkwrite_trim_zero() function as follows:
diff --git a/filters/checkwrite/checkwrite.c
b/filters/checkwrite/checkwrite.c
index ce8fa137aa80..9abe64ca7aa0 100644
--- a/filters/checkwrite/checkwrite.c
+++ b/filters/checkwrite/checkwrite.c
@@ -164,91 +164,84 @@ static int
checkwrite_trim_zero (nbdkit_next *next,
void *handle, uint32_t count, uint64_t offset,
uint32_t flags, int *err)
{
/* If the plugin supports extents, speed this up by using them. */
if (next->can_extents (next)) {
size_t i, n;
CLEANUP_EXTENTS_FREE struct nbdkit_extents *exts =
nbdkit_extents_full (next, count, offset, 0, err);
if (exts == NULL)
return -1;
n = nbdkit_extents_count (exts);
- for (i = 0; i < n && count > 0; ++i) {
+ for (i = 0; i < n; ++i) {
const struct nbdkit_extent e = nbdkit_get_extent (exts, i);
- const uint64_t next_extent_offset = e.offset + e.length;
+ uint64_t left_in_extent = e.length;
+
+ assert (count >= left_in_extent);
/* Anything that reads back as zero is good. */
if ((e.type & NBDKIT_EXTENT_ZERO) != 0) {
- const uint64_t zerolen = MIN (count, next_extent_offset - offset);
-
- offset += zerolen;
- count -= zerolen;
+ offset += left_in_extent;
+ count -= left_in_extent;
continue;
}
/* Otherwise we have to read the underlying data and check. */
if (flags & NBDKIT_FLAG_FAST_ZERO) {
*err = ENOTSUP;
return -1;
}
- while (offset < next_extent_offset && count > 0) {
- size_t buflen = MIN (MAX_REQUEST_SIZE, count);
- buflen = MIN (buflen, next_extent_offset - offset);
+ while (left_in_extent > 0) {
+ size_t buflen = MIN (MAX_REQUEST_SIZE, left_in_extent);
CLEANUP_FREE char *buf = malloc (buflen);
if (buf == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
return -1;
}
if (next->pread (next, buf, buflen, offset, 0, err) == -1)
return -1;
if (! is_zero (buf, buflen))
return data_does_not_match (err);
count -= buflen;
offset += buflen;
+ left_in_extent -= buflen;
}
} /* for extent */
-
- /* Assert that the loop above has actually checked the whole
- * region. If this fires then it could be because
- * nbdkit_extents_full isn't returning a full range of extents for
- * the whole region ... or maybe the loop above is wrong.
- */
- assert (count == 0);
}
/* Otherwise the plugin does not support extents, so do this the
* slow way.
*/
else {
...
}
return 0;
}
Note that, with this change, the *only* things we use "count" for are:
- the initial nbdkit_extents_full() call,
- an assertion that we're still "inside count".
This makes sense, based on the properties of nbdkit_extents_full().
nbdkit_extents_full() gives us an extent list that starts precisely at
"offset", and extends as far as possible towards "offset+count". Once
we
have those extents, it's enough to just check the extents' contents; we
can ignore "count" altogether.
If the current extent is a zero extent, we can skip it in full; if it is
a data extent, then we need to chunk it with MAX_REQUEST_SIZE. Either
way, "count" will never run out.
(NB: the "assert (count == 0)" is an independent revert of
aaf5484462cd.)
A more aggressive simplification would be (for the same end goal) the
following. This eliminates the usage of the "count" variable altogether,
except in the initial nbdkit_extents_full() call:
diff --git a/filters/checkwrite/checkwrite.c
b/filters/checkwrite/checkwrite.c
index ce8fa137aa80..9dac501904eb 100644
--- a/filters/checkwrite/checkwrite.c
+++ b/filters/checkwrite/checkwrite.c
@@ -164,91 +164,80 @@ static int
checkwrite_trim_zero (nbdkit_next *next,
void *handle, uint32_t count, uint64_t offset,
uint32_t flags, int *err)
{
/* If the plugin supports extents, speed this up by using them. */
if (next->can_extents (next)) {
size_t i, n;
CLEANUP_EXTENTS_FREE struct nbdkit_extents *exts =
nbdkit_extents_full (next, count, offset, 0, err);
if (exts == NULL)
return -1;
n = nbdkit_extents_count (exts);
- for (i = 0; i < n && count > 0; ++i) {
+ for (i = 0; i < n; ++i) {
const struct nbdkit_extent e = nbdkit_get_extent (exts, i);
- const uint64_t next_extent_offset = e.offset + e.length;
+ uint64_t left_in_extent = e.length;
/* Anything that reads back as zero is good. */
if ((e.type & NBDKIT_EXTENT_ZERO) != 0) {
- const uint64_t zerolen = MIN (count, next_extent_offset - offset);
-
- offset += zerolen;
- count -= zerolen;
+ offset += left_in_extent;
continue;
}
/* Otherwise we have to read the underlying data and check. */
if (flags & NBDKIT_FLAG_FAST_ZERO) {
*err = ENOTSUP;
return -1;
}
- while (offset < next_extent_offset && count > 0) {
- size_t buflen = MIN (MAX_REQUEST_SIZE, count);
- buflen = MIN (buflen, next_extent_offset - offset);
+ while (left_in_extent > 0) {
+ size_t buflen = MIN (MAX_REQUEST_SIZE, left_in_extent);
CLEANUP_FREE char *buf = malloc (buflen);
if (buf == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
return -1;
}
if (next->pread (next, buf, buflen, offset, 0, err) == -1)
return -1;
if (! is_zero (buf, buflen))
return data_does_not_match (err);
- count -= buflen;
offset += buflen;
+ left_in_extent -= buflen;
}
} /* for extent */
-
- /* Assert that the loop above has actually checked the whole
- * region. If this fires then it could be because
- * nbdkit_extents_full isn't returning a full range of extents for
- * the whole region ... or maybe the loop above is wrong.
- */
- assert (count == 0);
}
/* Otherwise the plugin does not support extents, so do this the
* slow way.
*/
else {
...
}
return 0;
}
Laszlo