Port the zero strategy from nbdkit file plugin, improving the
reliability and compatibility with block devices on modern kernels.
Local rw have now capability flags: can_punch_hole, can_zero_range,
can_fallocate, and can_zeroout. The flags are initialized based on the
type of the file descriptor and compile time checks:
- For regular file, we enable can_punch_hole, can_zero_range, and
can_fallocate.
- For block device, we enable can_punch_hole, can_zero_range, an
can_zeroout.
- For pipes and sockets we don't enable anything.
When calling zero() in the first time, we try the following methods,
returning on the first success:
- If don't need to allocate, try to punch a hole.
- Try to zero the range
- Try to combine punching a hole and fallocate
- Try BLKZEROOUT ioctl
If a method is not supported, we disable the capability flag, so the
next call can try only what works.
The fallocate and ioctl wrappers return false when the call is not
supported by the underlying storage so we can disable the capability.
Previously the process would exit with an error.
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
copy/file-ops.c | 108 +++++++++++++++++++++++++++++++++++++-----------
copy/main.c | 19 +++++++++
copy/nbdcopy.h | 6 +++
3 files changed, 110 insertions(+), 23 deletions(-)
diff --git a/copy/file-ops.c b/copy/file-ops.c
index d0b9447..c4d8a67 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -99,6 +99,12 @@ file_synch_write (struct rw *rw,
}
}
+static inline bool
+is_not_supported (int err)
+{
+ return err == ENOTSUP || err == EOPNOTSUPP;
+}
+
static bool
file_punch_hole(int fd, uint64_t offset, uint64_t count)
{
@@ -108,6 +114,9 @@ file_punch_hole(int fd, uint64_t offset, uint64_t count)
r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, count);
if (r == -1) {
+ if (is_not_supported (errno))
+ return false;
+
perror ("fallocate: FALLOC_FL_PUNCH_HOLE");
exit (EXIT_FAILURE);
}
@@ -120,14 +129,17 @@ static bool
file_zero_range(int fd, uint64_t offset, uint64_t count)
{
#ifdef FALLOC_FL_ZERO_RANGE
- int r;
+ int r;
- r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
- if (r == -1) {
- perror ("fallocate: FALLOC_FL_ZERO_RANGE");
- exit (EXIT_FAILURE);
- }
- return true;
+ r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == -1) {
+ if (is_not_supported (errno))
+ return false;
+
+ perror ("fallocate: FALLOC_FL_ZERO_HOLE");
+ exit (EXIT_FAILURE);
+ }
+ return true;
#endif
return false;
}
@@ -136,15 +148,18 @@ static bool
file_zeroout(int fd, uint64_t offset, uint64_t count)
{
#ifdef BLKZEROOUT
- int r;
- uint64_t range[2] = {offset, count};
+ int r;
+ uint64_t range[2] = {offset, count};
- r = ioctl (fd, BLKZEROOUT, &range);
- if (r == -1) {
- perror ("ioctl: BLKZEROOUT");
- exit (EXIT_FAILURE);
- }
- return true;
+ r = ioctl (fd, BLKZEROOUT, &range);
+ if (r == -1) {
+ if (errno == ENOTTY)
+ return false;
+
+ perror ("ioctl: BLKZEROOUT");
+ exit (EXIT_FAILURE);
+ }
+ return true;
#endif
return false;
}
@@ -152,7 +167,14 @@ file_zeroout(int fd, uint64_t offset, uint64_t count)
static bool
file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count)
{
- return file_punch_hole(rw->u.local.fd, offset, count);
+ if (rw->u.local.can_punch_hole) {
+ if (file_punch_hole(rw->u.local.fd, offset, count))
+ return true;
+
+ rw->u.local.can_punch_hole = false;
+ }
+
+ return false;
}
static bool
@@ -160,17 +182,57 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count,
bool allocate)
{
int fd = rw->u.local.fd;
- if (S_ISREG (rw->u.local.stat.st_mode)) {
- if (allocate) {
- return file_zero_range (fd, offset, count);
+ /* The first call will try several options, discovering the capabilities of
+ * the underlying storage, and disabling non working options. The next calls
+ * will try only what works.
+ *
+ * If we don't need to allocate try to punch a hole. This works for both
+ * files and block devices with modern kernels.
+ */
+
+ if (!allocate && rw->u.local.can_punch_hole) {
+ if (file_punch_hole (fd, offset, count))
+ return true;
+
+ rw->u.local.can_punch_hole = false;
+ }
+
+ /* Try to zero the range. This works for both files and block devices with
+ * modern kernels.
+ */
+
+ if (rw->u.local.can_zero_range) {
+ if (file_zero_range (fd, offset, count))
+ return true;
+
+ rw->u.local.can_zero_range = false;
+ }
+
+ /* If we can punch a hole and fallocate, we can combine both operations. This
+ * is expected to be more efficient than actually writing zeroes. This works
+ * only for files.
+ */
+
+ if (rw->u.local.can_punch_hole && rw->u.local.can_fallocate) {
+ if (file_punch_hole (fd, offset, count)) {
+ if (fallocate (fd, 0, offset, count))
+ return true;
+
+ rw->u.local.can_fallocate = false;
} else {
- return file_punch_hole (fd, offset, count);
+ rw->u.local.can_punch_hole = false;
}
}
- else if (S_ISBLK (rw->u.local.stat.st_mode) &&
+
+ /* Finally try BLKZEROOUT. This works only for block device if offset and
+ * count are aligned to device sector size.
+ */
+ else if (rw->u.local.can_zeroout &&
IS_ALIGNED (offset | count, rw->u.local.sector_size)) {
- /* Always allocate, discard and gurantee zeroing. */
- return file_zeroout (fd, offset, count);
+ if (file_zeroout(fd, offset, count))
+ return true;
+
+ rw->u.local.can_zeroout = false;
}
return false;
diff --git a/copy/main.c b/copy/main.c
index 68a6030..78fdff8 100644
--- a/copy/main.c
+++ b/copy/main.c
@@ -523,6 +523,16 @@ open_local (const char *prog,
#ifdef BLKSSZGET
if (ioctl (fd, BLKSSZGET, &rw->u.local.sector_size))
fprintf (stderr, "warning: cannot get sector size: %s: %m",
rw->name);
+#endif
+ /* Possible efficient zero methods for block device. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+ rw->u.local.can_punch_hole = true;
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+ rw->u.local.can_zero_range = true;
+#endif
+#ifdef BLKZEROOUT
+ rw->u.local.can_zeroout = true;
#endif
}
else if (S_ISREG (rw->u.local.stat.st_mode)) {
@@ -530,6 +540,15 @@ open_local (const char *prog,
rw->ops = &file_ops;
rw->size = rw->u.local.stat.st_size;
rw->u.local.seek_hole_supported = seek_hole_supported (fd);
+
+ /* Possible efficient zero methods for regular file. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+ rw->u.local.can_punch_hole = true;
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+ rw->u.local.can_zero_range = true;
+#endif
+ rw->u.local.can_fallocate = true;
}
else {
/* Probably stdin/stdout, a pipe or a socket. Set size == -1
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index 21d09bf..e33ea8e 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -51,6 +51,12 @@ struct rw {
struct stat stat;
bool seek_hole_supported;
int sector_size;
+
+ /* We try to use the most eficient zeroing first. If an efficent zero
+ * method is not available, we disable the flag so next time we use the
+ * working method.
+ */
+ bool can_punch_hole, can_zero_range, can_fallocate, can_zeroout;
} local;
struct {
handles handles; /* For NBD, one handle per connection. */
--
2.26.2