For destinations, especially NBD, which have a preferred block size,
adjust our write strategy so we always write whole blocks / zeroes of
the preferred size.
---
copy/file-ops.c | 8 ++++
copy/main.c | 82 ++++++++++++++++++++++++++++++++++++-
copy/multi-thread-copying.c | 2 +
copy/nbd-ops.c | 21 ++++++++++
copy/nbdcopy.h | 5 +++
copy/null-ops.c | 7 ++++
copy/pipe-ops.c | 7 ++++
copy/synch-copying.c | 2 +
8 files changed, 132 insertions(+), 2 deletions(-)
diff --git a/copy/file-ops.c b/copy/file-ops.c
index 847043417..3daf55484 100644
--- a/copy/file-ops.c
+++ b/copy/file-ops.c
@@ -369,6 +369,13 @@ file_is_read_only (struct rw *rw)
return false;
}
+static uint64_t
+file_get_preferred_block_size (struct rw *rw)
+{
+ /* XXX Could return the filesystem block size here. */
+ return 4096;
+}
+
static bool
file_can_extents (struct rw *rw)
{
@@ -726,6 +733,7 @@ static struct rw_ops file_ops = {
.ops_name = "file_ops",
.close = file_close,
.is_read_only = file_is_read_only,
+ .get_preferred_block_size = file_get_preferred_block_size,
.can_extents = file_can_extents,
.can_multi_conn = file_can_multi_conn,
.start_multi_conn = file_start_multi_conn,
diff --git a/copy/main.c b/copy/main.c
index 67788b5d9..82537bafc 100644
--- a/copy/main.c
+++ b/copy/main.c
@@ -38,12 +38,15 @@
#include <libnbd.h>
+#include "isaligned.h"
#include "ispowerof2.h"
#include "human-size.h"
+#include "rounding.h"
#include "version.h"
#include "nbdcopy.h"
bool allocated; /* --allocated flag */
+uint64_t blocksize; /* destination block size */
unsigned connections = 4; /* --connections */
bool destination_is_zero; /* --destination-is-zero flag */
bool extents = true; /* ! --no-extents flag */
@@ -367,6 +370,29 @@ main (int argc, char *argv[])
if (threads < connections)
connections = threads;
+ /* Check the destination preferred block size. */
+ blocksize = dst->ops->get_preferred_block_size (dst);
+ if (!is_power_of_2 (blocksize)) {
+ fprintf (stderr, "%s: error: "
+ "destination block size is not a power of 2: %" PRIu64
"\n",
+ prog, blocksize);
+ exit (EXIT_FAILURE);
+ }
+ if (request_size < blocksize) {
+ fprintf (stderr, "%s: error: "
+ "request size (%u) < destination block size (%" PRIu64 "),
"
+ "use the --request-size option to pick a larger request size"
+ "\n",
+ prog, request_size, blocksize);
+ exit (EXIT_FAILURE);
+ }
+ if (blocksize > THREAD_WORK_SIZE) {
+ fprintf (stderr, "%s: error: "
+ "destination block size is too large for nbdcopy: %" PRIu64
"\n",
+ prog, blocksize);
+ exit (EXIT_FAILURE);
+ }
+
/* Truncate the destination to the same size as the source. Only
* has an effect on regular files.
*/
@@ -392,9 +418,10 @@ main (int argc, char *argv[])
print_rw (src, "nbdcopy: src", stderr);
print_rw (dst, "nbdcopy: dst", stderr);
fprintf (stderr, "nbdcopy: connections=%u requests=%u threads=%u "
- "synchronous=%s\n",
+ "synchronous=%s blocksize=%" PRIu64 "\n",
connections, max_requests, threads,
- synchronous ? "true" : "false");
+ synchronous ? "true" : "false",
+ blocksize);
}
/* If multi-conn is enabled on either side, then at this point we
@@ -537,6 +564,57 @@ default_get_extents (struct rw *rw, uintptr_t index,
}
}
+/* Reading extents from the source drives writes and zeroes to the
+ * destination. We need to adjust the extents list so that any
+ * extents smaller than the destination block size are eliminated.
+ */
+void
+adjust_extents_for_blocksize (extent_list *exts)
+{
+ size_t i, j;
+ uint64_t end;
+
+ if (blocksize <= 1)
+ return;
+
+ /* Note we iterate [0..len-2] because the very last extent is
+ * permitted to be shorter than a whole block.
+ */
+ for (i = 0; i < exts->len - 1; ++i) {
+ assert (IS_ALIGNED (exts->ptr[i].offset, blocksize));
+ if (IS_ALIGNED (exts->ptr[i].length, blocksize))
+ continue;
+
+ if (!exts->ptr[i].zero) /* data */ {
+ /* Data extent: round the length up to the next block, delete
+ * any completely overlapped extents, and if still needed move
+ * up the start of the next extent.
+ */
+ exts->ptr[i].length = ROUND_UP (exts->ptr[i].length, blocksize);
+ end = exts->ptr[i].offset + exts->ptr[i].length;
+
+ for (j = i+1; j < exts->len && exts->ptr[j].offset < end; ++j)
{
+ if (exts->ptr[j].offset + exts->ptr[j].length <= end)
+ extent_list_remove (exts, j);
+ if (exts->ptr[j].offset < end)
+ exts->ptr[j].offset = end;
+ }
+ }
+ else {
+ /* Zero extent: Round the length down to blocksize, and if now
+ * zero-length, remove it.
+ */
+ exts->ptr[i].length = ROUND_DOWN (exts->ptr[i].length, blocksize);
+ if (i+1 < exts->len)
+ exts->ptr[i+1].offset = exts->ptr[i].offset + exts->ptr[i].length;
+ if (exts->ptr[i].length == 0) {
+ extent_list_remove (exts, i);
+ i--;
+ }
+ }
+ }
+}
+
/* Implementations of get_polling_fd and asynch_notify_* for backends
* which don't support polling.
*/
diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c
index b17ca598b..b436ba779 100644
--- a/copy/multi-thread-copying.c
+++ b/copy/multi-thread-copying.c
@@ -154,6 +154,8 @@ worker_thread (void *indexp)
else
default_get_extents (src, index, offset, count, &exts);
+ adjust_extents_for_blocksize (&exts);
+
for (i = 0; i < exts.len; ++i) {
struct command *command;
struct buffer *buffer;
diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
index dfc62f203..1134ddd4e 100644
--- a/copy/nbd-ops.c
+++ b/copy/nbd-ops.c
@@ -195,6 +195,26 @@ nbd_ops_is_read_only (struct rw *rw)
return false;
}
+static uint64_t
+nbd_ops_get_preferred_block_size (struct rw *rw)
+{
+ struct rw_nbd *rwn = (struct rw_nbd *) rw;
+ struct nbd_handle *nbd;
+ int64_t r;
+
+ if (rwn->handles.len == 0)
+ return 4096;
+
+ nbd = rwn->handles.ptr[0];
+ r = nbd_get_block_size (nbd, LIBNBD_SIZE_PREFERRED);
+ if (r == -1) {
+ fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ());
+ exit (EXIT_FAILURE);
+ }
+
+ return r == 0 ? 4096 : (uint64_t) r;
+}
+
static bool
nbd_ops_can_extents (struct rw *rw)
{
@@ -491,6 +511,7 @@ static struct rw_ops nbd_ops = {
.ops_name = "nbd_ops",
.close = nbd_ops_close,
.is_read_only = nbd_ops_is_read_only,
+ .get_preferred_block_size = nbd_ops_get_preferred_block_size,
.can_extents = nbd_ops_can_extents,
.can_multi_conn = nbd_ops_can_multi_conn,
.start_multi_conn = nbd_ops_start_multi_conn,
diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
index e7fe1eab2..c6505a1d4 100644
--- a/copy/nbdcopy.h
+++ b/copy/nbdcopy.h
@@ -120,6 +120,9 @@ struct rw_ops {
/* Return true if this is a read-only connection. */
bool (*is_read_only) (struct rw *rw);
+ /* Read the preferred block size. */
+ uint64_t (*get_preferred_block_size) (struct rw *rw);
+
/* For source only, does it support reading extents? */
bool (*can_extents) (struct rw *rw);
@@ -206,12 +209,14 @@ struct rw_ops {
extern void default_get_extents (struct rw *rw, uintptr_t index,
uint64_t offset, uint64_t count,
extent_list *ret);
+extern void adjust_extents_for_blocksize (extent_list *exts);
extern void get_polling_fd_not_supported (struct rw *rw, uintptr_t index,
int *fd_rtn, int *direction_rtn);
extern void asynch_notify_read_write_not_supported (struct rw *rw,
uintptr_t index);
extern bool allocated;
+extern uint64_t blocksize;
extern unsigned connections;
extern bool destination_is_zero;
extern bool extents;
diff --git a/copy/null-ops.c b/copy/null-ops.c
index a38666d62..5e87f3502 100644
--- a/copy/null-ops.c
+++ b/copy/null-ops.c
@@ -66,6 +66,12 @@ null_is_read_only (struct rw *rw)
return false;
}
+static uint64_t
+null_get_preferred_block_size (struct rw *rw)
+{
+ return 1;
+}
+
static bool
null_can_extents (struct rw *rw)
{
@@ -154,6 +160,7 @@ static struct rw_ops null_ops = {
.ops_name = "null_ops",
.close = null_close,
.is_read_only = null_is_read_only,
+ .get_preferred_block_size = null_get_preferred_block_size,
.can_extents = null_can_extents,
.can_multi_conn = null_can_multi_conn,
.start_multi_conn = null_start_multi_conn,
diff --git a/copy/pipe-ops.c b/copy/pipe-ops.c
index f9b8599a3..f2a1f0090 100644
--- a/copy/pipe-ops.c
+++ b/copy/pipe-ops.c
@@ -72,6 +72,12 @@ pipe_is_read_only (struct rw *rw)
return false;
}
+static uint64_t
+pipe_get_preferred_block_size (struct rw *rw)
+{
+ return 1;
+}
+
static bool
pipe_can_extents (struct rw *rw)
{
@@ -165,6 +171,7 @@ static struct rw_ops pipe_ops = {
.close = pipe_close,
.is_read_only = pipe_is_read_only,
+ .get_preferred_block_size = pipe_get_preferred_block_size,
.can_extents = pipe_can_extents,
.can_multi_conn = pipe_can_multi_conn,
.start_multi_conn = pipe_start_multi_conn,
diff --git a/copy/synch-copying.c b/copy/synch-copying.c
index c9899c343..8d6c4d3d0 100644
--- a/copy/synch-copying.c
+++ b/copy/synch-copying.c
@@ -70,6 +70,8 @@ synch_copying (void)
else
default_get_extents (src, 0, offset, count, &exts);
+ adjust_extents_for_blocksize (&exts);
+
for (i = 0; i < exts.len; ++i) {
assert (exts.ptr[i].length <= count);
--
2.32.0