Currently the cache and cow filters break up large requests into many
single block-sized requests to the underlying plugin. For some
plugins (eg. curl) this is very inefficient and causes huge
slow-downs.
For example I tested nbdkit + curl alone versus nbdkit + cache + curl
against a slow, remote VMware server. A simple run of virt-inspector
was at least 6-7 times slower. (It was so slow that I didn't actually
let it run to completion - I am estimating the slowdown multiple using
interim debug messages).
As part of fixing this, introduce a blk_read_multiple primitive. At
the moment the implementation is simply a loop that calls blk_read so
this won't make any performance difference yet. This change is just
refactoring.
---
filters/cache/blk.h | 6 ++++++
filters/cow/blk.h | 6 ++++++
filters/cache/blk.c | 16 ++++++++++++++++
filters/cache/cache.c | 21 ++++++++-------------
filters/cow/blk.c | 20 ++++++++++++++++++--
filters/cow/cow.c | 21 ++++++++-------------
6 files changed, 62 insertions(+), 28 deletions(-)
diff --git a/filters/cache/blk.h b/filters/cache/blk.h
index 87c753e2..1ee33ed7 100644
--- a/filters/cache/blk.h
+++ b/filters/cache/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
__attribute__((__nonnull__ (1, 3, 4)));
+/* As above, but read multiple blocks. */
+extern int blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+ __attribute__((__nonnull__ (1, 4, 5)));
+
/* If a single block is not cached, copy it from the plugin. */
extern int blk_cache (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index e6fd7417..b066c602 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
__attribute__((__nonnull__ (1, 3, 4)));
+/* Read multiple blocks from the overlay or plugin. */
+extern int blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+ __attribute__((__nonnull__ (1, 4, 5)));
+
/* Cache mode for blocks not already in overlay */
enum cache_mode {
BLK_CACHE_IGNORE, /* Do nothing */
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index f52f30e3..0d15411b 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -256,6 +256,22 @@ blk_read (nbdkit_next *next,
}
}
+int
+blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+{
+ while (nrblocks > 0) {
+ if (blk_read (next, blknum, block, err) == -1)
+ return -1;
+ blknum++;
+ nrblocks--;
+ block += blksize;
+ }
+
+ return 0;
+}
+
int
blk_cache (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 499aec68..9c081948 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -313,7 +313,7 @@ cache_pread (nbdkit_next *next,
uint32_t flags, int *err)
{
CLEANUP_FREE uint8_t *block = NULL;
- uint64_t blknum, blkoffs;
+ uint64_t blknum, blkoffs, nrblocks;
int r;
assert (!flags);
@@ -348,22 +348,17 @@ cache_pread (nbdkit_next *next,
}
/* Aligned body */
- /* XXX This breaks up large read requests into smaller ones, which
- * is a problem for plugins which have a large, fixed per-request
- * overhead (hello, curl). We should try to keep large requests
- * together as much as possible, but that requires us to be much
- * smarter here.
- */
- while (count >= blksize) {
+ nrblocks = count / blksize;
+ if (nrblocks > 0) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
- r = blk_read (next, blknum, buf, err);
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
if (r == -1)
return -1;
- buf += blksize;
- count -= blksize;
- offset += blksize;
- blknum++;
+ buf += nrblocks * blksize;
+ count -= nrblocks * blksize;
+ offset += nrblocks * blksize;
+ blknum += nrblocks;
}
/* Unaligned tail */
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 0f12d510..9d6e3e67 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -223,8 +223,8 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
*trimmed = state == BLOCK_TRIMMED;
}
-/* These are the block operations. They always read or write a single
- * whole block of size ‘blksize’.
+/* These are the block operations. They always read or write whole
+ * blocks of size ‘blksize’.
*/
int
blk_read (nbdkit_next *next,
@@ -280,6 +280,22 @@ blk_read (nbdkit_next *next,
}
}
+int
+blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+{
+ while (nrblocks > 0) {
+ if (blk_read (next, blknum, block, err) == -1)
+ return -1;
+ blknum++;
+ nrblocks--;
+ block += BLKSIZE;
+ }
+
+ return 0;
+}
+
int
blk_cache (nbdkit_next *next,
uint64_t blknum, uint8_t *block, enum cache_mode mode, int *err)
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 3bd09399..f74c0a34 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -210,7 +210,7 @@ cow_pread (nbdkit_next *next,
uint32_t flags, int *err)
{
CLEANUP_FREE uint8_t *block = NULL;
- uint64_t blknum, blkoffs;
+ uint64_t blknum, blkoffs, nrblocks;
int r;
if (!IS_ALIGNED (count | offset, BLKSIZE)) {
@@ -243,21 +243,16 @@ cow_pread (nbdkit_next *next,
}
/* Aligned body */
- /* XXX This breaks up large read requests into smaller ones, which
- * is a problem for plugins which have a large, fixed per-request
- * overhead (hello, curl). We should try to keep large requests
- * together as much as possible, but that requires us to be much
- * smarter here.
- */
- while (count >= BLKSIZE) {
- r = blk_read (next, blknum, buf, err);
+ nrblocks = count / BLKSIZE;
+ if (nrblocks > 0) {
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
if (r == -1)
return -1;
- buf += BLKSIZE;
- count -= BLKSIZE;
- offset += BLKSIZE;
- blknum++;
+ buf += nrblocks * BLKSIZE;
+ count -= nrblocks * BLKSIZE;
+ offset += nrblocks * BLKSIZE;
+ blknum += nrblocks;
}
/* Unaligned tail */
--
2.32.0