On Mon, Jul 26, 2021 at 06:28:59PM +0100, Richard W.M. Jones wrote:
This allows you to choose a larger block size. I found
experimentally
that this improves performance because of locality in access patterns.
The idea came from qcow2 which implicitly does the same thing because
of the relatively large cluster size (32K).
nbdkit + cache-filter with 4K block size + copy-on-read + curl
(to a very slow remote site):
=> virt-inspector took 22 mins
same with 64K block size:
=> virt-inspector took 19 mins
However compared to a qcow2 file using copy-on-read, backed with
nbdkit + curl (ie. implicit 32K block size) we are still a lot slower,
possibly because having the cache inside virt-inspector greatly
reduces round trips:
=> virt-inspector took 13 mins
---
filters/cache/nbdkit-cache-filter.pod | 9 ++++
tests/Makefile.am | 2 +
filters/cache/cache.h | 3 ++
filters/cache/blk.c | 2 +-
filters/cache/cache.c | 36 ++++++++++----
tests/test-cache-block-size.sh | 70 +++++++++++++++++++++++++++
6 files changed, 112 insertions(+), 10 deletions(-)
diff --git a/filters/cache/nbdkit-cache-filter.pod
b/filters/cache/nbdkit-cache-filter.pod
index 2ac307e0..9511e91b 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -5,6 +5,7 @@ nbdkit-cache-filter - nbdkit caching filter
=head1 SYNOPSIS
nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
+ [cache-min-block-size=SIZE]
[cache-max-size=SIZE]
[cache-high-threshold=N]
[cache-low-threshold=N]
@@ -59,6 +60,14 @@ This is dangerous and can cause data loss, but this may be acceptable
if you only use it for testing or with data that you don't care about
or can cheaply reconstruct.
+=item B<cache-min-block-size=>SIZE
+
+Set the minimum block size used by the cache. This must be a power of
+2 and E<ge> 4096.
+
+The default is 4096, or the block size of the filesystem which
+contains the temporary file storing the cache (whichever is larger).
+
=item B<cache-max-size=>SIZE
=item B<cache-high-threshold=>N
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8e0304d4..7146a2d1 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1373,12 +1373,14 @@ EXTRA_DIST += test-blocksize.sh test-blocksize-extents.sh
# cache filter test.
TESTS += \
test-cache.sh \
+ test-cache-block-size.sh \
test-cache-on-read.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
$(NULL)
EXTRA_DIST += \
test-cache.sh \
+ test-cache-block-size.sh \
test-cache-on-read.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
index a559adef..5c32c37c 100644
--- a/filters/cache/cache.h
+++ b/filters/cache/cache.h
@@ -45,6 +45,9 @@ extern enum cache_mode {
/* Size of a block in the cache. */
extern unsigned blksize;
+/* Minimum block size (cache-min-block-size parameter). */
+extern unsigned min_block_size;
+
/* Maximum size of the cache and high/low thresholds. */
extern int64_t max_size;
extern unsigned hi_thresh, lo_thresh;
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 19f79605..6276985f 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -149,7 +149,7 @@ blk_init (void)
nbdkit_error ("fstatvfs: %s: %m", tmpdir);
return -1;
}
- blksize = MAX (4096, statvfs.f_bsize);
+ blksize = MAX (min_block_size, statvfs.f_bsize);
nbdkit_debug ("cache: block size: %u", blksize);
bitmap_init (&bm, blksize, 2 /* bits per block */);
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 8af52106..48a20c3b 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -40,6 +40,7 @@
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
@@ -62,6 +63,7 @@
#include "blk.h"
#include "reclaim.h"
#include "isaligned.h"
+#include "ispowerof2.h"
#include "minmax.h"
#include "rounding.h"
@@ -70,7 +72,8 @@
*/
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
-unsigned blksize;
+unsigned blksize; /* actual block size (picked by blk.c) */
+unsigned min_block_size = 4096;
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
int64_t max_size = -1;
unsigned hi_thresh = 95, lo_thresh = 80;
@@ -80,13 +83,6 @@ const char *cor_path;
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
int *err);
-static void
-cache_load (void)
-{
- if (blk_init () == -1)
- exit (EXIT_FAILURE);
-}
-
static void
cache_unload (void)
{
@@ -116,6 +112,19 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
return -1;
}
}
+ else if (strcmp (key, "cache-min-block-size") == 0) {
+ int64_t r;
+
+ r = nbdkit_parse_size (value);
+ if (r == -1)
+ return -1;
+ if (r < 4096 || !is_power_of_2 (r) || r > UINT_MAX) {
+ nbdkit_error ("cache-min-block-size is not a power of 2, or is too small or
too large");
+ return -1;
+ }
+ min_block_size = r;
+ return 0;
+ }
#ifdef HAVE_CACHE_RECLAIM
else if (strcmp (key, "cache-max-size") == 0) {
int64_t r;
@@ -220,6 +229,15 @@ cache_config_complete (nbdkit_next_config_complete *next,
return next (nxdata);
}
+static int
+cache_get_ready (int thread_model)
+{
+ if (blk_init () == -1)
+ return -1;
+
+ return 0;
+}
+
/* Get the file size, set the cache size. */
static int64_t
cache_get_size (nbdkit_next *next,
@@ -691,11 +709,11 @@ cache_cache (nbdkit_next *next,
static struct nbdkit_filter filter = {
.name = "cache",
.longname = "nbdkit caching filter",
- .load = cache_load,
.unload = cache_unload,
.config = cache_config,
.config_complete = cache_config_complete,
.config_help = cache_config_help,
+ .get_ready = cache_get_ready,
.prepare = cache_prepare,
.get_size = cache_get_size,
.can_cache = cache_can_cache,
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
new file mode 100755
index 00000000..a2a27407
--- /dev/null
+++ b/tests/test-cache-block-size.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cache
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="cache-block-size.img $sock cache-block-size.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Create an empty base image.
+truncate -s 128K cache-block-size.img
+
+# Run nbdkit with the caching filter.
+start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
+ file cache-block-size.img cache-min-block-size=64K
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+# Write some pattern data to the overlay and check it reads back OK.
+buf = b"abcd" * 16384
+h.pwrite(buf, 32768)
+zero = h.pread(32768, 0)
+assert zero == bytearray(32768)
+buf2 = h.pread(65536, 32768)
+assert buf == buf2
+
+# Flushing should write through to the underlying file.
+h.flush()
+
+with open("cache-block-size.img", "rb") as file:
+ zero = file.read(32768)
+ assert zero == bytearray(32768)
+ buf2 = file.read(65536)
+ assert buf == buf2
+'
I do not see how this checks that the min block size is respected. I
would expect something along the lines of:
1) read 32k
2) write to the underlying file past those 32k
3) read another 32k (from offset == 32k)
4) check that the last read returned original data and not those written
in step 2)