The whole point of the cache filter is to avoid visiting the plugin
more than once for a range of data; as such, passing cache requests
through to the plugin is wrong, and a client request for caching a
range of the file obviously means we want the data locally. Our
filter defaults to cache_on_read=false, where we normally only cache
data that has been written but not yet flushed; but it can be presumed
that an explicit cache request should always pull data locally,
regardless of the cache_on_read setting. And even when
cache_on_read=true, we can implement caching more efficiently than
discarding the buffer of a naive pread.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
filters/cache/nbdkit-cache-filter.pod | 6 +--
filters/cache/blk.h | 10 ++++-
filters/cache/blk.c | 50 ++++++++++++++++++++++++-
filters/cache/cache.c | 53 +++++++++++++++++++++++++++
4 files changed, 113 insertions(+), 6 deletions(-)
diff --git a/filters/cache/nbdkit-cache-filter.pod
b/filters/cache/nbdkit-cache-filter.pod
index 5993831..8b50231 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -70,9 +70,9 @@ Limit the size of the cache to C<SIZE>. See L</CACHE MAXIMUM
SIZE> below.
=item B<cache-on-read=true>
-Cache read requests as well as write requests. Any time a block is
-read from the plugin, it is saved in the cache (if there is sufficient
-space) so the same data can be served more quickly later.
+Cache read requests as well as write and cache requests. Any time a
+block is read from the plugin, it is saved in the cache (if there is
+sufficient space) so the same data can be served more quickly later.
Note that if the underlying data served by the plugin can be modified
by some other means (eg. something else can write to a file which is
diff --git a/filters/cache/blk.h b/filters/cache/blk.h
index 974a118..0d84f74 100644
--- a/filters/cache/blk.h
+++ b/filters/cache/blk.h
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018 Red Hat Inc.
+ * Copyright (C) 2018-2019 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -49,11 +49,17 @@ extern void blk_free (void);
/* Allocate or resize the cache file and bitmap. */
extern int blk_set_size (uint64_t new_size);
-/* Read a single block from the cache or plugin. */
+/* Read a single block from the cache or plugin. If cache_on_read is set,
+ * also ensure it is cached. */
extern int blk_read (struct nbdkit_next_ops *next_ops, void *nxdata,
uint64_t blknum, uint8_t *block, int *err)
__attribute__((__nonnull__ (1, 4, 5)));
+/* If a single block is not cached, copy it from the plugin. */
+extern int blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata,
+ uint64_t blknum, uint8_t *block, int *err)
+ __attribute__((__nonnull__ (1, 4, 5)));
+
/* Write to the cache and the plugin. */
extern int blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata,
uint64_t blknum, const uint8_t *block,
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index acbed61..cf7145d 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2018 Red Hat Inc.
+ * Copyright (C) 2018-2019 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -217,6 +217,54 @@ blk_read (struct nbdkit_next_ops *next_ops, void *nxdata,
}
}
+int
+blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata,
+ uint64_t blknum, uint8_t *block, int *err)
+{
+ off_t offset = blknum * blksize;
+ enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
+
+ reclaim (fd, &bm);
+
+ nbdkit_debug ("cache: blk_cache block %" PRIu64 " (offset %" PRIu64
") is %s",
+ blknum, (uint64_t) offset,
+ state == BLOCK_NOT_CACHED ? "not cached" :
+ state == BLOCK_CLEAN ? "clean" :
+ state == BLOCK_DIRTY ? "dirty" :
+ "unknown");
+
+ if (state == BLOCK_NOT_CACHED) {
+ off_t offset = blknum * blksize;
+
+ /* Read underlying plugin, copy to cache regardless of cache-on-read. */
+ if (next_ops->pread (nxdata, block, blksize, offset, 0, err) == -1)
+ return -1;
+
+ nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64
")",
+ blknum, (uint64_t) offset);
+
+ if (pwrite (fd, block, blksize, offset) == -1) {
+ *err = errno;
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
+ lru_set_recently_accessed (blknum);
+ }
+ else {
+#if HAVE_POSIX_FADVISE
+ int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
+ if (r) {
+ errno = r;
+ nbdkit_error ("posix_fadvise: %m");
+ return -1;
+ }
+#endif
+ lru_set_recently_accessed (blknum);
+ }
+ return 0;
+}
+
int
blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata,
uint64_t blknum, const uint8_t *block, uint32_t flags,
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index e215cac..2d2f39d 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -230,6 +230,13 @@ cache_prepare (struct nbdkit_next_ops *next_ops, void *nxdata,
return 0;
}
+/* Override the plugin's .can_cache, because we are caching here instead */
+static int
+cache_can_cache (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle)
+{
+ return NBDKIT_CACHE_NATIVE;
+}
+
/* Read data. */
static int
cache_pread (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -548,6 +555,50 @@ flush_dirty_block (uint64_t blknum, void *datav)
return 0; /* continue scanning and flushing. */
}
+/* Cache data. */
+static int
+cache_cache (struct nbdkit_next_ops *next_ops, void *nxdata,
+ void *handle, uint32_t count, uint64_t offset,
+ uint32_t flags, int *err)
+{
+ CLEANUP_FREE uint8_t *block = NULL;
+ uint64_t blknum, blkoffs;
+ int r;
+ uint64_t remaining = count; /* Rounding out could exceed 32 bits */
+
+ assert (!flags);
+ block = malloc (blksize);
+ if (block == NULL) {
+ *err = errno;
+ nbdkit_error ("malloc: %m");
+ return -1;
+ }
+
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
+
+ /* Unaligned head */
+ remaining += blkoffs;
+ offset -= blkoffs;
+
+ /* Unaligned tail */
+ remaining = ROUND_UP (remaining, blksize);
+
+ /* Aligned body */
+ while (remaining) {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
+ r = blk_cache (next_ops, nxdata, blknum, block, err);
+ if (r == -1)
+ return -1;
+
+ remaining -= blksize;
+ offset += blksize;
+ blknum++;
+ }
+
+ return 0;
+}
+
static struct nbdkit_filter filter = {
.name = "cache",
.longname = "nbdkit caching filter",
@@ -558,10 +609,12 @@ static struct nbdkit_filter filter = {
.config_complete = cache_config_complete,
.prepare = cache_prepare,
.get_size = cache_get_size,
+ .can_cache = cache_can_cache,
.pread = cache_pread,
.pwrite = cache_pwrite,
.zero = cache_zero,
.flush = cache_flush,
+ .cache = cache_cache,
};
NBDKIT_REGISTER_FILTER(filter)
--
2.20.1