Continuing from the previous commit, reimplement blk_read_multiple so
that it does not break up "runs" of blocks which all have the same
cache state.
---
filters/cache/blk.c | 83 ++++++++++++++++++++++++++++++---------------
filters/cow/blk.c | 67 ++++++++++++++++++++++--------------
2 files changed, 96 insertions(+), 54 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 0d15411b..f85ada35 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -44,6 +44,7 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#ifdef HAVE_SYS_STATVFS_H
@@ -193,26 +194,40 @@ blk_set_size (uint64_t new_size)
return 0;
}
-int
-blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+static int
+_blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
{
off_t offset = blknum * blksize;
- enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
+ bool not_cached =
+ bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+ uint64_t b, runblocks;
- reclaim (fd, &bm);
+ assert (nrblocks > 0);
if (cache_debug_verbose)
- nbdkit_debug ("cache: blk_read block %" PRIu64
+ nbdkit_debug ("cache: blk_read_multiple block %" PRIu64
" (offset %" PRIu64 ") is %s",
blknum, (uint64_t) offset,
- state == BLOCK_NOT_CACHED ? "not cached" :
- state == BLOCK_CLEAN ? "clean" :
- state == BLOCK_DIRTY ? "dirty" :
- "unknown");
+ not_cached ? "not cached" : "cached");
- if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
- unsigned n = blksize, tail = 0;
+ /* Find out how many of the following blocks form a "run" with the
+ * same cached/not-cached state. We can process that many blocks in
+ * one go.
+ */
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+ bool s =
+ bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+ if (not_cached != s)
+ break;
+ }
+
+ if (not_cached) { /* Read underlying plugin. */
+ unsigned n, tail = 0;
+
+ assert (blksize * runblocks <= UINT_MAX);
+ n = blksize * runblocks;
if (offset + n > size) {
tail = offset + n - size;
@@ -228,32 +243,44 @@ blk_read (nbdkit_next *next,
*/
memset (block + n, 0, tail);
- /* If cache-on-read, copy the block to the cache. */
+ /* If cache-on-read, copy the blocks to the cache. */
if (cache_on_read) {
if (cache_debug_verbose)
nbdkit_debug ("cache: cache-on-read block %" PRIu64
" (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
}
- bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
- lru_set_recently_accessed (blknum);
+ for (b = 0; b < runblocks; ++b) {
+ bitmap_set_blk (&bm, blknum + b, BLOCK_CLEAN);
+ lru_set_recently_accessed (blknum + b);
+ }
}
- return 0;
}
else { /* Read cache. */
- if (pread (fd, block, blksize, offset) == -1) {
+ if (pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
}
- lru_set_recently_accessed (blknum);
- return 0;
+ for (b = 0; b < runblocks; ++b)
+ lru_set_recently_accessed (blknum + b);
}
+
+ /* If all done, return. */
+ if (runblocks == nrblocks)
+ return 0;
+
+ /* Recurse to read remaining blocks. */
+ return _blk_read_multiple (next,
+ blknum + runblocks,
+ nrblocks - runblocks,
+ block + blksize * runblocks,
+ err);
}
int
@@ -261,15 +288,15 @@ blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
uint8_t *block, int *err)
{
- while (nrblocks > 0) {
- if (blk_read (next, blknum, block, err) == -1)
- return -1;
- blknum++;
- nrblocks--;
- block += blksize;
- }
+ reclaim (fd, &bm);
+ return _blk_read_multiple (next, blknum, nrblocks, block, err);
+}
- return 0;
+int
+blk_read (nbdkit_next *next,
+ uint64_t blknum, uint8_t *block, int *err)
+{
+ return blk_read_multiple (next, blknum, 1, block, err);
}
int
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 9d6e3e67..9e6c8879 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -79,6 +79,7 @@
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#include <sys/types.h>
@@ -227,29 +228,44 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
* blocks of size ‘blksize’.
*/
int
-blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
{
off_t offset = blknum * BLKSIZE;
enum bm_entry state;
+ uint64_t b, runblocks;
- /* The state might be modified from another thread - for example
- * another thread might write (BLOCK_NOT_ALLOCATED ->
- * BLOCK_ALLOCATED) while we are reading from the plugin, returning
- * the old data. However a read issued after the write returns
- * should always return the correct data.
+ /* Find out how many of the following blocks form a "run" with the
+ * same state. We can process that many blocks in one go.
+ *
+ * About the locking: The state might be modified from another
+ * thread - for example another thread might write
+ * (BLOCK_NOT_ALLOCATED -> BLOCK_ALLOCATED) while we are reading
+ * from the plugin, returning the old data. However a read issued
+ * after the write returns should always return the correct data.
*/
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
+
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+ enum bm_entry s = bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_ALLOCATED);
+ if (state != s)
+ break;
+ }
}
if (cow_debug_verbose)
- nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64
") is %s",
+ nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
+ " (offset %" PRIu64 ") is %s",
blknum, (uint64_t) offset, state_to_string (state));
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
- unsigned n = BLKSIZE, tail = 0;
+ unsigned n, tail = 0;
+
+ assert (BLKSIZE * runblocks <= UINT_MAX);
+ n = BLKSIZE * runblocks;
if (offset + n > size) {
tail = offset + n - size;
@@ -264,36 +280,35 @@ blk_read (nbdkit_next *next,
* zeroing the tail.
*/
memset (block + n, 0, tail);
- return 0;
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (pread (fd, block, BLKSIZE, offset) == -1) {
+ if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
}
- return 0;
}
else /* state == BLOCK_TRIMMED */ {
- memset (block, 0, BLKSIZE);
+ memset (block, 0, BLKSIZE * runblocks);
+ }
+
+ /* If all done, return. */
+ if (runblocks == nrblocks)
return 0;
- }
+
+ /* Recurse to read remaining blocks. */
+ return blk_read_multiple (next,
+ blknum + runblocks,
+ nrblocks - runblocks,
+ block + BLKSIZE * runblocks,
+ err);
}
int
-blk_read_multiple (nbdkit_next *next,
- uint64_t blknum, uint64_t nrblocks,
- uint8_t *block, int *err)
+blk_read (nbdkit_next *next,
+ uint64_t blknum, uint8_t *block, int *err)
{
- while (nrblocks > 0) {
- if (blk_read (next, blknum, block, err) == -1)
- return -1;
- blknum++;
- nrblocks--;
- block += BLKSIZE;
- }
-
- return 0;
+ return blk_read_multiple (next, blknum, 1, block, err);
}
int
--
2.32.0