On platforms with 64K page sizes (RHEL + aarch64 or ppc64le) we have
hit a problem in test-memory-allocator-malloc-mlock.sh which appears
to be caused by:
1. The test requests a 2K mlocked buffer.
2. The allocated buffer straddles a page boundary sometimes.
3. Linux expands the mlock request to two pages, ie. the requested
size becomes 2 * 64K = 128K
4. The standard mlock limit on Linux is 64K so the mlock request is
rejected and the test fails.
Also POSIX requires mlock parameters to be page aligned (although
Linux does not require this).
This commit attempts to fix both problems by ensuring the mlock
request is always page aligned. On the separate mlock path we use the
usual vector reserve function to do overflow checks and so on, but
then we reallocate the buffer using posix_memalign (or valloc) so it
is page aligned before locking it. This requires a duplicate
allocation and memcpy but this path is not performance critical.
I extended the test mlock size from 2K to 8K to try to provoke the
original bug (if it still happens) more often.
Thanks: Laszlo Ersek
---
configure.ac | 4 +-
common/allocators/malloc.c | 89 ++++++++++++++++++---
tests/test-memory-allocator-malloc-mlock.sh | 16 +++-
3 files changed, 94 insertions(+), 15 deletions(-)
diff --git a/configure.ac b/configure.ac
index c426aec8..1ab85e3e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,7 +384,9 @@ AC_CHECK_FUNCS([\
pipe \
pipe2 \
ppoll \
- posix_fadvise])
+ posix_fadvise \
+ posix_memalign \
+ valloc])
dnl Check for structs and members.
AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [[#include <dirent.h>]])
diff --git a/common/allocators/malloc.c b/common/allocators/malloc.c
index eea44432..986a45c9 100644
--- a/common/allocators/malloc.c
+++ b/common/allocators/malloc.c
@@ -36,6 +36,9 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
@@ -46,6 +49,7 @@
#include <nbdkit-plugin.h>
#include "cleanup.h"
+#include "rounding.h"
#include "vector.h"
#include "allocator.h"
@@ -81,13 +85,46 @@ m_alloc_free (struct allocator *a)
}
}
-/* Extend the underlying bytearray if needed. mlock if requested. */
+/* Extend the underlying bytearray if needed. */
static int
-extend (struct m_alloc *ma, uint64_t new_size)
+extend_without_mlock (struct m_alloc *ma, uint64_t new_size)
{
- ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock);
size_t old_size, n;
+ if (ma->ba.cap < new_size) {
+ old_size = ma->ba.cap;
+ n = new_size - ma->ba.cap;
+
+ if (bytearray_reserve (&ma->ba, n) == -1) {
+ nbdkit_error ("realloc: %m");
+ return -1;
+ }
+
+ /* Initialize the newly allocated memory to 0. */
+ memset (ma->ba.ptr + old_size, 0, n);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_MLOCK
+static int
+extend_with_mlock (struct m_alloc *ma, uint64_t new_size)
+{
+ size_t old_size, n;
+ void *p;
+#ifdef HAVE_POSIX_MEMALIGN
+ int r;
+#endif
+
+ long pagesize = sysconf (_SC_PAGE_SIZE);
+ assert (pagesize > 0);
+
+ /* POSIX requires both base and size of mlock to be page aligned,
+ * even though Linux does not.
+ */
+ new_size = ROUND_UP (new_size, pagesize);
+
if (ma->ba.cap < new_size) {
old_size = ma->ba.cap;
n = new_size - ma->ba.cap;
@@ -96,30 +133,62 @@ extend (struct m_alloc *ma, uint64_t new_size)
/* Since the memory might be moved by realloc, we must unlock the
* original array.
*/
- if (ma->use_mlock)
+ if (ma->use_mlock && ma->ba.ptr != NULL)
munlock (ma->ba.ptr, ma->ba.cap);
#endif
+ /* Call the normal vector reserve function so that it does the
+ * overflow checks.
+ */
if (bytearray_reserve (&ma->ba, n) == -1) {
nbdkit_error ("realloc: %m");
return -1;
}
+ assert (new_size <= ma->ba.cap);
/* Initialize the newly allocated memory to 0. */
memset (ma->ba.ptr + old_size, 0, n);
-#ifdef HAVE_MLOCK
- if (ma->use_mlock) {
- if (mlock (ma->ba.ptr, ma->ba.cap) == -1) {
- nbdkit_error ("allocator=malloc: mlock: %m");
- return -1;
- }
+ /* But now reallocate it as a page-aligned buffer so we can mlock it. */
+#ifdef HAVE_POSIX_MEMALIGN
+ if ((r = posix_memalign (&p, pagesize, ma->ba.cap)) != 0) {
+ errno = r;
+ nbdkit_error ("posix_memalign: %m");
+ return -1;
}
+#elif HAVE_VALLOC
+ p = valloc (ma->ba.cap);
+ if (p == NULL) {
+ nbdkit_error ("valloc: %m");
+ return -1;
+ }
+#else
+#error "this platform does not have posix_memalign or valloc"
#endif
+
+ memcpy (p, ma->ba.ptr, new_size);
+ if (mlock (ma->ba.ptr, new_size) == -1) {
+ nbdkit_error ("allocator=malloc: mlock: %m");
+ return -1;
+ }
}
return 0;
}
+#endif /* HAVE_MLOCK */
+
+static int
+extend (struct m_alloc *ma, uint64_t new_size)
+{
+ ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock);
+
+#ifdef HAVE_MLOCK
+ if (ma->use_mlock)
+ return extend_with_mlock (ma, new_size);
+#endif
+
+ return extend_without_mlock (ma, new_size);
+}
static int
m_alloc_set_size_hint (struct allocator *a, uint64_t size_hint)
diff --git a/tests/test-memory-allocator-malloc-mlock.sh
b/tests/test-memory-allocator-malloc-mlock.sh
index 4ef92db6..983ef22b 100755
--- a/tests/test-memory-allocator-malloc-mlock.sh
+++ b/tests/test-memory-allocator-malloc-mlock.sh
@@ -46,9 +46,9 @@ if ! nbdkit memory --dump-plugin | grep -sq mlock=yes; then
fi
# ulimit -l is measured in kilobytes and so for this test must be at
-# least 2 (kilobytes) and we actually check it's a bit larger to allow
+# least 8 (kilobytes) and we actually check it's a bit larger to allow
# room for error. On Linux the default is usually 64.
-requires test `ulimit -l` -gt 8
+requires test `ulimit -l` -gt 16
sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
files="memory-allocator-malloc-mlock.pid $sock"
@@ -57,17 +57,21 @@ cleanup_fn rm -f $files
# Run nbdkit with memory plugin.
start_nbdkit -P memory-allocator-malloc-mlock.pid -U $sock \
- memory 2048 allocator=malloc,mlock=true
+ memory 8192 allocator=malloc,mlock=true
nbdsh --connect "nbd+unix://?socket=$sock" \
-c '
-# Write some stuff to the beginning, middle and end.
+# Write some stuff.
buf1 = b"1" * 512
h.pwrite(buf1, 0)
buf2 = b"2" * 512
h.pwrite(buf2, 1024)
buf3 = b"3" * 512
h.pwrite(buf3, 1536)
+buf4 = b"4" * 512
+h.pwrite(buf4, 4096)
+buf5 = b"5" * 1024
+h.pwrite(buf5, 8192-1024)
# Read it back.
buf11 = h.pread(len(buf1), 0)
@@ -76,4 +80,8 @@ buf22 = h.pread(len(buf2), 1024)
assert buf2 == buf22
buf33 = h.pread(len(buf3), 1536)
assert buf3 == buf33
+buf44 = h.pread(len(buf4), 4096)
+assert buf4 == buf44
+buf55 = h.pread(len(buf5), 8192-1024)
+assert buf5 == buf55
'
--
2.32.0