On 01/27/22 17:39, Richard W.M. Jones wrote:
Allow vectors to be page aligned (eg. for performance or page
protection reasons). To create a page aligned vector you have to use
<vector>_reserve_page_aligned instead of plain <vector>_reserve.
Note that <vector>_duplicate creates unaligned duplicates.
---
configure.ac | 4 ++-
common/utils/vector.h | 12 +++++++
common/utils/vector.c | 82 +++++++++++++++++++++++++++++++++++++++++--
3 files changed, 94 insertions(+), 4 deletions(-)
diff --git a/configure.ac b/configure.ac
index c426aec8..1ab85e3e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,7 +384,9 @@ AC_CHECK_FUNCS([\
pipe \
pipe2 \
ppoll \
- posix_fadvise])
+ posix_fadvise \
+ posix_memalign \
+ valloc])
dnl Check for structs and members.
AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [[#include <dirent.h>]])
diff --git a/common/utils/vector.h b/common/utils/vector.h
index 1d04f812..50dd895d 100644
--- a/common/utils/vector.h
+++ b/common/utils/vector.h
@@ -101,6 +101,15 @@
sizeof (type)); \
} \
\
+ /* Same as _reserve, but the allocation will be page aligned. \
+ */ \
+ static inline int \
+ name##_reserve_page_aligned (name *v, size_t n) \
+ { \
+ return generic_vector_reserve_page_aligned ((struct generic_vector *)v, \
+ n, sizeof (type)); \
+ } \
+ \
/* Insert at i'th element. i=0 => beginning i=len => append */ \
static inline int \
name##_insert (name *v, type elem, size_t i) \
@@ -197,4 +206,7 @@ struct generic_vector {
extern int generic_vector_reserve (struct generic_vector *v,
size_t n, size_t itemsize);
+extern int generic_vector_reserve_page_aligned (struct generic_vector *v,
+ size_t n, size_t itemsize);
+
#endif /* NBDKIT_VECTOR_H */
diff --git a/common/utils/vector.c b/common/utils/vector.c
index 54e6b810..8f0c33b1 100644
--- a/common/utils/vector.c
+++ b/common/utils/vector.c
@@ -34,15 +34,17 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#include <errno.h>
+#include <assert.h>
#include "checked-overflow.h"
#include "vector.h"
-int
-generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize)
+static int
+calculate_capacity (struct generic_vector *v, size_t n, size_t itemsize,
+ size_t *newcap_r, size_t *newbytes_r)
{
- void *newptr;
size_t reqcap, reqbytes, newcap, newbytes, t;
/* New capacity requested. We must allocate this minimum (or fail).
@@ -71,6 +73,20 @@ generic_vector_reserve (struct generic_vector *v, size_t n, size_t
itemsize)
newbytes = reqbytes;
}
+ *newcap_r = newcap;
+ *newbytes_r = newbytes;
+ return 0;
+}
+
+int
+generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize)
+{
+ void *newptr;
+ size_t newcap, newbytes;
+
+ if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1)
+ return -1;
+
newptr = realloc (v->ptr, newbytes);
if (newptr == NULL)
return -1;
@@ -79,3 +95,63 @@ generic_vector_reserve (struct generic_vector *v, size_t n, size_t
itemsize)
v->cap = newcap;
return 0;
}
+
+/* Always allocates a buffer which is page aligned (both base and size). */
+int
+generic_vector_reserve_page_aligned (struct generic_vector *v,
+ size_t n, size_t itemsize)
+{
+#ifdef HAVE_POSIX_MEMALIGN
+ int r;
+#endif
+ void *newptr;
+ size_t newcap, newbytes;
+ long pagesize;
+ size_t extra, extra_items;
+
+ pagesize = sysconf (_SC_PAGE_SIZE);
+ assert (pagesize > 0);
+
+ if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1)
+ return -1;
+
+ /* If the new size (in bytes) is not a full page then we have to
+ * round up to the next page. Note we make the implicit assumption
+ * here that itemsize is divisible by pagesize, but I assume you
(1) should be "pagesize is divisible by itemsize".
(2) I think this limitation belongs in the API description, in the
header file.
... In fact, we could even check it above, near assert (pagesize > 0).
+ * know what you're doing if you're using this function.
+ */
+ extra = newbytes & (pagesize - 1);
(3) Suggest updating the assert() to
assert (pagesize > 1)
:)
+ if (extra > 0) {
+ extra_items = (pagesize - extra) / itemsize;
newbytes = k * itemsize
pagesize = n * itemsize
("n" items per page)
newbytes = k * itemsize
= M * pagesize + extra
= M * (n * itemsize) + extra
("M" pages fully covered)
so
(k - M * n) * itemsize = extra
pagesize - extra = n * itemsize - (k - M * n) * itemsize
= (n - (k - M * n)) * itemsize
= (n - k + M * n) * itemsize
= ((M + 1) * n - k) * itemsize
Thus
extra_items = (M + 1) * n - k
Meaning we cover M pages in full, then add one page, then subtract the
actual element count we have. OK!
+
+ if (ADD_OVERFLOW (newcap, extra_items, &newcap) ||
+ ADD_OVERFLOW (newbytes, extra_items * itemsize, &newbytes)) {
(4) "extra_items * itemsize" seems correct, but perhaps it's simpler to
write (pagesize - extra)?
Looks OK to me otherwise!
Thanks,
Laszlo
+ errno = ENOMEM;
+ return -1;
+ }
+ }
+
+#ifdef HAVE_POSIX_MEMALIGN
+ if ((r = posix_memalign (&newptr, pagesize, newbytes)) != 0) {
+ errno = r;
+ return -1;
+ }
+#elif HAVE_VALLOC
+ newptr = valloc (newbytes);
+ if (newptr == NULL)
+ return -1;
+#else
+#error "this platform does not have posix_memalign or valloc"
+#endif
+
+ /* NB: reserve always makes the buffer larger so we don't have to
+ * deal with the case of a shrinking buffer here. Also reserve
+ * (like the underlying realloc) does not zero the reserved
+ * elements. So we're just copying over the old elements here.
+ */
+ memcpy (newptr, v->ptr, v->len * itemsize);
+ free (v->ptr);
+ v->ptr = newptr;
+ v->cap = newcap;
+ return 0;
+}