Instead of locking around every request, use explicit and (slightly)
more fine-grained, and change the thread model to parallel. The same
change is made to the memory plugin and the data plugin.
This improves performance slightly. Using fio with 8 threads and
multi-conn enabled with -C 8:
Before:
read: IOPS=103k, BW=401MiB/s (420MB/s)(46.0GiB/120002msec)
write: IOPS=103k, BW=401MiB/s (420MB/s)(46.0GiB/120002msec)
After:
read: IOPS=112k, BW=437MiB/s (458MB/s)(51.2GiB/120001msec)
write: IOPS=112k, BW=437MiB/s (458MB/s)(51.2GiB/120001msec)
For comparison:
The memory plugin implemented using a simple malloc instead of a
sparse array:
read: IOPS=133k, BW=518MiB/s (544MB/s)(60.7GiB/120002msec)
write: IOPS=133k, BW=518MiB/s (543MB/s)(60.7GiB/120002msec)
Directly running fio against /dev/shm:
read: IOPS=1018k, BW=3978MiB/s (4171MB/s)(466GiB/120001msec)
write: IOPS=1018k, BW=3979MiB/s (4172MB/s)(466GiB/120001msec)
---
plugins/data/data.c | 22 +++++++++++++++++++---
plugins/memory/memory.c | 21 +++++++++++++++++++--
2 files changed, 38 insertions(+), 5 deletions(-)
diff --git a/plugins/data/data.c b/plugins/data/data.c
index 11a7b69..1d4c5b9 100644
--- a/plugins/data/data.c
+++ b/plugins/data/data.c
@@ -39,6 +39,8 @@
#include <inttypes.h>
#include <string.h>
+#include <pthread.h>
+
#if defined(HAVE_GNUTLS) && defined(HAVE_GNUTLS_BASE64_DECODE2)
#include <gnutls/gnutls.h>
#endif
@@ -56,8 +58,11 @@ static int64_t size = -1;
/* Size of data specified on the command line. */
static int64_t data_size = -1;
-/* Sparse array. */
+/* Sparse array - the lock must be held when accessing this from
+ * connected callbacks.
+ */
static struct sparse_array *sa;
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
/* Debug directory operations (-D data.dir=1). */
int data_debug_dir;
@@ -305,7 +310,7 @@ data_dump_plugin (void)
#endif
}
-#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
/* No meaning, just used as the address for the handle. */
static int dh;
@@ -335,7 +340,9 @@ data_can_multi_conn (void *handle)
static int
data_pread (void *handle, void *buf, uint32_t count, uint64_t offset)
{
+ pthread_mutex_lock (&lock);
sparse_array_read (sa, buf, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
@@ -343,14 +350,21 @@ data_pread (void *handle, void *buf, uint32_t count, uint64_t
offset)
static int
data_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset)
{
- return sparse_array_write (sa, buf, count, offset);
+ int r;
+
+ pthread_mutex_lock (&lock);
+ r = sparse_array_write (sa, buf, count, offset);
+ pthread_mutex_unlock (&lock);
+ return r;
}
/* Zero. */
static int
data_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
{
+ pthread_mutex_lock (&lock);
sparse_array_zero (sa, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
@@ -358,7 +372,9 @@ data_zero (void *handle, uint32_t count, uint64_t offset, int
may_trim)
static int
data_trim (void *handle, uint32_t count, uint64_t offset)
{
+ pthread_mutex_lock (&lock);
sparse_array_zero (sa, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
diff --git a/plugins/memory/memory.c b/plugins/memory/memory.c
index 0084401..e27e127 100644
--- a/plugins/memory/memory.c
+++ b/plugins/memory/memory.c
@@ -42,6 +42,8 @@
#include <errno.h>
#include <assert.h>
+#include <pthread.h>
+
#include <nbdkit-plugin.h>
#include "sparse.h"
@@ -52,7 +54,11 @@ static int64_t size = 0;
/* Debug directory operations (-D memory.dir=1). */
int memory_debug_dir;
+/* Sparse array - the lock must be held when accessing this from
+ * connected callbacks.
+ */
static struct sparse_array *sa;
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
static void
memory_load (void)
@@ -106,7 +112,7 @@ memory_open (int readonly)
return NBDKIT_HANDLE_NOT_NEEDED;
}
-#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
/* Get the disk size. */
static int64_t
@@ -126,7 +132,9 @@ memory_can_multi_conn (void *handle)
static int
memory_pread (void *handle, void *buf, uint32_t count, uint64_t offset)
{
+ pthread_mutex_lock (&lock);
sparse_array_read (sa, buf, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
@@ -134,14 +142,21 @@ memory_pread (void *handle, void *buf, uint32_t count, uint64_t
offset)
static int
memory_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset)
{
- return sparse_array_write (sa, buf, count, offset);
+ int r;
+
+ pthread_mutex_lock (&lock);
+ r = sparse_array_write (sa, buf, count, offset);
+ pthread_mutex_unlock (&lock);
+ return r;
}
/* Zero. */
static int
memory_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
{
+ pthread_mutex_lock (&lock);
sparse_array_zero (sa, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
@@ -149,7 +164,9 @@ memory_zero (void *handle, uint32_t count, uint64_t offset, int
may_trim)
static int
memory_trim (void *handle, uint32_t count, uint64_t offset)
{
+ pthread_mutex_lock (&lock);
sparse_array_zero (sa, count, offset);
+ pthread_mutex_unlock (&lock);
return 0;
}
--
2.19.2