The sh plugin makes it easy to test whether we leak fds into a plugin
that fork()s as part of handling a client request:
./nbdkit -U - --filter=log --filter=stats sh - \
logfile=/dev/null statsfile=/dev/null \
--run 'qemu-io -r -f raw -c "r 0 1" $nbd' <<\EOF
case $1 in
get_size) echo 1m;;
pread) ls -l /proc/$$/fd >/dev/tty
dd iflag=count_bytes count=$3 if=/dev/zero || exit 1 ;;
*) exit 2 ;;
esac
EOF
Pre-patch, this demonstrates that we are leaking internal socket and
filter's log fds, as well as internal pipe fds fixed a couple patches
ago (the listing should only include fds 0, 1, 2, and whatever sh has
open on the temporary script file; bash picks 255 for that). This
patch deals with the server leaks, the next one with the filter leaks.
In the case of accept, we either require atomicity or a mutex to
ensure that it is not possible for one thread to be accepting a new
client while another thread is processing .pread or similar with a
fork(), so that we avoid this race:
client1 client2
fd1 = accept
.pread
fd2 = accept
fork
fcntl(F_SETFD, FD_CLOEXEC)
where the fd2 socket for the second client leaks into the fork for
handling the first client's pread callback. If the server is running
with a thread model of SERIALIZE_ALL_REQUESTS or stricter, then we
already have such a lock (although we have to tweak lock_request to
work with a NULL connection), but the race could bite a server in the
looser SERIALIZE_REQUESTS or PARALLEL thread models. But since we
just added the .fork_safe witness of whether a plugin can tolerate fd
leaks, we can now easily use that as the decision of whether to stick
with accept4() (not yet in POSIX, but documented [1] for several
years, and present on RHEL 6 and FreeBSD 10), or cripple parallel
plugins that aren't prepared for leaked fds (for platforms like Haiku
that still need atomic CLOEXEC [2]).
[1]
http://austingroupbugs.net/view.php?id=411
[2]
https://dev.haiku-os.org/ticket/15219
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
configure.ac | 1 +
server/internal.h | 6 ++----
common/utils/utils.c | 6 ++++--
server/locks.c | 4 ++--
server/plugins.c | 8 ++++++--
server/sockets.c | 32 ++++++++++++++++++++++++++++----
6 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/configure.ac b/configure.ac
index cabec5c8..054ad64a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -193,6 +193,7 @@ AC_CHECK_HEADERS([\
dnl Check for functions in libc, all optional.
AC_CHECK_FUNCS([\
+ accept4 \
fdatasync \
get_current_dir_name \
mkostemp \
diff --git a/server/internal.h b/server/internal.h
index 6207f0cf..76f2139f 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -312,10 +312,8 @@ extern struct backend *filter_register (struct backend *next, size_t
index,
extern void lock_init_thread_model (void);
extern void lock_connection (void);
extern void unlock_connection (void);
-extern void lock_request (struct connection *conn)
- __attribute__((__nonnull__ (1)));
-extern void unlock_request (struct connection *conn)
- __attribute__((__nonnull__ (1)));
+extern void lock_request (struct connection *conn);
+extern void unlock_request (struct connection *conn);
extern void lock_unload (void);
extern void unlock_unload (void);
diff --git a/common/utils/utils.c b/common/utils/utils.c
index 9ac3443b..32bc96a7 100644
--- a/common/utils/utils.c
+++ b/common/utils/utils.c
@@ -140,13 +140,15 @@ exit_status_to_nbd_error (int status, const char *cmd)
*/
int
set_cloexec (int fd) {
-#if defined SOCK_CLOEXEC && defined HAVE_MKOSTEMP && defined HAVE_PIPE2
+#if (defined SOCK_CLOEXEC && defined HAVE_MKOSTEMP && defined HAVE_PIPE2
&& \
+ defined HAVE_ACCEPT4)
nbdkit_error ("prefer creating fds with CLOEXEC atomically set");
close (fd);
errno = EBADF;
return -1;
#else
-# if defined SOCK_CLOEXEC || defined HAVE_MKOSTEMP || defined HAVE_PIPE2
+# if (defined SOCK_CLOEXEC || defined HAVE_MKOSTEMP || defined HAVE_PIPE2 || \
+ defined HAVE_ACCEPT4)
# error "Unexpected: your system has incomplete atomic CLOEXEC support"
# endif
int f;
diff --git a/server/locks.c b/server/locks.c
index 1c50186e..739e22a1 100644
--- a/server/locks.c
+++ b/server/locks.c
@@ -77,7 +77,7 @@ lock_request (struct connection *conn)
pthread_mutex_lock (&all_requests_lock))
abort ();
- if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS &&
+ if (conn && thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS
&&
pthread_mutex_lock (&conn->request_lock))
abort ();
@@ -91,7 +91,7 @@ unlock_request (struct connection *conn)
if (pthread_rwlock_unlock (&unload_prevention_lock))
abort ();
- if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS &&
+ if (conn && thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS
&&
pthread_mutex_unlock (&conn->request_lock))
abort ();
diff --git a/server/plugins.c b/server/plugins.c
index 0e6c05b0..917f0eec 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -90,14 +90,18 @@ plugin_thread_model (struct backend *b)
int thread_model = p->plugin._thread_model;
int r;
- /* For now, we leak fds on all platforms; once that is fixed, this
- * restriction can be limited to only occur when !HAVE_ACCEPT4.
+#ifndef HAVE_ACCEPT4
+ /* If the server is unable to atomically set CLOEXEC when accepting
+ * a new connection, then we must serialize to ensure that we are
+ * not attempting to fork() in one thread while the server is
+ * accepting the next client in another thread.
*/
if (p->plugin._thread_model > NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
&&
p->plugin.fork_safe == 0) {
nbdkit_debug (".fork_safe not set, serializing to avoid fd leaks");
thread_model = NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS;
}
+#endif
if (p->plugin.thread_model) {
r = p->plugin.thread_model ();
diff --git a/server/sockets.c b/server/sockets.c
index 5adf5af5..70d999ee 100644
--- a/server/sockets.c
+++ b/server/sockets.c
@@ -1,5 +1,5 @@
/* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -183,7 +183,14 @@ bind_tcpip_socket (size_t *nr_socks)
set_selinux_label ();
- sock = socket (a->ai_family, a->ai_socktype, a->ai_protocol);
+#ifdef SOCK_CLOEXEC
+ sock = socket (a->ai_family, a->ai_socktype | SOCK_CLOEXEC,
a->ai_protocol);
+#else
+ /* Fortunately, this code is only run at startup, so there is no
+ * risk of the fd leaking to a plugin's fork()
+ */
+ sock = set_cloexec (socket (a->ai_family, a->ai_socktype, a->ai_protocol));
+#endif
if (sock == -1) {
perror ("bind_tcpip_socket: socket");
exit (EXIT_FAILURE);
@@ -294,8 +301,25 @@ accept_connection (int listen_sock)
thread_data->instance_num = instance_num++;
thread_data->addrlen = sizeof thread_data->addr;
again:
- thread_data->sock = accept (listen_sock,
- &thread_data->addr, &thread_data->addrlen);
+#ifdef HAVE_ACCEPT4
+ thread_data->sock = accept4 (listen_sock,
+ &thread_data->addr, &thread_data->addrlen,
+ SOCK_CLOEXEC);
+#else
+ /* If we are fully parallel, then this function could be accepting
+ * connections in one thread while another thread could be in a
+ * plugin trying to fork. But thanks to .fork_safe, we either know
+ * that the plugin can tolerate the fd leak, or we've restricted
+ * things to serialize_all_requests so that we don't perform the
+ * accept until the plugin is not running. Therefore, not being
+ * atomic is okay.
+ */
+ lock_request (NULL);
+ thread_data->sock = set_cloexec (accept (listen_sock,
+ &thread_data->addr,
+ &thread_data->addrlen));
+ unlock_request (NULL);
+#endif
if (thread_data->sock == -1) {
if (errno == EINTR || errno == EAGAIN)
goto again;
--
2.20.1