Commit d43f482adce2 ("p2v: avoid connecting to ourself while probing
qemu-nbd (RHBZ#1167774)", 2014-12-15) resolved the following race
condition:
start_conversion() [conversion.c]
open_data_connection() [ssh.c]
*local_port = nbd_local_port;
nbd_local_port++;
start_qemu_nbd() [conversion.c]
fork()
/* in the child */
/* execute "qemu-nbd ... -p port_str" */
bind()
wait_qemu_nbd() [conversion.c]
connect()
The race was between the child process (qemu-nbd) reaching bind() and the
parent process reaching connect(). In case the parent process "won", then
the kernel would assign an ephemeral source port to the connecting socket
from such a pool of ports from which the child process had not bound its
listening socket yet. Arguably due to a kernel bug, the kernel could then
assign the same port number to the local end of the connecting socket as
the port number the socket was connecting to, effectively causing the
parent process to connect to itself.
This race no longer exists: due to the previous patches, only socket
activation is possible, in which bind() is called by the parent process
for the listening socket strictly before the parent process calls
connect():
start_conversion() [conversion.c]
start_nbd_server() [nbd.c]
open_listening_socket() [nbd.c]
bind_tcpip_socket() [nbd.c]
bind()
start_nbdkit() [nbd.c]
fork()
/* in the child */
socket_activation()
/* execute "nbdkit", letting it
* inherit the already bound socket
*/
wait_for_nbd_server_to_start() [nbd.c]
connect_with_source_port() [nbd.c]
bind_source_port() [nbd.c]
bind()
connect()
Remove bind_source_port(). Rename connect_with_source_port() to
connect_to_nbdkit(), and remove its "source_port" parameter.
Ref:
https://listman.redhat.com/archives/libguestfs/2022-March/028475.html
Suggested-by: Richard W.M. Jones <rjones(a)redhat.com>
Signed-off-by: Laszlo Ersek <lersek(a)redhat.com>
Reviewed-by: Richard W.M. Jones <rjones(a)redhat.com>
---
Notes:
v2:
- pick up Rich's R-b
nbd.c | 74 +-------------------
1 file changed, 3 insertions(+), 71 deletions(-)
diff --git a/nbd.c b/nbd.c
index cb849e75dc84..f64afdc53dd3 100644
--- a/nbd.c
+++ b/nbd.c
@@ -83,8 +83,7 @@ static enum nbd_server use_server;
static pid_t start_nbdkit (const char *device, int *fds, size_t nr_fds);
static int open_listening_socket (int **fds, size_t *nr_fds);
static int bind_tcpip_socket (const char *port, int **fds, size_t *nr_fds);
-static int connect_with_source_port (int dest_port, int source_port);
-static int bind_source_port (int sockfd, int family, int source_port);
+static int connect_to_nbdkit (int dest_port);
static char *nbd_error;
@@ -467,178 +466,111 @@ int
wait_for_nbd_server_to_start (int port)
{
int sockfd = -1;
int result = -1;
time_t start_t, now_t;
struct timespec half_sec = { .tv_sec = 0, .tv_nsec = 500000000 };
struct timeval timeout = { .tv_usec = 0 };
char magic[8]; /* NBDMAGIC */
size_t bytes_read = 0;
ssize_t recvd;
time (&start_t);
for (;;) {
time (&now_t);
if (now_t - start_t >= WAIT_NBD_TIMEOUT) {
set_nbd_error ("timed out waiting for NBD server to start");
goto cleanup;
}
- /* Source port for probing NBD server should be one greater than
- * port. It's not guaranteed to always bind to this port, but it
- * will hint the kernel to start there and try incrementally
- * higher ports if needed. This avoids the case where the kernel
- * selects port as our source port, and we immediately connect to
- * ourself. See:
- *
https://bugzilla.redhat.com/show_bug.cgi?id=1167774#c9
- */
- sockfd = connect_with_source_port (port, port+1);
+ sockfd = connect_to_nbdkit (port);
if (sockfd >= 0)
break;
nanosleep (&half_sec, NULL);
}
time (&now_t);
timeout.tv_sec = (start_t + WAIT_NBD_TIMEOUT) - now_t;
if (setsockopt (sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) == -1)
{
set_nbd_error ("waiting for NBD server to start: "
"setsockopt(SO_RCVTIMEO): %m");
goto cleanup;
}
do {
recvd = recv (sockfd, magic, sizeof magic - bytes_read, 0);
if (recvd == -1) {
set_nbd_error ("waiting for NBD server to start: recv: %m");
goto cleanup;
}
bytes_read += recvd;
} while (bytes_read < sizeof magic);
if (memcmp (magic, "NBDMAGIC", sizeof magic) != 0) {
set_nbd_error ("waiting for NBD server to start: "
"'NBDMAGIC' was not received from NBD server");
goto cleanup;
}
result = 0;
cleanup:
if (sockfd >= 0)
close (sockfd);
return result;
}
/**
* Connect to C<localhost:dest_port>, resolving the address using
* L<getaddrinfo(3)>.
*
- * This also sets the source port of the connection to the first free
- * port number E<ge> C<source_port>.
- *
* This may involve multiple connections - to IPv4 and IPv6 for
* instance.
*/
static int
-connect_with_source_port (int dest_port, int source_port)
+connect_to_nbdkit (int dest_port)
{
struct addrinfo hints;
struct addrinfo *results, *rp;
char dest_port_str[16];
int r, sockfd = -1;
- int reuseaddr = 1;
snprintf (dest_port_str, sizeof dest_port_str, "%d", dest_port);
memset (&hints, 0, sizeof hints);
hints.ai_family = AF_UNSPEC; /* allow IPv4 or IPv6 */
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_NUMERICSERV; /* numeric dest port number */
hints.ai_protocol = 0; /* any protocol */
r = getaddrinfo ("localhost", dest_port_str, &hints, &results);
if (r != 0) {
set_nbd_error ("getaddrinfo: localhost/%s: %s",
dest_port_str, gai_strerror (r));
return -1;
}
for (rp = results; rp != NULL; rp = rp->ai_next) {
sockfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sockfd == -1)
continue;
- /* If we run p2v repeatedly (say, running the tests in a loop),
- * there's a decent chance we'll end up trying to bind() to a port
- * that is in TIME_WAIT from a prior run. Handle that gracefully
- * with SO_REUSEADDR.
- */
- if (setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR,
- &reuseaddr, sizeof reuseaddr) == -1)
- perror ("warning: setsockopt");
-
- /* Need to bind the source port. */
- if (bind_source_port (sockfd, rp->ai_family, source_port) == -1) {
- close (sockfd);
- sockfd = -1;
- continue;
- }
-
/* Connect. */
if (connect (sockfd, rp->ai_addr, rp->ai_addrlen) == -1) {
set_nbd_error ("waiting for NBD server to start: "
"connect to localhost/%s: %m", dest_port_str);
close (sockfd);
sockfd = -1;
continue;
}
break;
}
freeaddrinfo (results);
return sockfd;
}
-
-static int
-bind_source_port (int sockfd, int family, int source_port)
-{
- struct addrinfo hints;
- struct addrinfo *results, *rp;
- char source_port_str[16];
- int r;
-
- snprintf (source_port_str, sizeof source_port_str, "%d", source_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_PASSIVE | AI_NUMERICSERV; /* numeric port number */
- hints.ai_protocol = 0; /* any protocol */
-
- r = getaddrinfo ("localhost", source_port_str, &hints, &results);
- if (r != 0) {
- set_nbd_error ("getaddrinfo (bind): localhost/%s: %s",
- source_port_str, gai_strerror (r));
- return -1;
- }
-
- for (rp = results; rp != NULL; rp = rp->ai_next) {
- if (bind (sockfd, rp->ai_addr, rp->ai_addrlen) == 0)
- goto bound;
- }
-
- set_nbd_error ("waiting for NBD server to start: "
- "bind to source port %d: %m",
- source_port);
- freeaddrinfo (results);
- return -1;
-
- bound:
- freeaddrinfo (results);
- return 0;
-}
--
2.19.1.3.g30247aa5d201