Any time we reply to NBD_CMD_READ or NBD_CMD_BLOCK_STATUS, we end up
calling conn->send() more than once. Now that we've disabled Nagle's
algorithm, this implies that we try harder to send the small header
immediately, rather than batching it with the rest of the payload,
which causes more overhead in the amount of actual network traffic.
For interfaces that support corking (gnutls, or Linux TCP sockets), we
can give a hint that the separate send() calls should be batched into
a single network packet where practical.
This patch just wires up support; the next one will actually use it
and provide performance measurements.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
server/internal.h | 3 +++
server/connections.c | 25 +++++++++++++++++++++++++
server/crypto.c | 19 +++++++++++++++++++
3 files changed, 47 insertions(+)
diff --git a/server/internal.h b/server/internal.h
index 2ee5e23..cb34323 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -145,6 +145,8 @@ typedef int (*connection_recv_function) (struct connection *,
typedef int (*connection_send_function) (struct connection *,
const void *buf, size_t len)
__attribute__((__nonnull__ (1, 2)));
+typedef int (*connection_cork_function) (struct connection *, bool)
+ __attribute__((__nonnull__ (1)));
typedef void (*connection_close_function) (struct connection *)
__attribute__((__nonnull__ (1)));
@@ -180,6 +182,7 @@ struct connection {
int sockin, sockout;
connection_recv_function recv;
connection_send_function send;
+ connection_cork_function cork;
connection_close_function close;
};
diff --git a/server/connections.c b/server/connections.c
index b7d9a6a..9b0b75c 100644
--- a/server/connections.c
+++ b/server/connections.c
@@ -38,6 +38,8 @@
#include <inttypes.h>
#include <string.h>
#include <unistd.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
#include "internal.h"
@@ -51,6 +53,7 @@ static void free_connection (struct connection *conn);
/* Don't call these raw socket functions directly. Use conn->recv etc. */
static int raw_recv (struct connection *, void *buf, size_t len);
static int raw_send (struct connection *, const void *buf, size_t len);
+static int raw_cork (struct connection *conn, bool cork);
static void raw_close (struct connection *);
int
@@ -288,6 +291,15 @@ new_connection (int sockin, int sockout, int nworkers)
conn->send = raw_send;
conn->close = raw_close;
+ /* Install a cork handler, but only if corking works */
+#ifdef TCP_CORK
+ {
+ int opt = 0;
+ if (setsockopt (sockout, IPPROTO_TCP, TCP_CORK, &opt, sizeof opt) == 0)
+ conn->cork = raw_cork;
+ }
+#endif
+
return conn;
}
@@ -344,6 +356,19 @@ raw_send (struct connection *conn, const void *vbuf, size_t len)
return 0;
}
+/* Change the cork status to batch a group of send calls, and either succeed
+ * completely (returns 0) or fail (returns -1).
+ */
+static int
+raw_cork (struct connection *conn, bool cork)
+{
+ int opt = cork;
+
+ /* Ignore failure; new_connection() checked that uncork should work */
+ setsockopt (conn->sockout, IPPROTO_TCP, TCP_CORK, &opt, sizeof opt);
+ return 0;
+}
+
/* Read buffer from conn->sockin and either succeed completely
* (returns > 0), read an EOF (returns 0), or fail (returns -1).
*/
diff --git a/server/crypto.c b/server/crypto.c
index 978a843..e4abca2 100644
--- a/server/crypto.c
+++ b/server/crypto.c
@@ -371,6 +371,24 @@ crypto_send (struct connection *conn, const void *vbuf, size_t len)
return 0;
}
+/* Change the cork status to batch a group of send calls, and either succeed
+ * completely (returns 0) or fail (returns -1).
+ */
+static int
+crypto_cork (struct connection *conn, bool cork)
+{
+ gnutls_session_t session = conn->crypto_session;
+
+ assert (session != NULL);
+
+ if (cork)
+ gnutls_record_cork (session);
+ else if (gnutls_record_uncork (session, GNUTLS_RECORD_WAIT) < 0)
+ return -1;
+
+ return 0;
+}
+
/* There's no place in the NBD protocol to send back errors from
* close, so this function ignores errors.
*/
@@ -504,6 +522,7 @@ crypto_negotiate_tls (struct connection *conn, int sockin, int
sockout)
*/
conn->crypto_session = session;
conn->recv = crypto_recv;
+ conn->cork = crypto_cork;
conn->send = crypto_send;
conn->close = crypto_close;
return 0;
--
2.20.1