When structured replies are negotiated, the server may advertise
support for the DF flag (the server promises to return at most one
data/hole chunk, or to fail with NBD_EOVERFLOW if the chunk would be
too large). As both nbdkit and qemu-nbd support this flag (the former
only trivially, but the latter by not compressing holes over the
wire), it is worth exposing to clients, if only for testing purposes.
I chose to specifically reject the flag for plain nbd_[aio_]pread
(about all it can do is cause a read to fail that would otherwise
succeed) and only accept it for nbd_[aio_]pread_structured, but it
would be easy enough to change the one to forward to the other if we
wanted.
---
generator/generator | 22 +++++++++++++++++++---
interop/structured-read.c | 31 ++++++++++++++++++++++++-------
lib/flags.c | 11 +++++++++++
lib/nbd-protocol.h | 1 +
lib/protocol.c | 1 +
lib/rw.c | 14 ++++++++++++--
python/t/405-pread-structured.py | 6 ++++++
7 files changed, 74 insertions(+), 12 deletions(-)
diff --git a/generator/generator b/generator/generator
index cf9876f..da60486 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1243,6 +1243,17 @@ Returns true if the server supports the zero command
the server does not.";
};
+ "can_df", {
+ default_call with
+ args = []; ret = RBool;
+ shortdesc = "does the server support the don't fragment flag to
pread?";
+ longdesc = "\
+Returns true if the server supports structured reads with an
+ability to request a non-fragmented read (see C<nbd_pread_structured>,
+C<nbd_aio_pread_structured>). Returns false if the server either lacks
+structured reads or if it does not support a non-fragmented read request.";
+ };
+
"can_multi_conn", {
default_call with
args = []; ret = RBool;
@@ -1306,7 +1317,8 @@ at C<offset> and ending at C<offset> + C<count> -
1. NBD
can only read all or nothing using this call. The call
returns when the data has been read fully into C<buf> or there is an
error. See also C<nbd_pread_structured>, if finer visibility is
-required into the server's replies.
+required into the server's replies, or if you want to use
+C<LIBNBD_CMD_FLAG_DF>.
The C<flags> parameter must be C<0> for now (it exists for future NBD
protocol extensions).";
@@ -1379,8 +1391,11 @@ the server obeyed the requirement that a read call must not have
overlapping chunks and must not succeed without enough chunks to cover
the entire request.
-The C<flags> parameter must be C<0> for now (it exists for future NBD
-protocol extensions).";
+The C<flags> parameter may be C<0> for no flags, or may contain
+C<LIBNBD_CMD_FLAG_DF> meaning that the server should not reply with
+more than one fragment (if that is supported - some servers cannot do
+this, see C<nbd_can_df>). Libnbd does not validate that the server
+actually obeys the flag.";
};
"pwrite", {
@@ -2025,6 +2040,7 @@ let constants = [
"CMD_FLAG_FUA", 1 lsl 0;
"CMD_FLAG_NO_HOLE", 1 lsl 1;
+ "CMD_FLAG_DF", 1 lsl 2;
"CMD_FLAG_REQ_ONE", 1 lsl 3;
"READ_DATA", 1;
diff --git a/interop/structured-read.c b/interop/structured-read.c
index ea47254..cf8b893 100644
--- a/interop/structured-read.c
+++ b/interop/structured-read.c
@@ -38,7 +38,7 @@ static const char *unixsocket;
static char rbuf[1024];
struct data {
- //XXX bool df; /* input: true if DF flag was passed to request */
+ bool df; /* input: true if DF flag was passed to request */
int count; /* input: count of expected remaining calls */
bool fail; /* input: true to return failure */
bool seen_hole; /* output: true if hole encountered */
@@ -60,15 +60,24 @@ read_cb (void *opaque, const void *bufv, size_t count, uint64_t
offset,
switch (status) {
case LIBNBD_READ_DATA:
- // XXX if (df...)
- assert (buf == rbuf + 512);
- assert (count == 512);
- assert (offset == 2048 + 512);
- assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+ if (data->df) {
+ assert (buf == rbuf);
+ assert (count == 1024);
+ assert (offset == 2048);
+ assert (buf[0] == 0 && memcmp (buf, buf + 1, 511) == 0);
+ assert (buf[512] == 1 && memcmp (buf + 512, buf + 513, 511) == 0);
+ }
+ else {
+ assert (buf == rbuf + 512);
+ assert (count == 512);
+ assert (offset == 2048 + 512);
+ assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+ }
assert (!data->seen_data);
data->seen_data = true;
break;
case LIBNBD_READ_HOLE:
+ assert (!data->df); /* Relies on qemu-nbd's behavior */
assert (buf == rbuf);
assert (count == 512);
assert (offset == 2048);
@@ -134,7 +143,15 @@ main (int argc, char *argv[])
}
assert (data.seen_data && data.seen_hole);
- // XXX Repeat with DF flag
+ /* Repeat with DF flag. */
+ memset (rbuf, 2, sizeof rbuf);
+ data = (struct data) { .df = true, .count = 1, };
+ if (nbd_pread_structured (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+ LIBNBD_CMD_FLAG_DF) == -1) {
+ fprintf (stderr, "%s\n", nbd_get_error ());
+ exit (EXIT_FAILURE);
+ }
+ assert (data.seen_data && !data.seen_hole);
/* Trigger a failed callback, to prove connection stays up. With
* reads, all chunks trigger a callback even after failure, but the
diff --git a/lib/flags.c b/lib/flags.c
index 421a7d2..cdbc28f 100644
--- a/lib/flags.c
+++ b/lib/flags.c
@@ -42,6 +42,11 @@ nbd_internal_set_size_and_flags (struct nbd_handle *h,
return -1;
}
+ if (eflags & NBD_FLAG_SEND_DF && !h->structured_replies) {
+ debug (h, "server lacks structured replies, ignoring claim of df");
+ eflags &= ~NBD_FLAG_SEND_DF;
+ }
+
h->exportsize = exportsize;
h->eflags = eflags;
return 0;
@@ -95,6 +100,12 @@ nbd_unlocked_can_zero (struct nbd_handle *h)
return get_flag (h, NBD_FLAG_SEND_WRITE_ZEROES);
}
+int
+nbd_unlocked_can_df (struct nbd_handle *h)
+{
+ return get_flag (h, NBD_FLAG_SEND_DF);
+}
+
int
nbd_unlocked_can_multi_conn (struct nbd_handle *h)
{
diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
index 071971e..405af3e 100644
--- a/lib/nbd-protocol.h
+++ b/lib/nbd-protocol.h
@@ -245,6 +245,7 @@ struct nbd_structured_reply_error {
#define NBD_ENOMEM 12
#define NBD_EINVAL 22
#define NBD_ENOSPC 28
+#define NBD_EOVERFLOW 75
#define NBD_ESHUTDOWN 108
#endif /* NBD_PROTOCOL_H */
diff --git a/lib/protocol.c b/lib/protocol.c
index d3ac0b4..6087887 100644
--- a/lib/protocol.c
+++ b/lib/protocol.c
@@ -35,6 +35,7 @@ nbd_internal_errno_of_nbd_error (uint32_t error)
case NBD_ENOMEM: return ENOMEM;
case NBD_EINVAL: return EINVAL;
case NBD_ENOSPC: return ENOSPC;
+ case NBD_EOVERFLOW: return EOVERFLOW;
case NBD_ESHUTDOWN: return ESHUTDOWN;
default: return EINVAL;
}
diff --git a/lib/rw.c b/lib/rw.c
index 24dbc4e..2dc60de 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -238,6 +238,10 @@ int64_t
nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
size_t count, uint64_t offset, uint32_t flags)
{
+ /* We could silently accept flag DF, but it really only makes sense
+ * with callbacks, because otherwise there is no observable change
+ * except that the server may fail where it would otherwise succeed.
+ */
if (flags != 0) {
set_error (EINVAL, "invalid flag: %" PRIu32, flags);
return -1;
@@ -254,12 +258,18 @@ nbd_unlocked_aio_pread_structured (struct nbd_handle *h, void *buf,
{
struct command_cb cb = { .opaque = opaque, .fn.read = read, };
- if (flags != 0) {
+ if ((flags & ~LIBNBD_CMD_FLAG_DF) != 0) {
set_error (EINVAL, "invalid flag: %" PRIu32, flags);
return -1;
}
- return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+ if ((flags & LIBNBD_CMD_FLAG_DF) != 0 &&
+ nbd_unlocked_can_df (h) != 1) {
+ set_error (EINVAL, "server does not support the DF flag");
+ return -1;
+ }
+
+ return nbd_internal_command_common (h, flags, NBD_CMD_READ, offset, count,
buf, &cb);
}
diff --git a/python/t/405-pread-structured.py b/python/t/405-pread-structured.py
index 51f0144..1bfa162 100644
--- a/python/t/405-pread-structured.py
+++ b/python/t/405-pread-structured.py
@@ -35,3 +35,9 @@ buf = h.pread_structured (512, 0, 42, f)
print ("%r" % buf)
assert buf == expected
+
+buf = h.pread_structured (512, 0, 42, f, nbd.CMD_FLAG_DF)
+
+print ("%r" % buf)
+
+assert buf == expected
--
2.20.1