On Tue, May 30, 2023 at 01:50:59PM +0200, Laszlo Ersek wrote:
On 5/25/23 15:00, Eric Blake wrote:
> Add the magic numbers and new structs necessary to implement the NBD
> protocol extension of extended headers providing 64-bit lengths. This
> corresponds to upstream nbd commits 36abf47d and a9384e2f on the
> extension-ext-header branch[1] (commit e6f3b94a for
> NBD_FLAG_BLOCK_STATUS_PAYLOAD is saved for a later patch).
>
> [1]
https://github.com/NetworkBlockDevice/nbd/blob/extension-ext-header/doc/p...
>
> Signed-off-by: Eric Blake <eblake(a)redhat.com>
> ---
> lib/nbd-protocol.h | 66 ++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 52 insertions(+), 14 deletions(-)
>
> diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
> index f4640a98..b6fa9b8a 100644
> --- a/lib/nbd-protocol.h
> +++ b/lib/nbd-protocol.h
> @@ -124,6 +124,7 @@ struct nbd_fixed_new_option_reply {
> #define NBD_OPT_STRUCTURED_REPLY 8
> #define NBD_OPT_LIST_META_CONTEXT 9
> #define NBD_OPT_SET_META_CONTEXT 10
> +#define NBD_OPT_EXTENDED_HEADERS 11
>
> #define NBD_REP_ERR(val) (0x80000000 | (val))
> #define NBD_REP_IS_ERR(val) (!!((val) & 0x80000000))
> @@ -141,6 +142,7 @@ struct nbd_fixed_new_option_reply {
> #define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR (7)
> #define NBD_REP_ERR_BLOCK_SIZE_REQD NBD_REP_ERR (8)
> #define NBD_REP_ERR_TOO_BIG NBD_REP_ERR (9)
> +#define NBD_REP_ERR_EXT_HEADER_REQD NBD_REP_ERR (10)
>
> #define NBD_INFO_EXPORT 0
> #define NBD_INFO_NAME 1
> @@ -182,16 +184,26 @@ struct nbd_fixed_new_option_reply_meta_context {
> /* followed by a string */
> } NBD_ATTRIBUTE_PACKED;
>
> -/* Request (client -> server). */
> +/* Compact request (client -> server). */
> struct nbd_request {
> uint32_t magic; /* NBD_REQUEST_MAGIC. */
> - uint16_t flags; /* Request flags. */
> - uint16_t type; /* Request type. */
> + uint16_t flags; /* Request flags: NBD_CMD_FLAG_*. */
> + uint16_t type; /* Request type: NBD_CMD_*. */
> uint64_t handle; /* Opaque handle. */
> uint64_t offset; /* Request offset. */
> uint32_t count; /* Request length. */
> } NBD_ATTRIBUTE_PACKED;
>
> +/* Extended request (client -> server). */
> +struct nbd_request_ext {
> + uint32_t magic; /* NBD_EXTENDED_REQUEST_MAGIC. */
> + uint16_t flags; /* Request flags: NBD_CMD_FLAG_*. */
> + uint16_t type; /* Request type: NBD_CMD_*. */
> + uint64_t handle; /* Opaque handle. */
> + uint64_t offset; /* Request offset. */
> + uint64_t count; /* Request effect or payload length. */
> +} NBD_ATTRIBUTE_PACKED;
> +
> /* Simple reply (server -> client). */
> struct nbd_simple_reply {
> uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC. */
> @@ -208,6 +220,16 @@ struct nbd_structured_reply {
> uint32_t length; /* Length of payload which follows. */
> } NBD_ATTRIBUTE_PACKED;
>
> +/* Extended reply (server -> client). */
> +struct nbd_extended_reply {
> + uint32_t magic; /* NBD_EXTENDED_REPLY_MAGIC. */
> + uint16_t flags; /* NBD_REPLY_FLAG_* */
> + uint16_t type; /* NBD_REPLY_TYPE_* */
> + uint64_t handle; /* Opaque handle. */
> + uint64_t offset; /* Client's offset. */
> + uint64_t length; /* Length of payload which follows. */
> +} NBD_ATTRIBUTE_PACKED;
> +
> struct nbd_structured_reply_offset_data {
> uint64_t offset; /* offset */
> /* Followed by data. */
> @@ -224,11 +246,23 @@ struct nbd_block_descriptor {
> uint32_t status_flags; /* block type (hole etc) */
> } NBD_ATTRIBUTE_PACKED;
>
> +/* NBD_REPLY_TYPE_BLOCK_STATUS_EXT block descriptor. */
> +struct nbd_block_descriptor_ext {
> + uint64_t length; /* length of block */
> + uint64_t status_flags; /* block type (hole etc) */
I wonder if making the status_flags fields uint64_t too was really
necessary, or just a consequence of us wanting to treat a sequence of
these records as a (double as long) sequence of uint64_t's. Anyway, this
is a spec-level question, and I totally don't want to question the spec. :)
The actual interpretation of the status flag is up to the spec of the
metadata namespace that is being used for this particular request.
Some metadata namespaces treat the "flags" field as an unsigned integer,
to specify offsets or counters. I that context, the flags field should
indeed be extended to 64 bits.
> +} NBD_ATTRIBUTE_PACKED;
> +
> struct nbd_structured_reply_block_status_hdr {
> uint32_t context_id; /* metadata context ID */
> /* followed by array of nbd_block_descriptor extents */
> } NBD_ATTRIBUTE_PACKED;
>
> +struct nbd_structured_reply_block_status_ext_hdr {
> + uint32_t context_id; /* metadata context ID */
> + uint32_t count; /* length of following array */
(1) I think that "length of following array" is confusing here. Per
spec, this is "descriptor count" -- that's clearer. "Length"
could be
mistaken for "number of bytes".
Also, what was the justification for *not* making "count" uint64_t?
(Just asking.) I do understand a server is permitted to respond with a
block status reply that covers less than the requested range, so I
understand a server, if it needs to, *can* truncate its response for the
sake of fitting "count" into a uint32_t.
This is, as you say, the number of block descriptor messages we are
going to send to the client. Each such message is at 8 bytes long.
That would mean that by the time you overflow a uint32_t with the number
of extents that are to be sent, you'll be promising to send 2^32 * 8
(i.e., 2^36) bytes of data, or 32 GiB.
That would be a ridiculously amount of data, and no user is going to
wait for a client to finish parsing that amount of data without a hard
reboot of their client.
The only change that would be reasonable here is to reduce the size of
this field 16 to bits, rather than increasing it (but then we lose
alignment, so that's also not a good idea)
> + /* followed by array of nbd_block_descriptor_ext extents */
> +} NBD_ATTRIBUTE_PACKED;
> +
> struct nbd_structured_reply_error {
> uint32_t error; /* NBD_E* error number */
> uint16_t len; /* Length of human readable error. */
> @@ -236,8 +270,10 @@ struct nbd_structured_reply_error {
> } NBD_ATTRIBUTE_PACKED;
>
> #define NBD_REQUEST_MAGIC 0x25609513
> +#define NBD_EXTENDED_REQUEST_MAGIC 0x21e41c71
> #define NBD_SIMPLE_REPLY_MAGIC 0x67446698
> #define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef
> +#define NBD_EXTENDED_REPLY_MAGIC 0x6e8a278c
>
> /* Structured reply flags. */
> #define NBD_REPLY_FLAG_DONE (1<<0)
> @@ -246,12 +282,13 @@ struct nbd_structured_reply_error {
> #define NBD_REPLY_TYPE_IS_ERR(val) (!!((val) & (1<<15)))
>
> /* Structured reply types. */
> -#define NBD_REPLY_TYPE_NONE 0
> -#define NBD_REPLY_TYPE_OFFSET_DATA 1
> -#define NBD_REPLY_TYPE_OFFSET_HOLE 2
> -#define NBD_REPLY_TYPE_BLOCK_STATUS 5
> -#define NBD_REPLY_TYPE_ERROR NBD_REPLY_TYPE_ERR (1)
> -#define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_TYPE_ERR (2)
> +#define NBD_REPLY_TYPE_NONE 0
> +#define NBD_REPLY_TYPE_OFFSET_DATA 1
> +#define NBD_REPLY_TYPE_OFFSET_HOLE 2
> +#define NBD_REPLY_TYPE_BLOCK_STATUS 5
> +#define NBD_REPLY_TYPE_BLOCK_STATUS_EXT 6
> +#define NBD_REPLY_TYPE_ERROR NBD_REPLY_TYPE_ERR (1)
> +#define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_TYPE_ERR (2)
>
> /* NBD commands. */
> #define NBD_CMD_READ 0
> @@ -263,11 +300,12 @@ struct nbd_structured_reply_error {
> #define NBD_CMD_WRITE_ZEROES 6
> #define NBD_CMD_BLOCK_STATUS 7
>
> -#define NBD_CMD_FLAG_FUA (1<<0)
> -#define NBD_CMD_FLAG_NO_HOLE (1<<1)
> -#define NBD_CMD_FLAG_DF (1<<2)
> -#define NBD_CMD_FLAG_REQ_ONE (1<<3)
> -#define NBD_CMD_FLAG_FAST_ZERO (1<<4)
> +#define NBD_CMD_FLAG_FUA (1<<0)
> +#define NBD_CMD_FLAG_NO_HOLE (1<<1)
> +#define NBD_CMD_FLAG_DF (1<<2)
> +#define NBD_CMD_FLAG_REQ_ONE (1<<3)
> +#define NBD_CMD_FLAG_FAST_ZERO (1<<4)
> +#define NBD_CMD_FLAG_PAYLOAD_LEN (1<<5)
Ah, this seems like a nice addition: "... the client sets
NBD_CMD_FLAG_PAYLOAD_LEN in order to pass a payload that informs the
server to limit its replies to the metacontext id(s) in the client's
request payload, rather than giving an answer on all possible
metacontext ids".
>
> /* NBD error codes. */
> #define NBD_SUCCESS 0
With (1) clarified:
Reviewed-by: Laszlo Ersek <lersek(a)redhat.com>
--
w(a)uter.{be,co.za}
wouter(a){grep.be,fosdem.org,debian.org}
I will have a Tin-Actinium-Potassium mixture, thanks.