On Mon, Sep 04, 2023 at 09:52:53AM +0200, Laszlo Ersek wrote:
On 9/3/23 17:22, Richard W.M. Jones wrote:
> This is broadly simple code motion, intended so that we can reuse the
> same code in libnbd.
> ---
> common/include/Makefile.am | 6 ++
> common/include/human-size.h | 137 +++++++++++++++++++++++++++++++
> common/include/test-human-size.c | 133 ++++++++++++++++++++++++++++++
> server/public.c | 78 ++----------------
> .gitignore | 1 +
> 5 files changed, 283 insertions(+), 72 deletions(-)
>
> diff --git a/common/include/Makefile.am b/common/include/Makefile.am
> index 3162e92c2..8e4de04f3 100644
> --- a/common/include/Makefile.am
> +++ b/common/include/Makefile.am
> @@ -42,6 +42,7 @@ EXTRA_DIST = \
> checked-overflow.h \
> compiler-macros.h \
> hexdigit.h \
> + human-size.h \
> isaligned.h \
> ispowerof2.h \
> iszero.h \
> @@ -63,6 +64,7 @@ TESTS = \
> test-ascii-string \
> test-byte-swapping \
> test-checked-overflow \
> + test-human-size \
> test-isaligned \
> test-ispowerof2 \
> test-iszero \
> @@ -93,6 +95,10 @@ test_checked_overflow_SOURCES = test-checked-overflow.c
checked-overflow.h
> test_checked_overflow_CPPFLAGS = -I$(srcdir)
> test_checked_overflow_CFLAGS = $(WARNINGS_CFLAGS)
>
> +test_human_size_SOURCES = test-human-size.c human-size.h
> +test_human_size_CPPFLAGS = -I$(srcdir)
> +test_human_size_CFLAGS = $(WARNINGS_CFLAGS)
> +
> test_isaligned_SOURCES = test-isaligned.c isaligned.h
> test_isaligned_CPPFLAGS = -I$(srcdir)
> test_isaligned_CFLAGS = $(WARNINGS_CFLAGS)
> diff --git a/common/include/human-size.h b/common/include/human-size.h
> new file mode 100644
> index 000000000..788dbd7ba
> --- /dev/null
> +++ b/common/include/human-size.h
> @@ -0,0 +1,137 @@
> +/* nbdkit
> + * Copyright Red Hat
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + *
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * * Neither the name of Red Hat nor the names of its contributors may be
> + * used to endorse or promote products derived from this software without
> + * specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS''
AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#ifndef NBDKIT_HUMAN_SIZE_H
> +#define NBDKIT_HUMAN_SIZE_H
> +
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +
> +/* Attempt to parse a string with a possible scaling suffix, such as
> + * "2M". Disk sizes cannot usefully exceed off_t (which is signed)
> + * and cannot be negative.
> + *
> + * On error, returns -1 and sets *error and *pstr. You can form a
> + * final error message by appending "<error>: <pstr>".
> + */
> +static inline int64_t
> +human_size_parse (const char *str,
> + const char **error, const char **pstr)
> +{
> + int64_t size;
> + char *end;
> + uint64_t scale = 1;
> +
> + /* XXX Should we also parse things like '1.5M'? */
> + /* XXX Should we allow hex? If so, hex cannot use scaling suffixes,
> + * because some of them are valid hex digits.
> + */
> + errno = 0;
> + size = strtoimax (str, &end, 10);
(1) A further improvement here (likely best done in a separate patch)
could be to change the type of "size" to "intmax_t", from
"int64_t".
That way, the assignment will be safe even theoretically, *and* the
overflow check at the bottom of the function (with the division &
comparison of the quotient against INT_MAX) will work just the same.
I'm always very unsure how this all works. In particular I seem to
recall that intmax_t is no longer really the maximum possible int
(because of int128) and so it's always 64 bit on platforms we care
about. Can Eric comment?
> + if (str == end) {
> + *error = "could not parse size string";
> + *pstr = str;
> + return -1;
> + }
> + if (size < 0) {
> + *error = "size cannot be negative";
> + *pstr = str;
> + return -1;
> + }
> + if (errno) {
> + *error = "size exceeds maximum value";
> + *pstr = str;
> + return -1;
> + }
> +
> + switch (*end) {
> + /* No suffix */
> + case '\0':
> + end--; /* Safe, since we already filtered out empty string */
> + break;
> +
> + /* Powers of 1024 */
> + case 'e': case 'E':
> + scale *= 1024;
> + /* fallthru */
> + case 'p': case 'P':
> + scale *= 1024;
> + /* fallthru */
> + case 't': case 'T':
> + scale *= 1024;
> + /* fallthru */
> + case 'g': case 'G':
> + scale *= 1024;
> + /* fallthru */
> + case 'm': case 'M':
> + scale *= 1024;
> + /* fallthru */
> + case 'k': case 'K':
> + scale *= 1024;
> + /* fallthru */
> + case 'b': case 'B':
> + break;
> +
> + /* "sectors", ie. units of 512 bytes, even if that's not the
real
> + * sector size
> + */
> + case 's': case 'S':
> + scale = 512;
> + break;
> +
> + default:
> + *error = "could not parse size: unknown suffix";
> + *pstr = end;
> + return -1;
> + }
> +
> + /* XXX Maybe we should support 'MiB' as a synonym for 'M'; and
'MB'
> + * for powers of 1000, for similarity to GNU tools. But for now,
> + * anything beyond 'M' is dropped.
> + */
> + if (end[1]) {
> + *error = "could not parse size: unknown suffix";
> + *pstr = end;
> + return -1;
> + }
> +
> + if (INT64_MAX / scale < size) {
> + *error = "could not parse size: size * scale overflows";
> + *pstr = str;
> + return -1;
> + }
> +
> + return size * scale;
> +}
> +
> +#endif /* NBDKIT_HUMAN_SIZE_H */
> diff --git a/common/include/test-human-size.c b/common/include/test-human-size.c
> new file mode 100644
> index 000000000..e8cbe7f41
> --- /dev/null
> +++ b/common/include/test-human-size.c
> @@ -0,0 +1,133 @@
> +/* nbdkit
> + * Copyright Red Hat
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + *
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * * Neither the name of Red Hat nor the names of its contributors may be
> + * used to endorse or promote products derived from this software without
> + * specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS''
AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <config.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +
> +#include "array-size.h"
> +#include "human-size.h"
> +
> +int
> +main (void)
> +{
> + size_t i;
> + bool pass = true;
> + struct pair {
> + const char *str;
> + int64_t res;
> + } pairs[] = {
> + /* Bogus strings */
> + { "", -1 },
> + { "0x0", -1 },
> + { "garbage", -1 },
> + { "0garbage", -1 },
> + { "8E", -1 },
> + { "8192P", -1 },
> +
> + /* Strings leading to overflow */
> + { "9223372036854775808", -1 }, /* INT_MAX + 1 */
> + { "18446744073709551614", -1 }, /* UINT64_MAX - 1 */
> + { "18446744073709551615", -1 }, /* UINT64_MAX */
> + { "18446744073709551616", -1 }, /* UINT64_MAX + 1 */
> + { "999999999999999999999999", -1 },
> +
> + /* Strings representing negative values */
> + { "-1", -1 },
> + { "-2", -1 },
> + { "-9223372036854775809", -1 }, /* INT64_MIN - 1 */
> + { "-9223372036854775808", -1 }, /* INT64_MIN */
> + { "-9223372036854775807", -1 }, /* INT64_MIN + 1 */
> + { "-18446744073709551616", -1 }, /* -UINT64_MAX - 1 */
> + { "-18446744073709551615", -1 }, /* -UINT64_MAX */
> + { "-18446744073709551614", -1 }, /* -UINT64_MAX + 1 */
> +
> + /* Strings we may want to support in the future */
> + { "M", -1 },
> + { "1MB", -1 },
> + { "1MiB", -1 },
> + { "1.5M", -1 },
> +
> + /* Valid strings */
> + { "-0", 0 },
> + { "0", 0 },
> + { "+0", 0 },
> + { " 08", 8 },
> + { "1", 1 },
> + { "+1", 1 },
> + { "1234567890", 1234567890 },
> + { "+1234567890", 1234567890 },
> + { "9223372036854775807", INT64_MAX },
> + { "1s", 512 },
> + { "2S", 1024 },
> + { "1b", 1 },
> + { "1B", 1 },
> + { "1k", 1024 },
> + { "1K", 1024 },
> + { "1m", 1024 * 1024 },
> + { "1M", 1024 * 1024 },
> + { "+1M", 1024 * 1024 },
> + { "1g", 1024 * 1024 * 1024 },
> + { "1G", 1024 * 1024 * 1024 },
> + { "1t", 1024LL * 1024 * 1024 * 1024 },
> + { "1T", 1024LL * 1024 * 1024 * 1024 },
> + { "1p", 1024LL * 1024 * 1024 * 1024 * 1024 },
> + { "1P", 1024LL * 1024 * 1024 * 1024 * 1024 },
> + { "8191p", 1024LL * 1024 * 1024 * 1024 * 1024 * 8191 },
> + { "1e", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024 },
> + { "1E", 1024LL * 1024 * 1024 * 1024 * 1024 * 1024 },
> + };
> +
> + for (i = 0; i < ARRAY_SIZE (pairs); i++) {
> + const char *error = NULL, *pstr = NULL;
> + int64_t r;
> +
> + r = human_size_parse (pairs[i].str, &error, &pstr);
> + if (r != pairs[i].res) {
> + fprintf (stderr,
> + "Wrong parse for %s, got %" PRId64 ", expected
%" PRId64 "\n",
> + pairs[i].str, r, pairs[i].res);
> + pass = false;
> + }
> + if (r == -1) {
> + if (error == NULL || pstr == NULL) {
> + fprintf (stderr, "Wrong error message handling for %s\n",
pairs[i].str);
> + pass = false;
> + }
> + }
> + }
> +
> + exit (pass ? EXIT_SUCCESS : EXIT_FAILURE);
> +}
(2) I don't like that we're repeating the test cases here, from
test_nbdkit_parse_size() [server/test-public.c].
Originally I intended to ask "why not just *move* that code as well",
but I think I see the point...
Namely, in test_nbdkit_parse_size(), we still need to test
nbdkit_error() -- via "error_flagged" --, and nbdkit_error() remains
unique to test_nbdkit_parse_size(), after factoring out
human_size_parse(). And so, for triggering the errors, we need to keep
the same test cases.
... Would it be possible to move the "pairs" array into a separate C
file under "common"? (Not necessarily under "common/include".)
We'd need
a new header file (for defining the "pair" type, for declaring the
"pairs" array, and for declaring the "num_pairs" constant, which
would
have to be a global variable then.)
If that's too difficult or intrusive, then please at least
cross-reference each source file from the other, in new comments, so
that whenever we update one of them, we don't forget the other.
Actually it was easy enough to make this into a common file
("common/include/human-size-test-cases.h") in the final version.
(3) Calling "exit" at the end is a bit awkward to me.
Correct, but
"return" would work just as fine.
With the cross-refs added:
Reviewed-by: Laszlo Ersek <lersek(a)redhat.com>
Thanks,
I pushed this as commit 29ce734ad.
Rich.
Laszlo
> diff --git a/server/public.c b/server/public.c
> index 705ac3a47..a1ba603d4 100644
> --- a/server/public.c
> +++ b/server/public.c
> @@ -76,6 +76,7 @@
> #include "ascii-string.h"
> #include "get_current_dir_name.h"
> #include "getline.h"
> +#include "human-size.h"
> #include "poll.h"
> #include "realpath.h"
> #include "strndup.h"
> @@ -343,83 +344,16 @@ nbdkit_parse_uint64_t (const char *what, const char *str,
uint64_t *rp)
> NBDKIT_DLL_PUBLIC int64_t
> nbdkit_parse_size (const char *str)
> {
> + const char *error, *pstr;
> int64_t size;
> - char *end;
> - uint64_t scale = 1;
>
> - /* Disk sizes cannot usefully exceed off_t (which is signed) and
> - * cannot be negative. */
> - /* XXX Should we also parse things like '1.5M'? */
> - /* XXX Should we allow hex? If so, hex cannot use scaling suffixes,
> - * because some of them are valid hex digits */
> - errno = 0;
> - size = strtoimax (str, &end, 10);
> - if (str == end) {
> - nbdkit_error ("could not parse size string (%s)", str);
> - return -1;
> - }
> - if (size < 0) {
> - nbdkit_error ("size cannot be negative (%s)", str);
> - return -1;
> - }
> - if (errno) {
> - nbdkit_error ("size (%s) exceeds maximum value", str);
> - return -1;
> - }
> -
> - switch (*end) {
> - /* No suffix */
> - case '\0':
> - end--; /* Safe, since we already filtered out empty string */
> - break;
> -
> - /* Powers of 1024 */
> - case 'e': case 'E':
> - scale *= 1024;
> - /* fallthru */
> - case 'p': case 'P':
> - scale *= 1024;
> - /* fallthru */
> - case 't': case 'T':
> - scale *= 1024;
> - /* fallthru */
> - case 'g': case 'G':
> - scale *= 1024;
> - /* fallthru */
> - case 'm': case 'M':
> - scale *= 1024;
> - /* fallthru */
> - case 'k': case 'K':
> - scale *= 1024;
> - /* fallthru */
> - case 'b': case 'B':
> - break;
> -
> - /* "sectors", ie. units of 512 bytes, even if that's not the
real
> - * sector size */
> - case 's': case 'S':
> - scale = 512;
> - break;
> -
> - default:
> - nbdkit_error ("could not parse size: unknown suffix '%s'",
end);
> - return -1;
> - }
> -
> - /* XXX Maybe we should support 'MiB' as a synonym for 'M'; and
'MB'
> - * for powers of 1000, for similarity to GNU tools. But for now,
> - * anything beyond 'M' is dropped. */
> - if (end[1]) {
> - nbdkit_error ("could not parse size: unknown suffix '%s'",
end);
> - return -1;
> - }
> -
> - if (INT64_MAX / scale < size) {
> - nbdkit_error ("overflow computing size (%s)", str);
> + size = human_size_parse (str, &error, &pstr);
> + if (size == -1) {
> + nbdkit_error ("%s: %s", error, pstr);
> return -1;
> }
>
> - return size * scale;
> + return size;
> }
>
> NBDKIT_DLL_PUBLIC int
> diff --git a/.gitignore b/.gitignore
> index 49af3998f..04fdcd723 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -40,6 +40,7 @@ plugins/*/*.3
> /common/include/test-ascii-string
> /common/include/test-byte-swapping
> /common/include/test-checked-overflow
> +/common/include/test-human-size
> /common/include/test-isaligned
> /common/include/test-ispowerof2
> /common/include/test-iszero
--
Richard Jones, Virtualization Group, Red Hat
http://people.redhat.com/~rjones
Read my programming and virtualization blog:
http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines. Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v