On 9/21/19 6:56 AM, Richard W.M. Jones wrote:
sscanf is sadly not safe (because it doesn't handle integer
overflow
correctly), and strto*l functions are a pain to use correctly.
Therefore add some functions to hide the pain of parsing integers from
the command line.
The simpler uses of sscanf and strto*l are replaced. There are still
a few where we are using advanced features of sscanf.
I note you are not permitting specification of a base (that is,
hard-coding base=0 in the underlying strto*l); that appears to be fine
for all users thus converted.
=head1 PARSING COMMAND LINE PARAMETERS
+=head2 Parsing numbers
+
+There are several functions for parsing numbers. These all deal
+correctly with overflow, out of range and parse errors, and you should
+use them instead of unsafe functions like L<sscanf(3)>, L<atoi(3)> and
+similar.
+
+ int nbdkit_parse_int (const char *what, const char *str, int *r);
+ int nbdkit_parse_unsigned (const char *what,
+ const char *str, unsigned *r);
+ int nbdkit_parse_long (const char *what, const char *str, long *r);
+ int nbdkit_parse_unsigned_long (const char *what,
+ const char *str, unsigned long *r);
Do we really want to encourage the use of parse_long and
parse_unsigned_long? Those differ between 32- and 64-bit platforms, at
which point, it's often better to explicitly call out the intended size.
It would also make a bit more sense that you don't offer
parse_long_long, if the explicit-sized versions are good enough when
64-bit values are needed.
It's also a bit odd to not offer a parse_long_long for strtoll, altagain
arguing that the explicit parse_int64_t is probably better to encourage.
+ int nbdkit_parse_int16_t (const char *what,
+ const char *str, int16_t *r);
+ int nbdkit_parse_uint16_t (const char *what,
+ const char *str, uint16_t *r);
I guess we can add [u]int8_t variants later if a need arises? Or should
we do it now?
+ int nbdkit_parse_int32_t (const char *what,
+ const char *str, int32_t *r);
+ int nbdkit_parse_uint32_t (const char *what,
+ const char *str, uint32_t *r);
+ int nbdkit_parse_int64_t (const char *what,
+ const char *str, int64_t *r);
+ int nbdkit_parse_uint64_t (const char *what,
+ const char *str, uint64_t *r);
+ int nbdkit_parse_ssize_t (const char *what,
+ const char *str, ssize_t *r);
+ int nbdkit_parse_size_t (const char *what,
+ const char *str, size_t *r);
[s]size_t is another one that can differ between 32- and 64-bit
platforms; but I'm fine with having these function, especially since
there are still efforts being considered about an alternative kernel ABI
with 64-bit integers but 32-bit size_t.
+
+Parse string C<str> into an integer of various types. These functions
+parse a decimal, hexadecimal (C<"0x...">) or octal
(C<"0...">) number.
+
+On success the functions return C<0> and set C<*r> to the parsed value
+(unless C<*r == NULL> in which case the result is discarded). On
+error, C<nbdkit_error> is called and the functions return C<-1>.
+
+The C<what> parameter is printed in error messages to provide context.
+It should usually be a short descriptive string of what you are trying
+to parse, eg:
+
+ if (nbdkit_parse_int ("random seed", argv[1], &seed) == -1)
+ return -1;
+
+might print an error:
+
+ random seed: could not parse number: "lalala"
+
Do we want to make it part of our documented contract that *r is
unchanged if we return -1?
+++ b/filters/cache/cache.c
@@ -70,7 +70,7 @@ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
unsigned blksize;
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
int64_t max_size = -1;
-int hi_thresh = 95, lo_thresh = 80;
+unsigned hi_thresh = 95, lo_thresh = 80;
So you're fixing typing errors at the same time as using the new helper
functions. Makes the patch a bit bigger to review, but it's not worth
the effort to split it now.
bool cache_on_read = false;
static int cache_flush (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle,
uint32_t flags, int *err);
@@ -129,22 +129,20 @@ cache_config (nbdkit_next_config *next, void *nxdata,
return 0;
}
else if (strcmp (key, "cache-high-threshold") == 0) {
- if (sscanf (value, "%d", &hi_thresh) != 1) {
- nbdkit_error ("invalid cache-high-threshold parameter: %s", value);
+ if (nbdkit_parse_unsigned ("cache-high-threshold",
+ value, &hi_thresh) == -1)
Semantic change; previously "010" was '10' and "08" was
'8', now "010"
is '8' and "08" is a failure (because we used "%d" instead of
"%i").
(Multiple spots in this patch). Not a big deal to me, but we may want
to call it out in the commit message as intentional.
+++ b/include/nbdkit-common.h
@@ -84,6 +84,28 @@ extern void nbdkit_vdebug (const char *msg, va_list args);
extern char *nbdkit_absolute_path (const char *path);
extern int64_t nbdkit_parse_size (const char *str);
extern int nbdkit_parse_bool (const char *str);
+extern int nbdkit_parse_int (const char *what, const char *str,
+ int *r);
Should we mark 'what' and 'str' as being non-null parameters? But you
definitely document 'r' as valid when NULL.
+++ b/plugins/curl/curl.c
@@ -204,7 +204,9 @@ curl_config (const char *key, const char *value)
}
else if (strcmp (key, "timeout") == 0) {
- if (sscanf (value, "%ld", &timeout) != 1 || timeout < 0) {
+ if (nbdkit_parse_long ("timeout", value, &timeout) == -1)
+ return -1;
+ if (timeout < 0) {
nbdkit_error ("'timeout' must be 0 or a positive timeout in
seconds");
return -1;
Why not change to 'unsigned timeout'? 'long' is rather oversized on
64-bit platforms, and keeping things signed doesn't buy much compared to
just going unsigned. (Yep, that's me trying to get rid of a need for
parse_long).
+++ b/plugins/partitioning/partitioning.c
@@ -211,10 +211,8 @@ partitioning_config (const char *key, const char *value)
else if (strcmp (key, "mbr-id") == 0) {
if (strcasecmp (value, "default") == 0)
mbr_id = DEFAULT_MBR_ID;
- else if (sscanf (value, "%i", &mbr_id) != 1) {
- nbdkit_error ("could not parse mbr-id: %s", value);
+ else if (nbdkit_parse_int ("mbr-id", value, &mbr_id) == -1)
return -1;
Is 'int' the best type for this?
+++ b/plugins/ssh/ssh.c
@@ -59,7 +59,7 @@ static bool verify_remote_host = true;
static const char *known_hosts = NULL;
static const char **identity = NULL;
static size_t nr_identities = 0;
-static long timeout = 0;
+static unsigned long timeout = 0;
Again, a 32-bit timeout regardless of 32- or 64-bit platform is probably
saner.
+++ b/plugins/vddk/vddk.c
@@ -271,10 +271,8 @@ vddk_config (const char *key, const char *value)
return -1;
}
else if (strcmp (key, "nfchostport") == 0) {
- if (sscanf (value, "%d", &nfc_host_port) != 1) {
- nbdkit_error ("cannot parse nfchostport: %s", value);
+ if (nbdkit_parse_int ("nfchostport", value, &nfc_host_port) == -1)
return -1;
- }
Is 'int' the right type here?
+++ b/server/public.c
@@ -45,6 +45,7 @@
#include <string.h>
#include <unistd.h>
#include <limits.h>
+#include <ctype.h>
#include <termios.h>
#include <errno.h>
#include <poll.h>
@@ -108,6 +109,223 @@ nbdkit_realpath (const char *path)
return ret;
}
+/* Common code for parsing integers. */
+#define PARSE_COMMON_TAIL \
+ if (errno != 0) { \
+ nbdkit_error ("%s: could not parse number: \"%s\": %m",
\
+ what, str); \
+ return -1; \
+ } \
+ if (end == str) { \
+ nbdkit_error ("%s: empty string where we expected a number", \
+ what); \
+ return -1; \
+ } \
+ if (*end) { \
+ nbdkit_error ("%s: could not parse number: \"%s\": trailing
garbage", \
+ what, str); \
+ return -1; \
+ } \
+ \
+ if (rp) \
+ *rp = r; \
+ return 0
+
A bit more painful for gdb debugging to have this much in a macro, but
not too bad given our unit tests exercise it.
Leaves *rp untouched if we return -1; I don't mind that, but think we
should document it as intentional.
+/* Functions for parsing signed integers. */
+int
+nbdkit_parse_int (const char *what, const char *str, int *rp)
+{
+ long r;
+ char *end;
+
+ errno = 0;
+ r = strtol (str, &end, 0);
+#if INT_MAX != LONG_MAX
+ if (r < INT_MIN || r > INT_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
Looks correct.
+
+int
+nbdkit_parse_long (const char *what, const char *str, long *rp)
+{
+ long r;
+ char *end;
+
+ errno = 0;
+ r = strtol (str, &end, 0);
+ PARSE_COMMON_TAIL;
+}
I argue we don't want this one.
+
+int
+nbdkit_parse_int16_t (const char *what, const char *str, int16_t *rp)
+{
+ long r;
+ char *end;
+
+ errno = 0;
+ r = strtol (str, &end, 0);
+ if (r < INT16_MIN || r > INT16_MAX)
+ errno = ERANGE;
+ PARSE_COMMON_TAIL;
+}
No int8_t counterpart? But looks correct.
+
+int
+nbdkit_parse_int32_t (const char *what, const char *str, int32_t *rp)
+{
+ long r;
+ char *end;
+
+ errno = 0;
+ r = strtol (str, &end, 0);
+#if INT32_MAX != LONG_MAX
+ if (r < INT32_MIN || r > INT32_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
+
+int
+nbdkit_parse_int64_t (const char *what, const char *str, int64_t *rp)
+{
+ long long r;
+ char *end;
+
+ errno = 0;
+ r = strtoll (str, &end, 0);
+#if INT64_MAX != LONGLONG_MAX
+ if (r < INT64_MIN || r > INT64_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
Probably a dead #if, but doesn't hurt to leave it in.
+
+int
+nbdkit_parse_ssize_t (const char *what, const char *str, ssize_t *rp)
+{
+ long long r;
+ char *end;
+
+ errno = 0;
+ r = strtoll (str, &end, 0);
+#if SSIZE_MAX != LONGLONG_MAX
+#ifndef SSIZE_MIN
+#define SSIZE_MIN (-SSIZE_MAX-1)
Assumes twos-complement, but that's sane enough for the platforms we target.
+#endif
+ if (r < SSIZE_MIN || r > SSIZE_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
+
+/* Functions for parsing unsigned integers. */
+
+/* strtou* functions have surprising behaviour if the first character
+ * (after whitespace) is '-', so deny this.
+ */
+#define PARSE_ERROR_IF_NEGATIVE \
+ do { \
+ const char *p = str; \
+ while (isspace (*p)) \
+ p++; \
+ if (*p == '-') { \
+ nbdkit_error ("%s: negative numbers are not allowed", what); \
+ return -1; \
+ } \
+ } while (0)
+
Agree. However, why not modify str in-place, so that the subsequent
strtoul() does not have to repeat our prefix scan?
+int
+nbdkit_parse_unsigned (const char *what, const char *str, unsigned *rp)
+{
+ unsigned long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoul (str, &end, 0);
+#if UINT_MAX != ULONG_MAX
+ if (r > UINT_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
Much easier to write for both 32- and 64-bit platforms when you
pre-filter out the surprising '-' behavior. Looks correct.
+
+int
+nbdkit_parse_unsigned_long (const char *what, const char *str,
+ unsigned long *rp)
+{
+ unsigned long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoul (str, &end, 0);
+ PARSE_COMMON_TAIL;
+}
Again, I argue we don't need this one.
+
+int
+nbdkit_parse_uint16_t (const char *what, const char *str, uint16_t *rp)
+{
+ unsigned long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoul (str, &end, 0);
+ if (r > UINT16_MAX)
+ errno = ERANGE;
+ PARSE_COMMON_TAIL;
+}
+
+int
+nbdkit_parse_uint32_t (const char *what, const char *str, uint32_t *rp)
+{
+ unsigned long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoul (str, &end, 0);
+#if UINT32_MAX != ULONG_MAX
+ if (r > UINT32_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
+
+int
+nbdkit_parse_uint64_t (const char *what, const char *str, uint64_t *rp)
+{
+ unsigned long long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoull (str, &end, 0);
+#if UINT64_MAX != ULONGLONG_MAX
+ if (r > UINT64_MAX)
+ errno = ERANGE;
+#endif
Another probably dead #if, but I'm fine with leaving it.
+ PARSE_COMMON_TAIL;
+}
+
+int
+nbdkit_parse_size_t (const char *what, const char *str, size_t *rp)
+{
+ unsigned long long r;
+ char *end;
+
+ PARSE_ERROR_IF_NEGATIVE;
+ errno = 0;
+ r = strtoull (str, &end, 0);
+#if SIZE_MAX != ULONGLONG_MAX
+ if (r > SIZE_MAX)
+ errno = ERANGE;
+#endif
+ PARSE_COMMON_TAIL;
+}
+
/* Parse a string as a size with possible scaling suffix, or return -1
* after reporting the error.
*/
Hmm, seeing this comment, do we really need raw [s]size_t parsing, or
can we get away with scaled parsing anywhere that sizes are intended?
But the code looks correct if you want to keep it.
diff --git a/server/socket-activation.c b/server/socket-activation.c
index b9db67c..50df4ca 100644
--- a/server/socket-activation.c
+++ b/server/socket-activation.c
@@ -59,11 +59,8 @@ get_socket_activation (void)
s = getenv ("LISTEN_PID");
if (s == NULL)
return 0;
- if (sscanf (s, "%u", &pid) != 1) {
- fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
- program_name, "LISTEN_PID");
+ if (nbdkit_parse_unsigned ("LISTEN_PID", s, &pid) == -1)
return 0;
- }
Possible semantic change: on parse failure, are we still printing
something to stderr, or is nbdkit_error() not effective this early?
if (pid != getpid ()) {
fprintf (stderr, "%s: %s was not for us (ignored)\n",
program_name, "LISTEN_PID");
@@ -73,11 +70,8 @@ get_socket_activation (void)
s = getenv ("LISTEN_FDS");
if (s == NULL)
return 0;
- if (sscanf (s, "%u", &nr_fds) != 1) {
- fprintf (stderr, "%s: malformed %s environment variable (ignored)\n",
- program_name, "LISTEN_FDS");
+ if (nbdkit_parse_unsigned ("LISTEN_FDS", s, &nr_fds) == -1)
return 0;
- }
and again
+++ b/server/test-public.c
+static bool
+test_nbdkit_parse_ints (void)
+{
+ bool pass = true;
+ char s[64];
+
+#define PARSE(TYPE, FORMAT, TEST, RET, EXPECTED) \
+ do { \
+ error_flagged = false; \
+ TYPE i = 0; \
+ int r = nbdkit_parse_##TYPE ("test", TEST, &i); \
+ if (r != RET || i != EXPECTED) { \
+ fprintf (stderr, \
+ "%s: %d: wrong parse for %s: r=%d i=" FORMAT "\n",
\
+ __FILE__, __LINE__, TEST, r, i); \
+ pass = false; \
+ } \
+ if ((r == -1) != error_flagged) { \
+ fprintf (stderr, \
+ "%s: %d: wrong error message handling for %s\n", \
+ __FILE__, __LINE__, TEST); \
+ pass = false; \
+ } \
+ } while (0)
+#define OK 0
+#define BAD -1
Nice.
+
+ /* Test the basic parsing of decimals, hexadecimal, octal and
+ * negative numbers.
+ */
+ PARSE (int, "%d", "0", OK, 0);
+ PARSE (int, "%d", "1", OK, 1);
+ PARSE (int, "%d", "99", OK, 99);
+ PARSE (int, "%d", "0x1", OK, 1);
+ PARSE (int, "%d", "0xf", OK, 15);
+ PARSE (int, "%d", "0x10", OK, 16);
+ PARSE (int, "%d", "0xff", OK, 255);
+ PARSE (int, "%d", "01", OK, 1);
+ PARSE (int, "%d", "07", OK, 7);
+ PARSE (int, "%d", "010", OK, 8);
+ PARSE (int, "%d", "+0", OK, 0);
+ PARSE (int, "%d", "+99", OK, 99);
+ PARSE (int, "%d", "+0xf", OK, 15);
+ PARSE (int, "%d", "+010", OK, 8);
+ PARSE (int, "%d", "-0", OK, 0);
+ PARSE (int, "%d", "-99", OK, -99);
+ PARSE (int, "%d", "-0xf", OK, -15);
I'd test upper case at least once, such as -0XF here. Testing leading
whitespace would also be nice.
+ PARSE (int, "%d", "-010", OK, -8);
+ PARSE (int, "%d", "2147483647", OK, 2147483647); /* INT_MAX */
+ PARSE (int, "%d", "-2147483648", OK, -2147483648); /* INT_MIN */
+ PARSE (int, "%d", "0x7fffffff", OK, 0x7fffffff);
+ PARSE (int, "%d", "-0x80000000", OK, -0x80000000);
Matches our earlier assumptions of twos-complement targets.
+
+ /* Test basic error handling. */
+ PARSE (int, "%d", "", BAD, 0);
+ PARSE (int, "%d", "-", BAD, 0);
I might also test "- 0" as BAD.
+ PARSE (int, "%d", "+", BAD, 0);
+ PARSE (int, "%d", "++", BAD, 0);
+ PARSE (int, "%d", "++0", BAD, 0);
+ PARSE (int, "%d", "--0", BAD, 0);
+ PARSE (int, "%d", "0x", BAD, 0);
I'd also test "08" as BAD.
+ PARSE (int, "%d", "42x", BAD, 0);
+ PARSE (int, "%d", "42-", BAD, 0);
+ PARSE (int, "%d", "garbage", BAD, 0);
+ PARSE (int, "%d", "2147483648", BAD, 0); /* INT_MAX + 1 */
+ PARSE (int, "%d", "-2147483649", BAD, 0); /* INT_MIN - 1 */
+ PARSE (int, "%d", "999999999999999999999999", BAD, 0);
+ PARSE (int, "%d", "-999999999999999999999999", BAD, 0);
It might also be worth testing "0x1p1" as invalid (and not an accidental
hex-float). Other possible non-integer strings to be sure we don't
accidentally parse include "inf", "nan", "0.0".
+
+ /* Test nbdkit_parse_unsigned. */
+ PARSE (unsigned, "%u", "0", OK, 0);
+ PARSE (unsigned, "%u", "1", OK, 1);
+ PARSE (unsigned, "%u", "99", OK, 99);
+ PARSE (unsigned, "%u", "0x1", OK, 1);
+ PARSE (unsigned, "%u", "0xf", OK, 15);
+ PARSE (unsigned, "%u", "0x10", OK, 16);
+ PARSE (unsigned, "%u", "0xff", OK, 255);
+ PARSE (unsigned, "%u", "01", OK, 1);
+ PARSE (unsigned, "%u", "07", OK, 7);
+ PARSE (unsigned, "%u", "010", OK, 8);
+ PARSE (unsigned, "%u", "+0", OK, 0);
+ PARSE (unsigned, "%u", "+99", OK, 99);
+ PARSE (unsigned, "%u", "+0xf", OK, 15);
+ PARSE (unsigned, "%u", "+010", OK, 8);
+ PARSE (unsigned, "%u", "-0", BAD, 0);
+ PARSE (unsigned, "%u", "-99", BAD, 0);
+ PARSE (unsigned, "%u", "-0xf", BAD, 0);
+ PARSE (unsigned, "%u", "-010", BAD, 0);
I'd also test at least " 0" and " -0" (or similar), to prove we
handled
leading spaces correctly.
+ PARSE (unsigned, "%u", "2147483647", OK,
2147483647); /* INT_MAX */
+ PARSE (unsigned, "%u", "-2147483648", BAD, 0); /* INT_MIN */
+ PARSE (unsigned, "%u", "0x7fffffff", OK, 0x7fffffff);
+ PARSE (unsigned, "%u", "-0x80000000", BAD, 0);
+
+ /* Test nbdkit_parse_long. */
+ PARSE (long, "%ld", "0", OK, 0);
+ assert (snprintf (s, sizeof s, "%" PRIi64, (int64_t) LONG_MAX) != -1);
+ PARSE (long, "%ld", s, OK, LONG_MAX);
+ assert (snprintf (s, sizeof s, "%" PRIi64, (int64_t) LONG_MIN) != -1);
+ PARSE (long, "%ld", s, OK, LONG_MIN);
+ PARSE (long, "%ld", "999999999999999999999999", BAD, 0);
+ PARSE (long, "%ld", "-999999999999999999999999", BAD, 0);
snprintf() side effects inside assert(). Tsk tsk - the test will now
fail under -DNDEBUG. And that's part of why I don't think we want to
expose parse_long.
+
+ /* Test nbdkit_parse_ssize_t. */
+ PARSE (ssize_t, "%zd", "0", OK, 0);
+ assert (snprintf (s, sizeof s, "%" PRIi64, (int64_t) SSIZE_MAX) != -1);
Again, bad side effects. But I'm less sure whether to avoid exposing
parse_ssize_t.
+ PARSE (ssize_t, "%zd", s, OK, SSIZE_MAX);
+#ifndef SSIZE_MIN
+#define SSIZE_MIN (-SSIZE_MAX-1)
+#endif
+ assert (snprintf (s, sizeof s, "%" PRIi64, (int64_t) SSIZE_MIN) != -1);
+ PARSE (ssize_t, "%zd", s, OK, SSIZE_MIN);
+ PARSE (ssize_t, "%zd", "999999999999999999999999", BAD, 0);
+ PARSE (ssize_t, "%zd", "-999999999999999999999999", BAD, 0);
+
+ /* Test nbdkit_parse_size_t. */
+ PARSE (size_t, "%zu", "0", OK, 0);
+ assert (snprintf (s, sizeof s, "%" PRIu64, (uint64_t) SIZE_MAX) != -1);
+ PARSE (size_t, "%zu", s, OK, SIZE_MAX);
+ PARSE (size_t, "%zu", "999999999999999999999999", BAD, 0);
+ PARSE (size_t, "%zu", "-1", BAD, 0);
+
+#undef PARSE
+#undef OK
+#undef BAD
+ return pass;
+}
A few tweaks still needed, but overall looking good.
+++ b/server/usergroup.c
@@ -97,7 +97,7 @@ parseuser (const char *id)
saved_errno = errno;
- if (sscanf (id, "%d", &val) == 1)
+ if (nbdkit_parse_int ("parseuser", id, &val) == 0)
Is 'int' still the right type for this?
return val;
fprintf (stderr, "%s: -u option: %s is not a valid user name or uid",
@@ -125,7 +125,7 @@ parsegroup (const char *id)
saved_errno = errno;
- if (sscanf (id, "%d", &val) == 1)
+ if (nbdkit_parse_int ("parsegroup", id, &val) == 0)
return val;
fprintf (stderr, "%s: -g option: %s is not a valid group name or gid",
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3226
Virtualization:
qemu.org |
libvirt.org