[Libguestfs] [PATCH libnbd INCOMPLETE 2/3] fuzzing: Add fuzzed data provider equivalent in C

Tuesday, 5 March 2024

This adds a fuzzed data provider, the C equivalent of:

https://github.com/llvm/llvm-project/blob/main/compiler-rt/include/fuzzer...
https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed...

This is not very well documented except by reading the LLVM sources,
but the general idea is that we define a format which is robust
against the sort of input changes that fuzzers do, such as flipping
bits, or inserting and removing bytes.

This means, for example, that we cannot use any <length> + <data>
fields (eg. for binary data) because a fuzzer would easily break this,
either by flipping bits in the <length> field without "knowing" that
it needs to adjust the <data>, or by inserting or removing bytes from
<data> without "knowing" that it needs to update the <length>.
Various other techniques are employed to improve robustness which are
described in the code.

Note the license is BSD because we want to use this in future in
nbdkit.
---
 fuzzing/Makefile.am            |  16 ++-
 fuzzing/fuzzed-data-provider.h | 223 +++++++++++++++++++++++++++++++++
 2 files changed, 236 insertions(+), 3 deletions(-)

diff --git a/fuzzing/Makefile.am b/fuzzing/Makefile.am
index 450aaf1..1d878ee 100644
--- a/fuzzing/Makefile.am
+++ b/fuzzing/Makefile.am
@@ -29,10 +29,20 @@ if ENABLE_LIBFUZZER
 noinst_PROGRAMS += libnbd-libfuzzer-test
 endif
 
-libnbd_fuzz_wrapper_SOURCES = libnbd-fuzz-wrapper.c
-libnbd_fuzz_wrapper_CPPFLAGS = -I$(top_srcdir)/include
+libnbd_fuzz_wrapper_SOURCES = \
+	libnbd-fuzz-wrapper.c \
+	fuzzer-data-provider.h \
+	$(NULL)
+libnbd_fuzz_wrapper_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/common/include \
+	-I$(top_srcdir)/common/vector \
+	$(NULL)
 libnbd_fuzz_wrapper_CFLAGS = $(WARNINGS_CFLAGS)
-libnbd_fuzz_wrapper_LDADD = $(top_builddir)/lib/libnbd.la
+libnbd_fuzz_wrapper_LDADD = \
+	$(top_builddir)/common/utils/libutils.la \
+	$(top_builddir)/lib/libnbd.la \
+	$(NULL)
 
 libnbd_libfuzzer_test_SOURCES = libnbd-libfuzzer-test.c
 libnbd_libfuzzer_test_CPPFLAGS = -I$(top_srcdir)/include
diff --git a/fuzzing/fuzzed-data-provider.h b/fuzzing/fuzzed-data-provider.h
new file mode 100644
index 0000000..8f836f6
--- /dev/null
+++ b/fuzzing/fuzzed-data-provider.h
@@ -0,0 +1,223 @@
+/* fuzzed-data-provider.h
+ * Copyright Red Hat
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* This is a C equivalent of the LLVM header:
+ *
https://github.com/llvm/llvm-project/blob/main/compiler-rt/include/fuzzer...
+ * See also:
+ *
https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed...
+ */
+
+#ifndef FUZZED_DATA_PROVIDER_H
+#define FUZZED_DATA_PROVIDER_H
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <minmax.h>
+#include <vector.h>
+#include <string-vector.h>
+#include <const-string-vector.h>
+
+/* Before calling any other functions, your code must declare:
+ *
+ * byte_vector fuzzed_data;
+ *
+ * and 'fuzzed_data' must be initialized with the full data read from
+ * the fuzzer.
+ *
+ */
+DEFINE_VECTOR_TYPE (byte_vector, uint8_t);
+extern byte_vector fuzzed_data;
+
+/* Test if there is any more fuzzer input to consume.  You can use
+ * this to end the test if you're using a loop (because the other
+ * functions in this file will always return something even if there's
+ * no more input, which would cause an infinite loop).
+ *
+ * Implementation note: We actually consume bytes from the end of the
+ * fuzzer input first, as that is easier with our vector type.  This
+ * should make no practical difference.
+ */
+static inline bool
+more_fuzzed_data (void)
+{
+  return fuzzed_data.len > 0;
+}
+
+/* Return exactly one byte from the fuzzed input.
+ *
+ * If there is no input, this returns zero.
+ */
+static inline uint8_t
+fuzzed_data_consume_byte (void)
+{
+  uint8_t r = 0;
+
+  if (fuzzed_data.len > 0) {
+    r = fuzzed_data.ptr[fuzzed_data.len-1];
+    fuzzed_data.len--;
+  }
+  return r;
+}
+
+/* Return exactly 'size' bytes into a preallocated buffer.  The
+ * 'result' buffer must be at least 'size' bytes long.
+ *
+ * If there is insufficient input, this pads the output with zeroes.
+ */
+static inline void
+fuzzed_data_consume_bytes (size_t size, void *result)
+{
+  uint8_t *r = result;
+  size_t n;
+
+  n = MIN (size, fuzzed_data.len);
+  memcpy (r, &fuzzed_data.ptr[fuzzed_data.len - n], n);
+  memset (&r[n], 0, size-n);
+
+  fuzzed_data.len -= n;
+}
+
+/* Return exactly 'size' bytes, as a newly allocated vector.
+ *
+ * If there is insufficient input, this pads the output with zeroes.
+ */
+static inline byte_vector
+fuzzed_data_consume_bytes_as_vector (size_t size)
+{
+  byte_vector result = empty_vector;
+  size_t n;
+
+  /* We want to use reserve_exactly here so that ASAN can detect
+   * buffer overflow in the caller (and also because we are not
+   * expecting to need to expand this vector).
+   */
+  if (byte_vector_reserve_exactly (&result, size) == -1)
+    abort ();
+
+  fuzzed_data_consume_bytes (size, result.ptr);
+  return result;
+}
+
+/* Return a variable sized buffer of data.  The size of the data is
+ * controlled by the fuzzer itself, in a manner which is robust to
+ * normal fuzzer mutations.
+ *
+ * Implementation: We scan the input until we hit a single zero byte
+ * followed by a non-zero byte, and then we stop scanning and return
+ * the buffer.  Two zero bytes in a row are treated as an "escape
+ * sequence", adding a zero byte to the output buffer and continuing
+ * scanning.  If at any point during scanning we hit the end of the
+ * fuzzer input, we stop scanning and return the buffer.
+ *
+ * If there is no input, this returns a zero length buffer.
+ */
+static inline byte_vector
+fuzzed_data_consume_buffer (size_t size)
+{
+  byte_vector result = empty_vector;
+  uint8_t b;
+
+  while (more_fuzzed_data ()) {
+    b = fuzzed_data_consume_byte ();
+    if (b > 0) {
+      if (byte_vector_append (&result, b) == -1)
+        abort ();
+    }
+    else if (more_fuzzed_data ()) {
+      b = fuzzed_data_consume_byte ();
+      if (b > 0) {
+        /* End of the buffer marker = zero byte + non-zero byte.
+         * "Put back" the non-zero byte we just consumed.
+         */
+        fuzzed_data.len++;
+        break;
+      }
+      else {
+        /* Escape sequence = zero byte + zero byte.
+         * Store a single zero byte and continue scanning.
+         */
+        if (byte_vector_append (&result, 0) == -1)
+          abort ();
+      }
+    }
+  }
+
+  return result;
+}
+
+/* Return an unsigned integer between bounds [min, max] inclusive.
+ *
+ * If there is no more input, this returns 'min'.
+ */
+static inline uint64_t
+fuzzed_data_consume_uint64_t (uint64_t min, uint64_t max)
+{
+  uint64_t range, result;
+  size_t offset_bits;
+
+  if (min > max)
+    abort ();
+
+  range = max - min;
+  result = 0;
+  offset_bits = 0;
+  while (offset_bits < 64 && (range >> offset) > 0) {
+    result <<= 8;
+    result |= fuzzed_data_consume_byte ();
+  }
+
+  /* Make sure the result is in range, but avoid division by 0 if
+   * range + 1 would overflow.
+   */
+  if (range < UINT64_MAX)
+    result = result % (range + 1);
+
+  return min + result;
+}
+
+/* Same as above, but returns an unsigned between [min..max] (inclusive). */
+static inline unsigned
+fuzzed_data_consume_unsigned (unsigned min, unsigned max)
+{
+  return fuzzed_data_consume_uint64_t (min, max);
+}
+
+/* Same as above, but returns an int between [0..max] (inclusive). */
+static inline int
+fuzzed_data_consume_enum (int max)
+{
+  return fuzzed_data_consume_uint64_t (0, max);
+}
+
+#endif /* FUZZED_DATA_PROVIDER_H */
-- 
2.43.1

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Libguestfs] [PATCH libnbd INCOMPLETE 2/3] fuzzing: Add fuzzed data provider equivalent in C