Fast testing whether the AIO buffer (or regions within it) contain all
zeroes, which allows Python code to quickly do sparsification when
copying.
This includes the iszero.h header from nbdkit which is distributed
under a compatible license.
---
common/include/Makefile.am | 5 +--
common/include/iszero.h | 63 +++++++++++++++++++++++++++++++++++++
generator/generator | 17 ++++++++--
python/Makefile.am | 3 +-
python/handle.c | 47 +++++++++++++++++++++++++++
python/t/580-aio-is-zero.py | 53 +++++++++++++++++++++++++++++++
6 files changed, 183 insertions(+), 5 deletions(-)
diff --git a/common/include/Makefile.am b/common/include/Makefile.am
index 89cc61e..79f0351 100644
--- a/common/include/Makefile.am
+++ b/common/include/Makefile.am
@@ -1,5 +1,5 @@
# nbd client library in userspace
-# Copyright (C) 2013-2019 Red Hat Inc.
+# Copyright (C) 2013-2020 Red Hat Inc.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -18,4 +18,5 @@
include $(top_srcdir)/subdir-rules.mk
EXTRA_DIST = \
- byte-swapping.h
+ byte-swapping.h \
+ iszero.h
diff --git a/common/include/iszero.h b/common/include/iszero.h
new file mode 100644
index 0000000..7fdbb52
--- /dev/null
+++ b/common/include/iszero.h
@@ -0,0 +1,63 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_ISZERO_H
+#define NBDKIT_ISZERO_H
+
+#include <string.h>
+#include <stdbool.h>
+
+/* Return true iff the buffer is all zero bytes.
+ *
+ * The clever approach here was suggested by Eric Blake. See:
+ *
https://www.redhat.com/archives/libguestfs/2017-April/msg00171.html
+ *
https://rusty.ozlabs.org/?p=560
+ *
+ * See also:
+ *
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69908
+ */
+static inline bool __attribute__((__nonnull__ (1)))
+is_zero (const char *buffer, size_t size)
+{
+ size_t i;
+ const size_t limit = size < 16 ? size : 16;
+
+ for (i = 0; i < limit; ++i)
+ if (buffer[i])
+ return false;
+ if (size != limit)
+ return ! memcmp (buffer, buffer + 16, size - 16);
+
+ return true;
+}
+
+#endif /* NBDKIT_ISZERO_H */
diff --git a/generator/generator b/generator/generator
index 31fe9dd..20fd872 100755
--- a/generator/generator
+++ b/generator/generator
@@ -5014,7 +5014,8 @@ raise_exception ()
"alloc_aio_buffer";
"aio_buffer_from_bytearray";
"aio_buffer_to_bytearray";
- "aio_buffer_size" ] @ List.map fst handle_calls);
+ "aio_buffer_size";
+ "aio_buffer_is_zero" ] @ List.map fst handle_calls);
pr "\n";
pr "#endif /* LIBNBD_METHODS_H */\n"
@@ -5044,7 +5045,8 @@ let generate_python_libnbdmod_c () =
"alloc_aio_buffer";
"aio_buffer_from_bytearray";
"aio_buffer_to_bytearray";
- "aio_buffer_size" ] @ List.map fst handle_calls);
+ "aio_buffer_size";
+ "aio_buffer_is_zero" ] @ List.map fst handle_calls);
pr " { NULL, NULL, 0, NULL }\n";
pr "};\n";
pr "\n";
@@ -5667,6 +5669,17 @@ class Buffer (object):
'''return the size of an AIO buffer'''
return libnbdmod.aio_buffer_size (self._o)
+ def is_zero (self, offset=0, size=-1):
+ '''
+ Returns true if and only if the buffer contains zero bytes.
+ Note that a freshly allocated buffer is uninitialized, not zero.
+
+ By default this tests the whole buffer, but you can restrict
+ the test to a sub-range of the buffer using the optional
+ offset and size parameters.
+ '''
+ return libnbdmod.aio_buffer_is_zero (self._o, offset, size)
+
class NBD (object):
'''NBD handle'''
diff --git a/python/Makefile.am b/python/Makefile.am
index 48aabea..c621eb7 100644
--- a/python/Makefile.am
+++ b/python/Makefile.am
@@ -50,7 +50,8 @@ libnbdmod_la_SOURCES = \
utils.c
libnbdmod_la_CPPFLAGS = \
$(PYTHON_CFLAGS) \
- -I$(top_srcdir)/include
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/common/include
libnbdmod_la_CFLAGS = \
$(WARNINGS_CFLAGS)
libnbdmod_la_LIBADD = \
diff --git a/python/handle.c b/python/handle.c
index ef81ab8..da0c417 100644
--- a/python/handle.c
+++ b/python/handle.c
@@ -35,6 +35,8 @@
#include <libnbd.h>
+#include "iszero.h"
+
#include "methods.h"
static inline PyObject *
@@ -213,3 +215,48 @@ nbd_internal_py_aio_buffer_size (PyObject *self, PyObject *args)
return PyLong_FromSsize_t (buf->len);
}
+
+PyObject *
+nbd_internal_py_aio_buffer_is_zero (PyObject *self, PyObject *args)
+{
+ PyObject *obj;
+ struct py_aio_buffer *buf;
+ Py_ssize_t offset, size;
+
+ if (!PyArg_ParseTuple (args,
+ (char *) "Onn:nbd_internal_py_aio_buffer_size",
+ &obj, &offset, &size))
+ return NULL;
+
+ if (size == 0)
+ Py_RETURN_TRUE;
+
+ buf = nbd_internal_py_get_aio_buffer (obj);
+ if (buf == NULL)
+ return NULL;
+
+ /* Check the bounds of the offset. */
+ if (offset < 0 || offset > buf->len) {
+ PyErr_SetString (PyExc_IndexError, "offset out of range");
+ return NULL;
+ }
+
+ /* Compute or check the length. */
+ if (size == -1)
+ size = buf->len - offset;
+ else if (size < 0) {
+ PyErr_SetString (PyExc_IndexError,
+ "size cannot be negative, "
+ "except -1 to mean to the end of the buffer");
+ return NULL;
+ }
+ else if ((size_t) offset + size > buf->len) {
+ PyErr_SetString (PyExc_IndexError, "size out of range");
+ return NULL;
+ }
+
+ if (is_zero (buf->data + offset, size))
+ Py_RETURN_TRUE;
+ else
+ Py_RETURN_FALSE;
+}
diff --git a/python/t/580-aio-is-zero.py b/python/t/580-aio-is-zero.py
new file mode 100644
index 0000000..5bda9ff
--- /dev/null
+++ b/python/t/580-aio-is-zero.py
@@ -0,0 +1,53 @@
+# libnbd Python bindings
+# Copyright (C) 2010-2020 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+# This tests the nbd.Buffer is_zero method. We can do this entirely
+# without using the NBD protocol.
+
+import os
+import nbd
+
+# Simplest case: A Buffer initialized with zeros should be zero.
+ba = bytearray (2**20)
+buf = nbd.Buffer.from_bytearray (ba)
+assert buf.is_zero ()
+
+# The above buffer is 2**20 (= 1MB), slices of it should also be zero.
+for i in range (0, 7):
+ assert buf.is_zero (i * 2**17, 2**17)
+
+# A Buffer initialized with non-zeroes should not be zero.
+ba = bytearray (b'\xff') * 2**20
+buf = nbd.Buffer.from_bytearray (ba)
+assert not buf.is_zero ()
+
+# Slices should not be zero.
+for i in range (0, 15):
+ assert not buf.is_zero (i * 2**16, 2**16)
+
+# Buffer with a block of non-zeroes and block of zeroes.
+ba = bytearray (b'\xff') * 2**20 + bytearray (2**20)
+buf = nbd.Buffer.from_bytearray (ba)
+assert not buf.is_zero ()
+assert not buf.is_zero (0, 2**20)
+assert buf.is_zero (2**20)
+assert not buf.is_zero (2**19)
+assert buf.is_zero (2**20, 2**19)
+assert not buf.is_zero (2**20-1, 1)
+assert buf.is_zero (2**20, 1)
+assert not buf.is_zero (0, 1)
+assert buf.is_zero (2**21-1, 1)
--
2.24.1