This uses the gnulib TLS macros.
---
.gitignore | 2 +
Makefile.am | 1 +
bootstrap | 1 +
common/mlpcre/Makefile.am | 142 +++++++++++++++++++++++++++++
common/mlpcre/PCRE.ml | 32 +++++++
common/mlpcre/PCRE.mli | 74 ++++++++++++++++
common/mlpcre/dummy.c | 2 +
common/mlpcre/pcre-c.c | 212 ++++++++++++++++++++++++++++++++++++++++++++
common/mlpcre/pcre_tests.ml | 70 +++++++++++++++
configure.ac | 1 +
m4/.gitignore | 1 +
11 files changed, 538 insertions(+)
diff --git a/.gitignore b/.gitignore
index 4699933d3..0e7a649f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -123,6 +123,8 @@ Makefile.in
/common/errnostring/errnostring-gperf.gperf
/common/errnostring/errnostring.h
/common/miniexpect/miniexpect.3
+/common/mlpcre/.depend
+/common/mlpcre/pcre_tests
/common/mlprogress/.depend
/common/mlstdutils/.depend
/common/mlstdutils/bytes.ml
diff --git a/Makefile.am b/Makefile.am
index 84b00393d..fb4c99db5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -156,6 +156,7 @@ SUBDIRS += csharp
# OCaml tools. Note 'common/ml*', 'mllib' and 'customize'
contain
# shared code used by other OCaml tools, so these must come first.
if HAVE_OCAML
+SUBDIRS += common/mlpcre
SUBDIRS += common/mlprogress
SUBDIRS += common/mlvisit
SUBDIRS += common/mlxml
diff --git a/bootstrap b/bootstrap
index 77a95a25b..4e3d4bc51 100755
--- a/bootstrap
+++ b/bootstrap
@@ -95,6 +95,7 @@ symlinkat
sys_select
sys_types
sys_wait
+tls
vasprintf
vc-list-files
warnings
diff --git a/common/mlpcre/Makefile.am b/common/mlpcre/Makefile.am
new file mode 100644
index 000000000..aa638cd94
--- /dev/null
+++ b/common/mlpcre/Makefile.am
@@ -0,0 +1,142 @@
+# Bindings for Perl-compatible Regular Expressions.
+# Copyright (C) 2017 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+include $(top_srcdir)/subdir-rules.mk
+
+EXTRA_DIST = \
+ $(SOURCES_MLI) \
+ $(SOURCES_ML) \
+ $(SOURCES_C) \
+ pcre_tests.ml
+
+SOURCES_MLI = \
+ PCRE.mli
+
+SOURCES_ML = \
+ PCRE.ml
+
+SOURCES_C = \
+ pcre-c.c
+
+if HAVE_OCAML
+
+# We pretend that we're building a C library. automake handles the
+# compilation of the C sources for us. At the end we take the C
+# objects and OCaml objects and link them into the OCaml library.
+# This C library is never used.
+
+noinst_LIBRARIES = libmlpcre.a
+
+if !HAVE_OCAMLOPT
+MLPCRE_CMA = mlpcre.cma
+else
+MLPCRE_CMA = mlpcre.cmxa
+endif
+
+noinst_DATA = $(MLPCRE_CMA)
+
+libmlpcre_a_SOURCES = $(SOURCES_C)
+libmlpcre_a_CPPFLAGS = \
+ -I. \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/gnulib/lib -I$(top_builddir)/gnulib/lib \
+ -I$(top_srcdir)/common/utils -I$(top_builddir)/common/utils \
+ -I$(shell $(OCAMLC) -where)
+libmlpcre_a_CFLAGS = \
+ $(WARN_CFLAGS) $(WERROR_CFLAGS) \
+ -fPIC
+
+BOBJECTS = $(SOURCES_ML:.ml=.cmo)
+XOBJECTS = $(BOBJECTS:.cmo=.cmx)
+
+# -I $(top_builddir)/lib/.libs is a hack which forces corresponding -L
+# option to be passed to gcc, so we don't try linking against an
+# installed copy of libguestfs.
+OCAMLPACKAGES = \
+ -I $(top_builddir)/gnulib/lib/.libs \
+ -I $(top_builddir)/common/utils/.libs \
+ -I $(builddir)
+OCAMLPACKAGES_TESTS = $(MLPCRE_CMA)
+
+OCAMLFLAGS = $(OCAML_FLAGS) $(OCAML_WARN_ERROR)
+
+if !HAVE_OCAMLOPT
+OBJECTS = $(BOBJECTS)
+else
+OBJECTS = $(XOBJECTS)
+endif
+
+libmlpcre_a_DEPENDENCIES = $(OBJECTS)
+
+$(MLPCRE_CMA): $(OBJECTS) libmlpcre.a
+ $(OCAMLFIND) mklib $(OCAMLPACKAGES) \
+ $(OBJECTS) $(libmlpcre_a_OBJECTS) -cclib -lpcre -o mlpcre
+
+# Tests.
+
+pcre_tests_SOURCES = dummy.c
+pcre_tests_BOBJECTS = pcre_tests.cmo
+pcre_tests_XOBJECTS = $(pcre_tests_BOBJECTS:.cmo=.cmx)
+
+# Can't call the following as <test>_OBJECTS because automake gets confused.
+if !HAVE_OCAMLOPT
+pcre_tests_THEOBJECTS = $(pcre_tests_BOBJECTS)
+pcre_tests.cmo: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS)
+else
+pcre_tests_THEOBJECTS = $(pcre_tests_XOBJECTS)
+pcre_tests.cmx: OCAMLPACKAGES += $(OCAMLPACKAGES_TESTS)
+endif
+
+OCAMLLINKFLAGS = $(LINK_CUSTOM_OCAMLC_ONLY)
+
+pcre_tests_DEPENDENCIES = \
+ $(pcre_tests_THEOBJECTS) \
+ $(MLPCRE_CMA) \
+ $(top_srcdir)/ocaml-link.sh
+pcre_tests_LINK = \
+ $(top_srcdir)/ocaml-link.sh \
+ -cclib '-lutils -lpcre -lgnu' -- \
+ $(OCAMLFIND) $(BEST) $(OCAMLFLAGS) $(OCAMLLINKFLAGS) \
+ $(OCAMLPACKAGES) $(OCAMLPACKAGES_TESTS) \
+ $(pcre_tests_THEOBJECTS) -o $@
+
+TESTS_ENVIRONMENT = $(top_builddir)/run --test
+LOG_COMPILER = $(VG)
+
+check_PROGRAMS = pcre_tests
+TESTS = pcre_tests
+
+check-valgrind:
+ $(MAKE) VG="@VG@" check
+
+# Dependencies.
+depend: .depend
+
+.depend: $(wildcard $(abs_srcdir)/*.mli) $(wildcard $(abs_srcdir)/*.ml)
+ rm -f $@ $@-t
+ $(OCAMLFIND) ocamldep -I ../../ocaml -I $(abs_srcdir) $^ | \
+ $(SED) 's/ *$$//' | \
+ $(SED) -e :a -e '/ *\\$$/N; s/ *\\\n */ /; ta' | \
+ $(SED) -e 's,$(abs_srcdir)/,$(builddir)/,g' | \
+ sort > $@-t
+ mv $@-t $@
+
+-include .depend
+
+endif
+
+.PHONY: depend docs
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
new file mode 100644
index 000000000..94eea4b34
--- /dev/null
+++ b/common/mlpcre/PCRE.ml
@@ -0,0 +1,32 @@
+(* Bindings for Perl-compatible Regular Expressions.
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+(* Lightweight bindings for the PCRE library. *)
+
+exception Error of string * int
+
+type regexp
+
+external compile : string -> regexp = "guestfs_int_pcre_compile"
+
+external matches : regexp -> string -> bool = "guestfs_int_pcre_matches"
+
+external sub : int -> string = "guestfs_int_pcre_sub"
+
+let () =
+ Callback.register_exception "PCRE.Error" (Error ("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
new file mode 100644
index 000000000..882fb6a79
--- /dev/null
+++ b/common/mlpcre/PCRE.mli
@@ -0,0 +1,74 @@
+(* Bindings for Perl-compatible Regular Expressions.
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+(** Lightweight bindings for the PCRE library.
+
+ Note this is {i not} Markus Mottl's ocaml-pcre, and doesn't
+ work like that library.
+
+ To match a regular expression:
+
+{v
+let re = PCRE.compile "(a+)b"
+...
+
+if PCRE.matches re "ccaaaabb" then (
+ let whole = PCRE.sub 0 in (* returns "aaaab" *)
+ let first = PCRE.sub 1 in (* returns "aaaa" *)
+ ...
+)
+v}
+
+ Note that there is implicit global state stored between the
+ call to {!matches} and {!sub}. This is stored in thread
+ local storage so it is safe provided there are no other calls
+ to {!matches} in the same thread.
+*)
+
+exception Error of string * int
+(** PCRE error raised by various functions.
+
+ The string is the printable error message. The integer is one
+ of the negative [PCRE_*] error codes (see pcreapi(3)), but may
+ be 0 if there was no error code information. *)
+
+type regexp
+(** The type of a compiled regular expression. *)
+
+val compile : string -> regexp
+(** Compile a regular expression. This can raise {!Error}. *)
+
+val matches : regexp -> string -> bool
+(** Test whether the regular expression matches the string. This
+ returns true if the regexp matches or false otherwise.
+
+ This also saves any matched substrings in thread-local storage
+ until either the next call to {!matches} in the current thread
+ or the program exits.
+
+ This can raise {!Error} if PCRE returns an error. *)
+
+val sub : int -> string
+(** Return the nth sub matched by the previous call to
+ {!matches} in the current thread.
+
+ If [n == 0] it returns the whole matching part of the string.
+
+ If [n >= 1] it returns the nth substring.
+
+ If there was no nth substring then this raises [Not_found]. *)
diff --git a/common/mlpcre/dummy.c b/common/mlpcre/dummy.c
new file mode 100644
index 000000000..ebab6198c
--- /dev/null
+++ b/common/mlpcre/dummy.c
@@ -0,0 +1,2 @@
+/* Dummy source, to be used for OCaml-based tools with no C sources. */
+enum { foo = 1 };
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
new file mode 100644
index 000000000..15de16570
--- /dev/null
+++ b/common/mlpcre/pcre-c.c
@@ -0,0 +1,212 @@
+/* Bindings for Perl-compatible Regular Expressions.
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <pcre.h>
+
+#include <caml/alloc.h>
+#include <caml/callback.h>
+#include <caml/custom.h>
+#include <caml/fail.h>
+#include <caml/memory.h>
+#include <caml/mlvalues.h>
+
+#include "cleanups.h"
+
+#include "glthread/tls.h"
+
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
+/* Data on the most recent match is stored in this thread-local
+ * variable. It is freed either by the next call to PCRE.matches or
+ * by (clean) thread exit.
+ */
+static gl_tls_key_t last_match;
+
+struct last_match {
+ char *subject; /* subject string */
+ int *vec;
+ int r; /* value returned by pcre_exec */
+};
+
+static void
+free_last_match (struct last_match *data)
+{
+ if (data) {
+ free (data->subject);
+ free (data->vec);
+ free (data);
+ }
+}
+
+static void init (void) __attribute__((constructor));
+
+static void
+init (void)
+{
+ gl_tls_key_init (last_match, (void (*) (void *))free_last_match);
+}
+
+static void
+raise_error (const char *msg, int errcode)
+{
+ value *exn = caml_named_value ("PCRE.Error");
+ value args[2];
+
+ args[0] = caml_copy_string (msg);
+ args[1] = Val_int (errcode);
+ caml_raise_with_args (*exn, 2, args);
+}
+
+/* Wrap and unwrap pcre regular expression handles, with a finalizer. */
+#define Regexp_val(rv) (*(pcre **)Data_custom_val(rv))
+
+static void
+regexp_finalize (value rev)
+{
+ pcre *re = Regexp_val (rev);
+ if (re) pcre_free (re);
+}
+
+static struct custom_operations custom_operations = {
+ (char *) "pcre_custom_operations",
+ regexp_finalize,
+ custom_compare_default,
+ custom_hash_default,
+ custom_serialize_default,
+ custom_deserialize_default
+};
+
+static value
+Val_regexp (pcre *re)
+{
+ CAMLparam0 ();
+ CAMLlocal1 (rv);
+
+ rv = caml_alloc_custom (&custom_operations, sizeof (pcre *), 0, 1);
+ Regexp_val (rv) = re;
+
+ CAMLreturn (rv);
+}
+
+value
+guestfs_int_pcre_compile (value pattv)
+{
+ CAMLparam1 (pattv);
+ pcre *re;
+ const char *err;
+ int offset;
+
+ re = pcre_compile (String_val (pattv), 0, &err, &offset, NULL);
+ if (re == NULL)
+ raise_error (err, 0);
+
+ CAMLreturn (Val_regexp (re));
+}
+
+value
+guestfs_int_pcre_matches (value rev, value strv)
+{
+ CAMLparam2 (rev, strv);
+ pcre *re = Regexp_val (rev);
+ struct last_match *m, *oldm;
+ size_t len = caml_string_length (strv);
+ int capcount, r;
+ int veclen;
+
+ /* Calculate maximum number of substrings, and hence the vector needed. */
+ r = pcre_fullinfo (re, NULL, PCRE_INFO_CAPTURECOUNT, (int *) &capcount);
+ /* I believe that errors should never occur because of OCaml
+ * type safety, so we should abort here. If this ever happens
+ * we will need to look at it again.
+ */
+ assert (r == 0);
+ veclen = 3 * (1 + capcount);
+
+ m = malloc (sizeof *m);
+ if (m == NULL)
+ caml_raise_out_of_memory ();
+
+ /* We will need the original subject string when fetching
+ * substrings, so take a copy.
+ */
+ m->subject = malloc (len+1);
+ if (m->subject == NULL) {
+ free (m);
+ caml_raise_out_of_memory ();
+ }
+ memcpy (m->subject, String_val (strv), len+1);
+
+ m->vec = malloc (veclen * sizeof (int));
+ if (m->vec == NULL) {
+ free (m->subject);
+ free (m);
+ caml_raise_out_of_memory ();
+ }
+
+ m->r = pcre_exec (re, NULL, m->subject, len, 0, 0, m->vec, veclen);
+ if (m->r < 0 && m->r != PCRE_ERROR_NOMATCH)
+ /* The C code in lib/match.c ignores errors. */
+ raise_error ("pcre_exec", m->r);
+
+ /* This error would indicate that pcre_exec ran out of space in the
+ * vector. However if we are calculating the size of the vector
+ * correctly above, then this should never happen.
+ */
+ assert (m->r != 0);
+
+ /* Replace the old TLS match data. */
+ oldm = gl_tls_get (last_match);
+ free_last_match (oldm);
+ gl_tls_set (last_match, m);
+
+ CAMLreturn (m->r == PCRE_ERROR_NOMATCH ? Val_false : Val_true);
+}
+
+value
+guestfs_int_pcre_sub (value nv)
+{
+ CAMLparam1 (nv);
+ CAMLlocal1 (strv);
+ int len;
+ CLEANUP_FREE char *str = NULL;
+ struct last_match *m = gl_tls_get (last_match);
+
+ if (m == NULL)
+ raise_error ("PCRE.sub called without calling PCRE.matches", 0);
+
+ len = pcre_get_substring (m->subject, m->vec, m->r, Int_val (nv),
+ (const char **) &str);
+
+ if (len == PCRE_ERROR_NOSUBSTRING)
+ caml_raise_not_found ();
+
+ if (len < 0)
+ raise_error ("pcre_get_substring", len);
+
+ strv = caml_alloc_string (len);
+ memcpy (String_val (strv), str, len);
+ CAMLreturn (strv);
+}
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
new file mode 100644
index 000000000..59be2b36d
--- /dev/null
+++ b/common/mlpcre/pcre_tests.ml
@@ -0,0 +1,70 @@
+(* Test bindings for Perl-compatible Regular Expressions.
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Printf
+
+let compile patt =
+ eprintf "PCRE.compile %s\n%!" patt;
+ PCRE.compile patt
+
+let matches re str =
+ eprintf "PCRE.matches %s ->%!" str;
+ let r = PCRE.matches re str in
+ eprintf " %b\n%!" r;
+ r
+
+let sub i =
+ eprintf "PCRE.sub %d ->%!" i;
+ let r = PCRE.sub i in
+ eprintf " %s\n%!" r;
+ r
+
+let () =
+ try
+ let re0 = compile "a+b" in
+ let re1 = compile "(a+)b" in
+ let re2 = compile "(a+)(b*)" in
+
+ assert (matches re0 "ccaaabbbb" = true);
+ assert (sub 0 = "aaab");
+
+ assert (matches re0 "aaa" = false);
+
+ assert (matches re0 "xyz" = false);
+
+ assert (matches re0 "aaabc" = true);
+ assert (sub 0 = "aaab");
+
+ assert (matches re1 "ccaaabb" = true);
+ assert (sub 1 = "aaa");
+ assert (sub 0 = "aaab");
+
+ assert (matches re2 "ccabbc" = true);
+ assert (sub 1 = "a");
+ assert (sub 2 = "bb");
+ assert (sub 0 = "abb");
+
+ assert (matches re2 "ccac" = true);
+ assert (sub 1 = "a");
+ assert (sub 2 = "");
+ assert (sub 0 = "a");
+ with
+ | Not_found ->
+ failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
+ | PCRE.Error (msg, code) ->
+ failwith (sprintf "PCRE error: %s (PCRE error code %d)" msg code)
diff --git a/configure.ac b/configure.ac
index 3cf9d0a28..651b48b3b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -196,6 +196,7 @@ AC_CONFIG_FILES([Makefile
common/errnostring/Makefile
common/edit/Makefile
common/miniexpect/Makefile
+ common/mlpcre/Makefile
common/mlprogress/Makefile
common/mlstdutils/Makefile
common/mlstdutils/guestfs_config.ml
diff --git a/m4/.gitignore b/m4/.gitignore
index 07960ed7b..a84b22e5c 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -248,6 +248,7 @@
/thread.m4
/time_h.m4
/timespec.m4
+/tls.m4
/ttyname_r.m4
/ulonglong.m4
/ungetc.m4
--
2.13.2