Similar to Perl s/// but lacks backreferences.
---
common/mlpcre/PCRE.ml | 25 +++++++++++++++++++++++++
common/mlpcre/PCRE.mli | 13 +++++++++++++
common/mlpcre/pcre_tests.ml | 23 ++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 5269d41f8..0eb7eb2ec 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -27,5 +27,30 @@ external matches : regexp -> string -> bool =
"guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
external subi : int -> int * int = "guestfs_int_pcre_subi"
+let rec replace ?(global = false) patt subst subj =
+ if not (matches patt subj) then
+ (* Return original string unchanged if patt doesn't match. *)
+ subj
+ else (
+ (* If patt matches "yyyy" in the original string then we have
+ * the following situation, where "xxxx" is the part of the
+ * original string before the match, and "zzzz..." is the
+ * part after the match:
+ * "xxxxyyyyzzzzzzzzzzzzz"
+ * ^ ^
+ * i1 i2
+ *)
+ let i1, i2 = subi 0 in
+ let xs = String.sub subj 0 i1 (* "xxxx", part before the match *) in
+ let zs = String.sub subj i2 (String.length subj - i2) (* after *) in
+
+ (* If the global flag was set, we want to continue substitutions
+ * in the rest of the string.
+ *)
+ let zs = if global then replace ~global patt subst zs else zs in
+
+ xs ^ subst ^ zs
+ )
+
let () =
Callback.register_exception "PCRE.Error" (Error ("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 02f16d19d..634cc600c 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -91,3 +91,16 @@ val subi : int -> int * int
for exact details).
If there was no nth substring then this raises [Not_found]. *)
+
+val replace : ?global:bool -> regexp -> string -> string -> string
+(** [replace ?global patt subst subj] performs a search and replace
+ on the subject string ([subj]). Where [patt] matches the
+ string, [subst] is substituted. This works similarly to the
+ Perl function [s///].
+
+ The [?global] flag defaults to false, so only the first
+ instance of [patt] in the string is replaced. If set to true
+ then every instance of [patt] in the string is replaced.
+
+ Note that this function does not allow backreferences.
+ Any captures in [patt] are ignored. *)
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 316a4348e..b5f712d20 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -28,6 +28,12 @@ let matches re str =
eprintf " %b\n%!" r;
r
+let replace ?(global = false) patt subst subj =
+ eprintf "PCRE.replace global:%b <patt> %s %s ->%!" global subst
subj;
+ let r = PCRE.replace ~global patt subst subj in
+ eprintf " %s\n%!" r;
+ r
+
let sub i =
eprintf "PCRE.sub %d ->%!" i;
let r = PCRE.sub i in
@@ -45,6 +51,7 @@ let () =
let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
+ let re3 = compile "[^A-Za-z0-9_]" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -71,7 +78,21 @@ let () =
assert (sub 0 = "a");
assert (subi 0 = (2, 3));
assert (subi 1 = (2, 3));
- assert (subi 2 = (3, 3))
+ assert (subi 2 = (3, 3));
+
+ assert (replace re0 "dd" "abcabcaabccca" =
"ddcabcaabccca");
+ assert (replace ~global:true re0 "dd" "abcabcaabccca" =
"ddcddcddccca");
+
+ (* This example copies a usage from customize/firstboot.ml
+ * "\xc2\xa3" is utf-8 for the GBP sign. Ideally PCRE would
+ * recognize that this is a single character, however doing that
+ * would involve passing the PCRE_UTF8 flag when compiling
+ * patterns, and that could be problematic if PCRE was built
+ * without Unicode support (XXX).
+ *)
+ assert (replace ~global:true re3 "-" "this is
a\xc2\xa3funny.name?"
+ (* = "this-is-a-funny-name-" if UTF-8 worked *)
+ = "this-is-a--funny-name-");
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
--
2.13.2