This idea was previously proposed by Tomáš Golembiovský in
https://www.redhat.com/archives/libguestfs/2017-January/msg00138.html
---
common/mlstdutils/std_utils.ml | 28 ++++++++++++++++------------
common/mlstdutils/std_utils.mli | 11 ++++++++---
common/mlstdutils/std_utils_tests.ml | 29 +++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml
index b731b8fd5..37eef0348 100644
--- a/common/mlstdutils/std_utils.ml
+++ b/common/mlstdutils/std_utils.ml
@@ -147,18 +147,7 @@ module String = struct
done;
if not !r then s else Bytes.to_string b2
- let rec nsplit sep str =
- let len = length str in
- let seplen = length sep in
- let i = find str sep in
- if i = -1 then [str]
- else (
- let s' = sub str 0 i in
- let s'' = sub str (i+seplen) (len-i-seplen) in
- s' :: nsplit sep s''
- )
-
- let split sep str =
+ let rec split sep str =
let len = length sep in
let seplen = length str in
let i = find str sep in
@@ -167,6 +156,21 @@ module String = struct
sub str 0 i, sub str (i + len) (seplen - i - len)
)
+ and nsplit ?(max = 0) sep str =
+ if max < 0 then
+ invalid_arg "String.nsplit: max parameter should not be negative";
+
+ (* If we reached the limit, OR if the pattern does not match the string
+ * at all, return the rest of the string as a single element list.
+ *)
+ if max = 1 || find str sep = -1 then
+ [str]
+ else (
+ let s1, s2 = split sep str in
+ let max = if max = 0 then 0 else max - 1 in
+ s1 :: nsplit ~max sep s2
+ )
+
let rec lines_split str =
let buf = Buffer.create 16 in
let len = length str in
diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli
index d217e48d4..c08e51360 100644
--- a/common/mlstdutils/std_utils.mli
+++ b/common/mlstdutils/std_utils.mli
@@ -88,14 +88,19 @@ module String : sig
[str] with [s2]. *)
val replace_char : string -> char -> char -> string
(** Replace character in string. *)
- val nsplit : string -> string -> string list
- (** [nsplit sep str] splits [str] into multiple strings at each
- separator [sep]. *)
val split : string -> string -> string * string
(** [split sep str] splits [str] at the first occurrence of the
separator [sep], returning the part before and the part after.
If separator is not found, return the whole string and an
empty string. *)
+ val nsplit : ?max:int -> string -> string -> string list
+ (** [nsplit ?max sep str] splits [str] into multiple strings at each
+ separator [sep].
+
+ As with the Perl split function, you can give an optional
+ [?max] parameter to limit the number of strings returned. The
+ final element of the list will contain the remainder of the
+ input string. *)
val lines_split : string -> string list
(** [lines_split str] splits [str] into lines, keeping continuation
characters (i.e. [\] at the end of lines) into account. *)
diff --git a/common/mlstdutils/std_utils_tests.ml b/common/mlstdutils/std_utils_tests.ml
index ce49c7606..dcd237dab 100644
--- a/common/mlstdutils/std_utils_tests.ml
+++ b/common/mlstdutils/std_utils_tests.ml
@@ -18,6 +18,8 @@
(* This file tests the Std_utils module. *)
+open Printf
+
open OUnit2
open Std_utils
@@ -26,6 +28,7 @@ let assert_equal_string = assert_equal ~printer:(fun x -> x)
let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x)
let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x)
let assert_equal_stringlist = assert_equal ~printer:(fun x -> "(" ^
(String.escaped (String.concat "," x)) ^ ")")
+let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf "%S,
%S" x y)
let test_subdirectory ctx =
assert_equal_string "" (subdirectory "/foo" "/foo");
@@ -83,6 +86,30 @@ let test_string_find ctx =
assert_equal_int (-1) (String.find "" "baz");
assert_equal_int (-1) (String.find "foobar" "baz")
+(* Test Std_utils.String.split. *)
+let test_string_split ctx =
+ assert_equal_stringpair ("a", "b") (String.split " "
"a b");
+ assert_equal_stringpair ("", "ab") (String.split " "
" ab");
+ assert_equal_stringpair ("", "abc") (String.split ""
"abc");
+ assert_equal_stringpair ("abc", "") (String.split " "
"abc");
+ assert_equal_stringpair ("", "") (String.split " "
"")
+
+(* Test Std_utils.String.nsplit. *)
+let test_string_nsplit ctx =
+ (* XXX Not clear if the next test case indicates an error in
+ * String.nsplit. However this is how it has historically worked.
+ *)
+ assert_equal_stringlist [""] (String.nsplit " " "");
+ assert_equal_stringlist ["abc"] (String.nsplit " "
"abc");
+ assert_equal_stringlist ["a"; "b"; "c"] (String.nsplit
" " "a b c");
+ assert_equal_stringlist ["a"; "b"; "c"; ""]
(String.nsplit " " "a b c ");
+ assert_equal_stringlist [""; "a"; "b"; "c"]
(String.nsplit " " " a b c");
+ assert_equal_stringlist [""; "a"; "b"; "c";
""] (String.nsplit " " " a b c ");
+ assert_equal_stringlist ["a b c d"] (String.nsplit ~max:1 " "
"a b c d");
+ assert_equal_stringlist ["a"; "b c d"] (String.nsplit ~max:2 "
" "a b c d");
+ assert_equal_stringlist ["a"; "b"; "c d"] (String.nsplit
~max:3 " " "a b c d");
+ assert_equal_stringlist ["a"; "b"; "c"; "d"]
(String.nsplit ~max:10 " " "a b c d")
+
(* Test Std_utils.String.lines_split. *)
let test_string_lines_split ctx =
assert_equal_stringlist [""] (String.lines_split "");
@@ -129,6 +156,8 @@ let suite =
"strings.is_prefix" >:: test_string_is_prefix;
"strings.is_suffix" >:: test_string_is_suffix;
"strings.find" >:: test_string_find;
+ "strings.split" >:: test_string_split;
+ "strings.nsplit" >:: test_string_nsplit;
"strings.lines_split" >:: test_string_lines_split;
"strings.span" >:: test_string_span;
"strings.chomp" >:: test_string_chomp;
--
2.13.2