On 11/18/2015 01:03 AM, Richard W.M. Jones wrote:
 This simple parser has (limited) understanding of the Windows
'*.inf'
 file format.  This is a Windows config file with some peculiarities.
 This commit also has a unit test.
 ---
   po/POTFILES-ml        |   1 +
   v2v/Makefile.am       |   5 +-
   v2v/v2v_unit_tests.ml | 104 +++++++++++++++++++++++++++++++++++-
   v2v/windows_inf.ml    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++
   v2v/windows_inf.mli   |  58 ++++++++++++++++++++
   5 files changed, 308 insertions(+), 3 deletions(-)
   create mode 100644 v2v/windows_inf.ml
   create mode 100644 v2v/windows_inf.mli
 diff --git a/po/POTFILES-ml b/po/POTFILES-ml
 index c02ffc0..88db39b 100644
 --- a/po/POTFILES-ml
 +++ b/po/POTFILES-ml
 @@ -127,4 +127,5 @@ v2v/v2v.ml
   v2v/v2v_unit_tests.ml
   v2v/vCenter.ml
   v2v/windows.ml
 +v2v/windows_inf.ml
   v2v/xml.ml
 diff --git a/v2v/Makefile.am b/v2v/Makefile.am
 index 5dfef6e..c46594c 100644
 --- a/v2v/Makefile.am
 +++ b/v2v/Makefile.am
 @@ -69,6 +69,7 @@ SOURCES_MLI = \
   	utils.mli \
   	vCenter.mli \
   	windows.mli \
 +	windows_inf.mli \
   	xml.mli
   
   SOURCES_ML = \
 @@ -82,8 +83,9 @@ SOURCES_ML = \
   	DOM.ml \
   	changeuid.ml \
   	OVF.ml \
 -	linux.ml \
 +	windows_inf.ml \
   	windows.ml \
 +	linux.ml \
   	modules_list.ml \
   	input_disk.ml \
   	input_libvirtxml.ml \
 @@ -309,6 +311,7 @@ v2v_unit_tests_BOBJECTS = \
   	utils.cmo \
   	DOM.cmo \
   	OVF.cmo \
 +	windows_inf.cmo \
   	windows.cmo \
   	v2v_unit_tests.cmo
   v2v_unit_tests_XOBJECTS = $(v2v_unit_tests_BOBJECTS:.cmo=.cmx)
 diff --git a/v2v/v2v_unit_tests.ml b/v2v/v2v_unit_tests.ml
 index a2dca32..169eea9 100644
 --- a/v2v/v2v_unit_tests.ml
 +++ b/v2v/v2v_unit_tests.ml
 @@ -18,13 +18,21 @@
   
   (* This file tests individual virt-v2v functions. *)
   
 +open Printf
   open OUnit2
 +
 +open Common_utils
 +
   open Types
   
 -open Printf
 -
   external identity : 'a -> 'a = "%identity"
   
 +let (//) = Filename.concat
 +
 +let srcdir =
 +  try Sys.getenv "srcdir"
 +  with Not_found -> failwith "environment variable $srcdir must be set"
 +
   let inspect_defaults = {
     i_type = ""; i_distro = ""; i_arch = "";
     i_major_version = 0; i_minor_version = 0;
 @@ -126,6 +134,97 @@ let test_drive_index ctx =
     assert_raises exn (fun () -> Utils.drive_index "Z");
     assert_raises exn (fun () -> Utils.drive_index "aB")
   
 +(* Test parsing a [*.inf] file. *)
 +let test_windows_inf_of_string ctx =
 +  let printer = Windows_inf.to_string in
 +
 +  (* There is nothing special about this choice.  It is just a driver
 +   * [*.inf] file picked at random.
 +   *)
 +  let path = srcdir // ".." // "test-data" //
"fake-virtio-win" //
 +               "cd" // "Balloon" // "2k12" //
"amd64" // "balloon.inf" in
 +
 +  let sections = Windows_inf.load path in
 +
 +  let expected = [
 +    "version", [
 +      "signature", "\"$WINDOWS NT$\"";
 +      "class", "System";
 +      "classguid", "{4d36e97d-e325-11ce-bfc1-08002be10318}";
 +      "provider", "%RHEL%";
 +      "driverver", "12/04/2014,62.71.104.9600";
 +      "catalogfile", "Balloon.cat";
 +      "driverpackagetype", "PlugAndPlay";
 +      "driverpackagedisplayname", "%BALLOON.DeviceDesc%";
 +      "pnplockdown", "1";
 +    ];
 +    "destinationdirs", [
 +      "defaultdestdir", "12";
 +    ];
 +    "sourcedisksnames", [
 +      "1", "%DiskId1%,,,\"\"";
 +    ];
 +    "sourcedisksfiles", [
 +      "balloon.sys", "1,,";
 +    ];
 +    "manufacturer", [
 +      "%rhel%", "Standard,NTamd64";
 +    ];
 +    "standard", [
 +      "%balloon.devicedesc%", "BALLOON_Device,
PCI\\VEN_1AF4&DEV_1002&SUBSYS_00051AF4&REV_00";
 +    ];
 +    "standard.ntamd64", [
 +      "%balloon.devicedesc%", "BALLOON_Device,
PCI\\VEN_1AF4&DEV_1002&SUBSYS_00051AF4&REV_00";
 +    ];
 +    "balloon_device.nt", [
 +      "copyfiles", "Drivers_Dir";
 +    ];
 +    "drivers_dir", [];
 +    "balloon_device.nt.services", [
 +      "addservice", "BALLOON,%SPSVCINST_ASSOCSERVICE%,
BALLOON_Service_Inst, BALLOON_Logging_Inst";
 +    ];
 +    "balloon_service_inst", [
 +      "displayname", "%BALLOON.SVCDESC%";
 +      "servicetype", "1";
 +      "starttype", "3";
 +      "errorcontrol", "1";
 +      "servicebinary", "%12%\\balloon.sys";
 +    ];
 +    "balloon_logging_inst", [
 +      "addreg", "BALLOON_Logging_Inst_AddReg";
 +    ];
 +    "balloon_logging_inst_addreg", [];
 +    "destinationdirs", [
 +      "balloon_device_coinstaller_copyfiles", "11";
 +    ];
 +    "balloon_device.nt.coinstallers", [
 +      "addreg", "BALLOON_Device_CoInstaller_AddReg";
 +      "copyfiles", "BALLOON_Device_CoInstaller_CopyFiles";
 +    ];
 +    "balloon_device_coinstaller_addreg", [];
 +    "balloon_device_coinstaller_copyfiles", [];
 +    "sourcedisksfiles", [
 +      "wdfcoinstaller01011.dll", "1";
 +    ];
 +    "balloon_device.nt.wdf", [
 +      "kmdfservice", "BALLOON, BALLOON_wdfsect";
 +    ];
 +    "balloon_wdfsect", [
 +      "kmdflibraryversion", "1.11";
 +    ];
 +    "strings", [
 +      "spsvcinst_assocservice", "0x00000002";
 +      "rhel", "\"Red Hat, Inc.\"";
 +      "diskid1", "\"VirtIO Balloon Installation Disk
#1\"";
 +      "balloon.devicedesc", "\"VirtIO Balloon Driver\"";
 +      "balloon.svcdesc", "\"VirtIO Balloon Service\"";
 +      "classname", "\"VirtIO Balloon Device\"";
 +    ];
 +  ] in
 +
 +  assert_equal ~printer expected sections
 +
 +(* Test the code which matches [*.inf] files to Windows guests. *)
   let test_virtio_iso_path_matches_guest_os ctx =
     (* Windows OSes fake inspection data. *)
     let make_win name major minor variant arch = {
 @@ -772,6 +871,7 @@ let suite =
         "OVF.get_ostype" >:: test_get_ostype;
         "Utils.drive_name" >:: test_drive_name;
         "Utils.drive_index" >:: test_drive_index;
 +      "Windows_inf.of_string" >:: test_windows_inf_of_string;
         "Windows.virtio_iso_path_matches_guest_os" >::
           test_virtio_iso_path_matches_guest_os;
       ]
 diff --git a/v2v/windows_inf.ml b/v2v/windows_inf.ml
 new file mode 100644
 index 0000000..2066a2e
 --- /dev/null
 +++ b/v2v/windows_inf.ml
 @@ -0,0 +1,143 @@
 +(* virt-v2v
 + * Copyright (C) 2015 Red Hat Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + *)
 +
 +open Printf
 +
 +open Common_utils
 +
 +type t = section list
 +and section = string * data list
 +and data = string * string
 +
 +let crlf_rex = Str.regexp "\r?\n"
 +
 +(* Match [[header]] in a Windows [*.inf] file. *)
 +let section_header_rex =
 +  Str.regexp "^[ \t]*\\[[ \t]*\\(.*\\)[ \t]*\\][ \t]*$"
 +
 +let match_section_header line = Str.string_match section_header_rex line 0
 +let not_section_header line = not (match_section_header line)
 +
 +(* Match [key = value] in a Windows [*.inf] file. *)
 +let key_value_rex =
 +  Str.regexp_case_fold
 +    "^[ \t]*\\([a-z0-9%_.]+\\)[ \t]*=[ \t]*\\(.*\\)[ \t]*$"
 +
 +(* Match comment preceeded by whitespace (so comments can be removed). *)
 +let comment_rex = Str.regexp "[ \t]*;.*"
 +
 +(* Parse a Windows [*.inf] file into headers and section lines. *)
 +let of_string content =
 +  (* Split up the inf file (possibly with DOS line endings) into lines. *)
 +  let lines = Str.split crlf_rex content in
 +
 +  (* Split the file into section headers + section content. *)
 +  let rec loop = function
 +    | [] -> []
 +    | header :: xs when match_section_header header ->
 +       let header = Str.matched_group 1 header in
 +       let lines = takewhile not_section_header xs in
 +       let ys = dropwhile not_section_header xs in
 +       (header, lines) :: loop ys
 +    | xs ->
 +       (* Put all initial lines before the first section into a
 +        * section with no name.
 +        *)
 +       let lines = takewhile not_section_header xs in
 +       let ys = dropwhile not_section_header xs in
 +       ("", lines) :: loop ys
 +  in
 +  let sections = loop lines in
 +
 +  (* Split the lines that match "key = value" into [(key, value)] pairs.
 +   * Ignore any other lines.
 +   *)
 +  let sections = List.map (
 +    fun (header, lines) ->
 +      let lines = filter_map (
 +        fun line ->
 +          if Str.string_match key_value_rex line 0 then (
 +            let key = Str.matched_group 1 line in
 +            let value = Str.matched_group 2 line in
 +            Some (key, value)
 +          )
 +          else None (* ignore the non-matching line *)
 +      ) lines in
 +      header, lines
 +  ) sections in
 +
 +  (* If the dummy section at the beginning is now completely empty,
 +   * remove it.
 +   *)
 +  let sections =
 +    match sections with
 +    | ("", []) :: sections -> sections
 +    | sections -> sections in
 +
 +  (* Remove any comments from values, conservatively though because
 +   * we don't really understand the value format.
 +   *)
 +  let sections = List.map (
 +    fun (header, lines) ->
 +      let lines = List.map (
 +        fun (key, value) ->
 +          let value =
 +            if String.contains value '"' then value
 +            else Str.replace_first comment_rex "" value in
 +          key, value
 +      ) lines in
 +      header, lines
 +  ) sections in
 +
 +  (* Normalize (by lowercasing) the section headers and keys (but not
 +   * the values).
 +   *)
 +  let sections = List.map (
 +    fun (header, lines) ->
 +      let header = String.lowercase_ascii header in
 +      let lines = List.map (
 +        fun (key, value) ->
 +          String.lowercase_ascii key, value
 +      ) lines in
 +      header, lines
 +  ) sections in
 +
 +  sections
 +
 +let find_section t section_name =
 +  let section_name = String.lowercase_ascii section_name in
 +  List.assoc section_name t
 +
 +let find_key t section_name key_name =
 +  let data = find_section t section_name in
 +  let key_name = String.lowercase_ascii key_name in
 +  List.assoc key_name data
 +
 +let load filename =
 +  of_string (read_whole_file filename)
 +
 +let rec to_string sections =
 +  String.concat "\n" (List.map string_of_section sections)
 +
 +and string_of_section (header, body) =
 +  let header = sprintf "[%s]" header in
 +  let body = List.map string_of_key_value body in
 +  String.concat "\n" (header :: body)
 +
 +and string_of_key_value (key, value) =
 +  sprintf "%s = %s" key value
 diff --git a/v2v/windows_inf.mli b/v2v/windows_inf.mli
 new file mode 100644
 index 0000000..1cb8040
 --- /dev/null
 +++ b/v2v/windows_inf.mli
 @@ -0,0 +1,58 @@
 +(* virt-v2v
 + * Copyright (C) 2009-2015 Red Hat Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + *)
 +
 +(** Handle Windows driver [*.inf] files. *)
 +
 +type t = section list
 +(** Type of a parsed Windows driver [*.inf] file. *)
 +
 +and section = string * data list
 +(** A single section consists of a header and a list of lines.  If
 +    the file doesn't start with a section header, then the initial
 +    section has a dummy header name [""].
 +
 +    The section header is always normalized to lowercase ASCII. *)
 +
 +and data = string * string
 +(** A [key = value] pair appearing within a section body.  The key
 +    (but {i not} the value) is always normalized to lowercase ASCII. *)
 +
 +val of_string : string -> t
 +(** Parse an [*.inf] file from the string.  No parse errors are
 +    possible since this parser accepts anything as a possible [*.inf]
 +    file. *)
 +
 +val load : string -> t
 +(** Same as {!of_string} except we load the content from
 +    a host file. *)
 +
 +val to_string : t -> string
 +(** Convert an inf file back to a string.  This should probably only
 +    be used for debugging, since we don't preserve comments and it's
 +    not tested that Windows would be able to parse what we write out. *)
 +
 +val find_section : t -> string -> data list
 +(** [find_section t section_name] finds and returns a section by name.
 +
 +    Raises [Not_found] if not found. *)
 +
 +val find_key : t -> string -> string -> string
 +(** [find_key t section_name key_name] finds and returns a key within
 +    a particular section.
 +
 +    Raises [Not_found] if not found. *) 
adding roman to CC: