Factor out the complex code that handles dealing with multiple
different OVA file formats into a separate Parse_ova module.
This is largely straightforward code refactoring -- there should be no
significant functional change.
However:
- Parse_ova now checks up front if the OVA contains any compressed
disks and avoids the tar optimization in that case. This is a
regression for the case of an OVA containing a mix of both
compressed and uncompressed disks (we expect this to be rare).
The change is nevertheless good because it reduces the coupling
between two parts of the code.
- I had to simplify an error message.
---
v2v/Makefile.am | 2 +
v2v/input_ova.ml | 375 +++++++++++++-----------------------------------------
v2v/parse_ova.ml | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++
v2v/parse_ova.mli | 73 +++++++++++
v2v/utils.ml | 59 ---------
v2v/utils.mli | 7 -
6 files changed, 523 insertions(+), 353 deletions(-)
diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index d832f75c0..c9ed1fc88 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -76,6 +76,7 @@ SOURCES_MLI = \
output_rhv_upload_plugin_source.mli \
output_rhv_upload_precheck_source.mli \
output_vdsm.mli \
+ parse_ova.mli \
parse_ovf_from_ova.mli \
parse_libvirt_xml.mli \
parse_vmx.mli \
@@ -99,6 +100,7 @@ SOURCES_ML = \
DOM.ml \
changeuid.ml \
parse_ovf_from_ova.ml \
+ parse_ova.ml \
create_ovf.ml \
linux.ml \
windows.ml \
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index f23a1f2a9..fc8fde4bc 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -20,242 +20,53 @@ open Printf
open Std_utils
open Tools_utils
-open Unix_utils
open Common_gettext.Gettext
open Types
-open Utils
+open Parse_ova
open Parse_ovf_from_ova
open Name_from_disk
-(* Return true if [libvirt] supports ["json:"] pseudo-URLs and accepts the
- * ["raw"] driver. Function also returns true if [libvirt] backend is not
- * used. This didn't work in libvirt < 3.1.0.
- *)
-let libvirt_supports_json_raw_driver () =
- if backend_is_libvirt () then (
- let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in
- debug "libvirt supports \"raw\" driver in json URL: %B" sup;
- sup
- )
- else
- true
-
-let pigz_available =
- let test = lazy (shell_command "pigz --help >/dev/null 2>&1" = 0)
in
- fun () -> Lazy.force test
-
-let pxz_available =
- let test = lazy (shell_command "pxz --help >/dev/null 2>&1" = 0)
in
- fun () -> Lazy.force test
-
-let zcat_command_of_format = function
- | `GZip ->
- if pigz_available () then "pigz -c -d" else "gzip -c -d"
- | `XZ ->
- if pxz_available () then "pxz -c -d" else "xz -c -d"
-
-(* Untar part or all files from tar archive. If [paths] is specified it is
- * a list of paths in the tar archive.
- *)
-let untar ?format ?(paths = []) file outdir =
- let paths = String.concat " " (List.map quote paths) in
- let cmd =
- match format with
- | None ->
- sprintf "tar -xf %s -C %s %s"
- (quote file) (quote outdir) paths
- | Some ((`GZip|`XZ) as format) ->
- sprintf "%s %s | tar -xf - -C %s %s"
- (zcat_command_of_format format) (quote file)
- (quote outdir) paths in
- if shell_command cmd <> 0 then
- error (f_"error unpacking %s, see earlier error messages") file
-
-(* Untar only ovf and manifest from the archive *)
-let untar_metadata file outdir =
- let files = external_command (sprintf "tar -tf %s" (Filename.quote file)) in
- let files =
- List.filter_map (
- fun f ->
- if Filename.check_suffix f ".ovf" ||
- Filename.check_suffix f ".mf" then Some f
- else None
- ) files in
- untar ~paths:files file outdir
-
-(* Uncompress the first few bytes of [file] and return it as
- * [(bytes, len)].
- *)
-let uncompress_head format file =
- let cmd = sprintf "%s %s" (zcat_command_of_format format) (quote file) in
- let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in
- let b = Bytes.create 512 in
- let len = input chan_out b 0 (Bytes.length b) in
- (* We're expecting the subprocess to fail because we close
- * the pipe early, so:
- *)
- ignore (Unix.close_process_full (chan_out, chan_in, chan_err));
- b, len
-
-(* Run [detect_file_type] on a compressed file, returning the
- * type of the uncompressed content (if known).
- *)
-let uncompressed_type format file =
- let head, headlen = uncompress_head format file in
- let tmpfile, chan =
- Filename.open_temp_file "ova.file." "" in
- output chan head 0 headlen;
- close_out chan;
- let ret = detect_file_type tmpfile in
- Sys.remove tmpfile;
- ret
-
-(* Find files in [dir] ending with [ext]. *)
-let find_files dir ext =
- let rec loop = function
- | [] -> []
- | dir :: rest ->
- let files = Array.to_list (Sys.readdir dir) in
- let files = List.map (Filename.concat dir) files in
- let dirs, files = List.partition Sys.is_directory files in
- let files =
- List.filter (fun x -> Filename.check_suffix x ext) files in
- files @ loop (rest @ dirs)
- in
- loop [dir]
-
-class input_ova ova =
- let tmpdir =
- let base_dir = (open_guestfs ())#get_cachedir () in
- let t = Mkdtemp.temp_dir ~base_dir "ova." in
- rmdir_on_exit t;
- t in
-object
+class input_ova ova = object
inherit input
method as_options = "-i ova " ^ ova
method source () =
(* Extract ova file. *)
- let exploded, partial =
- (* The spec allows a directory to be specified as an ova. This
- * is also pretty convenient.
- *)
- if is_directory ova then ova, false
- else (
- match detect_file_type ova with
- | `Tar ->
- (* Normal ovas are tar file (not compressed). *)
- if qemu_img_supports_offset_and_size () &&
- libvirt_supports_json_raw_driver () then (
- (* In newer QEMU we don't have to extract everything.
- * We can access disks inside the tar archive directly.
- *)
- untar_metadata ova tmpdir;
- tmpdir, true
- ) else (
- untar ova tmpdir;
- tmpdir, false
- )
+ let ova_t = parse_ova ova in
- | `Zip ->
- (* However, although not permitted by the spec, people ship
- * zip files as ova too.
- *)
- let cmd = [ "unzip" ] @
- (if verbose () then [] else [ "-q" ]) @
- [ "-j"; "-d"; tmpdir; ova ] in
- if run_command cmd <> 0 then
- error (f_"error unpacking %s, see earlier error messages") ova;
- tmpdir, false
+ (* Extract ovf file from ova. *)
+ let ovf = get_ovf_file ova_t in
- | (`GZip|`XZ) as format ->
- (match uncompressed_type format ova with
- | `Tar ->
- untar ~format ova tmpdir;
- tmpdir, false
- | `Zip | `GZip | `XZ | `Unknown ->
- error (f_"%s: unsupported file format\n\nFormats which we currently
understand for '-i ova' are: tar (uncompressed, compress with gzip or xz),
zip") ova
- )
+ (* Extract the manifest from *.mf files in the ova. *)
+ let manifest = get_manifest ova_t in
- | `Unknown ->
- error (f_"%s: unsupported file format\n\nFormats which we currently
understand for '-i ova' are: tar (uncompressed, compress with gzip or xz),
zip") ova
- ) in
-
- (* Exploded path must be absolute (RHBZ#1155121). *)
- let exploded = absolute_path exploded in
-
- (* If virt-v2v is running as root, and the backend is libvirt, then
- * we have to chmod the directory to 0755 and files to 0644
- * so it is readable by qemu.qemu. This is libvirt bug RHBZ#890291.
- *)
- if Unix.geteuid () = 0 && backend_is_libvirt () then (
- warning (f_"making OVA directory public readable to work around libvirt bug
https://bugzilla.redhat.com/1045069");
- let cmd = [ "chmod"; "-R"; "go=u,go-w"; exploded ] @
- if partial then [ ova ] else [] in
- ignore (run_command cmd)
- );
-
- (* Search for the ovf file. *)
- let ovf = find_files exploded ".ovf" in
- let ovf =
- match ovf with
- | [] ->
- error (f_"no .ovf file was found in %s") ova
- | [x] -> x
- | _ :: _ ->
- error (f_"more than one .ovf file was found in %s") ova in
-
- (* Read any .mf (manifest) files and verify sha1. *)
- let mf = find_files exploded ".mf" in
- let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$" in
+ (* Verify checksums of files listed in the manifest. *)
List.iter (
- fun mf ->
- debug "processing manifest %s" mf;
- let mf_folder = Filename.dirname mf in
- let mf_subfolder = subdirectory exploded mf_folder in
- with_open_in mf (
- fun chan ->
- let rec loop () =
- let line = input_line chan in
- if PCRE.matches rex line then (
- let mode = PCRE.sub 1
- and disk = PCRE.sub 2
- and expected = PCRE.sub 3 in
- let csum = Checksums.of_string mode expected in
- match
- if partial then
- Checksums.verify_checksum csum
- ~tar:ova (mf_subfolder // disk)
- else
- Checksums.verify_checksum csum (mf_folder // disk)
- with
- | Checksums.Good_checksum -> ()
- | Checksums.Mismatched_checksum (_, actual) ->
- error (f_"checksum of disk %s does not match manifest %s (actual
%s(%s) = %s, expected %s(%s) = %s)")
- disk mf mode disk actual mode disk expected
- | Checksums.Missing_file ->
- (* RHBZ#1570407: Some OVA files generated by VMware
- * reference non-existent components in the *.mf file.
- * Generate a warning and ignore it.
- *)
- warning (f_"%s has a checksum for non-existent file %s
(ignored)")
- mf disk
- )
- else
- warning (f_"unable to parse line from manifest file: %S")
line;
- loop ()
- in
- (try loop () with End_of_file -> ())
- )
- ) mf;
-
- let ovf_folder = Filename.dirname ovf in
+ fun (file_ref, csum) ->
+ let filename, r =
+ match file_ref with
+ | LocalFile filename ->
+ filename, Checksums.verify_checksum csum filename
+ | TarFile (tar, filename) ->
+ filename, Checksums.verify_checksum csum ~tar filename in
+ match r with
+ | Checksums.Good_checksum -> ()
+ | Checksums.Mismatched_checksum (_, actual) ->
+ error (f_"checksum of disk %s does not match manifest (actual = %s,
expected = %s)")
+ filename actual (Checksums.string_of_csum_t csum)
+ | Checksums.Missing_file ->
+ (* RHBZ#1570407: Some OVA files generated by VMware
+ * reference non-existent components in the *.mf file.
+ * Generate a warning and ignore it.
+ *)
+ warning (f_"manifest has a checksum for non-existent file %s
(ignored)")
+ filename
+ ) manifest;
(* Parse the ovf file. *)
- let name, memory, vcpu, cpu_topology, firmware,
- disks, removables, nics =
+ let name, memory, vcpu, cpu_topology, firmware, disks, removables, nics =
parse_ovf_from_ova ovf in
let name =
@@ -265,81 +76,71 @@ object
name_from_disk ova
| Some name -> name in
- let disks = List.map (
- fun ({ href; compressed } as disk) ->
- let partial =
- if compressed && partial then (
- (* We cannot access compressed disk inside the tar;
- * we have to extract it.
- *)
- untar ~paths:[(subdirectory exploded ovf_folder) // href]
- ova tmpdir;
- false
- )
- else
- partial in
+ (* Convert the disk hrefs into qemu URIs. *)
+ let qemu_uris = List.map (
+ fun { href; compressed } ->
+ let file_ref = get_file_ref ova_t href in
- let filename =
- if partial then
- (subdirectory exploded ovf_folder) // href
- else (
- (* Does the file exist and is it readable? *)
- Unix.access (ovf_folder // href) [Unix.R_OK];
- ovf_folder // href
- ) in
+ match compressed, file_ref with
+ | false, LocalFile filename ->
+ filename
- (* The spec allows the file to be gzip-compressed, in which case
- * we must uncompress it into the tmpdir.
- *)
- let filename =
- if compressed then (
- let new_filename = tmpdir // String.random8 () ^ ".vmdk" in
- let cmd =
- sprintf "zcat %s > %s" (quote filename) (quote new_filename)
in
- if shell_command cmd <> 0 then
- error (f_"error uncompressing %s, see earlier error messages")
- filename;
- new_filename
- )
- else filename in
+ | true, LocalFile filename ->
+ (* The spec allows the file to be gzip-compressed, in
+ * which case we must uncompress it into a temporary.
+ *)
+ let temp_dir = (open_guestfs ())#get_cachedir () in
+ let new_filename = Filename.temp_file ~temp_dir "ova"
".vmdk" in
+ unlink_on_exit new_filename;
+ let cmd =
+ sprintf "zcat %s > %s" (quote filename) (quote new_filename)
in
+ if shell_command cmd <> 0 then
+ error (f_"error uncompressing %s, see earlier error messages")
+ filename;
+ new_filename
- let qemu_uri =
- if not partial then (
- filename
- )
- else (
- let offset, size =
- try find_file_in_tar ova filename
- with
- | Not_found ->
- error (f_"file ‘%s’ not found in the ova") filename
- | Failure msg -> error (f_"%s") msg in
- (* QEMU requires size aligned to 512 bytes. This is safe because
- * tar also works with 512 byte blocks.
- *)
- let size = roundup64 size 512L in
+ | false, TarFile (tar, filename) ->
+ (* This is the tar optimization. *)
+ let offset, size =
+ try Parse_ova.get_tar_offet_and_size tar filename
+ with
+ | Not_found ->
+ error (f_"file ‘%s’ not found in the ova") filename
+ | Failure msg -> error (f_"%s") msg in
+ (* QEMU requires size aligned to 512 bytes. This is safe because
+ * tar also works with 512 byte blocks.
+ *)
+ let size = roundup64 size 512L in
- (* Workaround for libvirt bug RHBZ#1431652. *)
- let ova_path = absolute_path ova in
+ (* Workaround for libvirt bug RHBZ#1431652. *)
+ let tar_path = absolute_path tar in
- let doc = [
- "file", JSON.Dict [
- "driver", JSON.String "raw";
- "offset", JSON.Int64 offset;
- "size", JSON.Int64 size;
- "file", JSON.Dict [
- "driver", JSON.String
"file";
- "filename", JSON.String ova_path]
- ]
- ] in
- let uri =
- sprintf "json:%s" (JSON.string_of_doc ~fmt:JSON.Compact doc) in
- debug "json: %s" uri;
- uri
- ) in
+ let doc = [
+ "file", JSON.Dict [
+ "driver", JSON.String "raw";
+ "offset", JSON.Int64 offset;
+ "size", JSON.Int64 size;
+ "file", JSON.Dict [
+ "driver", JSON.String "file";
+ "filename", JSON.String tar_path]
+ ]
+ ] in
+ let uri =
+ sprintf "json:%s" (JSON.string_of_doc ~fmt:JSON.Compact doc) in
+ uri
- { disk.source_disk with s_qemu_uri = qemu_uri }
- ) disks in
+ | true, TarFile _ ->
+ (* This should not happen since {!Parse_ova} knows that
+ * qemu cannot handle compressed files here.
+ *)
+ assert false
+ ) disks in
+
+ (* Get a final list of source disks. *)
+ let disks =
+ List.map (fun ({ source_disk }, qemu_uri) ->
+ { source_disk with s_qemu_uri = qemu_uri })
+ (List.combine disks qemu_uris) in
let source = {
s_hypervisor = VMware;
diff --git a/v2v/parse_ova.ml b/v2v/parse_ova.ml
new file mode 100644
index 000000000..431cbe8d0
--- /dev/null
+++ b/v2v/parse_ova.ml
@@ -0,0 +1,360 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Printf
+
+open Std_utils
+open Tools_utils
+open Unix_utils
+open Common_gettext.Gettext
+
+open Utils
+open Parse_ovf_from_ova
+
+type t = {
+ (* Save the original OVA name, for error messages. *)
+ orig_ova : string;
+
+ (* Top directory of OVA. If the OVA was already a directory then
+ * this is just that directory. However in normal cases this is
+ * a temporary directory that we create, unpacking either just the
+ * OVF and MF files, or those plus the disks. This temporary
+ * directory will be cleaned up on exit.
+ *)
+ top_dir : string;
+
+ ova_type : ova_type;
+}
+
+and ova_type =
+ (* The original OVA was a directory. Or the OVA was fully unpacked
+ * into a temporary directory.
+ *
+ * In either case everything is available in [top_dir].
+ *)
+ | Directory
+
+ (* The original OVA was an uncompressed tar file and we are able
+ * to optimize access to the disks by keeping them in the tarball.
+ *
+ * The OVF and MF files only have been unpacked in [top_dir].
+ *)
+ | TarOptimized of string (* tarball *)
+
+type file_ref =
+ | LocalFile of string
+ | TarFile of string * string
+
+type mf_record = file_ref * Checksums.csum_t
+
+let rec parse_ova ova =
+ (* The spec allows a directory to be specified as an ova. This
+ * is also pretty convenient.
+ *)
+ let top_dir, ova_type =
+ if is_directory ova then ova, Directory
+ else (
+ let tmpdir =
+ let base_dir = (open_guestfs ())#get_cachedir () in
+ let t = Mkdtemp.temp_dir ~base_dir "ova." in
+ rmdir_on_exit t;
+ t in
+
+ match detect_file_type ova with
+ | `Tar ->
+ (* Normal ovas are tar file (not compressed). *)
+
+ (* In newer QEMU we don't have to extract everything.
+ * We can access disks inside the tar archive directly.
+ *)
+ if qemu_img_supports_offset_and_size () &&
+ libvirt_supports_json_raw_driver () &&
+ (untar_metadata ova tmpdir;
+ no_disks_are_compressed ova tmpdir) then
+ tmpdir, TarOptimized ova
+ else (
+ (* If qemu/libvirt is too old or any disk is compressed
+ * then we must fall back on the slow path.
+ *)
+ untar ova tmpdir;
+ tmpdir, Directory
+ )
+
+ | `Zip ->
+ (* However, although not permitted by the spec, people ship
+ * zip files as ova too.
+ *)
+ let cmd =
+ [ "unzip" ] @ (if verbose () then [] else [ "-q" ]) @
+ [ "-j"; "-d"; tmpdir; ova ] in
+ if run_command cmd <> 0 then
+ error (f_"error unpacking %s, see earlier error messages") ova;
+ tmpdir, Directory
+
+ | (`GZip|`XZ) as format ->
+ (match uncompressed_type format ova with
+ | `Tar ->
+ untar ~format ova tmpdir;
+ tmpdir, Directory
+ | `Zip | `GZip | `XZ | `Unknown ->
+ error (f_"%s: unsupported file format\n\nFormats which we currently
understand for '-i ova' are: tar (uncompressed, compress with gzip or xz),
zip") ova
+ )
+
+ | `Unknown ->
+ error (f_"%s: unsupported file format\n\nFormats which we currently
understand for '-i ova' are: tar (uncompressed, compress with gzip or xz),
zip") ova
+ ) in
+
+ (* Exploded path must be absolute (RHBZ#1155121). *)
+ let top_dir = absolute_path top_dir in
+
+ (* If virt-v2v is running as root, and the backend is libvirt, then
+ * we have to chmod the directory to 0755 and files to 0644
+ * so it is readable by qemu.qemu. This is libvirt bug RHBZ#890291.
+ *)
+ if Unix.geteuid () = 0 && backend_is_libvirt () then (
+ warning (f_"making OVA directory public readable to work around libvirt bug
https://bugzilla.redhat.com/1045069");
+ let what =
+ match ova_type with
+ | Directory -> [ top_dir ]
+ | TarOptimized ova -> [ top_dir; ova ] in
+ let cmd = [ "chmod"; "-R"; "go=u,go-w" ] @ what in
+ ignore (run_command cmd)
+ );
+
+ { orig_ova = ova; top_dir; ova_type }
+
+(* Return true if [libvirt] supports ["json:"] pseudo-URLs and accepts the
+ * ["raw"] driver. Function also returns true if [libvirt] backend is not
+ * used. This didn't work in libvirt < 3.1.0.
+ *)
+and libvirt_supports_json_raw_driver () =
+ if backend_is_libvirt () then (
+ let sup = Libvirt_utils.libvirt_get_version () >= (3, 1, 0) in
+ debug "libvirt supports \"raw\" driver in json URL: %B" sup;
+ sup
+ )
+ else
+ true
+
+(* No disks compressed? We need to check the OVF file. *)
+and no_disks_are_compressed ova tmpdir =
+ let t = { orig_ova = ova; top_dir = tmpdir; ova_type = Directory } in
+ let ovf = get_ovf_file t in
+ let disks = parse_disks ovf in
+ not (List.exists (fun { compressed } -> compressed) disks)
+
+and pigz_available =
+ let test = lazy (shell_command "pigz --help >/dev/null 2>&1" = 0)
in
+ fun () -> Lazy.force test
+
+and pxz_available =
+ let test = lazy (shell_command "pxz --help >/dev/null 2>&1" = 0)
in
+ fun () -> Lazy.force test
+
+and zcat_command_of_format = function
+ | `GZip ->
+ if pigz_available () then "pigz -c -d" else "gzip -c -d"
+ | `XZ ->
+ if pxz_available () then "pxz -c -d" else "xz -c -d"
+
+(* Untar part or all files from tar archive. If [paths] is specified it is
+ * a list of paths in the tar archive.
+ *)
+and untar ?format ?(paths = []) file outdir =
+ let paths = String.concat " " (List.map quote paths) in
+ let cmd =
+ match format with
+ | None ->
+ sprintf "tar -xf %s -C %s %s"
+ (quote file) (quote outdir) paths
+ | Some ((`GZip|`XZ) as format) ->
+ sprintf "%s %s | tar -xf - -C %s %s"
+ (zcat_command_of_format format) (quote file)
+ (quote outdir) paths in
+ if shell_command cmd <> 0 then
+ error (f_"error unpacking %s, see earlier error messages") file
+
+(* Untar only ovf and manifest from the archive *)
+and untar_metadata file outdir =
+ let files = external_command (sprintf "tar -tf %s" (Filename.quote file)) in
+ let files =
+ List.filter_map (
+ fun f ->
+ if Filename.check_suffix f ".ovf" ||
+ Filename.check_suffix f ".mf" then Some f
+ else None
+ ) files in
+ untar ~paths:files file outdir
+
+(* Uncompress the first few bytes of [file] and return it as
+ * [(bytes, len)].
+ *)
+and uncompress_head format file =
+ let cmd = sprintf "%s %s" (zcat_command_of_format format) (quote file) in
+ let chan_out, chan_in, chan_err = Unix.open_process_full cmd [||] in
+ let b = Bytes.create 512 in
+ let len = input chan_out b 0 (Bytes.length b) in
+ (* We're expecting the subprocess to fail because we close
+ * the pipe early, so:
+ *)
+ ignore (Unix.close_process_full (chan_out, chan_in, chan_err));
+ b, len
+
+(* Run [detect_file_type] on a compressed file, returning the
+ * type of the uncompressed content (if known).
+ *)
+and uncompressed_type format file =
+ let head, headlen = uncompress_head format file in
+ let tmpfile, chan =
+ Filename.open_temp_file "ova.file." "" in
+ output chan head 0 headlen;
+ close_out chan;
+ let ret = detect_file_type tmpfile in
+ Sys.remove tmpfile;
+ ret
+
+(* Find files in [dir] ending with [ext]. *)
+and find_files dir ext =
+ let rec loop = function
+ | [] -> []
+ | dir :: rest ->
+ let files = Array.to_list (Sys.readdir dir) in
+ let files = List.map (Filename.concat dir) files in
+ let dirs, files = List.partition Sys.is_directory files in
+ let files =
+ List.filter (fun x -> Filename.check_suffix x ext) files in
+ files @ loop (rest @ dirs)
+ in
+ loop [dir]
+
+and get_ovf_file { orig_ova; top_dir } =
+ let ovf = find_files top_dir ".ovf" in
+ match ovf with
+ | [] ->
+ error (f_"no .ovf file was found in %s") orig_ova
+ | [x] -> x
+ | _ :: _ ->
+ error (f_"more than one .ovf file was found in %s") orig_ova
+
+let rex = PCRE.compile "^(SHA1|SHA256)\\((.*)\\)= ([0-9a-fA-F]+)\r?$"
+
+let get_manifest { top_dir; ova_type } =
+ let mf_files = find_files top_dir ".mf" in
+ let manifest =
+ List.map (
+ fun mf ->
+ debug "ova: processing manifest file %s" mf;
+ let mf_folder = Filename.dirname mf in
+ let mf_subfolder = subdirectory top_dir mf_folder in
+ with_open_in mf (
+ fun chan ->
+ let ret = ref [] in
+ let rec loop () =
+ let line = input_line chan in
+ if PCRE.matches rex line then (
+ let csum_type = PCRE.sub 1
+ and filename = PCRE.sub 2
+ and expected = PCRE.sub 3 in
+ let csum = Checksums.of_string csum_type expected in
+ let file_ref =
+ match ova_type with
+ | Directory ->
+ LocalFile (mf_folder // filename)
+ | TarOptimized tar ->
+ TarFile (tar, mf_subfolder // filename) in
+ List.push_front (file_ref, csum) ret
+ )
+ else
+ warning (f_"unable to parse line from manifest file: %S")
line;
+ loop ()
+ in
+ (try loop () with End_of_file -> ());
+ !ret
+ )
+ ) mf_files in
+
+ List.flatten manifest
+
+let get_file_ref ({ top_dir; ova_type } as t) href =
+ let ovf = get_ovf_file t in
+ let ovf_folder = Filename.dirname ovf in
+
+ match ova_type with
+ | Directory -> LocalFile (ovf_folder // href)
+ | TarOptimized tar ->
+ let filename = subdirectory top_dir ovf_folder // href in
+ TarFile (tar, filename)
+
+let ws = PCRE.compile "\\s+"
+let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
+
+let get_tar_offet_and_size tar filename =
+ let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in
+ let rec loop lines =
+ match lines with
+ | [] -> raise Not_found
+ | line :: lines -> (
+ (* Lines have the form:
+ * block <offset>: <perms> <owner>/<group> <size>
<mdate> <mtime> <file>
+ * or:
+ * block <offset>: ** Block of NULs **
+ * block <offset>: ** End of File **
+ *)
+ if PCRE.matches re_tar_message line then
+ loop lines (* ignore "** Block of NULs **" etc. *)
+ else (
+ let elems = PCRE.nsplit ~max:8 ws line in
+ if List.length elems = 8 && List.hd elems = "block" then (
+ let elems = Array.of_list elems in
+ let offset = elems.(1) in
+ let size = elems.(4) in
+ let fname = elems.(7) in
+
+ if fname <> filename then
+ loop lines
+ else (
+ let offset =
+ try
+ (* There should be a colon at the end *)
+ let i = String.rindex offset ':' in
+ if i == (String.length offset)-1 then
+ Int64.of_string (String.sub offset 0 i)
+ else
+ failwith "colon at wrong position"
+ with Failure _ | Not_found ->
+ failwithf (f_"invalid offset returned by tar: %S") offset in
+
+ let size =
+ try Int64.of_string size
+ with Failure _ ->
+ failwithf (f_"invalid size returned by tar: %S") size in
+
+ (* Note: Offset is actualy block number and there is a single
+ * block with tar header at the beginning of the file. So skip
+ * the header and convert the block number to bytes before
+ * returning.
+ *)
+ (offset +^ 1L) *^ 512L, size
+ )
+ )
+ else
+ failwithf (f_"failed to parse line returned by tar: %S") line
+ )
+ )
+ in
+ loop lines
diff --git a/v2v/parse_ova.mli b/v2v/parse_ova.mli
new file mode 100644
index 000000000..54df752ad
--- /dev/null
+++ b/v2v/parse_ova.mli
@@ -0,0 +1,73 @@
+(* virt-v2v
+ * Copyright (C) 2009-2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+(** Helper functions for dealing with the OVA pseudo-format. *)
+
+type t
+
+val parse_ova : string -> t
+(** The parameter references either an OVA file or a directory
+ containing an unpacked OVA.
+
+ The OVA is "opened". If necessary, parts of the OVA are
+ unpacked into a temporary directory. This can consume a lot
+ of space, although we are able to optimize some common cases.
+
+ This does {b not} parse or verify the OVF, MF or disks. *)
+
+val get_ovf_file : t -> string
+(** Return the filename of the OVF file from the OVA. This will
+ be a local file (might be a temporary file) valid for the
+ lifetime of the handle.
+
+ The filename can be passed directly to
+ {!Parse_ovf_from_ova.parse_ovf_from_ova}. *)
+
+type file_ref =
+ | LocalFile of string (** A local filename. *)
+ | TarFile of string * string (** Tar file containing file. *)
+(** A file reference, pointing usually to a disk. If the OVA
+ is unpacked during parsing then this points to a local file.
+ It might be a temporary file, but it is valid for the lifetime
+ of the handle. If we are optimizing access to the OVA then
+ it might also be a reference to a file within a tarball. *)
+
+type mf_record = file_ref * Checksums.csum_t
+(** A manifest record: (file reference, checksum of file). *)
+
+val get_manifest : t -> mf_record list
+(** Find and parse all manifest ([*.mf]) files in the OVA.
+ Parse out the filenames and checksums from these files
+ and return the full manifest as a single list.
+
+ Note the checksums are returned, but this function does not
+ verify them. Also VMware-generated OVAs can return
+ non-existent files in this list. *)
+
+val get_file_ref : t -> string -> file_ref
+(** Convert an OVF [href] into an actual file reference.
+
+ Note this does not check that the file really exists. *)
+
+val get_tar_offet_and_size : string -> string -> int64 * int64
+(** [get_tar_offet_and_size tar filename] looks up file in the [tar]
+ archive and returns a tuple containing at which byte it starts
+ and how long the file is.
+
+ Function raises [Not_found] if there is no such file inside [tar] and
+ [Failure] if there is any error parsing the tar output. *)
diff --git a/v2v/utils.ml b/v2v/utils.ml
index d73011f9f..67e2028f3 100644
--- a/v2v/utils.ml
+++ b/v2v/utils.ml
@@ -146,65 +146,6 @@ let error_if_no_ssh_agent () =
with Not_found ->
error (f_"ssh-agent authentication has not been set up ($SSH_AUTH_SOCK is not
set). This is required by qemu to do passwordless ssh access. See the virt-v2v(1) man
page for more information.")
-let ws = PCRE.compile "\\s+"
-let re_tar_message = PCRE.compile "\\*\\* [^*]+ \\*\\*$"
-
-let find_file_in_tar tar filename =
- let lines = external_command (sprintf "tar tRvf %s" (Filename.quote tar)) in
- let rec loop lines =
- match lines with
- | [] -> raise Not_found
- | line :: lines -> (
- (* Lines have the form:
- * block <offset>: <perms> <owner>/<group> <size>
<mdate> <mtime> <file>
- * or:
- * block <offset>: ** Block of NULs **
- * block <offset>: ** End of File **
- *)
- if PCRE.matches re_tar_message line then
- loop lines (* ignore "** Block of NULs **" etc. *)
- else (
- let elems = PCRE.nsplit ~max:8 ws line in
- if List.length elems = 8 && List.hd elems = "block" then (
- let elems = Array.of_list elems in
- let offset = elems.(1) in
- let size = elems.(4) in
- let fname = elems.(7) in
-
- if fname <> filename then
- loop lines
- else (
- let offset =
- try
- (* There should be a colon at the end *)
- let i = String.rindex offset ':' in
- if i == (String.length offset)-1 then
- Int64.of_string (String.sub offset 0 i)
- else
- failwith "colon at wrong position"
- with Failure _ | Not_found ->
- failwithf (f_"invalid offset returned by tar: %S") offset in
-
- let size =
- try Int64.of_string size
- with Failure _ ->
- failwithf (f_"invalid size returned by tar: %S") size in
-
- (* Note: Offset is actualy block number and there is a single
- * block with tar header at the beginning of the file. So skip
- * the header and convert the block number to bytes before
- * returning.
- *)
- (offset +^ 1L) *^ 512L, size
- )
- )
- else
- failwithf (f_"failed to parse line returned by tar: %S") line
- )
- )
- in
- loop lines
-
(* Wait for a file to appear until a timeout. *)
let rec wait_for_file filename timeout =
if Sys.file_exists filename then true
diff --git a/v2v/utils.mli b/v2v/utils.mli
index 4a444aaa0..fd91387a7 100644
--- a/v2v/utils.mli
+++ b/v2v/utils.mli
@@ -55,13 +55,6 @@ val backend_is_libvirt : unit -> bool
val error_if_no_ssh_agent : unit -> unit
-val find_file_in_tar : string -> string -> int64 * int64
-(** [find_file_in_tar tar filename] looks up file in [tar] archive and returns
- a tuple containing at which byte it starts and how long the file is.
-
- Function raises [Not_found] if there is no such file inside [tar] and
- [Failure] if there is any error parsing the tar output. *)
-
val wait_for_file : string -> int -> bool
(** [wait_for_file filename timeout] waits up to [timeout] seconds for
[filename] to appear. It returns [true] if the file appeared. *)
--
2.16.2