On Friday, 4 November 2016 14:52:52 CET Tomáš Golembiovský wrote:
We don't have to always extract all files from the OVA archive.
The OVA,
as defined in the standard, is plain tar. We can work directly over the
tar archive if we use correct 'offset' and 'size' options when defining
the backing file for QEMU.
This leads to improvements in speed and puts much lower requirement on
available disk space.
Signed-off-by: Tomáš Golembiovský <tgolembi(a)redhat.com>
---
Just few notes, still need to review the actual bulk of the work done
by this patch.
v2v/input_ova.ml | 113
+++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 98 insertions(+), 15 deletions(-)
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index f76fe82..60dbaf1 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -39,17 +39,20 @@ object
method source () =
- let untar ?(format = "") file outdir =
- let cmd = [ "tar"; sprintf "-x%sf" format; file;
"-C"; outdir ] in
+ (* Untar part or all files from tar archive. If [path] is specified it is
+ * a path in the tar archive.
+ *)
+ let untar ?(format = "") ?(path="") file outdir =
The default value for 'path' can be omitted here, checking whether
~path was actually specified in the 'untar' call:
+ let cmd = [ "tar"; sprintf "-x%sf" format;
file; "-C"; outdir; path ] in
let cmd =
[ "tar"; sprintf "-x%sf" format; file; "-C"; outdir; ]
@ (match path with None -> [] | Some p -> [p]) in
In this case, having an empty default value for 'path' means an empty
parameter will be passed as argument for 'tar'.
if run_command cmd <> 0 then
error (f_"error unpacking %s, see earlier error messages") ova in
(* Extract ova file. *)
- let exploded =
+ let exploded, partial =
(* The spec allows a directory to be specified as an ova. This
* is also pretty convenient.
*)
- if is_directory ova then ova
+ if is_directory ova then ova, false
else (
let uncompress_head zcat file =
let cmd = sprintf "%s %s" zcat (quote file) in
@@ -67,11 +70,19 @@ object
tmpfile in
+ (* Untar only ovf and manifest from the archive *)
+ let untar_partial file outdir =
+ let cmd1 = [ "tar"; "-tf" ; file ] in
+ let cmd2 = [ "grep"; "\\.\\(ovf\\|mf\\)$" ] in
+ let cmd3 = [ "xargs"; "tar"; "-xf" ; file;
"-C"; outdir ] in
+ if shell_command ((stringify_args cmd1) ^ " | " ^ (stringify_args
cmd2) ^ " | " ^ (stringify_args cmd3)) <> 0 then
Hmm... maybe it's just taste, but I find this command concatenation
confusing, and prone to break. I'd use:
1) external_command to get the list of files in the archive
2) filter the filenames (see Filename.check_suffix) to pick only the
needed ones
3) maybe using the helper 'untar' function above to extract the files
+ error (f_"error unpacking %s, see earlier error
messages") ova in
+
match detect_file_type ova with
| `Tar ->
(* Normal ovas are tar file (not compressed). *)
- untar ova tmpdir;
- tmpdir
+ untar_partial ova tmpdir;
+ tmpdir, true
| `Zip ->
(* However, although not permitted by the spec, people ship
* zip files as ova too.
@@ -81,7 +92,7 @@ object
[ "-j"; "-d"; tmpdir; ova ] in
if run_command cmd <> 0 then
error (f_"error unpacking %s, see earlier error messages") ova;
- tmpdir
+ tmpdir, false
| (`GZip|`XZ) as format ->
let zcat, tar_fmt =
match format with
@@ -94,7 +105,7 @@ object
(match tmpfiletype with
| `Tar ->
untar ~format:tar_fmt ova tmpdir;
- tmpdir
+ tmpdir, false
| `Zip | `GZip | `XZ | `Unknown ->
error (f_"%s: unsupported file format\n\nFormats which we currently
understand for '-i ova' are: tar (uncompressed, compress with gzip or xz),
zip") ova
)
@@ -135,6 +146,49 @@ object
loop [dir]
in
+ (* Find file in [tar] archive and return at which byte it starts and how
+ * long it is.
+ *)
+ let find_file_in_tar tar filename =
+ let cmd1 = [ "tar"; "tRvf"; tar ] in
+ let cmd2 = [ "awk"; sprintf
+ "$8 == \"%s\" {print substr($2, 1, index($2, \":\")-1),
$5}"
+ filename ]
+ in
As above, I'd use external_command + manual filtering code in OCaml.
YMMV though.
+ let lines =
+ external_command ((stringify_args cmd1) ^ " | " ^ (stringify_args
cmd2))
+ in
+ if (List.length lines < 1) then
No need for parenthesis, and it can be even simplified as:
if lines = [] then
+ raise Not_found
+ else
+ let soffset, ssize = String.split " " (List.hd lines) in
+ let offset =
+ try Int64.of_string soffset
+ with Failure _ ->
+ error (f_"Invalid offset returned by `tar`: %s") soffset
+ in
+ let size =
+ try Int64.of_string ssize
+ with Failure _ ->
+ error (f_"Invalid size returend by `tar': %s") ssize
Typo in the error message, and different wrt the above one (`tar` vs
`tar') .
+ in
+ (* Note: Offset is actualy block number and there is a single block
+ * with tar header at the beginning of the file. So skip the header and
+ * convert the block number to bytes before returning.
+ *)
+ (offset +^ 1L) *^ 512L, size
+ in
+
+ let subfolder folder parent =
+ if String.is_prefix folder (parent // "") then
+ let len = String.length parent in
+ String.sub folder (len+1) (String.length folder-len-1)
+ else if folder = parent then
+ ""
+ else
+ assert(false)
No need for parenthesis.
--
Pino Toscano