On 11/22/22 16:47, Richard W.M. Jones wrote:
 This tool can be used to estimate the disk space needed before doing
a
 virt-v2v conversion.
 
 It is a replacement for the old --print-estimate option which was
 dropped in virt-v2v 2.0 (commit 5828c9c7d5 "v2v: Remove
 --print-estimate option").
 ---
  docs/Makefile.am            |  15 ++
  docs/test-v2v-docs.sh       |   9 +
  docs/virt-v2v-inspector.pod | 252 +++++++++++++++++++
  docs/virt-v2v.pod           |   4 +
  configure.ac                |   1 +
  Makefile.am                 |   3 +-
  inspector/Makefile.am       | 129 ++++++++++
  tests/Makefile.am           |   2 +
  inspector/inspector.mli     |  19 ++
  inspector/inspector.ml      | 472 ++++++++++++++++++++++++++++++++++++
  inspector/dummy.c           |   2 +
  tests/test-v2v-inspector.sh |  76 ++++++
  .gitignore                  |   3 +
  run.in                      |   3 +-
  14 files changed, 988 insertions(+), 2 deletions(-) 
I've skimmed this. It seems very useful and mostly okay, where "mostly
okay" qualifies *my level of understanding* of the code, and not the
code itself. There is a bit of code duplication with other parts of the
tree, but I think that's entirely OK: this is a new (experimental?)
tool, for further feature enablement, so intrusive refactorings are not
called for at this point, IMO. I suggest that we merge this ASAP and let
people start testing it.
One suggestion: migrate the nice info from the cover letter into this
actual commit message.
Acked-by: Laszlo Ersek <lersek(a)redhat.com>
Laszlo
 
 diff --git a/docs/Makefile.am b/docs/Makefile.am
 index 3668fd4f0c..012c672294 100644
 --- a/docs/Makefile.am
 +++ b/docs/Makefile.am
 @@ -24,6 +24,7 @@ EXTRA_DIST = \
  	virt-v2v-in-place.pod \
  	virt-v2v-input-vmware.pod \
  	virt-v2v-input-xen.pod \
 +	virt-v2v-inspector.pod \
  	virt-v2v-output-local.pod \
  	virt-v2v-output-openstack.pod \
  	virt-v2v-output-rhv.pod \
 @@ -40,6 +41,7 @@ man_MANS = \
  	virt-v2v-in-place.1 \
  	virt-v2v-input-vmware.1 \
  	virt-v2v-input-xen.1 \
 +	virt-v2v-inspector.1 \
  	virt-v2v-output-local.1 \
  	virt-v2v-output-openstack.1 \
  	virt-v2v-output-rhv.1 \
 @@ -53,6 +55,7 @@ noinst_DATA = \
  	$(top_builddir)/website/virt-v2v-in-place.1.html \
  	$(top_builddir)/website/virt-v2v-input-vmware.1.html \
  	$(top_builddir)/website/virt-v2v-input-xen.1.html \
 +	$(top_builddir)/website/virt-v2v-inspector.1.html \
  	$(top_builddir)/website/virt-v2v-output-local.1.html \
  	$(top_builddir)/website/virt-v2v-output-openstack.1.html \
  	$(top_builddir)/website/virt-v2v-output-rhv.1.html \
 @@ -117,6 +120,18 @@ stamp-virt-v2v-input-xen.pod: virt-v2v-input-xen.pod
  	  $<
  	touch $@
  
 +virt-v2v-inspector.1 $(top_builddir)/website/virt-v2v-inspector.1.html:
stamp-virt-v2v-inspector.pod
 +
 +stamp-virt-v2v-inspector.pod: virt-v2v-inspector.pod
 +	$(PODWRAPPER) \
 +	  --man virt-v2v-inspector.1 \
 +	  --html $(top_builddir)/website/virt-v2v-inspector.1.html \
 +	  --path $(top_srcdir)/common/options \
 +	  --license GPLv2+ \
 +	  --warning safe \
 +	  $<
 +	touch $@
 +
  virt-v2v-output-local.1 $(top_builddir)/website/virt-v2v-output-local.1.html:
stamp-virt-v2v-output-local.pod
  
  stamp-virt-v2v-output-local.pod: virt-v2v-output-local.pod
 diff --git a/docs/test-v2v-docs.sh b/docs/test-v2v-docs.sh
 index 92ae39ee57..c0de5a20ce 100755
 --- a/docs/test-v2v-docs.sh
 +++ b/docs/test-v2v-docs.sh
 @@ -75,3 +75,12 @@ $srcdir/../podcheck.pl virt-v2v-in-place.pod virt-v2v-in-place \
  --oo,\
  --op,\
  --os
 +
 +$srcdir/../podcheck.pl virt-v2v-inspector.pod virt-v2v-inspector \
 +  --path $srcdir/../common/options \
 +  --ignore=\
 +--ic,\
 +--if,\
 +--io,\
 +--ip,\
 +--it
 diff --git a/docs/virt-v2v-inspector.pod b/docs/virt-v2v-inspector.pod
 new file mode 100644
 index 0000000000..d2f0b66e4f
 --- /dev/null
 +++ b/docs/virt-v2v-inspector.pod
 @@ -0,0 +1,252 @@
 +=head1 NAME
 +
 +virt-v2v-inspector - Estimate disk space needed before virt-v2v conversion
 +
 +=head1 SYNOPSIS
 +
 + virt-v2v-inspector [-i* options] guest
 +
 +=head1 DESCRIPTION
 +
 +Virt-v2v-inspector is a companion tool for L<virt-v2v(1)> which can be
 +used before conversion to estimate the number of output disks and disk
 +space that will be required to complete the virt-v2v conversion.  The
 +common use for this is to preallocate target disks on management
 +systems that need this (like Kubevirt).
 +
 +This manual page only documents the estimation feature, not all of the
 +I<-i*> options which are the same as virt-v2v.  You should read
 +L<virt-v2v(1)> first.
 +
 +=head2 Selecting the input guest
 +
 +You can run virt-v2v-inspector with the same I<-i*> options as
 +virt-v2v.  (Don't use any I<-o*> options).  This will select the guest
 +that you want to estimate.
 +
 +For example to estimate the space required for a guest in a stored
 +local disk called F<filename.img> you could do:
 +
 + virt-v2v-inspector -i disk filename.img
 +
 +=head2 Output
 +
 +The output from this tool is an XML document (written to stdout).
 +
 +=over 4
 +
 +=item *
 +
 +Fields which are annotated with an C<estimated='true'> attribute are
 +estimated.  Virt-v2v cannot always know exactly the final size of some
 +things, such as the exact real size of the output disk, since there
 +might be small perturbations between runs.  Estimates are usually very
 +close to the final values.
 +
 +=item *
 +
 +Elements (including sub-trees) which are annotated with an
 +C<informational='true'> attribute are for information only.  These
 +elements might be changed or removed in future versions.  If you would
 +like to rely on this data in your program please contact the
 +developers.
 +
 +=item *
 +
 +Numbers representing sizes are always given in bytes.
 +
 +=back
 +
 + <?xml version='1.0' encoding='utf-8'?>
 + <v2v-inspection>
 +   <program>virt-v2v-inspector</program>
 +   <package>virt-v2v</package>
 +   <version>2.1.9</version>
 +
 +The E<lt>programE<gt>, E<lt>packageE<gt> and
E<lt>versionE<gt>
 +elements refer to the current version of virt-v2v-inspector and are
 +useful for debugging.  Make sure you use the same version of
 +virt-v2v-inspector and virt-v2v.
 +
 +   <disks>
 +     <disk index='0'>
 +       <virtual-size>6442450944</virtual-size>
 +       <allocated estimated='true'>1400897536</allocated>
 +     </disk>
 +     <disk index='1'>
 +       <virtual-size>6442450944</virtual-size>
 +       <allocated estimated='true'>45131520</allocated>
 +     </disk>
 +   </disks>
 +
 +The E<lt>disksE<gt> element lists information about each guest disk.
 +The example virtual machine above has two disks.
 +E<lt>virtual-sizeE<gt> describes the size of the disk as seen from
 +inside the guest, while E<lt>allocatedE<gt> is an estimate of how much
 +storage will be needed on the host after conversion.  This is assuming
 +you use S<I<-oa sparse>> - see the notes below.
 +
 +   <operatingsystem>
 +     <name>linux</name>
 +     <distro>fedora</distro>
 +     <osinfo>fedora32</osinfo>
 +     <arch>x86_64</arch>
 +     [...]
 +   </operatingsystem>
 +
 +The E<lt>operatingsystemE<gt> element lists information about the
 +guest operating system gleaned during conversion, in a manner similar
 +to the L<virt-inspector(1)> tool from guestfs-tools.
 +
 +=head2 Output allocation mode and output format
 +
 +Virt-v2v supports selecting the output allocation mode (I<-oa> option)
 +and output format (I<-of> option, eg. S<I<-of qcow2>>).  Since it is
 +difficult to predict the effect of these options on the actual space
 +occupied by the final image this tool does not account for them.
 +
 +As a rule of thumb:
 +
 +=over 4
 +
 +=item S<virt-v2v -oa preallocated>
 +
 +causes the disk images on the target to consume their full virtual
 +size (excluding the effect of zero allocations will depends so much on
 +the underlying storage that it is often hard even for experts to
 +predict).
 +
 +=item S<virt-v2v -of qcow2>
 +
 +uses the QCOW2 format where supported which means that the apparent
 +size of the file will be equal to its sparse size, but otherwise
 +should not affect estimates very much.
 +
 +=back
 +
 +=head1 OPTIONS
 +
 +=over 4
 +
 +=item B<--help>
 +
 +Display help.
 +
 +=item B<-v>
 +
 +=item B<--verbose>
 +
 +Enable verbose messages for debugging.
 +
 +=item B<-V>
 +
 +=item B<--version>
 +
 +Display version number and exit.
 +
 +=item B<-x>
 +
 +Enable tracing of libguestfs API calls.
 +
 +=item B<-i> ...
 +
 +=item B<-ic> ...
 +
 +=item B<-if> ...
 +
 +=item B<-io> ...
 +
 +=item B<-ip> ...
 +
 +=item B<-it> ...
 +
 +All of the I<-i*> options supported by virt-v2v and also supported by
 +virt-v2v-inspector.
 +
 +=item B<-b> ...
 +
 +=item B<--bridge> ...
 +
 +=item B<--colors>
 +
 +=item B<--colours>
 +
 +=item B<--echo-keys>
 +
 +=item B<--key> ...
 +
 +=item B<--keys-from-stdin>
 +
 +=item B<--mac> ...
 +
 +=item B<--machine-readable>
 +
 +=item B<--machine-readable>=format
 +
 +=item B<-n> ...
 +
 +=item B<--network> ...
 +
 +=item B<-q>
 +
 +=item B<--quiet>
 +
 +=item B<--root> ...
 +
 +=item B<--wrap>
 +
 +These options work in the same way as the equivalent virt-v2v options.
 +
 +=back
 +
 +=head1 FILES
 +
 +Files used are the same as for virt-v2v.  See L<virt-v2v(1)/FILES>.
 +
 +=head1 ENVIRONMENT VARIABLES
 +
 +Environment variables used are the same as for virt-v2v.  See
 +L<virt-v2v(1)/ENVIRONMENT VARIABLES>.
 +
 +=head1 SEE ALSO
 +
 +L<virt-v2v(1)>,
 +L<virt-p2v(1)>,
 +L<virt-inspector(1)>,
 +L<guestfs(3)>,
 +L<guestfish(1)>,
 +L<qemu-img(1)>,
 +L<nbdkit(1)>,
 +L<http://libguestfs.org/>.
 +
 +=head1 AUTHORS
 +
 +Matthew Booth
 +
 +Cédric Bosdonnat
 +
 +Laszlo Ersek
 +
 +Tomáš Golembiovský
 +
 +Shahar Havivi
 +
 +Richard W.M. Jones
 +
 +Roman Kagan
 +
 +Mike Latimer
 +
 +Nir Soffer
 +
 +Pino Toscano
 +
 +Xiaodai Wang
 +
 +Ming Xie
 +
 +Tingting Zheng
 +
 +=head1 COPYRIGHT
 +
 +Copyright (C) 2009-2022 Red Hat Inc.
 diff --git a/docs/virt-v2v.pod b/docs/virt-v2v.pod
 index 4901c8407f..4f3d977a15 100644
 --- a/docs/virt-v2v.pod
 +++ b/docs/virt-v2v.pod
 @@ -21,6 +21,9 @@ There is also a companion front-end called L<virt-p2v(1)> which
comes
  as an ISO, CD or PXE image that can be booted on physical machines to
  virtualize those machines (physical to virtual, or p2v).
  
 +To estimate the disk space needed before conversion, see
 +L<virt-v2v-inspector(1)>.
 +
  For in-place conversion, there is a separate tool called
  L<virt-v2v-in-place(1)>.
  
 @@ -1624,6 +1627,7 @@
L<https://rwmj.wordpress.com/2015/09/18/importing-kvm-guests-to-ovirt-...
  =head1 SEE ALSO
  
  L<virt-p2v(1)>,
 +L<virt-v2v-inspector(1)>,
  L<virt-v2v-in-place(1)>,
  L<virt-customize(1)>,
  L<virt-df(1)>,
 diff --git a/configure.ac b/configure.ac
 index b2396781d6..f8e2836551 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -146,6 +146,7 @@ AC_CONFIG_FILES([Makefile
                   gnulib/lib/Makefile
                   in-place/Makefile
                   input/Makefile
 +                 inspector/Makefile
                   lib/Makefile
                   lib/config.ml
                   output/Makefile
 diff --git a/Makefile.am b/Makefile.am
 index cec68d76ce..16cd5f36d9 100644
 --- a/Makefile.am
 +++ b/Makefile.am
 @@ -42,6 +42,7 @@ SUBDIRS += input
  SUBDIRS += output
  SUBDIRS += convert
  SUBDIRS += v2v
 +SUBDIRS += inspector
  SUBDIRS += in-place
  
  SUBDIRS += tests
 @@ -111,7 +112,7 @@ po/POTFILES: configure.ac
  po/POTFILES-ml: configure.ac
  	rm -f $@ $@-t
  	cd $(srcdir); \
 -	find common/ml* lib in-place input output v2v -name '*.ml' | \
 +	find common/ml* lib in-place input inspector output v2v -name '*.ml' | \
  	grep -v '^common/mlprogress/' | \
  	grep -v '^common/mlvisit/' | \
  	grep -v '^lib/config.ml$$' | \
 diff --git a/inspector/Makefile.am b/inspector/Makefile.am
 new file mode 100644
 index 0000000000..30e6a297fa
 --- /dev/null
 +++ b/inspector/Makefile.am
 @@ -0,0 +1,129 @@
 +# libguestfs virt-v2v-inspector tool
 +# Copyright (C) 2009-2022 Red Hat Inc.
 +#
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 2 of the License, or
 +# (at your option) any later version.
 +#
 +# This program is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 +
 +include $(top_srcdir)/subdir-rules.mk
 +
 +EXTRA_DIST = \
 +	$(SOURCES_MLI) \
 +	$(SOURCES_ML) \
 +	$(SOURCES_C)
 +
 +SOURCES_MLI = \
 +	inspector.mli
 +
 +SOURCES_ML = \
 +	inspector.ml
 +
 +SOURCES_C = \
 +	dummy.c
 +
 +bin_PROGRAMS = virt-v2v-inspector
 +
 +virt_v2v_inspector_SOURCES = $(SOURCES_C)
 +virt_v2v_inspector_CPPFLAGS = \
 +	-DCAML_NAME_SPACE \
 +	-I. \
 +	-I$(top_builddir) \
 +	-I$(shell $(OCAMLC) -where) \
 +	-I$(top_srcdir)/lib
 +virt_v2v_inspector_CFLAGS = \
 +	-pthread \
 +	$(WARN_CFLAGS) $(WERROR_CFLAGS) \
 +	$(LIBGUESTFS_CFLAGS) \
 +	$(LIBVIRT_CFLAGS) \
 +	$(LIBOSINFO_CFLAGS)
 +
 +BOBJECTS = $(SOURCES_ML:.ml=.cmo)
 +XOBJECTS = $(BOBJECTS:.cmo=.cmx)
 +
 +OCAMLPACKAGES = \
 +	-package str,unix,guestfs,libvirt,nbd \
 +	-I $(top_builddir)/common/utils/.libs \
 +	-I $(top_builddir)/common/qemuopts/.libs \
 +	-I $(top_builddir)/gnulib/lib/.libs \
 +	-I $(top_builddir)/lib \
 +	-I $(top_builddir)/input \
 +	-I $(top_builddir)/convert \
 +	-I $(top_builddir)/common/mlstdutils \
 +	-I $(top_builddir)/common/mlutils \
 +	-I $(top_builddir)/common/mlgettext \
 +	-I $(top_builddir)/common/mlpcre \
 +	-I $(top_builddir)/common/mlxml \
 +	-I $(top_builddir)/common/mltools \
 +	-I $(top_builddir)/common/mlcustomize \
 +	-I $(top_builddir)/common/mlv2v
 +if HAVE_OCAML_PKG_GETTEXT
 +OCAMLPACKAGES += -package gettext-stub
 +endif
 +
 +OCAMLCLIBS = \
 +	-pthread \
 +	-lqemuopts \
 +	$(LIBGUESTFS_LIBS) \
 +	$(LIBVIRT_LIBS) \
 +	$(LIBXML2_LIBS) \
 +	$(JANSSON_LIBS) \
 +	$(LIBOSINFO_LIBS) \
 +	$(LIBINTL) \
 +	$(LIBNBD_LIBS) \
 +	-lgnu
 +
 +OCAMLFLAGS = $(OCAML_FLAGS) $(OCAML_WARN_ERROR) -ccopt '$(CFLAGS)'
 +
 +if !HAVE_OCAMLOPT
 +OBJECTS = $(BOBJECTS)
 +else
 +OBJECTS = $(XOBJECTS)
 +endif
 +
 +OCAMLLINKFLAGS = \
 +	mlstdutils.$(MLARCHIVE) \
 +	mlgettext.$(MLARCHIVE) \
 +	mlpcre.$(MLARCHIVE) \
 +	mlxml.$(MLARCHIVE) \
 +	mlcutils.$(MLARCHIVE) \
 +	mltools.$(MLARCHIVE) \
 +	mllibvirt.$(MLARCHIVE) \
 +	mlcustomize.$(MLARCHIVE) \
 +	mlv2v.$(MLARCHIVE) \
 +	mlv2vlib.$(MLARCHIVE) \
 +	mlconvert.$(MLARCHIVE) \
 +	mlinput.$(MLARCHIVE) \
 +	$(LINK_CUSTOM_OCAMLC_ONLY)
 +
 +virt_v2v_inspector_DEPENDENCIES = \
 +	$(OBJECTS) \
 +	$(top_builddir)/input/mlinput.$(MLARCHIVE) \
 +	$(top_builddir)/convert/mlconvert.$(MLARCHIVE) \
 +	$(top_builddir)/lib/mlv2vlib.$(MLARCHIVE) \
 +	$(top_srcdir)/ocaml-link.sh
 +virt_v2v_inspector_LINK = \
 +	$(top_srcdir)/ocaml-link.sh -cclib '$(OCAMLCLIBS)' -- \
 +	  $(OCAMLFIND) $(BEST) $(OCAMLFLAGS) $(OCAMLPACKAGES) $(OCAMLLINKFLAGS) \
 +	  $(OBJECTS) -o $@
 +
 +# Data directory.
 +
 +virttoolsdatadir = $(datadir)/virt-tools
 +
 +# Dependencies.
 +.depend: \
 +	$(srcdir)/*.mli \
 +	$(srcdir)/*.ml \
 +	$(filter %.ml,$(BUILT_SOURCES))
 +	$(top_builddir)/ocaml-dep.sh $^
 +-include .depend
 diff --git a/tests/Makefile.am b/tests/Makefile.am
 index fb068624c7..de3f1fe9e2 100644
 --- a/tests/Makefile.am
 +++ b/tests/Makefile.am
 @@ -78,6 +78,7 @@ TESTS = \
  	test-v2v-i-disk.sh \
  	test-v2v-i-ova.sh \
  	test-v2v-in-place.sh \
 +	test-v2v-inspector.sh \
  	test-v2v-mac.sh \
  	test-v2v-machine-readable.sh \
  	test-v2v-networks-and-bridges.sh \
 @@ -235,6 +236,7 @@ EXTRA_DIST += \
  	test-v2v-i-vmx-6.vmx \
  	test-v2v-i-vmx-7.vmx \
  	test-v2v-in-place.sh \
 +	test-v2v-inspector.sh \
  	test-v2v-it-vddk-io-query.sh \
  	test-v2v-machine-readable.sh \
  	test-v2v-mac-expected.xml \
 diff --git a/inspector/inspector.mli b/inspector/inspector.mli
 new file mode 100644
 index 0000000000..af7cc31cb3
 --- /dev/null
 +++ b/inspector/inspector.mli
 @@ -0,0 +1,19 @@
 +(* virt-v2v-in-place
 + * Copyright (C) 2009-2022 Red Hat Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + *)
 +
 +(* Nothing is exported. *)
 diff --git a/inspector/inspector.ml b/inspector/inspector.ml
 new file mode 100644
 index 0000000000..0ded5d62ae
 --- /dev/null
 +++ b/inspector/inspector.ml
 @@ -0,0 +1,472 @@
 +(* virt-v2v-inspector
 + * Copyright (C) 2009-2022 Red Hat Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with this program; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + *)
 +
 +open Printf
 +open Unix
 +
 +open Std_utils
 +open Tools_utils
 +open Unix_utils
 +open Common_gettext.Gettext
 +open Getopt.OptionName
 +
 +open Types
 +open Utils
 +open DOM
 +
 +(* Matches --mac command line parameters. *)
 +let mac_re = PCRE.compile ~anchored:true
"([[:xdigit:]]{2}:[[:xdigit:]]{2}:[[:xdigit:]]{2}:[[:xdigit:]]{2}:[[:xdigit:]]{2}:[[:xdigit:]]{2}):(network|bridge|ip):(.*)"
 +let mac_ip_re = PCRE.compile ~anchored:true "([[:xdigit:]]|:|\\.)+"
 +
 +let rec main () =
 +  let set_string_option_once optname optref arg =
 +    match !optref with
 +    | Some _ ->
 +       error (f_"%s option used more than once on the command line") optname
 +    | None ->
 +       optref := Some arg
 +  in
 +
 +  let bandwidth = ref None in
 +  let bandwidth_file = ref None in
 +  let input_conn = ref None in
 +  let input_format = ref None in
 +  let input_password = ref None in
 +  let input_transport = ref None in
 +
 +  let input_options = ref [] in
 +  let io_query = ref false in
 +  let set_input_option_compat k v =
 +    List.push_back input_options (k, v)
 +  in
 +  let set_input_option option =
 +    if option = "?" then io_query := true
 +    else (
 +      let k, v = String.split "=" option in
 +      set_input_option_compat k v
 +    )
 +  in
 +
 +  let network_map = Networks.create () in
 +  let static_ips = ref [] in
 +  let rec add_network str =
 +    match String.split ":" str with
 +    | "", "" ->
 +       error (f_"invalid -n/--network parameter")
 +    | out, "" | "", out ->
 +       Networks.add_default_network network_map out
 +    | in_, out ->
 +       Networks.add_network network_map in_ out
 +  and add_bridge str =
 +    match String.split ":" str with
 +    | "", "" ->
 +       error (f_"invalid -b/--bridge parameter")
 +    | out, "" | "", out ->
 +       Networks.add_default_bridge network_map out
 +    | in_, out ->
 +       Networks.add_bridge network_map in_ out
 +  and add_mac str =
 +    if not (PCRE.matches mac_re str) then
 +      error (f_"cannot parse --mac \"%s\" parameter") str;
 +    let mac = PCRE.sub 1 and out = PCRE.sub 3 in
 +    match PCRE.sub 2 with
 +    | "network" ->
 +       Networks.add_mac network_map mac Network out
 +    | "bridge" ->
 +       Networks.add_mac network_map mac Bridge out
 +    | "ip" ->
 +       (match String.nsplit "," out with
 +        | [] -> error (f_"invalid --mac ip option")
 +        | [ip] -> add_static_ip mac ip None None []
 +        | [ip; gw] -> add_static_ip mac ip (Some gw) None []
 +        | ip :: gw :: len :: nameservers ->
 +           add_static_ip mac ip (Some gw) (Some len) nameservers
 +       )
 +    | _ -> assert false
 +  and add_static_ip if_mac_addr if_ip_address if_default_gateway
 +                    if_prefix_length_str if_nameservers =
 +    (* Check the IP addresses and prefix length are sensible.  This
 +     * is only a very simple test that they are sane, since IP addresses
 +     * come in too many valid forms to check thoroughly.
 +     *)
 +    let rec error_unless_ip_addr what addr =
 +      if not (PCRE.matches mac_ip_re addr) then
 +        error (f_"cannot parse --mac ip %s: doesn’t look like “%s” is an IP
address") what addr
 +    in
 +    error_unless_ip_addr "ipaddr" if_ip_address;
 +    Option.may (error_unless_ip_addr "gw") if_default_gateway;
 +    List.iter (error_unless_ip_addr "nameserver") if_nameservers;
 +    let if_prefix_length =
 +      match if_prefix_length_str with
 +      | None -> None
 +      | Some len ->
 +         let len =
 +           try int_of_string len with
 +           | Failure _ -> error (f_"cannot parse --mac ip prefix length field as
an integer: %s") len in
 +         if len < 0 || len > 128 then
 +           error (f_"--mac ip prefix length field is out of range");
 +         Some len in
 +    List.push_back static_ips
 +      { if_mac_addr; if_ip_address; if_default_gateway;
 +        if_prefix_length; if_nameservers }
 +  in
 +
 +  let root_choice = ref AskRoot in
 +  let set_root_choice = function
 +    | "ask" -> root_choice := AskRoot
 +    | "single" -> root_choice := SingleRoot
 +    | "first" -> root_choice := FirstRoot
 +    | dev when String.is_prefix dev "/dev/" -> root_choice := RootDev dev
 +    | s ->
 +      error (f_"unknown --root option: %s") s
 +  in
 +
 +  let input_mode = ref `Not_set in
 +  let set_input_mode mode =
 +    if !input_mode <> `Not_set then
 +      error (f_"%s option used more than once on the command line")
"-i";
 +    match mode with
 +    | "disk" | "local" -> input_mode := `Disk
 +    | "libvirt" -> input_mode := `Libvirt
 +    | "libvirtxml" -> input_mode := `LibvirtXML
 +    | "ova" -> input_mode := `OVA
 +    | "vmx" -> input_mode := `VMX
 +    | s ->
 +       error (f_"unknown -i option: %s") s
 +  in
 +
 +  let argspec = [
 +    [ S 'b'; L"bridge" ], Getopt.String ("in:out",
add_bridge),
 +                                    s_"Map bridge ‘in’ to ‘out’";
 +    [ S 'i' ],       Getopt.String ("disk|libvirt|libvirtxml|ova|vmx",
set_input_mode),
 +                                    s_"Set input mode (default: libvirt)";
 +    [ M"ic" ],       Getopt.String ("uri", set_string_option_once
"-ic" input_conn),
 +                                    s_"Libvirt URI";
 +    [ M"if" ],       Getopt.String ("format", set_string_option_once
"-if" input_format),
 +                                    s_"Input format";
 +    [ M"io" ],       Getopt.String ("option[=value]",
set_input_option),
 +                                    s_"Set option for input mode";
 +    [ M"ip" ],       Getopt.String ("filename",
set_string_option_once "-ip" input_password),
 +                                    s_"Use password from file to connect to input
hypervisor";
 +    [ M"it" ],       Getopt.String ("transport",
set_string_option_once "-it" input_transport),
 +                                    s_"Input transport";
 +    [ L"mac" ],      Getopt.String ("mac:network|bridge|ip:out",
add_mac),
 +                                    s_"Map NIC to network or bridge or assign
static IP";
 +    [ S 'n'; L"network" ], Getopt.String ("in:out",
add_network),
 +                                    s_"Map network ‘in’ to ‘out’";
 +    [ L"root" ],     Getopt.String ("ask|... ", set_root_choice),
 +                                    s_"How to choose root filesystem";
 +  ] in
 +  let args = ref [] in
 +  let anon_fun s = List.push_front s args in
 +  let usage_msg =
 +    sprintf (f_"\
 +%s: estimate disk space needed before virt-v2v conversion
 +
 +virt-v2v-inspector -i disk disk.img
 +
 +A short summary of the options is given below.  For detailed help please
 +read the man page virt-v2v-inspector(1).
 +")
 +      prog in
 +  let opthandle = create_standard_options argspec ~anon_fun ~key_opts:true
~machine_readable:true usage_msg in
 +  Getopt.parse opthandle.getopt;
 +
 +  (* Print the version, easier than asking users to tell us. *)
 +  debug "%s: %s %s (%s)"
 +        prog Config.package_name Config.package_version_full
 +        Config.host_cpu;
 +
 +  (* Print the libvirt version if debugging. *)
 +  if verbose () then (
 +    let major, minor, release = Libvirt_utils.libvirt_get_version () in
 +    debug "libvirt version: %d.%d.%d" major minor release
 +  );
 +
 +  (* Create the v2v directory to control conversion. *)
 +  let v2vdir = create_v2v_directory () in
 +
 +  (* Dereference the arguments. *)
 +  let args = List.rev !args in
 +  let input_conn = !input_conn in
 +  let input_mode = !input_mode in
 +  let input_transport =
 +    match !input_transport with
 +    | None -> None
 +    | Some "ssh" -> Some `SSH
 +    | Some "vddk" -> Some `VDDK
 +    | Some transport ->
 +       error (f_"unknown input transport ‘-it %s’") transport in
 +  let root_choice = !root_choice in
 +  let static_ips = !static_ips in
 +
 +  (* No arguments and machine-readable mode?  Print out some facts
 +   * about what this binary supports.
 +   *)
 +  (match args, machine_readable () with
 +   | [], Some { pr } ->
 +      pr "virt-v2v-inspector\n";
 +      pr "libguestfs-rewrite\n";
 +      pr "colours-option\n";
 +      pr "io\n";
 +      pr "mac-option\n";
 +      pr "mac-ip-option\n";
 +      pr "input:disk\n";
 +      pr "input:libvirt\n";
 +      pr "input:libvirtxml\n";
 +      pr "input:ova\n";
 +      pr "input:vmx\n";
 +      pr "convert:linux\n";
 +      pr "convert:windows\n";
 +      List.iter (pr "ovf:%s\n") Create_ovf.ovf_flavours;
 +      exit 0
 +   | _, _ -> ()
 +  );
 +
 +  (* Get the input module. *)
 +  let (module Input_module) =
 +    match input_mode with
 +    | `Disk -> (module Input_disk.Disk : Input.INPUT)
 +    | `LibvirtXML -> (module Input_libvirt.LibvirtXML)
 +    | `OVA -> (module Input_ova.OVA)
 +    | `VMX -> (module Input_vmx.VMX)
 +    | `Not_set | `Libvirt ->
 +       match input_conn with
 +       | None -> (module Input_libvirt.Libvirt_)
 +       | Some orig_uri ->
 +          let { Xml.uri_server = server; uri_scheme = scheme } =
 +            try Xml.parse_uri orig_uri
 +            with Invalid_argument msg ->
 +              error (f_"could not parse '-ic %s'.  Original error message
was: %s")
 +                orig_uri msg in
 +
 +          match server, scheme, input_transport with
 +          | None, _, _
 +            | Some "", _, _       (* Not a remote URI. *)
 +
 +            | Some _, None, _     (* No scheme? *)
 +            | Some _, Some "", _ ->
 +             (module Input_libvirt.Libvirt_)
 +
 +          (* vCenter over https. *)
 +          | Some server, Some ("esx"|"gsx"|"vpx"), None
->
 +             (module Input_vcenter_https.VCenterHTTPS)
 +
 +          (* vCenter or ESXi using nbdkit vddk plugin *)
 +          | Some server, Some ("esx"|"gsx"|"vpx"), Some
`VDDK ->
 +             (module Input_vddk.VDDK)
 +
 +          (* Xen over SSH *)
 +          | Some server, Some "xen+ssh", _ ->
 +             (module Input_xen_ssh.XenSSH)
 +
 +          (* Old virt-v2v also supported qemu+ssh://.  However I am
 +           * deliberately not supporting this in new virt-v2v.  Don't
 +           * use virt-v2v if a guest already runs on KVM.
 +           *)
 +
 +          (* Unknown remote scheme. *)
 +          | Some _, Some _, _ ->
 +             warning (f_"no support for remote libvirt connections to '-ic
%s'.  The conversion may fail when it tries to read the source disks.")
orig_uri;
 +             (module Input_libvirt.Libvirt_) in
 +
 +  let input_options = {
 +    Input.bandwidth =
 +      (match !bandwidth, !bandwidth_file with
 +       | None, None -> None
 +       | Some rate, None -> Some (StaticBandwidth rate)
 +       | rate, Some filename -> Some (DynamicBandwidth (rate, filename)));
 +    input_conn = input_conn;
 +    input_format = !input_format;
 +    input_options = !input_options;
 +    input_password = !input_password;
 +    input_transport = input_transport;
 +    (* This must always be true so that we do not modify the
 +     * source.  This is set to [false] by in-place mode.
 +     *)
 +    read_only = true;
 +  } in
 +
 +  (* If -io ? then we want to query input options supported in this mode. *)
 +  if !io_query then (
 +    Input_module.query_input_options ();
 +    exit 0
 +  );
 +
 +  (* Get the conversion options. *)
 +  let conv_options = {
 +    Convert.keep_serial_console = true;
 +    ks = opthandle.ks;
 +    network_map;
 +    root_choice;
 +    static_ips;
 +  } in
 +
 +  (* Before starting the input module, check there is sufficient
 +   * free space in the temporary directory on the host.
 +   *)
 +  check_host_free_space ();
 +
 +  (* Start the input module (runs an NBD server in the background). *)
 +  message (f_"Setting up the source: %s")
 +    (Input_module.to_string input_options args);
 +  let source = Input_module.setup v2vdir input_options args in
 +
 +  (* Do the conversion. *)
 +  with_open_out (v2vdir // "convert") (fun _ -> ());
 +  let inspect, _ = Convert.convert v2vdir conv_options source in
 +  unlink (v2vdir // "convert");
 +
 +  (* Debug the v2vdir. *)
 +  if verbose () then (
 +    let cmd = sprintf "ls -alZ %s 1>&2" (quote v2vdir) in
 +    ignore (Sys.command cmd)
 +  );
 +
 +  (* Dump out the information. *)
 +  let doc = inspector_xml v2vdir inspect in
 +  DOM.doc_to_chan Stdlib.stdout doc;
 +
 +  message (f_"Finishing off");
 +  (* As the last thing, write a file indicating success before
 +   * we exit (so before we kill the helpers).  The helpers may
 +   * use the presence or absence of the file to determine if
 +   * on-success or on-fail cleanup is required.
 +   *)
 +  with_open_out (v2vdir // "done") (fun _ -> ())
 +
 +(* Conversion can fail or hang if there is insufficient free space in
 + * the large temporary directory.  Some input modules use large_tmpdir
 + * to unpack OVAs or store qcow2 overlays and some output modules
 + * use it to store temporary files.  In addition the  500 MB guestfs
 + * appliance may be created there.  (RHBZ#1316479, RHBZ#2051394)
 + *)
 +and check_host_free_space () =
 +  let free_space = StatVFS.free_space (StatVFS.statvfs large_tmpdir) in
 +  debug "check_host_free_space: large_tmpdir=%s free_space=%Ld"
 +        large_tmpdir free_space;
 +  if free_space < 1_073_741_824L then
 +    error (f_"insufficient free space in the conversion server temporary directory
%s (%s).\n\nEither free up space in that directory, or set the LIBGUESTFS_CACHEDIR
environment variable to point to another directory with more than 1GB of free
space.\n\nSee also the virt-v2v(1) manual, section \"Minimum free space check in the
host\".")
 +          large_tmpdir (human_size free_space)
 +
 +(* This is a copy of {!Output.get_disks}. *)
 +and get_disks dir =
 +  let rec loop acc i =
 +    let socket = sprintf "%s/in%d" dir i in
 +    if Sys.file_exists socket then (
 +      let size = Utils.with_nbd_connect_unix ~socket NBD.get_size in
 +      loop ((i, size) :: acc) (i+1)
 +    )
 +    else
 +      List.rev acc
 +  in
 +  loop [] 0
 +
 +(* This is like {!Utils.get_disk_allocated} but works on the input disks. *)
 +and get_input_disk_allocated dir i =
 +  let socket = sprintf "%s/in%d" dir i
 +  and alloc_ctx = "base:allocation" in
 +  with_nbd_connect_unix ~socket ~meta_contexts:[alloc_ctx]
 +    (fun nbd ->
 +      if NBD.can_meta_context nbd alloc_ctx then (
 +        (* Get the list of extents, using a 2GiB chunk size as hint. *)
 +        let size = NBD.get_size nbd
 +        and allocated = ref 0_L
 +        and fetch_offset = ref 0_L in
 +        while !fetch_offset < size do
 +          let remaining = size -^ !fetch_offset in
 +          let fetch_size = min 0x8000_0000_L remaining in
 +          NBD.block_status nbd fetch_size !fetch_offset
 +            (fun ctx offset entries err ->
 +              assert (ctx = alloc_ctx);
 +              for i = 0 to Array.length entries / 2 - 1 do
 +                let len = entries.(i * 2)
 +                and typ = entries.(i * 2 + 1) in
 +                assert (len > 0_L);
 +                if typ &^ 1_L = 0_L then
 +                  allocated := !allocated +^ len;
 +                fetch_offset := !fetch_offset +^ len
 +              done;
 +              0
 +            )
 +        done;
 +        Some !allocated
 +      ) else None
 +    )
 +
 +(* This is where we construct the final XML document based on
 + * these inputs:
 + *   - Global configuration like the version of v2v etc.
 + *   - The NBD input sockets: v2vdir // "in0", "in1", etc
 + *   - The inspection data (Types.inspect)
 + *)
 +and inspector_xml v2vdir inspect =
 +  let body = ref [] in
 +
 +  (* Record the version of virt-v2v etc, mainly for debugging. *)
 +  List.push_back_list body [
 +    Comment generated_by;
 +    e "program" [] [PCData "virt-v2v-inspector"];
 +    e "package" [] [PCData Config.package_name];
 +    e "version" [] [PCData Config.package_version];
 +  ];
 +
 +  (* The disks. *)
 +  let disks = ref [] in
 +
 +  List.iter (
 +    fun (i, virtual_size) ->
 +      let elems = ref [] in
 +      List.push_back elems (e "virtual-size" []
 +                              [PCData (Int64.to_string virtual_size)]);
 +      (match get_input_disk_allocated v2vdir i with
 +       | None -> ()
 +       | Some real_size ->
 +          List.push_back elems (e "allocated" [ "estimated",
"true" ]
 +                                  [PCData (Int64.to_string real_size)])
 +      );
 +
 +      List.push_back disks (e "disk" [ "index", string_of_int i ]
!elems)
 +  ) (get_disks v2vdir);
 +  List.push_back body (e "disks" [] !disks);
 +
 +  (* The inspection data. *)
 +  (* NB: Keep these field names compatible with virt-inspector! *)
 +  let os = ref [] in
 +  List.push_back os (e "name" [] [PCData inspect.i_type]);
 +  List.push_back os (e "distro" [] [PCData inspect.i_distro]);
 +  List.push_back os (e "osinfo" [] [PCData inspect.i_osinfo]);
 +  List.push_back os (e "arch" [] [PCData inspect.i_arch]);
 +  List.push_back os (e "major_version" []
 +                       [PCData (string_of_int inspect.i_major_version)]);
 +  List.push_back os (e "minor_version" []
 +                       [PCData (string_of_int inspect.i_minor_version)]);
 +  if inspect.i_package_format <> "" then
 +    List.push_back os (e "package_format" []
 +                         [PCData inspect.i_package_format]);
 +  if inspect.i_package_management <> "" then
 +    List.push_back os (e "package_management" []
 +                         [PCData inspect.i_package_management]);
 +  if inspect.i_product_name <> "" then
 +    List.push_back os (e "product_name" [] [PCData inspect.i_product_name]);
 +  List.push_back body (e "operatingsystem" [] !os);
 +
 +  (* Construct the final document. *)
 +  (doc "v2v-inspection" [] !body : DOM.doc)
 +
 +let () = run_main_and_handle_errors main
 diff --git a/inspector/dummy.c b/inspector/dummy.c
 new file mode 100644
 index 0000000000..ebab6198cd
 --- /dev/null
 +++ b/inspector/dummy.c
 @@ -0,0 +1,2 @@
 +/* Dummy source, to be used for OCaml-based tools with no C sources. */
 +enum { foo = 1 };
 diff --git a/tests/test-v2v-inspector.sh b/tests/test-v2v-inspector.sh
 new file mode 100755
 index 0000000000..52406ddee1
 --- /dev/null
 +++ b/tests/test-v2v-inspector.sh
 @@ -0,0 +1,76 @@
 +#!/bin/bash -
 +# libguestfs virt-v2v test script
 +# Copyright (C) 2014-2022 Red Hat Inc.
 +#
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 2 of the License, or
 +# (at your option) any later version.
 +#
 +# This program is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 +
 +# Test virt-v2v-inspector.
 +
 +unset CDPATH
 +export LANG=C
 +set -e
 +
 +source ./functions.sh
 +set -e
 +set -x
 +
 +skip_if_skipped
 +requires test -f ../test-data/phony-guests/windows.img
 +
 +img="$abs_top_builddir/test-data/phony-guests/windows.img"
 +
 +export VIRT_TOOLS_DATA_DIR="$srcdir/../test-data/fake-virt-tools"
 +export VIRTIO_WIN="$srcdir/../test-data/fake-virtio-win"
 +
 +d=$PWD/test-v2v-inspector.d
 +rm -rf $d
 +cleanup_fn rm -r $d
 +mkdir $d
 +
 +out="$d/out"
 +
 +libvirt_xml="$d/test.xml"
 +rm -f $libvirt_xml
 +n=windows
 +cat > $libvirt_xml <<EOF
 +<node>
 +  <domain type='test'>
 +    <name>$n</name>
 +    <memory>1048576</memory>
 +    <os>
 +      <type>hvm</type>
 +      <boot dev='hd'/>
 +    </os>
 +    <devices>
 +      <disk type='file' device='disk'>
 +        <driver name='qemu' type='raw'/>
 +        <source file='$img'/>
 +        <target dev='vda' bus='virtio'/>
 +      </disk>
 +    </devices>
 +  </domain>
 +</node>
 +EOF
 +
 +$VG virt-v2v-inspector --quiet --debug-gc -i libvirt -ic "test://$libvirt_xml"
$n > $out
 +cat $out
 +
 +# Expect certain elements to be present.
 +grep '^<v2v-inspection' $out
 +grep '<program>virt-v2v-inspector</program>' $out
 +grep '<disks>' $out
 +grep "<disk index='0'>" $out
 +grep '<distro>windows</distro>' $out
 +grep '<osinfo>win7</osinfo>' $out
 diff --git a/.gitignore b/.gitignore
 index 62541b8980..655c794ee7 100644
 --- a/.gitignore
 +++ b/.gitignore
 @@ -51,6 +51,7 @@ Makefile.in
  /docs/virt-v2v-in-place.1
  /docs/virt-v2v-input-vmware.1
  /docs/virt-v2v-input-xen.1
 +/docs/virt-v2v-inspector.1
  /docs/virt-v2v-output-local.1
  /docs/virt-v2v-output-openstack.1
  /docs/virt-v2v-output-rhv.1
 @@ -62,6 +63,8 @@ Makefile.in
  /in-place/.depend
  /in-place/virt-v2v-in-place
  /input/.depend
 +/inspector/.depend
 +/inspector/virt-v2v-inspector
  /installcheck.sh
  /install-sh
  /libtool
 diff --git a/run.in b/run.in
 index 69936a6e2b..c75e4e0f0f 100755
 --- a/run.in
 +++ b/run.in
 @@ -67,9 +67,10 @@ export LIBGUESTFS_CACHEDIR="$b/tmp"
  mkdir -p "$b/tmp"
  chcon --reference=/tmp "$b/tmp" 2>/dev/null ||:
  
 -# Set the PATH to contain the virt-v2v and virt-v2v-in-place binaries.
 +# Set the PATH to contain the virt-v2v and other binaries.
  prepend PATH "$b/v2v"
  prepend PATH "$b/in-place"
 +prepend PATH "$b/inspector"
  export PATH
  
  # This is a cheap way to find some use-after-free and uninitialized