On Tue, Aug 14, 2018 at 8:29 PM Richard W.M. Jones <rjones@redhat.com> wrote:
This option prints the estimated size of the data that will be copied
from the source disk.

For interest, the test prints:

3747840 ../test-data/phony-guests/windows.img
Estimate: 3710976

Why not use qemu-img measure on the overlay?

It gives a conservative estimate that will never fail, based on the allocated
blocks and additional metadata required for the destination file format. 

Nir
 
---
 v2v/Makefile.am                |  2 ++
 v2v/cmdline.ml                 | 17 ++++++++++--
 v2v/cmdline.mli                |  2 ++
 v2v/test-v2v-print-estimate.sh | 47 +++++++++++++++++++++++++++++++++
 v2v/v2v.ml                     | 48 ++++++++++++++++++++++++++++++++++
 v2v/virt-v2v.pod               | 17 ++++++++++++
 6 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/v2v/Makefile.am b/v2v/Makefile.am
index 5461055d1..2ea0dd1d5 100644
--- a/v2v/Makefile.am
+++ b/v2v/Makefile.am
@@ -379,6 +379,7 @@ TESTS += \
        test-v2v-oa-option.sh \
        test-v2v-of-option.sh \
        test-v2v-on-option.sh \
+       test-v2v-print-estimate.sh \
        test-v2v-print-source.sh \
        test-v2v-sound.sh \
        $(SLOW_TESTS) \
@@ -534,6 +535,7 @@ EXTRA_DIST += \
        test-v2v-oa-option.sh \
        test-v2v-of-option.sh \
        test-v2v-on-option.sh \
+       test-v2v-print-estimate.sh \
        test-v2v-print-source.expected \
        test-v2v-print-source.sh \
        test-v2v-print-source.xml \
diff --git a/v2v/cmdline.ml b/v2v/cmdline.ml
index 5b2df3555..74cc27714 100644
--- a/v2v/cmdline.ml
+++ b/v2v/cmdline.ml
@@ -33,10 +33,12 @@ type cmdline = {
   debug_overlays : bool;
   do_copy : bool;
   in_place : bool;
+  machine_readable : bool;
   network_map : Networks.t;
   output_alloc : output_allocation;
   output_format : string option;
   output_name : string option;
+  print_estimate : bool;
   print_source : bool;
   root_choice : root_choice;
 }
@@ -49,6 +51,7 @@ let parse_cmdline () =
   let debug_overlays = ref false in
   let do_copy = ref true in
   let machine_readable = ref false in
+  let print_estimate = ref false in
   let print_source = ref false in
   let qemu_boot = ref false in

@@ -235,6 +238,8 @@ let parse_cmdline () =
                                     s_"Set output storage location";
     [ L"password-file" ], Getopt.String ("filename", set_string_option_once "--password-file" input_password),
                                     s_"Same as ‘-ip filename’";
+    [ L"print-estimate" ], Getopt.Set print_estimate,
+                                    s_"Estimate size of source and stop";
     [ L"print-source" ], Getopt.Set print_source,
                                     s_"Print source and stop";
     [ L"qemu-boot" ], Getopt.Set qemu_boot, s_"Boot in qemu (-o qemu only)";
@@ -330,6 +335,7 @@ read the man page virt-v2v(1).
   let output_options = List.rev !output_options in
   let output_password = !output_password in
   let output_storage = !output_storage in
+  let print_estimate = !print_estimate in
   let print_source = !print_source in
   let qemu_boot = !qemu_boot in
   let root_choice = !root_choice in
@@ -355,6 +361,12 @@ read the man page virt-v2v(1).
     exit 0
   );

+  (* Some options cannot be used with --in-place. *)
+  if in_place then (
+    if print_estimate then
+      error (f_"--in-place and --print-estimate cannot be used together")
+  );
+
   (* Input transport affects whether some input options should or
    * should not be used.
    *)
@@ -620,8 +632,9 @@ read the man page virt-v2v(1).
       output_format, output_alloc in

   {
-    compressed; debug_overlays; do_copy; in_place; network_map;
+    compressed; debug_overlays; do_copy; in_place;
+    machine_readable; network_map;
     output_alloc; output_format; output_name;
-    print_source; root_choice;
+    print_estimate; print_source; root_choice;
   },
   input, output
diff --git a/v2v/cmdline.mli b/v2v/cmdline.mli
index 25beb1c95..9b5bd4098 100644
--- a/v2v/cmdline.mli
+++ b/v2v/cmdline.mli
@@ -23,10 +23,12 @@ type cmdline = {
   debug_overlays : bool;
   do_copy : bool;
   in_place : bool;
+  machine_readable : bool;
   network_map : Networks.t;
   output_alloc : Types.output_allocation;
   output_format : string option;
   output_name : string option;
+  print_estimate : bool;
   print_source : bool;
   root_choice : Types.root_choice;
 }
diff --git a/v2v/test-v2v-print-estimate.sh b/v2v/test-v2v-print-estimate.sh
new file mode 100755
index 000000000..d952d1a37
--- /dev/null
+++ b/v2v/test-v2v-print-estimate.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -
+# libguestfs virt-v2v test script
+# Copyright (C) 2018 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# Test --print-estimate option.
+
+set -e
+
+$TEST_FUNCTIONS
+skip_if_skipped
+skip_unless_phony_guest windows.img
+
+f=test-v2v-print-estimate.out
+rm -f $f
+
+du -s -B 1 ../test-data/phony-guests/windows.img
+
+$VG virt-v2v --debug-gc \
+    -i libvirtxml test-v2v-print-source.xml \
+    -o local -os $(pwd) \
+    --print-estimate --quiet > $f
+
+echo -n "Estimate: "
+cat $f
+
+# Check the output is a single number.
+if [ "$(cat $f | wc -l)" -ne 1 ]; then
+    echo "$0: expecting one line of output"
+    exit 1
+fi
+grep -E '^[[:digit:]]+$' $f
+
+rm -f $f
diff --git a/v2v/v2v.ml b/v2v/v2v.ml
index 1775200d3..e24a9adf6 100644
--- a/v2v/v2v.ml
+++ b/v2v/v2v.ml
@@ -104,6 +104,12 @@ let rec main () =
   (* Decrypt the disks. *)
   inspect_decrypt g;

+  (* Print source disk size estimate and stop. *)
+  if cmdline.print_estimate then (
+    print_source_disk_size_estimate cmdline g;
+    exit 0
+  );
+
   (* Inspection - this also mounts up the filesystems. *)
   (match conversion_mode with
    | Copying _ -> message (f_"Inspecting the overlay")
@@ -371,6 +377,48 @@ and print_mpstat chan { mp_dev = dev; mp_path = path;
   fprintf chan "  bsize=%Ld blocks=%Ld bfree=%Ld bavail=%Ld\n"
     s.Guestfs.bsize s.Guestfs.blocks s.Guestfs.bfree s.Guestfs.bavail

+(* Print the estimated size of the source disk(s).
+ *
+ * These are somewhat related to mpstats above, except that
+ * we must also collect information about devices which do
+ * not contain mountable filesystems and so we must assume
+ * are copied completely.
+ *
+ * This function will unmount and mount filesystems
+ * randomly, but that's OK because we exit afterwards.
+ *)
+and print_source_disk_size_estimate cmdline g =
+  let fses = List.map fst (g#list_filesystems ()) in
+
+  let size =
+    List.fold_left (
+      fun size dev ->
+        g#umount_all ();
+        let mounted =
+          try g#mount_ro dev "/"; true
+          with G.Error _ -> false in
+
+        let sz, what =
+          if mounted then (
+            let { Guestfs.bfree; blocks; bsize } = g#statvfs "/" in
+            (blocks -^ bfree) *^ bsize, "filesystem"
+          )
+          else (
+            (* Assume the full size of the filesystem will have
+             * to be copied.
+             *)
+            g#blockdev_getsize64 dev, "device"
+          ) in
+
+        debug "print-estimate: %s %s uses %Ld (%s)"
+              what dev sz (human_size sz);
+        size +^ sz
+    ) 0L fses in
+
+  g#umount_all ();
+
+  printf "%Ld\n" size
+
 (* Conversion can fail if there is no space on the guest filesystems
  * (RHBZ#1139543).  To avoid this situation, check there is some
  * headroom.  Mainly we care about the root filesystem.
diff --git a/v2v/virt-v2v.pod b/v2v/virt-v2v.pod
index 303fe425c..80b765983 100644
--- a/v2v/virt-v2v.pod
+++ b/v2v/virt-v2v.pod
@@ -794,6 +794,23 @@ C<root>.
 You will get an error if virt-v2v is unable to mount/write to the
 Export Storage Domain.

+=item B<--print-estimate>
+
+Print the estimated size of the data which will be copied from the
+source disk(s) and stop.
+
+A single number is printed on stdout which is the estimated size of
+data that will be copied, in bytes.
+
+This estimate is the sum across all disks, because guest features such
+as LVM and MD means that it is not meaningful to provide separate
+estimates for each disk.  The size does not include deleted files and
+empty space in the source, but it cannot detect used-but-zero space
+and so it usually overestimates.
+
+You usually want to use this option in conjunction with the I<--quiet>
+option so that the result is not mixed in with standard messages.
+
 =item B<--print-source>

 Print information about the source guest and stop.  This option is
--
2.18.0 

_______________________________________________
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs