[PATCH v2] launch: add support for autodetection of appliance image format
                                
                                
                                
                                    
                                        by Pavel Butsykin
                                    
                                
                                
                                        This feature allows you to use different image formats for the fixed
appliance. The raw format is used by default.
Signed-off-by: Pavel Butsykin <pbutsykin(a)virtuozzo.com>
---
 lib/launch-direct.c     |  2 ++
 lib/launch-libvirt.c    | 19 ++++++++++++-------
 m4/guestfs_appliance.m4 | 11 +++++++++++
 3 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/lib/launch-direct.c b/lib/launch-direct.c
index 0be662e25..b9b54857a 100644
--- a/lib/launch-direct.c
+++ b/lib/launch-direct.c
@@ -592,7 +592,9 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
       append_list ("id=appliance");
       append_list ("cache=unsafe");
       append_list ("if=none");
+#ifndef APPLIANCE_FMT_AUTO
       append_list ("format=raw");
+#endif
     } end_list ();
     start_list ("-device") {
       append_list ("scsi-hd");
diff --git a/lib/launch-libvirt.c b/lib/launch-libvirt.c
index 4adb2cfb3..030ea6911 100644
--- a/lib/launch-libvirt.c
+++ b/lib/launch-libvirt.c
@@ -212,9 +212,10 @@ get_source_format_or_autodetect (guestfs_h *g, struct drive *drv)
 
 /**
  * Create a qcow2 format overlay, with the given C<backing_drive>
- * (file).  The C<format> parameter, which must be non-NULL, is the
- * backing file format.  This is used to create the appliance overlay,
- * and also for read-only drives.
+ * (file).  The C<format> parameter is the backing file format.
+ * The C<format> parameter can be NULL, in this case the backing
+ * format will be determined automatically.  This is used to create
+ * the appliance overlay, and also for read-only drives.
  */
 static char *
 make_qcow2_overlay (guestfs_h *g, const char *backing_drive,
@@ -223,8 +224,6 @@ make_qcow2_overlay (guestfs_h *g, const char *backing_drive,
   char *overlay;
   struct guestfs_disk_create_argv optargs;
 
-  assert (format != NULL);
-
   if (guestfs_int_lazy_make_tmpdir (g) == -1)
     return NULL;
 
@@ -232,8 +231,10 @@ make_qcow2_overlay (guestfs_h *g, const char *backing_drive,
 
   optargs.bitmask = GUESTFS_DISK_CREATE_BACKINGFILE_BITMASK;
   optargs.backingfile = backing_drive;
-  optargs.bitmask |= GUESTFS_DISK_CREATE_BACKINGFORMAT_BITMASK;
-  optargs.backingformat = format;
+  if (format) {
+    optargs.bitmask |= GUESTFS_DISK_CREATE_BACKINGFORMAT_BITMASK;
+    optargs.backingformat = format;
+  }
 
   if (guestfs_disk_create_argv (g, overlay, "qcow2", -1, &optargs) == -1) {
     free (overlay);
@@ -461,7 +462,11 @@ launch_libvirt (guestfs_h *g, void *datav, const char *libvirt_uri)
 
   /* Note that appliance can be NULL if using the old-style appliance. */
   if (appliance) {
+#ifdef APPLIANCE_FMT_AUTO
+    params.appliance_overlay = make_qcow2_overlay (g, appliance, NULL);
+#else
     params.appliance_overlay = make_qcow2_overlay (g, appliance, "raw");
+#endif
     if (!params.appliance_overlay)
       goto cleanup;
   }
diff --git a/m4/guestfs_appliance.m4 b/m4/guestfs_appliance.m4
index 81c43879f..4e1ec8135 100644
--- a/m4/guestfs_appliance.m4
+++ b/m4/guestfs_appliance.m4
@@ -139,3 +139,14 @@ AC_SUBST([GUESTFS_DEFAULT_PATH])
 
 AC_DEFINE_UNQUOTED([GUESTFS_DEFAULT_PATH], ["$GUESTFS_DEFAULT_PATH"],
     [Define guestfs default path.])
+
+AC_ARG_ENABLE([appliance-fmt-auto],
+    [AS_HELP_STRING([--enable-appliance-fmt-auto],
+        [enable autodetection of appliance image format @<:@default=no@:>@])],
+        [ENABLE_APPLIANCE_FMT_AUTO="$enableval"],
+        [ENABLE_APPLIANCE_FMT_AUTO=no])
+
+if test "x$ENABLE_APPLIANCE_FMT_AUTO" = "xyes"; then
+    AC_DEFINE([APPLIANCE_FMT_AUTO], [1],
+        [Define to 1 if enabled autodetection of appliance image format.])
+fi
-- 
2.13.0
                                
                         
                        
                                
                                5 years, 9 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        1.39 proposal: Let's split up the libguestfs git repo and tarballs
                                
                                
                                
                                    
                                        by Richard W.M. Jones
                                    
                                
                                
                                        My contention is that the libguestfs git repository is too large and
unwieldy.  There are too many separate, unrelated projects and as a
result of that the source has too many dependencies and takes too long
to build and test.
The project divides (sort of) naturally into layers -- the library,
the bindings, the various virt tools -- and could be split along those
lines into separate projects which can then be released and evolve at
their own pace.
My suggested split would be something like this:
* libguestfs: The library, daemon and appliance.  That would include
  the following directories in a single project:
       appliance
       bash
       contrib
       daemon
       docs
       examples
       gnulib
       lib
       logo
       test-tool
       tmp
       utils
       website
* 1 project for each language binding:
       csharp
       erlang
       gobject
       golang
       haskell
       java
       lua
       ocaml
       php
       perl
       python
       ruby
* virt-customize and related tools, we'd probably call this subproject
  "virt-builder".  It would include virt-builder, virt-customize and
  virt-sysprep, since they share a lot of common code.
* 1 project for each of the following items:
       small tools written in C
          (virt-cat, virt-filesystems, virt-log, virt-ls, virt-tail,
	   virt-diff, virt-edit, virt-format, guestmount, virt-inspector,
	   virt-make-fs, virt-rescue)
       guestfish
       virt-alignment-scan and virt-df
       virt-dib	   
       virt-get-kernel
       virt-resize
       virt-sparsify
       virt-v2v and virt-p2v
       virt-win-reg
* I'd be inclined to drop the legacy Perl tools virt-tar,
  virt-list-filesystems, virt-list-partitions unless someone
  especially wished to step forward to maintain them.
* common code and generator: Off to the side we'd somehow need to
  package up the common code and the generator for use by all of the
  above projects.  It wouldn't be a separate project for downstream
  packagers, but instead the code would be included (ie. duplicated)
  in tarballs and upstream available as a side git repo that you'd
  need to include when building (git submodule?).  This is somewhat
  unspecified.
M4, PO, and tests would be split between the projects as appropriate.
My proposal would be to do this incrementally, rather than all at
once, moving the easier things out first.
Thoughts?
Rich.
-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org
                                
                         
                        
                                
                                5 years, 11 months
                        
                        
                 
         
 
        
            
        
        
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] v2v: ovf: add firmware and machine type element
                                
                                
                                
                                    
                                        by Tomáš Golembiovský
                                    
                                
                                
                                        Add oVirt specific elemnt to OVF. It represents the combination of
machine type (i440fx/q35) and firmware (BIOS/UEFI).
Signed-off-by: Tomáš Golembiovský <tgolembi(a)redhat.com>
---
 v2v/create_ovf.ml                        | 20 +++++++++++++++++++-
 v2v/create_ovf.mli                       |  2 +-
 v2v/output_rhv.ml                        |  6 ++----
 v2v/output_rhv_upload.ml                 |  4 ++--
 v2v/output_vdsm.ml                       |  6 ++----
 v2v/test-v2v-o-rhv.ovf.expected          |  1 +
 v2v/test-v2v-o-vdsm-options.ovf.expected |  1 +
 7 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/v2v/create_ovf.ml b/v2v/create_ovf.ml
index 2cf610333..1cab11dfd 100644
--- a/v2v/create_ovf.ml
+++ b/v2v/create_ovf.ml
@@ -462,6 +462,22 @@ let origin_of_source_hypervisor = function
    *)
   | _ -> None
 
+let get_ovirt_biostype guestcaps target_firmware  =
+  let uefi_firmware =
+    match target_firmware with
+    | TargetBIOS -> None
+    | TargetUEFI -> Some (find_uefi_firmware guestcaps.gcaps_arch) in
+  let secure_boot_required =
+    match uefi_firmware with
+    | Some { Uefi.flags = flags }
+         when List.mem Uefi.UEFI_FLAG_SECURE_BOOT_REQUIRED flags -> true
+    | _ -> false in
+  match target_firmware, secure_boot_required with
+  | TargetUEFI, true -> 3 (* q35 + UEFI + secure boot *)
+  | TargetUEFI, _ -> 2 (* q35 + UEFI *)
+  (* 1 is q35 + SeaBIOS *)
+  | _, _ -> 0 (* i440fx + SeaBIOS *)
+
 (* Generate the .meta file associated with each volume. *)
 let create_meta_files output_alloc sd_uuid image_uuids targets =
   (* Note: Upper case in the .meta, mixed case in the OVF. *)
@@ -506,7 +522,7 @@ let create_meta_files output_alloc sd_uuid image_uuids targets =
   ) (List.combine targets image_uuids)
 
 (* Create the OVF file. *)
-let rec create_ovf source targets guestcaps inspect
+let rec create_ovf source targets guestcaps inspect target_firmware
     output_alloc sd_uuid image_uuids vol_uuids vm_uuid ovf_flavour =
   assert (List.length targets = List.length vol_uuids);
 
@@ -515,6 +531,7 @@ let rec create_ovf source targets guestcaps inspect
   let vmtype = get_vmtype inspect in
   let vmtype = match vmtype with `Desktop -> "0" | `Server -> "1" in
   let ostype = get_ostype inspect in
+  let biostype = get_ovirt_biostype guestcaps target_firmware in
 
   let ovf : doc =
     doc "ovf:Envelope" [
@@ -562,6 +579,7 @@ let rec create_ovf source targets guestcaps inspect
         e "VmType" [] [PCData vmtype];
         (* See https://bugzilla.redhat.com/show_bug.cgi?id=1260590#c17 *)
         e "DefaultDisplayType" [] [PCData "1"];
+        e "BiosType" [] [PCData (string_of_int biostype)];
       ] in
 
       (match source.s_cpu_model with
diff --git a/v2v/create_ovf.mli b/v2v/create_ovf.mli
index 8200b76f9..cb6c12690 100644
--- a/v2v/create_ovf.mli
+++ b/v2v/create_ovf.mli
@@ -43,7 +43,7 @@ val ovf_flavour_to_string : ovf_flavour -> string
     create OVF for another target management system then we would need
     to heavily modify or even duplicate this code. *)
 
-val create_ovf : Types.source -> Types.target list -> Types.guestcaps -> Types.inspect -> Types.output_allocation -> string -> string list -> string list -> string ->  ovf_flavour -> DOM.doc
+val create_ovf : Types.source -> Types.target list -> Types.guestcaps -> Types.inspect -> Types.target_firmware  -> Types.output_allocation -> string -> string list -> string list -> string ->  ovf_flavour -> DOM.doc
 (** Create the OVF file.
 
     Actually a {!DOM} document is created, not a file.  It can be written
diff --git a/v2v/output_rhv.ml b/v2v/output_rhv.ml
index 5260ab030..ce57f0309 100644
--- a/v2v/output_rhv.ml
+++ b/v2v/output_rhv.ml
@@ -115,7 +115,7 @@ object
 
   method as_options = sprintf "-o rhv -os %s" os
 
-  method supported_firmware = [ TargetBIOS ]
+  method supported_firmware = [ TargetBIOS; TargetUEFI ]
 
   (* RHV doesn't support serial consoles.  This causes the conversion
    * step to remove it.
@@ -270,12 +270,10 @@ object
 
   (* This is called after conversion to write the OVF metadata. *)
   method create_metadata source targets _ guestcaps inspect target_firmware =
-    (* See #supported_firmware above. *)
-    assert (target_firmware = TargetBIOS);
 
     (* Create the metadata. *)
     let ovf = Create_ovf.create_ovf source targets guestcaps inspect
-      output_alloc esd_uuid image_uuids vol_uuids vm_uuid
+      target_firmware output_alloc esd_uuid image_uuids vol_uuids vm_uuid
       Create_ovf.RHVExportStorageDomain in
 
     (* Write it to the metadata file. *)
diff --git a/v2v/output_rhv_upload.ml b/v2v/output_rhv_upload.ml
index 63fa2411a..3dcd6d952 100644
--- a/v2v/output_rhv_upload.ml
+++ b/v2v/output_rhv_upload.ml
@@ -253,7 +253,7 @@ object
     sprintf " -oc %s -op %s -os %s"
             output_conn output_password output_storage
 
-  method supported_firmware = [ TargetBIOS ]
+  method supported_firmware = [ TargetBIOS; TargetUEFI ]
 
   (* rhev-apt.exe will be installed (if available). *)
   method install_rhev_apt = true
@@ -407,7 +407,7 @@ If the messages above are not sufficient to diagnose the problem then add the 
     (* Create the metadata. *)
     let ovf =
       Create_ovf.create_ovf source targets guestcaps inspect
-                            output_alloc
+                            target_firmware output_alloc
                             sd_uuid image_uuids vol_uuids vm_uuid
                             OVirt in
     let ovf = DOM.doc_to_string ovf in
diff --git a/v2v/output_vdsm.ml b/v2v/output_vdsm.ml
index 9a1b748bc..f32acedae 100644
--- a/v2v/output_vdsm.ml
+++ b/v2v/output_vdsm.ml
@@ -123,7 +123,7 @@ object
        | flav -> sprintf "-oo vdsm-ovf-flavour=%s"
                          (Create_ovf.ovf_flavour_to_string flav))
 
-  method supported_firmware = [ TargetBIOS ]
+  method supported_firmware = [ TargetBIOS; TargetUEFI ]
 
   (* RHV doesn't support serial consoles.  This causes the conversion
    * step to remove it.
@@ -240,11 +240,9 @@ object
 
   (* This is called after conversion to write the OVF metadata. *)
   method create_metadata source targets _ guestcaps inspect target_firmware =
-    (* See #supported_firmware above. *)
-    assert (target_firmware = TargetBIOS);
-
     (* Create the metadata. *)
     let ovf = Create_ovf.create_ovf source targets guestcaps inspect
+      target_firmware
       output_alloc dd_uuid
       vdsm_options.image_uuids
       vdsm_options.vol_uuids
diff --git a/v2v/test-v2v-o-rhv.ovf.expected b/v2v/test-v2v-o-rhv.ovf.expected
index 7bcc456c5..c6bd05c56 100644
--- a/v2v/test-v2v-o-rhv.ovf.expected
+++ b/v2v/test-v2v-o-rhv.ovf.expected
@@ -25,6 +25,7 @@
     <IsStateless>False</IsStateless>
     <VmType>0</VmType>
     <DefaultDisplayType>1</DefaultDisplayType>
+    <BiosType>0</BiosType>
     <Section ovf:id='#VM_ID#' ovf:required='false' xsi:type='ovf:OperatingSystemSection_Type'>
       <Info>Microsoft Windows 7 Phony Edition</Info>
       <Description>Windows7</Description>
diff --git a/v2v/test-v2v-o-vdsm-options.ovf.expected b/v2v/test-v2v-o-vdsm-options.ovf.expected
index abaf37e54..f0d418b46 100644
--- a/v2v/test-v2v-o-vdsm-options.ovf.expected
+++ b/v2v/test-v2v-o-vdsm-options.ovf.expected
@@ -25,6 +25,7 @@
     <IsStateless>False</IsStateless>
     <VmType>0</VmType>
     <DefaultDisplayType>1</DefaultDisplayType>
+    <BiosType>0</BiosType>
     <OperatingSystemSection ovf:id='VM' ovf:required='false' ovirt:id='11'>
       <Info>Microsoft Windows 7 Phony Edition</Info>
       <Description>Windows7</Description>
-- 
2.18.0
                                
                         
                        
                                
                                7 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH nbdkit 0/4] Add truncate and map filters.
                                
                                
                                
                                    
                                        by Richard W.M. Jones
                                    
                                
                                
                                        This patch series proposes two new filters.
* truncate: This can truncate, extend, round up or round down the size
of a plugin/device.  A typical usage is to fix the qemu problem that
it can only handle devices which are a multiple of 512-bytes:
  nbdkit --filter=truncate random size=500 round-up=512
This will serve a virtual device with size 512 bytes.  Reading from
the last 12 bytes will return zeroes.  And writing is permitted,
provided you only write zeroes.
An alternative might have been to extend the offset filter to deal
with this, but that could have got quite clumsy.
* map: This is an all-purpose remapping filter.  Best to read the man
page to see what this does.
(These filters are not really related to each other, except that the
truncate filter turned out to be necessary to test the map filter.)
Rich.
                                
                         
                        
                                
                                7 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] v2v: -o rhv-upload: PEP8 fixes for rhv-upload-plugin.py
                                
                                
                                
                                    
                                        by Pino Toscano
                                    
                                
                                
                                        - wrap a too long line
- join strings in multiple lines using + (plus) instead of \ (backslash)
---
 v2v/rhv-upload-plugin.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py
index 4fad27fb8..bdc1e104a 100644
--- a/v2v/rhv-upload-plugin.py
+++ b/v2v/rhv-upload-plugin.py
@@ -86,7 +86,8 @@ def find_host(connection):
 
     hosts_service = system_service.hosts_service()
     hosts = hosts_service.list(
-        search="hw_id=%s and datacenter=%s and status=Up" % (vdsm_id, datacenter.name),
+        search="hw_id=%s and datacenter=%s and status=Up"
+               % (vdsm_id, datacenter.name),
         case_sensitive=True,
     )
     if len(hosts) == 0:
@@ -94,8 +95,8 @@ def find_host(connection):
         # - 'hw_id' equals to 'vdsm_id'
         # - Its status is 'Up'
         # - Belongs to the storage domain's datacenter
-        debug("cannot find a running host with hw_id=%r, " \
-              "that belongs to datacenter '%s', " \
+        debug("cannot find a running host with hw_id=%r, " +
+              "that belongs to datacenter '%s', " +
               "using any host" % (vdsm_id, datacenter.name))
         return None
 
-- 
2.17.1
                                
                         
                        
                                
                                7 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2] file: Normalize errno value for ENODEV
                                
                                
                                
                                    
                                        by Nir Soffer
                                    
                                
                                
                                        Fix issues Eric found in the original patch:
https://www.redhat.com/archives/libguestfs/2018-July/msg00072.html
- When handling ENODEV, the caller is expecting EOPNOTSUPP to trigger
  fallback.
- ENODEV should be ignored in file_trim.
Tested only on Fedora 28 and RHEL 7.5.
---
 plugins/file/file.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index a7c07fb..a8a6253 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -50,6 +50,21 @@
 
 static char *filename = NULL;
 
+#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
+static int
+do_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+  int r = -1;
+  r = fallocate (fd, mode, offset, len);
+  /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
+     with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
+  if (r == -1 && errno == ENODEV) {
+    errno = EOPNOTSUPP;
+  }
+  return r;
+}
+#endif
+
 static void
 file_unload (void)
 {
@@ -241,9 +256,9 @@ file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
 
 #ifdef FALLOC_FL_PUNCH_HOLE
   if (may_trim) {
-    r = fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-		   offset, count);
-    if (r == -1 && errno != EOPNOTSUPP && errno != ENODEV) {
+    r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      offset, count);
+    if (r == -1 && errno != EOPNOTSUPP) {
       nbdkit_error ("zero: %m");
     }
     /* PUNCH_HOLE is older; if it is not supported, it is likely that
@@ -253,8 +268,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
 #endif
 
 #ifdef FALLOC_FL_ZERO_RANGE
-  r = fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
-  if (r == -1 && errno != EOPNOTSUPP && errno != ENODEV) {
+  r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+  if (r == -1 && errno != EOPNOTSUPP) {
     nbdkit_error ("zero: %m");
   }
 #else
@@ -288,11 +303,11 @@ file_trim (void *handle, uint32_t count, uint64_t offset)
   struct handle *h = handle;
 
   /* Trim is advisory; we don't care if it fails for anything other
-   * than EIO, EPERM, or ENODEV (kernel 3.10) */
-  r = fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                 offset, count);
+   * than EIO or EPERM. */
+  r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                    offset, count);
   if (r < 0) {
-    if (errno != EPERM && errno != EIO && errno != ENODEV) {
+    if (errno != EPERM && errno != EIO) {
       nbdkit_debug ("ignoring failed fallocate during trim: %m");
       r = 0;
     }
-- 
2.17.1
                                
                         
                        
                                
                                7 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2] file: Add missing include for FALLOC_FL_*
                                
                                
                                
                                    
                                        by Nir Soffer
                                    
                                
                                
                                        On RHEL 7.5 we need to include <linux/falloc.h> for FALLOC_FL_* macros.
Without the macros, fallocate is never used and we fall back to manual
zeroing.
Here are examples runs with this change with a local file on ext4:
$ export SOCK=/tmp/nbd.sock
$ export FILE=/var/tmp/nbd.img
$ export BLOCK=/dev/loop2
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$FILE -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK
real    0m13.361s
user    0m0.127s
sys     0m0.668s
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK
real    0m13.491s
user    0m0.129s
sys     0m0.612s
Tested on Fedora 28 and RHEL 7.5.
---
v2:
- Include <linux/falloc.h> only on Linux
v1 was here:
https://www.redhat.com/archives/libguestfs/2018-July/msg00083.html
 plugins/file/file.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index a8a6253..0345115 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -42,6 +42,10 @@
 #include <sys/stat.h>
 #include <errno.h>
 
+#if defined(__linux__)
+#include <linux/falloc.h>   /* For FALLOC_FL_* on RHEL, glibc < 2.18 */
+#endif
+
 #include <nbdkit-plugin.h>
 
 #ifndef O_CLOEXEC
-- 
2.17.1
                                
                         
                        
                                
                                7 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] file: Zero support for block devices and NFS 4.2
                                
                                
                                
                                    
                                        by Nir Soffer
                                    
                                
                                
                                        If we may not trim, we tried ZERO_RANGE, but this is not well supported
yet, for example it is not available on NFS 4.2. ZERO_RANGE and
PUNCH_HOLE are supported now on block devices, but not on RHRL 7, so we
fallback to slow manual zeroing there.
Change the logic to support block devices on RHEL 7, and file systems
that do not support ZERO_RANGE.
The new logic:
- If we may trim, try PUNCH_HOLE
- If we can zero range, Try ZERO_RANGE
- If we can punch hole and fallocate, try fallocate(PUNCH_HOLE) followed
  by fallocate(0).
- If underlying file is a block device, try ioctl(BLKZEROOUT)
- Otherwise fallback to manual zeroing
The handle keeps now the underlying file capabilities, so once we
discover that an operation is not supported, we never try it again.
Here are examples runs on a server based on Intel(R) Xeon(R) CPU E5-2630
v4 @ 2.20GHz, using XtremIO storage via 4G FC HBA and 4 paths to
storage.
$ export SOCK=/tmp/nbd.sock
$ export BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK
real	0m2.741s
user	0m0.224s
sys	0m0.634s
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img nbd:unix:$SOCK
real	0m1.920s
user	0m0.163s
sys	0m0.735s
Issues:
- ioctl(BLKZEROOUT) will fail if offset or count are not aligned to
  logical sector size. I'm not sure if nbdkit or qemu-img ensure this.
- Need testing with NFS
---
 plugins/file/file.c | 126 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 103 insertions(+), 23 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index fb20622..bce2ed1 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -33,6 +33,7 @@
 
 #include <config.h>
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -42,6 +43,8 @@
 #include <sys/stat.h>
 #include <errno.h>
 #include <linux/falloc.h>   /* For FALLOC_FL_* on RHEL, glibc < 2.18 */
+#include <sys/ioctl.h>
+#include <linux/fs.h>
 
 #include <nbdkit-plugin.h>
 
@@ -116,6 +119,10 @@ file_config_complete (void)
 /* The per-connection handle. */
 struct handle {
   int fd;
+  bool is_block_device;
+  bool can_punch_hole;
+  bool can_zero_range;
+  bool can_fallocate;
 };
 
 /* Create the per-connection handle. */
@@ -123,6 +130,7 @@ static void *
 file_open (int readonly)
 {
   struct handle *h;
+  struct stat statbuf;
   int flags;
 
   h = malloc (sizeof *h);
@@ -144,6 +152,23 @@ file_open (int readonly)
     return NULL;
   }
 
+  if (fstat (h->fd, &statbuf) == -1) {
+    nbdkit_error ("fstat: %s: %m", filename);
+    free (h);
+    return NULL;
+  }
+
+  h->is_block_device = S_ISBLK(statbuf.st_mode);
+
+  /* These flags will disabled if an operation is not supported. */
+#ifdef FALLOC_FL_PUNCH_HOLE
+  h->can_punch_hole = true;
+#endif
+#ifdef FALLOC_FL_ZERO_RANGE
+  h->can_zero_range = true;
+#endif
+  h->can_fallocate = true;
+
   return h;
 }
 
@@ -164,27 +189,29 @@ static int64_t
 file_get_size (void *handle)
 {
   struct handle *h = handle;
-  struct stat statbuf;
 
-  if (fstat (h->fd, &statbuf) == -1) {
-    nbdkit_error ("stat: %m");
-    return -1;
-  }
-
-  if (S_ISBLK (statbuf.st_mode)) {
+  if (h->is_block_device) {
+    /* Block device, so st_size will not be the true size. */
     off_t size;
 
-    /* Block device, so st_size will not be the true size. */
     size = lseek (h->fd, 0, SEEK_END);
     if (size == -1) {
       nbdkit_error ("lseek (to find device size): %m");
       return -1;
     }
+
     return size;
-  }
+  } else {
+    /* Regular file. */
+    struct stat statbuf;
+
+    if (fstat (h->fd, &statbuf) == -1) {
+      nbdkit_error ("fstat: %m");
+      return -1;
+    }
 
-  /* Else regular file. */
-  return statbuf.st_size;
+    return statbuf.st_size;
+  }
 }
 
 static int
@@ -250,33 +277,86 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset)
 static int
 file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
 {
-#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
   struct handle *h = handle;
-#endif
   int r = -1;
 
 #ifdef FALLOC_FL_PUNCH_HOLE
-  if (may_trim) {
+  /* If we can and may trim, punching hole is our best option. */
+  if (h->can_punch_hole && may_trim) {
     r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                       offset, count);
-    if (r == -1 && errno != EOPNOTSUPP) {
+    if (r == 0)
+        return 0;
+
+    if (errno != EOPNOTSUPP) {
       nbdkit_error ("zero: %m");
+      return r;
     }
-    /* PUNCH_HOLE is older; if it is not supported, it is likely that
-       ZERO_RANGE will not work either, so fall back to write. */
-    return r;
+
+    h->can_punch_hole = false;
   }
 #endif
 
 #ifdef FALLOC_FL_ZERO_RANGE
-  r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
-  if (r == -1 && errno != EOPNOTSUPP) {
-    nbdkit_error ("zero: %m");
+  /* ZERO_RANGE is not well supported yet, but it the next best option. */
+  if (h->can_zero_range) {
+    r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+    if (r == 0)
+      return 0;
+
+    if (errno != EOPNOTSUPP) {
+      nbdkit_error ("zero: %m");
+      return r;
+    }
+
+    h->can_zero_range = false;
   }
-#else
+#endif
+
+#ifdef FALLOC_FL_PUNCH_HOLE
+  /* If we can punch hole but may not trim, we can combine punching hole and
+     fallocate to zero a range. This is much more efficient than writing zeros
+     manually. */
+  if (h->can_punch_hole && h->can_fallocate) {
+    r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      offset, count);
+    if (r == 0) {
+      r = do_fallocate(h->fd, 0, offset, count);
+      if (r == 0)
+        return 0;
+
+      if (errno != EOPNOTSUPP) {
+        nbdkit_error ("zero: %m");
+        return r;
+      }
+
+      h->can_fallocate = false;
+    } else {
+      if (errno != EOPNOTSUPP) {
+        nbdkit_error ("zero: %m");
+        return r;
+      }
+
+      h->can_punch_hole = false;
+    }
+  }
+#endif
+
+  /* For block devices, we can use BLKZEROOUT.
+     NOTE: count and offset must be aligned to logical block size. */
+  if (h->is_block_device) {
+    uint64_t range[2] = {offset, count};
+
+    r = ioctl(h->fd, BLKZEROOUT, &range);
+    if (r == 0)
+      return 0;
+
+    nbdkit_error("zero: %m");
+    return r;
+  }
+
   /* Trigger a fall back to writing */
   errno = EOPNOTSUPP;
-#endif
 
   return r;
 }
-- 
2.17.1
                                
                         
                        
                                
                                7 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH 1/2] file: Add missing include for FALLOC_FL_*
                                
                                
                                
                                    
                                        by Nir Soffer
                                    
                                
                                
                                        On RHEL 7.5 we need to include <linux/falloc.h> for FALLOC_FL_* macros.
Without the macros, fallocate is never used and we fall back to manual
zeroing.
Here are examples runs with this change with a local file on ext4:
$ export SOCK=/tmp/nbd.sock
$ export FILE=/var/tmp/nbd.img
$ export BLOCK=/dev/loop2
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$FILE -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK
real    0m13.361s
user    0m0.127s
sys     0m0.668s
$ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img nbd:unix:$SOCK
real    0m13.491s
user    0m0.129s
sys     0m0.612s
---
 plugins/file/file.c | 1 +
 1 file changed, 1 insertion(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 4210adb..fb20622 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -41,6 +41,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <errno.h>
+#include <linux/falloc.h>   /* For FALLOC_FL_* on RHEL, glibc < 2.18 */
 
 #include <nbdkit-plugin.h>
 
-- 
2.17.1
                                
                         
                        
                                
                                7 years, 3 months