On Mon, Nov 13, 2017 at 02:56:12PM +0100, Cédric Bosdonnat wrote:
 +let checksums_get_sha512 = function
 +  | None -> None
 +  | Some csums ->
 +      try
 +        Some (List.find (
 +          function
 +          | Checksums.SHA512 _ -> true
 +          | _ -> false
 +        ) csums)
 +      with Not_found -> None 
This is still a bit difficult to understand.  How about this explicit
loop?
  let checksums_get_sha512 = function
    | None -> None
    | Some csums ->
        let rec loop = function
        | [] -> None
        | Checksums.SHA512 csum :: _ -> Some csum
        | _ :: rest -> loop rest
        in
        loop csums
 +let osinfo_ids = ref None
 +
 +let rec osinfo_get_short_ids () =
 +  match !osinfo_ids with
 +  | Some ids -> ids
 +  | None ->
 +    osinfo_ids :=
 +      Some (
 +        Osinfo.fold (
 +          fun set filepath ->
 +            let doc = Xml.parse_file filepath in
 +            let xpathctx = Xml.xpath_new_context doc in
 +            let nodes = xpath_get_nodes xpathctx "/libosinfo/os/short-id" in
 +            List.fold_left (
 +              fun set node ->
 +                let id = Xml.node_as_string node in
 +                StringSet.add id set
 +            ) set nodes
 +        ) StringSet.empty
 +      );
 +    osinfo_get_short_ids () 
It doesn't really matter for this, but there is a nice way to write a
generic "memoize" higher-order function:
  
https://stackoverflow.com/questions/14454981/memoization-in-ocaml
(You wouldn't need the recursive variant here)
 +let compress_to file outdir =
 +  let outimg = outdir // (Filename.basename file) ^ ".xz" in 
Don't need parens around function application.
 +  info "Compressing ...%!"; 
You don't need %! here because the ‘info’ function calls
‘print_newline’ from stdlib and ‘print_newline’ calls ‘flush stdout’:
 
https://github.com/libguestfs/libguestfs/blob/a88385add653c4fc2592639d72b...
 
https://github.com/ocaml/ocaml/blob/c5fe6932b2151d0e4426072b4df3510318bc4...
 +  let cmd = [ "xz"; "-f"; "--best";
"--block-size=16777216"; "-c"; file ] in
 +  let file_flags = [ Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC; ] in
 +  let outfd = Unix.openfile outimg file_flags 0o666 in
 +  let res = run_command cmd ~stdout_chan:outfd in 
I read the code of run_command but I don't think it closes outfd, and
so outfd would be leaked here.
Also it'd be nice to use with_openfile, if it was upstream.
  
https://www.redhat.com/archives/libguestfs/2017-November/msg00028.html
Pino ^ x 2 ?
 +  if res <> 0 then
 +    error (f_"‘xz’ command failed");
 +  outimg
 +
 +let get_mime_type filepath =
 +  let file_cmd = "file --mime-type --brief " ^ (quote filepath) in 
Don't need parens.
 +let cmp a b =
 +  let string_of_arch = function Index.Arch s -> s | Index.GuessedArch s -> s in
 +  (string_of_arch a) = (string_of_arch b) 
You don't need parens here.
If you define Index.string_of_arch in the earlier then you can remove
the definition of string_of_arch here as well.
 +
 +let has_entry id arch index =
 +  List.exists (
 +    fun (item_id, { Index.arch = item_arch }) ->
 +      item_id = id && (cmp item_arch arch) 
Don't need parens.
 +let process_image acc_entries filename repo tmprepo index
interactive
 +                  compression sigchecker =
 +  message (f_"Preparing %s") filename;
 +
 +  let filepath = repo // filename in
 +  let { format = format; size = size } = get_disk_image_info filepath in 
Since commit c7651744da45 you can now write this as:
  let { format; size } = get_disk_image_info filepath in
 +  let ask ?default ?values message =
 +    let default_str = match default with
 +    | None -> ""
 +    | Some x -> sprintf " [%s] " x in
 +    let list_str = match values with
 +    | None -> ""
 +    | Some x ->
 +      sprintf (f_"Choose one from the list below:\n %s\n")
 +              (String.concat "\n " x) in
 +
 +    printf "%s%s%s%!" message default_str list_str; 
Seems quite a long way of writing:
  let ask ?default ?values message =
    printf "%s" message;
    (match default with
    | None -> ()
    | Some x -> printf " [%s] " x);
    (match values with
    | None -> ()
    | Some x -> printf (f_"Choose ... etc ..));
    let value = read_line () in
    ...
I don't think you need to flush the channel because stdout is line
buffered, and read_line will flush stdout anyway.
 +      if not (Str.string_match (Str.regexp
"[a-zA-Z0-9-_.]+") id 0) then ( 
It's not wrong to use Str, but you might want to use PCRE instead (see
common/mlpcre) since Perl regexps are more familiar for most people.
This could be written as:
    let re_valid_id = PCRE.compile ~anchored:true "[-a-zA-Z0-9_.]+"
    ...
    if not (PCRE.matches re_valid_id id) then (
      etc.
Note that Str regexps are anchored by default but PCRE regexps are not.
 +        warning (f_"Allowed characters are letters, digits, - _
and .");
 +        ask_id default
 +      ) else
 +        id in
 +
 +  let rec ask_arch guess =
 +    let arches = [ "x86_64"; "aarch64"; "armv7l";
"i686"; "ppc64"; "ppc64le"; "s390x" ] in
 +    match (ask (s_"Architecture: ") ~default:guess ~values:arches) with
 +    | None -> ask_arch guess
 +    | Some x ->
 +      if x = "" then
 +        ask_arch guess 
It seems as if the ‘ask’ function returns ‘default’ if the user just
hits return already.
 +  let extract_entry_data ?entry () =
 +    message (f_"Extracting data from the image...");
 +    let g = new Guestfs.guestfs () in 
Use open_guestfs from Tools_utils instead of this line.
 +    let printable_name =
 +      match entry with
 +      | Some (_, { Index.printable_name = printable_name }) -> 
You can just write this with the same meaning:
      | Some (_, { Index.printable_name }) ->
 +    (id, { Index.printable_name = printable_name;
 +           osinfo = osinfo;
 +           file_uri = Filename.basename out_path; 
              [etc]
This can be written more concisely as:
    (id, { Index.printable_name;
           osinfo;
           file_uri = Filename.basename out_path;
           arch;
           signature_uri = None;
           checksums = Some [checksum];
           revision;
           [etc]
 +  let _, { Index.checksums = checksums } = file_entry in 
This can be written as:
  let _, { Index.checksums } = file_entry in
 +  let images =
 +    let is_supported_format file =
 +      let extension = last_part_of file '.' in
 +      match extension with
 +      | Some ext -> List.mem ext [ "qcow2"; "raw";
"img" ]
 +      | None ->
 +        match (get_mime_type file) with 
You don't need parens here.
 +        | None -> false
 +        | Some mime -> mime = "application/octet-stream" in
 +    let is_new file =
 +      try
 +        let _, { Index.checksums = checksums } = 
You can now write this as:
    let _, { Index.checksums } =
 +          List.find (
 +            fun (_, { Index.file_uri = file_uri }) -> 
and:
      fun (_, { Index.file_uri }) ->
 +    List.filter (
 +      fun file ->
 +        if is_supported_format (cmdline.repo // file) then
 +          is_new file
 +        else
 +          false 
How about:
  List.filter (
    fun file -> is_supported_format (cmdline.repo // file) && is_new file
  ) files
 +  let index_channel = open_out outindex_path in 
Be nice to use with_open_out here, if that patch was upstream.
Pino ^ ?
 +  (* Filter out entries for newly found images and entries
 +     without a corresponding image file or with empty arch *)
 +  let index = List.filter (
 +    fun (id, { Index.arch = arch;
 +               Index.file_uri = file_uri }) -> 
You can write:
   fun (id, { Index.arch; Index.file_uri }) ->
 +      not (has_entry id arch images_entries) &&
Sys.file_exists file_uri
 +  ) index in
 +
 +  (* Convert all URIs back to relative ones *)
 +  let index = List.map (
 +    fun (id, entry) ->
 +      let { Index.file_uri = file_uri } = entry in 
     let { Index.file_uri } = entry in
 +    (* Remove the index file since we have the signed version of it
*)
 +    if not cmdline.keep_unsigned then
 +      Sys.remove (tmprepo // "index") 
We normally keep the index files even if we have a signed version.
Not sure which is better actually.  Maybe we should delete them.
 diff --git a/builder/test-virt-builder-repository.sh
b/builder/test-virt-builder-repository.sh
 new file mode 100755
 index 000000000..5ff270bcc
 --- /dev/null
 +++ b/builder/test-virt-builder-repository.sh
 @@ -0,0 +1,98 @@
 +#!/bin/bash -
 +# libguestfs
 +# Copyright (C) 2017 SUSE Inc.
 +#
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 2 of the License, or
 +# (at your option) any later version.
 +#
 +# This program is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 +
 +set -e
 +
 +$TEST_FUNCTIONS
 +slow_test
 +skip_if_skipped "$script"
 +
 +test_data=repository-testdata
 +rm -rf $test_data
 +mkdir $test_data
 +
 +# Make a copy of the Fedora image
 +cp ../test-data/phony-guests/fedora.img $test_data
 +
 +# Create minimal index file
 +cat > $test_data/index << EOF
 +[fedora]
 +file=fedora.img
 +EOF
 +
 +# Run virt-builder-repository (no compression, interactive)
 +echo 'x86_64
 +Fedora Test Image
 +fedora14
 +/dev/sda1
 +/dev/VG/Root
 +' | virt-builder-repository -v -x --no-compression -i $test_data
 +
 +assert_config () {
 +    item=$1
 +    regex=$2
 +
 +    sed -n -e "/\[$item]/,/^$/p" $test_data/index | grep "$regex"
 +}
 +
 +# Check the generated index file
 +assert_config 'fedora' 'revision=1'
 +assert_config 'fedora' 'arch=x86_64'
 +assert_config 'fedora' 'name=Fedora Test Image'
 +assert_config 'fedora' 'osinfo=fedora14'
 +assert_config 'fedora' 'checksum'
 +assert_config 'fedora' 'format=raw'
 +assert_config 'fedora' '^size='
 +assert_config 'fedora' 'compressed_size='
 +assert_config 'fedora' 'expand=/dev/'
 +
 +
 +# Copy the debian image and add the minimal piece to index
 +cp ../test-data/phony-guests/debian.img $test_data
 +
 +cat >> $test_data/index << EOF
 +
 +[debian]
 +file=debian.img
 +EOF
 +
 +# Run virt-builder-repository again
 +echo 'x86_64
 +Debian Test Image
 +debian9
 +
 +' | virt-builder-repository --no-compression -i $test_data
 +
 +# Check that the new image is complete and the first one hasn't changed
 +assert_config 'fedora' 'revision=1'
 +
 +assert_config 'debian' 'revision=1'
 +assert_config 'debian' 'checksum'
 +
 +# Modify the fedora image
 +export EDITOR='echo newline >>'
 +virt-edit -a $test_data/fedora.img /etc/test3
 +
 +# Rerun the tool (with compression)
 +virt-builder-repository -i $test_data
 +
 +# Check that the revision, file and size have been updated
 +assert_config 'fedora' 'revision=2'
 +assert_config 'fedora' 'file=fedora.img.xz'
 +test -e $test_data/fedora.img.xz
 +! test -e $test_data/fedora.img 
The test should ‘rm’ any temporary files it created after a successful
run (but not on error paths).
 --- a/builder/utils.mli
 +++ b/builder/utils.mli
 @@ -32,3 +32,6 @@ val string_of_revision : revision -> string
  val get_image_infos : string -> Yajl.yajl_val
  (** [get_image_infos path] Run qemu-img info on the image pointed at
      path as YAJL tree. *)
 +
 +val increment_revision : revision -> revision
 +(** Add one to the version number *) 
s/version/revision/
Looks good in general.
Rich.
-- 
Richard Jones, Virtualization Group, Red Hat 
http://people.redhat.com/~rjones
Read my programming and virtualization blog: 
http://rwmj.wordpress.com
Fedora Windows cross-compiler. Compile Windows programs, test, and
build Windows installers. Over 100 libraries supported.
http://fedoraproject.org/wiki/MinGW