On Mon, Aug 09, 2010 at 05:08:06PM +0100, Matthew Booth wrote:
On 02/08/10 18:12, Richard W.M. Jones wrote:
>>From 647be56b2d8b171a33608b36e0b7557d94db3c96 Mon Sep 17 00:00:00 2001
>From: Richard Jones<rjones(a)redhat.com>
>Date: Wed, 28 Jul 2010 15:38:57 +0100
>Subject: [PATCH 1/5] New API: file-architecture
>
>This change simply converts the existing Perl-only function
>file_architecture into a core API call. The core API call is
>written in C and available in all languages and from guestfish.
>---
> README | 2 +
> configure.ac | 9 ++
> perl/lib/Sys/Guestfs/Lib.pm | 147 +-----------------------
> perl/t/510-lib-file-arch.t | 70 -----------
> po/POTFILES.in | 1 +
> src/Makefile.am | 3 +-
> src/generator.ml | 128 +++++++++++++++++++++
> src/inspect.c | 266 +++++++++++++++++++++++++++++++++++++++++++
> 8 files changed, 411 insertions(+), 215 deletions(-)
> delete mode 100644 perl/t/510-lib-file-arch.t
> create mode 100644 src/inspect.c
>
>diff --git a/src/inspect.c b/src/inspect.c
>new file mode 100644
>index 0000000..8e28d8a
>--- /dev/null
>+++ b/src/inspect.c
>@@ -0,0 +1,266 @@
>+/* libguestfs
>+ * Copyright (C) 2010 Red Hat Inc.
>+ *
>+ * This library is free software; you can redistribute it and/or
>+ * modify it under the terms of the GNU Lesser General Public
>+ * License as published by the Free Software Foundation; either
>+ * version 2 of the License, or (at your option) any later version.
>+ *
>+ * This library is distributed in the hope that it will be useful,
>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>+ * Lesser General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU Lesser General Public
>+ * License along with this library; if not, write to the Free Software
>+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>+ */
>+
>+#include<config.h>
>+
>+#include<stdio.h>
>+#include<stdlib.h>
>+#include<stdint.h>
>+#include<inttypes.h>
>+#include<unistd.h>
>+#include<string.h>
>+#include<sys/stat.h>
>+
>+#include<pcre.h>
>+
>+#include "ignore-value.h"
>+
>+#include "guestfs.h"
>+#include "guestfs-internal.h"
>+#include "guestfs-internal-actions.h"
>+#include "guestfs_protocol.h"
>+
>+/* Compile all the regular expressions once when the shared library is
>+ * loaded. PCRE is thread safe so we're supposedly OK here if
>+ * multiple threads call into the libguestfs API functions below
>+ * simultaneously.
>+ */
>+static pcre *re_file_elf;
>+static pcre *re_file_win64;
>+static pcre *re_elf_ppc64;
>+
>+static void compile_regexps (void) __attribute__((constructor));
>+static void
>+compile_regexps (void)
>+{
>+ const char *err;
>+ int offset;
>+
>+#define COMPILE(re,pattern,options) \
>+ do { \
>+ re = pcre_compile ((pattern), (options),&err,&offset, NULL); \
>+ if (re == NULL) { \
>+ ignore_value (write (2, err, strlen (err))); \
>+ abort (); \
>+ } \
>+ } while (0)
>+
>+ COMPILE (re_file_elf,
>+ "ELF.*(?:executable|shared object|relocatable), (.+?),", 0);
>+ COMPILE (re_file_win64, "PE32\\+ executable", 0);
>+ COMPILE (re_elf_ppc64, "64.*PowerPC", 0);
>+}
>+
>+/* Match a regular expression which contains no captures. Returns
>+ * true if it matches or false if it doesn't.
>+ */
>+static int
>+match (const char *str, const pcre *re)
>+{
>+ size_t len = strlen (str);
>+ int vec[30], r;
>+
>+ r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
You can ditch vec here and pass in NULL, 0. None of the above REs
use back references, so this shouldn't be an issue.
My reading of the pcre_exec docs suggested otherwise. However I will
need to try it when I get back from the conference.
Aside: using a portion of a passed-in vector for slack space under
certain circumstances is just hideous!
>+ if (r == PCRE_ERROR_NOMATCH)
>+ return 0;
>+ if (r != 1) {
>+ /* Internal error -- should not happen. */
>+ fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned
unexpected error code %d when matching against the string \"%s\"\n",
>+ __FILE__, __func__, r, str);
>+ return 0;
>+ }
>+
>+ return 1;
>+}
>+
>+/* Match a regular expression which contains exactly one capture. If
>+ * the string matches, return the capture, otherwise return NULL. The
>+ * caller must free the result.
>+ */
>+static char *
>+match1 (const char *str, const pcre *re)
>+{
>+ size_t len = strlen (str);
>+ int vec[30], r;
>+
>+ r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
vec here could be of size 9 (need 0, 1, 2, 3; round up to multiple
of 3 == 6; * 1.5 = 9). Whatever you set it to, though, please don't
hard-code the size twice. Please use sizeof(vec)/sizeof(vec[0]) in
the function call.
Yes, good idea.
>+ if (r == PCRE_ERROR_NOMATCH)
>+ return NULL;
>+ if (r != 2) {
>+ /* Internal error -- should not happen. */
>+ fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned
unexpected error code %d when matching against the string \"%s\"\n",
>+ __FILE__, __func__, r, str);
>+ return NULL;
>+ }
>+
>+ return strndup (&str[vec[2]], vec[3]-vec[2]);
Is there a safe_ version of that? If not, you probably ought to
check for NULL return for consistency.
Urr yes you are right, I thought I'd caught all instances of this ...
>+}
>+
>+/* Convert output from 'file' command on ELF files to the canonical
>+ * architecture string. Caller must free the result.
>+ */
>+static char *
>+canonical_elf_arch (guestfs_h *g, const char *elf_arch)
>+{
>+ const char *r;
>+
>+ if (strstr (elf_arch, "Intel 80386"))
>+ r = "i386";
>+ else if (strstr (elf_arch, "Intel 80486"))
>+ r = "i486";
>+ else if (strstr (elf_arch, "x86-64"))
>+ r = "x86_64";
>+ else if (strstr (elf_arch, "AMD x86-64"))
>+ r = "x86_64";
>+ else if (strstr (elf_arch, "SPARC32"))
>+ r = "sparc";
>+ else if (strstr (elf_arch, "SPARC V9"))
>+ r = "sparc64";
>+ else if (strstr (elf_arch, "IA-64"))
>+ r = "ia64";
>+ else if (match (elf_arch, re_elf_ppc64))
>+ r = "ppc64";
>+ else if (strstr (elf_arch, "PowerPC"))
>+ r = "ppc";
>+ else
>+ r = elf_arch;
>+
>+ char *ret = safe_strdup (g, r);
>+ return ret;
>+}
>+
>+static int
>+is_regular_file (const char *filename)
>+{
>+ struct stat statbuf;
>+
>+ return lstat (filename,&statbuf) == 0&& S_ISREG (statbuf.st_mode);
>+}
>+
>+/* Download and uncompress the cpio file to find binaries within. */
>+#define INITRD_BINARIES1 "bin/ls bin/rm bin/modprobe sbin/modprobe bin/sh
bin/bash bin/dash bin/nash"
>+#define INITRD_BINARIES2 {"bin/ls", "bin/rm",
"bin/modprobe", "sbin/modprobe", "bin/sh",
"bin/bash", "bin/dash", "bin/nash"}
Eww.
Right, but it does make sense in context of the later code ...
If C had LISP-like macros ...
>+
>+static char *
>+cpio_arch (guestfs_h *g, const char *file, const char *path)
>+{
This function is really the architecture of an initrd. It should
probably be called initrd_arch.
Yup, although we can only handle cpio-format (see documentation).
Very old distros (RHEL 3 and earlier IIRC) used a compressed ext2 disk
image, and we cannot handle that at the moment.
>+ char *ret = NULL;
>+
>+ const char *method;
>+ if (strstr (file, "gzip"))
>+ method = "zcat";
>+ else if (strstr (file, "bzip2"))
>+ method = "bzcat";
>+ else
>+ method = "cat";
>+
>+ char dir[] = "/tmp/initrd.XXXXXX";
>+#define dir_len 18
This should be strlen(dir). The compiler will recognise this as a constant.
OK.
>+ if (mkdtemp (dir) == NULL) {
>+ perrorf (g, "mkdtemp");
>+ goto out;
>+ }
>+
>+ char dir_initrd[dir_len + 16];
>+ snprintf (dir_initrd, dir_len + 16, "%s/initrd", dir);
>+ if (guestfs_download (g, path, dir_initrd) == -1)
>+ goto out;
>+
>+ char cmd[dir_len + 256];
>+ snprintf (cmd, dir_len + 256,
>+ "cd %s&& %s initrd | cpio --quiet -id "
INITRD_BINARIES1,
>+ dir, method);
>+ int r = system (cmd);
>+ if (r == -1 || WEXITSTATUS (r) != 0) {
>+ perrorf (g, "cpio command failed");
>+ goto out;
>+ }
>+
>+ char bin[dir_len + 32];
You've placed a limit here of 31 bytes on the contents of any member
of INITRD_BINARIES2, which is defined above. You need a comment next
to the definition of INITRD_BINARIES2 about this. Or remove the
limit...
Yup.
>+ const char *bins[] = INITRD_BINARIES2;
>+ size_t i;
>+ for (i = 0; i< sizeof bins / sizeof bins[0]; ++i) {
>+ snprintf (bin, dir_len + 32, "%s/%s", dir, bins[i]);
Why not just use asprintf, or whatever the equivalent is in gnulib?
Because we'd have to free it up. I think there is an alloca-variant
in gnulib, but I think the static array should be OK if we document
it.
>+
>+ if (is_regular_file (bin)) {
>+ snprintf (cmd, dir_len + 256, "file %s", bin);
Reusing a magic number from above in a different context. Magic
numbers are getting a bit hard to track at this point. Could do with
cmd_len for the size of cmd.
Stepping back slightly, you could more cleanly use libmagic here
instead of invoking file. It would be less code, and wouldn't encode
length limits which aren't defined anywhere.
Yes, that would make a lot of sense.
>+ FILE *fp = popen (cmd, "r");
>+ if (!fp) {
>+ perrorf (g, "popen: %s", cmd);
>+ goto out;
>+ }
>+
>+ char line[1024];
>+ if (!fgets (line, sizeof line, fp)) {
>+ perrorf (g, "fgets");
>+ fclose (fp);
>+ goto out;
>+ }
>+ pclose (fp);
^^^ This chunk could go.
>+
>+ char *elf_arch;
>+ if ((elf_arch = match1 (line, re_file_elf)) != NULL) {
>+ ret = canonical_elf_arch (g, elf_arch);
>+ free (elf_arch);
>+ goto out;
>+ }
>+ }
>+ }
>+ error (g, "file_architecture: could not determine architecture of cpio
archive");
>+
>+ out:
>+ /* Free up the temporary directory. Note the directory name cannot
>+ * contain shell meta-characters because of the way it was
>+ * constructed above.
>+ */
>+ snprintf (cmd, dir_len + 256, "rm -rf %s", dir);
cmd_len
Is there really no widely available C implementation of this?
Not that I'm aware of ...?
>+ ignore_value (system (cmd));
>+
>+ return ret;
>+#undef dir_len
>+}
>+
>+char *
>+guestfs__file_architecture (guestfs_h *g, const char *path)
>+{
>+ char *file = NULL;
>+ char *elf_arch = NULL;
>+ char *ret = NULL;
>+
>+ /* Get the output of the "file" command. Note that because this
>+ * runs in the daemon, LANG=C so it's in English.
>+ */
>+ file = guestfs_file (g, path);
>+ if (file == NULL)
>+ return NULL;
>+
>+ if ((elf_arch = match1 (file, re_file_elf)) != NULL)
>+ ret = canonical_elf_arch (g, elf_arch);
>+ else if (strstr (file, "PE32 executable"))
>+ ret = safe_strdup (g, "i386");
>+ else if (match (file, re_file_win64))
Why not strstr(file, "PE32+ executable") ?
Yes, that's a mistake.
>+ ret = safe_strdup (g, "x86_64");
>+ else if (strstr (file, "cpio archive"))
>+ ret = cpio_arch (g, file, path);
>+ else
>+ error (g, "file_architecture: unknown architecture: %s", path);
>+
>+ free (file);
>+ free (elf_arch);
>+ return ret; /* caller frees */
>+}
>-- 1.7.1
API is fine. I think we need to think hard, though, about how to do
safe string manipulation in libguestfs. In the above code, it's hard
to be sure that you're not leaking or overflowing anything. Perhaps
using a string library would be in order.
Use OCaml ...
Rich.
--
Richard Jones, Virtualization Group, Red Hat
http://people.redhat.com/~rjones
virt-top is 'top' for virtual machines. Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://et.redhat.com/~rjones/virt-top