Allow the appliance / root filesystem to be placed on a virtual NVDIMM
and accessed directly by the guest kernel (DAX).
This requires corresponding changes in supermin.
---
src/guestfs-internal.h | 1 +
src/launch-direct.c | 68 ++++++++++++++++++++++++++++++++++++++++----------
src/launch.c | 8 +++++-
3 files changed, 63 insertions(+), 14 deletions(-)
diff --git a/src/guestfs-internal.h b/src/guestfs-internal.h
index d325f50..3655219 100644
--- a/src/guestfs-internal.h
+++ b/src/guestfs-internal.h
@@ -782,6 +782,7 @@ extern int64_t guestfs_int_timeval_diff (const struct timeval *x,
const struct t
extern void guestfs_int_launch_send_progress (guestfs_h *g, int perdozen);
extern char *guestfs_int_appliance_command_line (guestfs_h *g, const char *appliance_dev,
int flags);
#define APPLIANCE_COMMAND_LINE_IS_TCG 1
+#define APPLIANCE_COMMAND_LINE_USE_ACPI 2
const char *guestfs_int_get_cpu_model (int kvm);
int guestfs_int_create_socketname (guestfs_h *g, const char *filename, char
(*sockname)[UNIX_PATH_MAX]);
extern void guestfs_int_register_backend (const char *name, const struct backend_ops *);
diff --git a/src/launch-direct.c b/src/launch-direct.c
index 01b7e07..a005bda 100644
--- a/src/launch-direct.c
+++ b/src/launch-direct.c
@@ -234,6 +234,7 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
struct hv_param *hp;
bool has_kvm;
int force_tcg;
+ bool dax;
const char *cpu_model;
/* At present you must add drives before starting the appliance. In
@@ -371,15 +372,29 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
warning (g, "qemu debugging is enabled, connect gdb to tcp::1234 to
begin");
}
+ /* Can we use DAX? */
+#ifdef __x86_64__
+ dax = guestfs_int_qemu_version_ge (data->qemu_data, 2, 6) &&
+ guestfs_int_qemu_supports_device (g, data->qemu_data, "nvdimm");
+#else
+ dax = false;
+#endif
+
ADD_CMDLINE ("-machine");
ADD_CMDLINE_PRINTF (
#ifdef MACHINE_TYPE
MACHINE_TYPE ","
+ "%s"
+#elif __x86_64__
+ "pc,%s"
+#else
+ "%s"
#endif
#ifdef __aarch64__
"gic-version=host,"
#endif
"accel=%s",
+ dax ? "nvdimm," : "",
!force_tcg ? "kvm:tcg" : "tcg");
cpu_model = guestfs_int_get_cpu_model (has_kvm && !force_tcg);
@@ -394,7 +409,10 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
}
ADD_CMDLINE ("-m");
- ADD_CMDLINE_PRINTF ("%d", g->memsize);
+ if (dax)
+ ADD_CMDLINE_PRINTF ("%d,maxmem=32G,slots=32", g->memsize);
+ else
+ ADD_CMDLINE_PRINTF ("%d", g->memsize);
/* Force exit instead of reboot on panic */
ADD_CMDLINE ("-no-reboot");
@@ -541,21 +559,43 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
/* Add the ext2 appliance drive (after all the drives). */
if (has_appliance_drive) {
- ADD_CMDLINE ("-drive");
- ADD_CMDLINE_PRINTF ("file=%s,snapshot=on,id=appliance,"
- "cache=unsafe,if=none,format=raw",
- appliance);
+ if (dax) {
+ struct stat statbuf;
- if (virtio_scsi) {
- ADD_CMDLINE ("-device");
- ADD_CMDLINE ("scsi-hd,drive=appliance");
- }
- else {
+ if (stat (appliance, &statbuf) == -1) {
+ perrorf (g, "stat: %s", appliance);
+ goto cleanup0;
+ }
+
+ ADD_CMDLINE ("-object");
+ /* share=off corresponds to mmap MAP_PRIVATE inside qemu, so
+ * this should not affect the underlying file. IOW parallel
+ * access should be fine.
+ */
+ ADD_CMDLINE_PRINTF ("memory-backend-file,id=mem1,share=off,"
+ "mem-path=%s,size=%" PRIu64 "b",
+ appliance, (uint64_t) statbuf.st_size);
ADD_CMDLINE ("-device");
- ADD_CMDLINE (VIRTIO_BLK ",drive=appliance");
- }
+ ADD_CMDLINE ("nvdimm,memdev=mem1,id=nv1");
- appliance_dev = make_appliance_dev (g, virtio_scsi);
+ appliance_dev = safe_strdup (g, "/dev/pmem0");
+ } else {
+ ADD_CMDLINE ("-drive");
+ ADD_CMDLINE_PRINTF ("file=%s,snapshot=on,id=appliance,"
+ "cache=unsafe,if=none,format=raw",
+ appliance);
+
+ if (virtio_scsi) {
+ ADD_CMDLINE ("-device");
+ ADD_CMDLINE ("scsi-hd,drive=appliance");
+ }
+ else {
+ ADD_CMDLINE ("-device");
+ ADD_CMDLINE (VIRTIO_BLK ",drive=appliance");
+ }
+
+ appliance_dev = make_appliance_dev (g, virtio_scsi);
+ }
}
/* Create the virtio serial bus. */
@@ -597,6 +637,8 @@ launch_direct (guestfs_h *g, void *datav, const char *arg)
flags = 0;
if (!has_kvm || force_tcg)
flags |= APPLIANCE_COMMAND_LINE_IS_TCG;
+ if (dax)
+ flags |= APPLIANCE_COMMAND_LINE_USE_ACPI;
ADD_CMDLINE_STRING_NODUP
(guestfs_int_appliance_command_line (g, appliance_dev, flags));
diff --git a/src/launch.c b/src/launch.c
index 72a8b29..49f0455 100644
--- a/src/launch.c
+++ b/src/launch.c
@@ -318,6 +318,10 @@ guestfs_impl_config (guestfs_h *g,
* If we are launching a qemu TCG guest (ie. KVM is known to be
* disabled or unavailable). If you don't know, don't pass this flag.
*
+ * =item C<APPLIANCE_COMMAND_LINE_USE_ACPI>
+ *
+ * Use ACPI in the appliance. Normally disabled because it is slow.
+ *
* =back
*
* Note that this function returns a newly allocated buffer which must
@@ -331,6 +335,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char
*appliance_dev,
char *term = getenv ("TERM");
char *ret;
bool tcg = flags & APPLIANCE_COMMAND_LINE_IS_TCG;
+ bool use_acpi = flags & APPLIANCE_COMMAND_LINE_USE_ACPI;
char lpj_s[64] = "";
if (appliance_dev)
@@ -367,7 +372,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char
*appliance_dev,
" udev.event-timeout=6000" /* for newer udevd */
" no_timer_check" /* fix for RHBZ#502058 */
"%s" /* lpj */
- " acpi=off" /* ACPI is slow - 150-200ms extra on my laptop */
+ "%s" /* acpi=off: ACPI is slow, 150-200ms on my laptop */
" printk.time=1" /* display timestamp before kernel messages */
" cgroup_disable=memory" /* saves us about 5 MB of RAM */
" usbcore.nousb" /* disable USB, only saves about 1ms */
@@ -386,6 +391,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char
*appliance_dev,
g->memsize,
#endif
lpj_s,
+ !use_acpi ? " acpi=off" : "",
root,
g->selinux ? "selinux=1 enforcing=0" : "selinux=0",
g->verbose ? "guestfs_verbose=1" : "quiet",
--
2.7.4