Linux from around 5.6 now enumerates individual disks in any order
(whereas previously it enumerated only drivers in parallel). This
means that /dev/sdX ordering is no longer stable - in particular we
cannot be sure that /dev/sda inside the guest is the first disk that
was attached to the appliance, /dev/sdb the second disk and so on.
However we can still use SCSI PCI device numbering as found in
/dev/disk/by-path. Use this to translate device names in and out of
the appliance.
Thanks: Vitaly Kuznetsov, Paolo Bonzini, Dan Berrangé.
---
daemon/daemon.h | 2 +
daemon/device-name-translation.c | 182 +++++++++++++++++++++++++++++--
daemon/guestfsd.c | 5 +
lib/canonical-name.c | 5 +
4 files changed, 184 insertions(+), 10 deletions(-)
diff --git a/daemon/daemon.h b/daemon/daemon.h
index 170fb2537..24cf8585d 100644
--- a/daemon/daemon.h
+++ b/daemon/daemon.h
@@ -215,6 +215,8 @@ extern void notify_progress_no_ratelimit (uint64_t position, uint64_t
total, con
/* device-name-translation.c */
extern char *device_name_translation (const char *device);
+extern void device_name_translation_init (void);
+extern void device_name_translation_free (void);
extern char *reverse_device_name_translation (const char *device);
/* stubs.c (auto-generated) */
diff --git a/daemon/device-name-translation.c b/daemon/device-name-translation.c
index ed826bbae..5383d7ccb 100644
--- a/daemon/device-name-translation.c
+++ b/daemon/device-name-translation.c
@@ -27,12 +27,116 @@
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
+#include <errno.h>
+#include <error.h>
+
+#include "c-ctype.h"
#include "daemon.h"
+static char **cache;
+static size_t cache_size;
+
+/**
+ * Cache daemon disk mapping.
+ *
+ * When the daemon starts up, populate a cache with the contents
+ * of /dev/disk/by-path. It's easiest to use C<ls -lv> here
+ * since the names are sorted awkwardly.
+ */
+void
+device_name_translation_init (void)
+{
+ const char *by_path = "/dev/disk/by-path";
+ CLEANUP_FREE char *out = NULL, *err = NULL;
+ CLEANUP_FREE_STRING_LIST char **lines = NULL;
+ size_t i, n;
+ int r;
+
+ device_name_translation_free ();
+
+ r = command (&out, &err, "ls", "-1v", by_path, NULL);
+ if (r == -1)
+ error (EXIT_FAILURE, 0,
+ "failed to initialize device name translation cache: %s", err);
+
+ lines = split_lines (out);
+ if (lines == NULL)
+ error (EXIT_FAILURE, errno, "split_lines");
+
+ /* Delete entries for partitions. */
+ for (i = 0; lines[i] != NULL; ++i) {
+ if (strstr (lines[i], "-part")) {
+ n = guestfs_int_count_strings (&lines[i+1]);
+ memmove (&lines[i], &lines[i+1], (n+1) * sizeof (char *));
+ i--;
+ }
+ }
+
+ cache_size = guestfs_int_count_strings (lines);
+ cache = calloc (cache_size, sizeof (char *));
+ if (cache == NULL)
+ error (EXIT_FAILURE, errno, "calloc");
+
+ /* Look up each device name. It should be a symlink to /dev/sdX. */
+ for (i = 0; lines[i] != NULL; ++i) {
+ CLEANUP_FREE char *full;
+ char *device;
+
+ if (asprintf (&full, "%s/%s", by_path, lines[i]) == -1)
+ error (EXIT_FAILURE, errno, "asprintf");
+
+ device = realpath (full, NULL);
+ if (device == NULL)
+ error (EXIT_FAILURE, errno, "realpath: %s", full);
+ cache[i] = device;
+ }
+}
+
+void
+device_name_translation_free (void)
+{
+ size_t i;
+
+ for (i = 0; i < cache_size; ++i)
+ free (cache[i]);
+ free (cache);
+ cache = NULL;
+ cache_size = 0;
+}
+
/**
* Perform device name translation.
*
+ * Libguestfs defines a few standard formats for device names.
+ * (see also L<guestfs(3)/BLOCK DEVICE NAMING> and
+ * L<guestfs(3)/guestfs_canonical_device_name>). They are:
+ *
+ * =over 4
+ *
+ * =item F</dev/sdX[N]>
+ *
+ * =item F</dev/hdX[N]>
+ *
+ * =item F</dev/vdX[N]>
+ *
+ * These mean the Nth partition on the Xth device. Because
+ * Linux no longer enumerates devices in the order they are
+ * passed to qemu, we must translate these by looking up
+ * the actual device using /dev/disk/by-path/
+ *
+ * =item F</dev/mdX>
+ *
+ * =item F</dev/VG/LV>
+ *
+ * =item F</dev/mapper/...>
+ *
+ * =item F</dev/dm-N>
+ *
+ * These are not translated here.
+ *
+ * =back
+ *
* It returns a newly allocated string which the caller must free.
*
* It returns C<NULL> on error. B<Note> it does I<not> call
@@ -45,18 +149,58 @@ char *
device_name_translation (const char *device)
{
int fd;
- char *ret;
+ char *ret = NULL;
+ size_t len;
- fd = open (device, O_RDONLY|O_CLOEXEC);
+ /* /dev/sdX[N] and aliases like /dev/vdX[N]. */
+ if (STRPREFIX (device, "/dev/") &&
+ strchr (device+5, '/') == NULL && /* not an LV name */
+ device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
+ ((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+ ssize_t i;
+ const char *start;
+ char dev[16];
+
+ /* Translate to a disk index in /dev/disk/by-path sorted numerically. */
+ start = &device[5+len+1];
+ len = strspn (start, "abcdefghijklmnopqrstuvwxyz");
+ if (len >= sizeof dev - 1) {
+ fprintf (stderr, "unparseable device name: %s\n", device);
+ return NULL;
+ }
+ strcpy (dev, start);
+
+ i = guestfs_int_drive_index (dev);
+ if (i >= 0 && i < (ssize_t) cache_size) {
+ /* Append the partition name if present. */
+ if (asprintf (&ret, "%s%s", cache[i], start+len) == -1)
+ return NULL;
+ }
+ }
+
+ /* If we didn't translate it above, continue with the same name. */
+ if (ret == NULL) {
+ ret = strdup (device);
+ if (ret == NULL)
+ return NULL;
+ }
+
+ /* Now check the device is openable. */
+ fd = open (ret, O_RDONLY|O_CLOEXEC);
if (fd >= 0) {
close (fd);
- return strdup (device);
+ return ret;
}
- if (errno != ENXIO && errno != ENOENT)
+ if (errno != ENXIO && errno != ENOENT) {
+ perror (ret);
+ free (ret);
return NULL;
+ }
- /* If the name begins with "/dev/sd" then try the alternatives. */
+ free (ret);
+
+ /* If the original name begins with "/dev/sd" then try the alternatives. */
if (!STRPREFIX (device, "/dev/sd"))
return NULL;
device += 7; /* device == "a1" etc. */
@@ -97,13 +241,31 @@ device_name_translation (const char *device)
char *
reverse_device_name_translation (const char *device)
{
- char *ret;
+ char *ret = NULL;
+ size_t i;
+
+ /* Look it up in the cache, and if found return the canonical name.
+ * If not found return a copy of the original string.
+ */
+ for (i = 0; i < cache_size; ++i) {
+ const size_t len = strlen (cache[i]);
+
+ if (STREQ (device, cache[i]) ||
+ (STRPREFIX (device, cache[i]) && c_isdigit (device[len]))) {
+ if (asprintf (&ret, "%s%s", cache[i], &device[len]) == -1) {
+ reply_with_perror ("asprintf");
+ return NULL;
+ }
+ break;
+ }
+ }
- /* Currently a no-op. */
- ret = strdup (device);
if (ret == NULL) {
- reply_with_perror ("strdup");
- return NULL;
+ ret = strdup (device);
+ if (ret == NULL) {
+ reply_with_perror ("strdup");
+ return NULL;
+ }
}
return ret;
diff --git a/daemon/guestfsd.c b/daemon/guestfsd.c
index 5400adf64..fd87f6520 100644
--- a/daemon/guestfsd.c
+++ b/daemon/guestfsd.c
@@ -233,6 +233,9 @@ main (int argc, char *argv[])
_umask (0);
#endif
+ /* Initialize device name translations cache. */
+ device_name_translation_init ();
+
/* Connect to virtio-serial channel. */
if (!channel)
channel = VIRTIO_SERIAL_CHANNEL;
@@ -322,6 +325,8 @@ main (int argc, char *argv[])
/* Enter the main loop, reading and performing actions. */
main_loop (sock);
+ device_name_translation_free ();
+
exit (EXIT_SUCCESS);
}
diff --git a/lib/canonical-name.c b/lib/canonical-name.c
index 42a0fd2a6..efe45c5f1 100644
--- a/lib/canonical-name.c
+++ b/lib/canonical-name.c
@@ -37,6 +37,11 @@ guestfs_impl_canonical_device_name (guestfs_h *g, const char *device)
strchr (device+5, '/') == NULL && /* not an LV name */
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+ /* NB! These do not need to be translated by
+ * device_name_translation. They will be translated if necessary
+ * when the caller uses them in APIs which go through to the
+ * daemon.
+ */
ret = safe_asprintf (g, "/dev/sd%s", &device[5+len+1]);
}
else if (STRPREFIX (device, "/dev/mapper/") ||
--
2.25.0