Linux from around 5.6 now enumerates individual disks in any order
(whereas previously it enumerated only drivers in parallel). This
means that /dev/sdX ordering is no longer stable - in particular we
cannot be sure that /dev/sda inside the guest is the first disk that
was attached to the appliance, /dev/sdb the second disk and so on.
However we can still use SCSI PCI device numbering as found in
/dev/disk/by-path. Use this to translate device names in and out of
the appliance.
Thanks: Vitaly Kuznetsov, Paolo Bonzini, Dan Berrangé.
---
TODO | 13 +++
daemon/daemon.h | 2 +
daemon/device-name-translation.c | 195 +++++++++++++++++++++++++++++--
daemon/devsparts.ml | 23 +++-
daemon/guestfsd.c | 5 +
daemon/stubs-macros.h | 10 +-
lib/canonical-name.c | 5 +
7 files changed, 235 insertions(+), 18 deletions(-)
diff --git a/TODO b/TODO
index 066f633d4..064386ac2 100644
--- a/TODO
+++ b/TODO
@@ -189,6 +189,19 @@ might be replaced by direct use of the library (if this is easier).
But it is very hard to be compatible between RHEL6 and RHEL5 when
using the library directly.
+Properly fix device name translation
+------------------------------------
+
+Currently for Device parameters we pass a string name like "/dev/sda"
+or "/dev/sdb2" or "/dev/VG/LV" to the daemon. Since Linux broke
+device enumeration (RHBZ#1804207) this works less well, requiring
+non-trivial translation within the daemon.
+
+It would be far better if in the generator we mapped "/dev/XXX" to a
+proper structure. Device index + partition | LV | MD | ...
+This would be then used everywhere inside the daemon and mean we would
+no longer need device name translation at all.
+
Visualization
-------------
diff --git a/daemon/daemon.h b/daemon/daemon.h
index 170fb2537..24cf8585d 100644
--- a/daemon/daemon.h
+++ b/daemon/daemon.h
@@ -215,6 +215,8 @@ extern void notify_progress_no_ratelimit (uint64_t position, uint64_t
total, con
/* device-name-translation.c */
extern char *device_name_translation (const char *device);
+extern void device_name_translation_init (void);
+extern void device_name_translation_free (void);
extern char *reverse_device_name_translation (const char *device);
/* stubs.c (auto-generated) */
diff --git a/daemon/device-name-translation.c b/daemon/device-name-translation.c
index ed826bbae..30173f5c2 100644
--- a/daemon/device-name-translation.c
+++ b/daemon/device-name-translation.c
@@ -27,12 +27,125 @@
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
+#include <errno.h>
+#include <error.h>
+
+#include "c-ctype.h"
#include "daemon.h"
+static char **cache;
+static size_t cache_size;
+
+/**
+ * Cache daemon disk mapping.
+ *
+ * When the daemon starts up, populate a cache with the contents
+ * of /dev/disk/by-path. It's easiest to use C<ls -lv> here
+ * since the names are sorted awkwardly.
+ */
+void
+device_name_translation_init (void)
+{
+ const char *by_path = "/dev/disk/by-path";
+ CLEANUP_FREE char *out = NULL, *err = NULL;
+ CLEANUP_FREE_STRING_LIST char **lines = NULL;
+ size_t i, j;
+ int r;
+
+ device_name_translation_free ();
+
+ r = command (&out, &err, "ls", "-1v", by_path, NULL);
+ if (r == -1)
+ error (EXIT_FAILURE, 0,
+ "failed to initialize device name translation cache: %s", err);
+
+ lines = split_lines (out);
+ if (lines == NULL)
+ error (EXIT_FAILURE, errno, "split_lines");
+
+ cache_size = guestfs_int_count_strings (lines);
+ cache = calloc (cache_size, sizeof (char *));
+ if (cache == NULL)
+ error (EXIT_FAILURE, errno, "calloc");
+
+ /* Delete entries for partitions. (Mark them as NULL in the array
+ * and skip them below.)
+ */
+ for (i = 0; i < cache_size; ++i) {
+ if (strstr (lines[i], "-part"))
+ lines[i] = NULL;
+ }
+
+ /* Look up each device name. It should be a symlink to /dev/sdX. */
+ for (i = j = 0; i < cache_size; ++i) {
+ if (lines[i] != NULL) {
+ CLEANUP_FREE char *full;
+ char *device;
+
+ if (asprintf (&full, "%s/%s", by_path, lines[i]) == -1)
+ error (EXIT_FAILURE, errno, "asprintf");
+
+ device = realpath (full, NULL);
+ if (device == NULL)
+ error (EXIT_FAILURE, errno, "realpath: %s", full);
+
+ /* Don't add the root device to the cache. */
+ if (is_root_device (device))
+ continue;
+
+ cache[j++] = device;
+ }
+ }
+
+ /* This is the final cache size because we deleted entries above. */
+ cache_size = j;
+}
+
+void
+device_name_translation_free (void)
+{
+ size_t i;
+
+ for (i = 0; i < cache_size; ++i)
+ free (cache[i]);
+ free (cache);
+ cache = NULL;
+ cache_size = 0;
+}
+
/**
* Perform device name translation.
*
+ * Libguestfs defines a few standard formats for device names.
+ * (see also L<guestfs(3)/BLOCK DEVICE NAMING> and
+ * L<guestfs(3)/guestfs_canonical_device_name>). They are:
+ *
+ * =over 4
+ *
+ * =item F</dev/sdX[N]>
+ *
+ * =item F</dev/hdX[N]>
+ *
+ * =item F</dev/vdX[N]>
+ *
+ * These mean the Nth partition on the Xth device. Because
+ * Linux no longer enumerates devices in the order they are
+ * passed to qemu, we must translate these by looking up
+ * the actual device using /dev/disk/by-path/
+ *
+ * =item F</dev/mdX>
+ *
+ * =item F</dev/VG/LV>
+ *
+ * =item F</dev/mapper/...>
+ *
+ * =item F</dev/dm-N>
+ *
+ * These are not translated here.
+ *
+ * =back
+ *
* It returns a newly allocated string which the caller must free.
*
* It returns C<NULL> on error. B<Note> it does I<not> call
@@ -45,18 +158,59 @@ char *
device_name_translation (const char *device)
{
int fd;
- char *ret;
+ char *ret = NULL;
+ size_t len;
- fd = open (device, O_RDONLY|O_CLOEXEC);
+ /* /dev/sdX[N] and aliases like /dev/vdX[N]. */
+ if (STRPREFIX (device, "/dev/") &&
+ strchr (device+5, '/') == NULL && /* not an LV name */
+ device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
+ ((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+ ssize_t i;
+ const char *start;
+ char dev[16];
+
+ /* Translate to a disk index in /dev/disk/by-path sorted numerically. */
+ start = &device[5+len+1];
+ len = strspn (start, "abcdefghijklmnopqrstuvwxyz");
+ if (len >= sizeof dev - 1) {
+ fprintf (stderr, "unparseable device name: %s\n", device);
+ return NULL;
+ }
+ strcpy (dev, start);
+ dev[len] = '\0';
+
+ i = guestfs_int_drive_index (dev);
+ if (i >= 0 && i < (ssize_t) cache_size) {
+ /* Append the partition name if present. */
+ if (asprintf (&ret, "%s%s", cache[i], start+len) == -1)
+ return NULL;
+ }
+ }
+
+ /* If we didn't translate it above, continue with the same name. */
+ if (ret == NULL) {
+ ret = strdup (device);
+ if (ret == NULL)
+ return NULL;
+ }
+
+ /* Now check the device is openable. */
+ fd = open (ret, O_RDONLY|O_CLOEXEC);
if (fd >= 0) {
close (fd);
- return strdup (device);
+ return ret;
}
- if (errno != ENXIO && errno != ENOENT)
+ if (errno != ENXIO && errno != ENOENT) {
+ perror (ret);
+ free (ret);
return NULL;
+ }
- /* If the name begins with "/dev/sd" then try the alternatives. */
+ free (ret);
+
+ /* If the original name begins with "/dev/sd" then try the alternatives. */
if (!STRPREFIX (device, "/dev/sd"))
return NULL;
device += 7; /* device == "a1" etc. */
@@ -97,13 +251,34 @@ device_name_translation (const char *device)
char *
reverse_device_name_translation (const char *device)
{
- char *ret;
+ char *ret = NULL;
+ size_t i;
+
+ /* Look it up in the cache, and if found return the canonical name.
+ * If not found return a copy of the original string.
+ */
+ for (i = 0; i < cache_size; ++i) {
+ const size_t len = strlen (cache[i]);
+
+ if (STREQ (device, cache[i]) ||
+ (STRPREFIX (device, cache[i]) && c_isdigit (device[len]))) {
+ char drv[16];
+
+ guestfs_int_drive_name (i, drv);
+ if (asprintf (&ret, "/dev/sd%s%s", drv, &device[len]) == -1) {
+ reply_with_perror ("asprintf");
+ return NULL;
+ }
+ break;
+ }
+ }
- /* Currently a no-op. */
- ret = strdup (device);
if (ret == NULL) {
- reply_with_perror ("strdup");
- return NULL;
+ ret = strdup (device);
+ if (ret == NULL) {
+ reply_with_perror ("strdup");
+ return NULL;
+ }
}
return ret;
diff --git a/daemon/devsparts.ml b/daemon/devsparts.ml
index 59e66e82e..b0703d3ab 100644
--- a/daemon/devsparts.ml
+++ b/daemon/devsparts.ml
@@ -23,9 +23,26 @@ open Std_utils
open Utils
-let map_block_devices ~return_md f =
- let devs = Sys.readdir "/sys/block" in
- let devs = Array.to_list devs in
+let rec map_block_devices ~return_md f =
+ (* We need to get the devices in translated order. Use the
+ * same command that we use in device-name-translation.c.
+ *)
+ let path = "/dev/disk/by-path" in
+ let devs = command "ls" ["-1v"; path] in
+ let devs = String.trimr devs in
+ let devs = String.nsplit "\n" devs in
+ (* Delete entries for partitions. *)
+ let devs = List.filter (fun dev -> String.find dev "-part" = (-1)) devs
in
+ let devs =
+ List.filter_map (
+ fun file ->
+ let dev = Unix_utils.Realpath.realpath (sprintf "%s/%s" path file) in
+ (* Ignore non-/dev devices, and return without /dev/ prefix. *)
+ if String.is_prefix dev "/dev/" then
+ Some (String.sub dev 5 (String.length dev - 5))
+ else
+ None
+ ) devs in
let devs = List.filter (
fun dev ->
String.is_prefix dev "sd" ||
diff --git a/daemon/guestfsd.c b/daemon/guestfsd.c
index 5400adf64..fd87f6520 100644
--- a/daemon/guestfsd.c
+++ b/daemon/guestfsd.c
@@ -233,6 +233,9 @@ main (int argc, char *argv[])
_umask (0);
#endif
+ /* Initialize device name translations cache. */
+ device_name_translation_init ();
+
/* Connect to virtio-serial channel. */
if (!channel)
channel = VIRTIO_SERIAL_CHANNEL;
@@ -322,6 +325,8 @@ main (int argc, char *argv[])
/* Enter the main loop, reading and performing actions. */
main_loop (sock);
+ device_name_translation_free ();
+
exit (EXIT_SUCCESS);
}
diff --git a/daemon/stubs-macros.h b/daemon/stubs-macros.h
index e8b6c500b..235e79472 100644
--- a/daemon/stubs-macros.h
+++ b/daemon/stubs-macros.h
@@ -30,11 +30,6 @@
*/
#define RESOLVE_DEVICE(path,path_out,is_filein) \
do { \
- if (!is_device_parameter ((path))) { \
- if (is_filein) cancel_receive (); \
- reply_with_error ("%s: %s: expecting a device name", __func__, (path));
\
- return; \
- } \
(path_out) = device_name_translation ((path)); \
if ((path_out) == NULL) { \
const int err = errno; \
@@ -43,6 +38,11 @@
reply_with_perror ("%s: %s", __func__, path); \
return; \
} \
+ if (!is_device_parameter ((path_out))) { \
+ if (is_filein) cancel_receive (); \
+ reply_with_error ("%s: %s: expecting a device name", __func__, (path));
\
+ return; \
+ } \
} while (0)
/* All functions that take a mountable argument must call this macro.
diff --git a/lib/canonical-name.c b/lib/canonical-name.c
index 42a0fd2a6..efe45c5f1 100644
--- a/lib/canonical-name.c
+++ b/lib/canonical-name.c
@@ -37,6 +37,11 @@ guestfs_impl_canonical_device_name (guestfs_h *g, const char *device)
strchr (device+5, '/') == NULL && /* not an LV name */
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+ /* NB! These do not need to be translated by
+ * device_name_translation. They will be translated if necessary
+ * when the caller uses them in APIs which go through to the
+ * daemon.
+ */
ret = safe_asprintf (g, "/dev/sd%s", &device[5+len+1]);
}
else if (STRPREFIX (device, "/dev/mapper/") ||
--
2.25.0