On 6/28/20 8:02 AM, Richard W.M. Jones wrote:
---
plugins/tar/{tar.pl => nbdkit-tar-plugin.pod} | 145 ++-------
configure.ac | 2 -
plugins/tar/Makefile.am | 41 +--
tests/Makefile.am | 14 +-
plugins/tar/tar.c | 286 ++++++++++++++++++
tests/test-dump-plugin.sh | 2 +-
tests/test-help-plugin.sh | 2 +-
tests/test-tar-info.sh | 67 ++++
tests/test-tar.sh | 22 +-
tests/test-version-plugin.sh | 2 +-
wrapper.c | 2 +-
.gitignore | 1 -
README | 4 +-
13 files changed, 427 insertions(+), 163 deletions(-)
- nbdkit tar tar=FILENAME.tar file=PATH_INSIDE_TAR
+ nbdkit tar [tar=]FILENAME.tar file=PATH_INSIDE_TAR
I'm considering a followup patch to allow file=exportname as a magic
filename, similarly to how we did in the ext2 filter.
+=item B<file=>PATH_INSIDE_TAR
+
+The path of the file inside the tarball to serve. This parameter is
+required. It must exactly match the name stored in the tarball, so
+use S<C<tar tf filename.tar>>
However, I'm a bit worried about how it would work if the tarfile itself
includes a file named 'exportname' in the top directory of the tarfile.
A quick test confirms my worry:
$ cd /tmp
$ touch exportname
$ tar cf f.tar exportname
$ tar tf f.tar
exportname
$ LANG=C tar --no-auto-compress -tRvf f.tar exportname
block 0: -rw-rw-r-- eblake/eblake 0 2020-07-06 13:37 exportname
block 1: ** Block of NULs **
$ LANG=C tar --no-auto-compress -tRvf f.tar ./exportname
block 1: ** Block of NULs **
tar: ./exportname: Not found in archive
tar: Exiting with failure status due to previous errors
so if we like the idea, we'd have to allow the user to specify
mutually-exclusive config parameters: either file= to something within
the file, or exportname=on to allow the client to choose, where we then
validate that exactly one of those two options is configured.
+++ b/plugins/tar/Makefile.am
@@ -1,5 +1,5 @@
# nbdkit
-# Copyright (C) 2013-2020 Red Hat Inc.
+# Copyright (C) 2018-2020 Red Hat Inc.
Interesting change in dates.
+++ b/plugins/tar/tar.c
@@ -0,0 +1,286 @@
+/* nbdkit
+ * Copyright (C) 2018-2020 Red Hat Inc.
+ *
+static int
+tar_config_complete (void)
+{
+ if (tarfile == NULL || file == NULL) {
+ nbdkit_error ("you must supply the tar=<TARFILE> and
file=<FILENAME> "
+ "parameters");
+ return -1;
+ }
+
+ return 0;
+}
+
+#define tar_config_help \
+ "[tar=]<TARBALL> (required) The name of the tar file.\n" \
+ "file=<FILENAME> The path inside the tar file to
server."
Should '(required)' be listed on both lines? (Not necessarily on the
second, if we go with the exportname=on option)
+
+static int
+tar_get_ready (void)
+{
+ FILE *fp;
+ CLEANUP_FREE char *cmd = NULL;
+ size_t len = 0;
+ bool scanned_ok;
+ char s[256];
+
+ /* Construct the tar command to examine the tar file. */
+ fp = open_memstream (&cmd, &len);
+ if (fp == NULL) {
+ nbdkit_error ("open_memstream: %m");
+ return -1;
+ }
+ fprintf (fp, "LANG=C tar --no-auto-compress -tRvf ");
Use of --no-auto-compress is specific to GNU tar, do we care?
Should we allow a 'tar=/path/to/tar' parameter during .config to allow a
user to point to an alternative tar?
+ shell_quote (tarfile, fp);
+ fputc (' ', fp);
+ shell_quote (file, fp);
+ if (fclose (fp) == EOF) {
+ nbdkit_error ("memstream failed: %m");
+ return -1;
+ }
+
+ /* Run the command and read the first line of the output. */
+ nbdkit_debug ("%s", cmd);
+ fp = popen (cmd, "r");
+ if (fp == NULL) {
+ nbdkit_error ("tar: %m");
+ return -1;
+ }
+ scanned_ok = fscanf (fp, "block %" SCNu64 ": %*s %*s %" SCNu64,
+ &offset, &size) == 2;
fscanf() parsing an integer is subject to undefined behavior on
overflow. (Yes, I know it is a pre-existing and prevalent issue...)
+ /* We have to now read and discard the rest of the output until
EOF. */
+ while (fread (s, sizeof s, 1, fp) > 0)
+ ;
+ if (pclose (fp) != 0) {
+ nbdkit_error ("tar subcommand failed, "
+ "check that the file really exists in the tarball");
+ return -1;
+ }
If we add exportname support, we'll have to defer checking for file
existence until during .open.
+
+ if (!scanned_ok) {
+ nbdkit_error ("unexpected output from the tar subcommand");
+ return -1;
+ }
+
+ /* Adjust the offset: Add 1 for the tar header, then multiply by the
+ * block size.
+ */
+ offset = (offset+1) * 512;
Is 512 always correct, or can a tar created with -b > 1 cause issues?
+
+ nbdkit_debug ("tar: offset %" PRIu64 ", size %" PRIu64, offset,
size);
+
+ /* Check it looks sensible. XXX We ought to check it doesn't exceed
+ * the size of the tar file.
+ */
+ if (offset >= INT64_MAX || size >= INT64_MAX) {
+ nbdkit_error ("internal error: calculated offset and size are wrong");
+ return -1;
+ }
+
+ return 0;
+}
+
+struct handle {
+ int fd;
+};
+
+static void *
+tar_open (int readonly)
+{
+ struct handle *h;
+
+ assert (offset > 0); /* Cannot be zero because of tar header. */
+
+ h = calloc (1, sizeof *h);
+ if (h == NULL) {
+ nbdkit_error ("calloc: %m");
+ return NULL;
+ }
+ h->fd = open (tarfile, (readonly ? O_RDONLY : O_RDWR) | O_CLOEXEC);
Should we really be opening a different fd per client, or can we get
away with opening only one fd during .get_ready and sharing it among all
clients?
The unguarded use of O_CLOEXEC makes sense, but may cause compilation
issues on Haiku.
+ if (h->fd == -1) {
+ nbdkit_error ("%s: %m", tarfile);
+ free (h);
+ return NULL;
+ }
+
+ return h;
+}
+
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
+
+/* Get the file size. */
+static int64_t
+tar_get_size (void *handle)
+{
+ return size;
+}
+
+/* Serves the same data over multiple connections. */
+static int
+tar_can_multi_conn (void *handle)
+{
+ return 1;
+}
Needs a tweak if we add exportname=on
+
+static int
+tar_can_cache (void *handle)
+{
+ /* Let nbdkit call pread to populate the file system cache. */
+ return NBDKIT_CACHE_EMULATE;
+}
+
+/* Read data from the file. */
+static int
+tar_pread (void *handle, void *buf, uint32_t count, uint64_t offs)
+{
+ struct handle *h = handle;
+
+ offs += offset;
+
+ while (count > 0) {
+ ssize_t r = pread (h->fd, buf, count, offs);
+ if (r == -1) {
+ nbdkit_error ("pread: %m");
+ return -1;
+ }
+ if (r == 0) {
+ nbdkit_error ("pread: unexpected end of file");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offs += r;
+ }
+
+ return 0;
+}
+
+/* Write data to the file. */
+static int
+tar_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offs)
+{
+ struct handle *h = handle;
+
+ offs += offset;
+
+ while (count > 0) {
+ ssize_t r = pwrite (h->fd, buf, count, offs);
Does this always work even when the tar file was created with sparse
file support? For that matter, if the tar file was created with sparse
file contents, are we even guaranteed that a contiguous offset of the
tar file is blindly usable as the data to serve?
+ if (r == -1) {
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ buf += r;
+ count -= r;
+ offs += r;
+ }
+
+ return 0;
+}
+
+static struct nbdkit_plugin plugin = {
+ .name = "tar",
+ .longname = "nbdkit tar plugin",
+ .version = PACKAGE_VERSION,
+ .unload = tar_unload,
+ .config = tar_config,
+ .config_complete = tar_config_complete,
+ .config_help = tar_config_help,
+ .magic_config_key = "tar",
+ .get_ready = tar_get_ready,
+ .open = tar_open,
+ .get_size = tar_get_size,
+ .can_multi_conn = tar_can_multi_conn,
+ .can_cache = tar_can_cache,
+ .pread = tar_pread,
+ .pwrite = tar_pwrite,
+ .errno_is_preserved = 1,
No .extents makes sense if the tar file does not record sparseness, but
may be needed if we support sparse tar files.
$ truncate --size=1M large
$ echo 'hi' >> large
$ tar cSf f.tar large
$ ls -l large f.tar
-rw-rw-r--. 1 eblake eblake 10240 Jul 6 13:57 f.tar
-rw-rw-r--. 1 eblake eblake 1048579 Jul 6 13:48 large
$ LANG=C tar --no-auto-compress -tRvf f.tar large
block 0: -rw-rw-r-- eblake/eblake 1048579 2020-07-06 13:48 large
block 2: ** Block of NULs **
Yep, we need to special-case sparse tar files :(
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3226
Virtualization:
qemu.org |
libvirt.org