On Wed, Jun 29, 2022 at 04:56:20PM +0200, Laszlo Ersek wrote:
On 06/29/22 13:03, Richard W.M. Jones wrote:
> This will be used in a subsequent commit. At the moment the preferred
> block size for all sources / destinations is simply calculated and
> stored.
> ---
> copy/file-ops.c | 9 ++++++++-
> copy/main.c | 9 ++++++---
> copy/nbd-ops.c | 9 +++++++++
> copy/nbdcopy.h | 4 +++-
> copy/null-ops.c | 1 +
> copy/pipe-ops.c | 1 +
> 6 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/copy/file-ops.c b/copy/file-ops.c
> index ab3787545c..3b901bcdb8 100644
> --- a/copy/file-ops.c
> +++ b/copy/file-ops.c
> @@ -241,7 +241,8 @@ seek_hole_supported (int fd)
>
> struct rw *
> file_create (const char *name, int fd,
> - off_t st_size, bool is_block, direction d)
> + off_t st_size, blksize_t st_blksize,
> + bool is_block, direction d)
> {
> struct rw_file *rwf = calloc (1, sizeof *rwf);
> if (rwf == NULL) { perror ("calloc"); exit (EXIT_FAILURE); }
> @@ -262,6 +263,11 @@ file_create (const char *name, int fd,
> perror ("lseek");
> exit (EXIT_FAILURE);
> }
> + rwf->rw.preferred = 4096;
> +#ifdef BLKIOOPT
> + if (ioctl (fd, BLKIOOPT, &rwf->rw.preferred))
> + fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m",
name);
> +#endif
>From my reading of the kernel, this ioctl stores an "unsigned int",
which (in practice) will not set all bytes in the "preferred" uint64_t
field.
Additionally, if the ioctl fails, I'm not sure if we have a guarantee
that the destination will be left unchanged. I suggest:
#ifdef BLKIOOPT
unsigned int blkioopt;
if (ioctl (fd, BLKIOOPT, &blkioopt))
rwf->rw.preferred = blkioopt;
else {
fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m",
name);
rwf->rw.preferred = 4096;
}
#else
rwf->rw.preferred = 4096;
#endif
Ouch yes, and there's a similar bug in existing code too :-(
... however, I also suggest doing this *elsewhere* (not here). I
think
this function should take a plain uint64_t "preferred" param, and simply
assign that to "rwf->rw.preferred", independently of "is_block".
(There
is one call site only, so moving the logic out there does not lead to
code duplication.) I'll explain the reason below.
> rwf->seek_hole_supported = seek_hole_supported (fd);
> rwf->sector_size = 4096;
> #ifdef BLKSSZGET
> @@ -282,6 +288,7 @@ file_create (const char *name, int fd,
> else {
> /* Regular file. */
> rwf->rw.size = st_size;
> + rwf->rw.preferred = st_blksize;
> rwf->seek_hole_supported = seek_hole_supported (fd);
> /* Possible efficient zero methods for regular file. */
> #ifdef FALLOC_FL_PUNCH_HOLE
> diff --git a/copy/main.c b/copy/main.c
> index cc379e98bc..9b551de664 100644
> --- a/copy/main.c
> +++ b/copy/main.c
> @@ -513,7 +513,9 @@ open_local (const char *filename, direction d)
> exit (EXIT_FAILURE);
> }
> if (S_ISBLK (stat.st_mode) || S_ISREG (stat.st_mode))
> - return file_create (filename, fd, stat.st_size, S_ISBLK (stat.st_mode), d);
> + return file_create (filename, fd,
> + stat.st_size, stat.st_blksize, S_ISBLK (stat.st_mode),
> + d);
> else {
> /* Probably stdin/stdout, a pipe or a socket. */
> synchronous = true; /* Force synchronous mode for pipes. */
I couldn't find any info on whether fstat() fills in "stat.st_blksize"
at all for block devices. I understand that with "is_block" set, the
potential garbage will be ignored in file_create() anyway, but passing
garbage *itself* is not great.
So I suggest to split
S_ISBLK (stat.st_mode) || S_ISREG (stat.st_mode)
to two separate branche. Pass stat.st_blksize (as "uint64_t preferred")
on the S_ISREG branch. Do the following on the S_ISBLK branch:
unsigned int blkioopt;
#ifdef BLKIOOPT
if (ioctl (fd, BLKIOOPT, &blkioopt) == -1) {
fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m",
filename);
blkioopt = 4096;
}
#else
blkioopt = 4096;
#endif
return file_create (filename, fd, stat.st_size, blkioopt, false, d);
Yup, good idea. The existing code has "evolved" and was never very
well structured in the first place ...
> @@ -528,8 +530,9 @@ print_rw (struct rw *rw, const char *prefix, FILE *fp)
> char buf[HUMAN_SIZE_LONGEST];
>
> fprintf (fp, "%s: %s \"%s\"\n", prefix,
rw->ops->ops_name, rw->name);
> - fprintf (fp, "%s: size=%" PRIi64 " (%s)\n",
> - prefix, rw->size, human_size (buf, rw->size, NULL));
> + fprintf (fp, "%s: size=%" PRIi64 " (%s), preferred block
size=%" PRIu64 "\n",
> + prefix, rw->size, human_size (buf, rw->size, NULL),
> + rw->preferred);
> }
>
> /* Default implementation of rw->ops->get_extents for backends which
> diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
> index 3bc26ba613..6c5d59c578 100644
> --- a/copy/nbd-ops.c
> +++ b/copy/nbd-ops.c
> @@ -113,11 +113,20 @@ open_one_nbd_handle (struct rw_nbd *rwn)
> */
> if (rwn->handles.len == 0) {
> rwn->can_zero = nbd_can_zero (nbd) > 0;
> +
> rwn->rw.size = nbd_get_size (nbd);
> if (rwn->rw.size == -1) {
This existent code is fine, as nbd_get_size returns int64_t, and
"rwn->rw.size" also has type int64_t...
> fprintf (stderr, "%s: %s: %s\n", prog, rwn->rw.name,
nbd_get_error ());
> exit (EXIT_FAILURE);
> }
> +
> + rwn->rw.preferred = nbd_get_block_size (nbd, LIBNBD_SIZE_PREFERRED);
> + if (rwn->rw.preferred == -1) {
... but this is hard to read (albeit correct in practice).
In case nbd_get_block_size() fails, it will return (int64_t)-1, and we
end up assigning "rwn->rw.preferred" UINT64_MAX. In the comparison on
the next line, the (int)-1 will in practice be converted to uint64_t, so
we do the comparison against UINT64_MAX. Correct, but hard to read.
I'd use a separate "int64_t block_size" local variable for this:
block_size = nbd_get_block_size (...);
if (block_size == -1) {
fprintf (...);
exit (...);
}
rwn->rw.preferred = block_size == 0 ? 4096 : block_size;
But, replacing -1 with UINT64_MAX, or else (uint64_t)-1, would be
informative too.
OK
> + fprintf (stderr, "%s: %s: %s\n", prog,
rwn->rw.name, nbd_get_error ());
> + exit (EXIT_FAILURE);
> + }
> + if (rwn->rw.preferred == 0)
> + rwn->rw.preferred = 4096;
> }
>
> if (handles_append (&rwn->handles, nbd) == -1) {
> diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
> index 19797dfd66..9b02ddf270 100644
> --- a/copy/nbdcopy.h
> +++ b/copy/nbdcopy.h
> @@ -43,6 +43,7 @@ struct rw {
> struct rw_ops *ops; /* Operations. */
> const char *name; /* Printable name, for error messages etc. */
> int64_t size; /* May be -1 for streams. */
> + uint64_t preferred; /* Preferred block size. */
> /* Followed by private data for the particular subtype. */
> };
>
> @@ -53,7 +54,8 @@ typedef enum { READING, WRITING } direction;
>
> /* Create subtypes. */
> extern struct rw *file_create (const char *name, int fd,
> - off_t st_size, bool is_block, direction d);
> + off_t st_size, blksize_t st_blksize,
> + bool is_block, direction d);
> extern struct rw *nbd_rw_create_uri (const char *name,
> const char *uri, direction d);
> extern struct rw *nbd_rw_create_subprocess (const char **argv, size_t argc,
> diff --git a/copy/null-ops.c b/copy/null-ops.c
> index 1218a623e2..854e291613 100644
> --- a/copy/null-ops.c
> +++ b/copy/null-ops.c
> @@ -45,6 +45,7 @@ null_create (const char *name)
> rw->rw.ops = &null_ops;
> rw->rw.name = name;
> rw->rw.size = INT64_MAX;
> + rw->rw.preferred = 4 * 1024 * 1024;
> return &rw->rw;
> }
>
> diff --git a/copy/pipe-ops.c b/copy/pipe-ops.c
> index 3c8b6c2b39..3815f824fc 100644
> --- a/copy/pipe-ops.c
> +++ b/copy/pipe-ops.c
> @@ -43,6 +43,7 @@ pipe_create (const char *name, int fd)
> rwp->rw.ops = &pipe_ops;
> rwp->rw.name = name;
> rwp->rw.size = -1;
> + rwp->rw.preferred = 4096;
> rwp->fd = fd;
> return &rwp->rw;
> }
>
Thanks,
Laszlo
Rich.
--
Richard Jones, Virtualization Group, Red Hat
http://people.redhat.com/~rjones
Read my programming and virtualization blog:
http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines. Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v