We have extended collectd virt plugin to extract info about disk usage from
a libvirt domain using libguestfs.
We have had several issues with it which were raised here in 2018 by Peter
Dimitrov.
Currently the collectd plugin works fine and retrieves the required
statistics. Current collectd configuration says that interval of reading
statistics (interval of calling all plugins read functions) is 50 seconds.
After certain period of time (e.g. certain number of calls of plugin read
functions - about 490 calls), collectd is terminated with signal SIGABRT
with the following backtrace:
(gdb) bt
#0 0x00007ffff71f2e97 in raise () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007ffff71f4801 in abort () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007ffff723d897 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
#3 0x00007ffff72e8cff in ?? () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff72e8d21 in __fortify_fail () from
/lib/x86_64-linux-gnu/libc.so.6
#5 0x00007ffff72e6a10 in __chk_fail () from /lib/x86_64-linux-gnu/libc.so.6
#6 0x00007ffff72e8c0a in __fdelt_warn () from
/lib/x86_64-linux-gnu/libc.so.6
#7 0x00007ffff47ed8ba in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#8 0x00007ffff47ee2f5 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#9 0x00007ffff47efefc in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#10 0x00007ffff4794ca5 in guestfs_disk_create_argv () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#11 0x00007ffff4807b18 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#12 0x00007ffff47f0b44 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#13 0x00007ffff47f0d7b in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#14 0x00007ffff47f1c55 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#15 0x00007ffff4784927 in guestfs_add_drive_opts_argv () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#16 0x00007ffff48128e0 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#17 0x00007ffff4813cd6 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#18 0x00007ffff47ab2c3 in guestfs_add_libvirt_dom_argv () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#19 0x00007ffff4812cf6 in ?? () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#20 0x00007ffff4760368 in guestfs_add_domain_argv () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#21 0x00007ffff47dfc38 in guestfs_add_domain_va () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#22 0x00007ffff47dfee4 in guestfs_add_domain () from
/usr/lib/x86_64-linux-gnu/libguestfs.so.0
#23 0x00007ffff4a78bec in refresh_lists (inst=inst@entry=0x7ffff4c7f940
<lv_read_user_data>) at src/virt.c:2049
#24 0x00007ffff4a7a327 in lv_read (ud=<optimized out>) at src/virt.c:1656
#25 0x0000555555564a1c in plugin_read_thread (args=<optimized out>) at
src/daemon/plugin.c:540
#26 0x00007ffff79b66db in start_thread () from
/lib/x86_64-linux-gnu/libpthread.so.0
#27 0x00007ffff72d588f in clone () from /lib/x86_64-linux-gnu/libc.so.6
The code using libguestfs called every time virt plugin read function is
invoked is given below. I need to mention that the code presented here
lacks proper cleanup.
/* guestfs_extend start */
/* get FS stats using libguestfs */
/* Filesystems. */
guestfs_h *g = NULL;
int ret = 0;
int j = 0;
int cnt_drives = 0;
char **fses = NULL;
struct guestfs_statvfs *fs_stats = NULL;
struct fs_info *fs = NULL;
/* Work around collectd bug with waitpid() after fork() */
signal (SIGCHLD, SIG_DFL);
g = guestfs_create();
if (g == NULL) {
ERROR(PLUGIN_NAME " plugin: failed to create libguestfs handle");
goto cont; //exit(EXIT_FAILURE);
}
guestfs_set_trace(g,1);
//guestfs_set_verbose(g,1);
if ( 0 != guestfs_set_backend (g, "direct") ) {
ERROR(PLUGIN_NAME " plugin: guestfs_set_backend failed");
}
cnt_drives = guestfs_add_domain (g, name,
GUESTFS_ADD_DOMAIN_READONLY, 1, -1);
if (cnt_drives == -1) {
ERROR(PLUGIN_NAME " plugin: failed to get guestfs domain handle.
errno %d, guestfs _last_errno %d ", errno, guestfs_last_errno(g));
guestfs_close(g);
goto cont; //exit(EXIT_FAILURE);
}
ret = guestfs_launch(g);
if(ret == -1) {
ERROR(PLUGIN_NAME " plugin: failed to guestfs-launch domain");
guestfs_close(g);
goto cont; //exit(EXIT_FAILURE);
}
fses = guestfs_list_filesystems(g);
if(fses == NULL) {
ERROR(PLUGIN_NAME " plugin: failed to get filesystems!");
guestfs_close(g);
goto cont; //exit(EXIT_FAILURE);
}
j = 0;
while(fses[j] != NULL) {
if(strcmp(fses[j+1], "") != 0 &&
strcmp(fses[j+1], "swap") != 0 &&
strcmp(fses[j+1], "unknown") != 0 &&
/* skip CD-ROMs */
strcmp(fses[j+1], "iso9660") != 0 && !
/* If CD-ROM is bootable and has efi.img, libguestfs will mount
it - skip that case */
( strcmp(fses[j+1], "vfat") == 0 &&
j > 2 && /* so next line is valid*/
strcmp(fses[j-1], "iso9660") == 0) )
{
/* the code below is not executed for the sake of test */
/* the code below is not executed for the sake of test */
if ( 0 && (guestfs_mount_ro (g, (const char *) fses[j], "/")
==
0)) {
fs_stats = guestfs_statvfs(g, "/");
if(fs_stats == NULL) {
ERROR(PLUGIN_NAME " plugin: Failed guestfs_statvfs for
filesystem %s", fses[i]);
continue; //exit(EXIT_FAILURE);
}
guestfs_umount_all (g);
fs = malloc(sizeof(struct fs_info));
if (fs == NULL) {
ERROR(PLUGIN_NAME " plugin: Failed malloc for struct fs_info
");
continue; //exit(EXIT_FAILURE);
}
fs->fs_name = strdup(fses[j]);
if (fs->fs_name == NULL) {
ERROR(PLUGIN_NAME " plugin: Failed strdup for filesystem %s",
fses[i]);
continue; //exit(EXIT_FAILURE);
}
fs->dom = dom;
fs->usage_percent = (unsigned int) ceil(100. - 100. *
fs_stats->bavail / fs_stats->blocks);
fs->size_total = fs_stats->bsize * fs_stats->blocks;
fs->size_free = fs_stats->bsize * fs_stats->bavail;
fs->size_used = fs->size_total - fs->size_free;
guestfs_free_statvfs(fs_stats);
add_filesystem(state, fs);
sfree(fs->fs_name);
sfree(fs);
fs = NULL;
} // if fs mount succeeds
} //if Filesystem is eligible for stats
j += 2;
} //while there are more Filesystems
j = 0;
while(fses[j] != NULL) {
free(fses[j]);
j++;
}
free(fses);
guestfs_shutdown (g);
guestfs_close(g);
/* guestfs_extend end */
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: set_backend
"direct"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: set_backend = 0
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_domain
"tve50:00000013" "readonly:true"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_libvirt_dom
(virDomainPtr)0x7fffa002be60 "readonly:true"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
clear_backend_setting "internal_libvirt_norelabel_disks"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
clear_backend_setting = 0
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_drive
"/var/lib/nova/instances/5ca86029-d296-4261-9a67-908bdd6c4eab/disk"
"readonly:true" "format:qcow2"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_tmpdir
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_tmpdir = "/tmp"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: disk_create
"/tmp/libguestfs4mMxbv/overlay1.qcow2" "qcow2" -1
"backingfile:/var/lib/nova/instances/5ca86029-d296-4261-9a67-908bdd6c4eab/disk"
"backingformat:qcow2"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: disk_create = 0
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_drive = 0
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_libvirt_dom = 1
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: add_domain = 1
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: launch
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
get_backend_setting "force_tcg"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
get_backend_setting = NULL (error)
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_cachedir
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_cachedir =
"/var/tmp"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_cachedir
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_cachedir =
"/var/tmp"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_sockdir
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace: get_sockdir =
"/tmp"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
get_backend_setting "gdb"
Feb 20 15:58:08 tve50 collectd[4689]: libguestfs: trace:
get_backend_setting = NULL (error)
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: launch = 0
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_filesystems
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: feature_available
"lvm2"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace:
internal_feature_available "lvm2"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace:
internal_feature_available = 0
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: feature_available
= 1
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: feature_available
"ldm"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace:
internal_feature_available "ldm"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace:
internal_feature_available = 0
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: feature_available
= 1
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_devices
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_devices =
["/dev/sda"]
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_partitions
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_partitions =
["/dev/sda1", "/dev/sda15"]
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_md_devices
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_md_devices =
[]
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev
"/dev/sda1"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev =
"/dev/sda"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev
"/dev/sda15"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev =
"/dev/sda"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_partnum
"/dev/sda1"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_partnum = 1
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev
"/dev/sda1"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev =
"/dev/sda"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_get_mbr_id
"/dev/sda" 1
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_get_mbr_id =
264650159
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: vfs_type
"/dev/sda1"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: vfs_type = "ext3"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_partnum
"/dev/sda15"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_partnum =
15
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev
"/dev/sda15"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_to_dev =
"/dev/sda"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_get_mbr_id
"/dev/sda" 15
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: part_get_mbr_id =
-1054182616
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: vfs_type
"/dev/sda15"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: vfs_type = "vfat"
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: lvs
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: lvs = []
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_ldm_volumes
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_ldm_volumes =
[]
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_ldm_partitions
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace:
list_ldm_partitions = []
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: list_filesystems =
["/dev/sda1", "ext3", "/dev/sda15", "vfat"]
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: shutdown
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: internal_autosync
Feb 20 15:58:11 tve50 collectd[4689]: libguestfs: trace: internal_autosync
= 0
Feb 20 15:58:12 tve50 collectd[4689]: libguestfs: trace: shutdown = 0
Feb 20 15:58:12 tve50 collectd[4689]: libguestfs: trace: close
When the problems happens, after several hours, we have the following trace:
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: set_backend
"direct"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: set_backend = 0
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: add_domain
"tve50:00000013" "readonly:true"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: add_libvirt_dom
(virDomainPtr)0x7fffb0037b90 "readonly:true"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace:
clear_backend_setting "internal_libvirt_norelabel_disks"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace:
clear_backend_setting = 0
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: add_drive
"/var/lib/nova/instances/5ca86029-d296-4261-9a67-908bdd6c4eab/disk"
"readonly:true" "format:qcow2"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: get_tmpdir
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: get_tmpdir =
"/tmp"
Feb 20 15:09:36 tve50 collectd[17720]: libguestfs: trace: disk_create
"/tmp/libguestfsPMoTz7/overlay1.qcow2" "qcow2" -1
"backingfile:/var/lib/nova/instances/5ca86029-d296-4261-9a67-908
Feb 20 15:09:36 tve50 collectd[17720]: *** buffer overflow detected ***:
/usr/sbin/collectd terminated
Feb 20 15:09:37 tve50 systemd[1]: collectd.service: Main process exited,
code=killed, status=6/ABRT
Feb 20 15:09:37 tve50 systemd[1]: collectd.service: Failed with result
'signal'.
--
*Veselin Kozhuharski** |* Software Engineer
Direct: +359 2 439 2590 ext. 3912 *|* Mobile: +359 887 412116 |
veselin_k*(a)telco.com
<mzabaruk(a)telco.com>*
*Telco Systems | **www.telco.com <
http://www.telco.com/>*
Follow us: *LinkedIn <
http://www.linkedin.com/company/telco-systems>*
| *Twitter
<
http://twitter.com/TelcoSystems>* | *Facebook
<
https://www.facebook.com/TelcoSystems>* | *YouTube
<
http://www.youtube.com/TelcoSystems>* | *Blog <
http://www.telco.com/blog>*
|