It was brought to my attention that dumping a registry hive causes a
lot of time spent in disk I/O activity because iconv_open() and
iconv_close() are called for every key. Every iconv_open() call causes
/usr/lib/.../gconv/$ENCODING.so to be opened and mapped.
The iconv_t handles are now cached in the hive_h struct; they are
opened on-demand and re-used.
On my ~10 year old Lenovo T60, I have seen 57% savings in the overal
runtime of running
hivexregedit --export windows-8-enterprise-software.hive '\\'
---
bootstrap | 1 +
configure.ac | 2 ++
lib/Makefile.am | 2 ++
lib/handle.c | 42 +++++++++++++++++++++++++++++++++++++++++-
lib/hivex-internal.h | 31 ++++++++++++++++++++++---------
lib/node.c | 6 +++---
lib/utf16.c | 38 ++++++++++++++++----------------------
lib/value.c | 10 +++++-----
lib/write.c | 4 ++--
m4/.gitignore | 2 ++
10 files changed, 96 insertions(+), 42 deletions(-)
diff --git a/bootstrap b/bootstrap
index bd82477..373fad8 100755
--- a/bootstrap
+++ b/bootstrap
@@ -75,6 +75,7 @@ vc-list-files
warnings
xstrtol
xstrtoll
+threadlib
'
$gnulib_tool \
diff --git a/configure.ac b/configure.ac
index 547fb0d..8405774 100644
--- a/configure.ac
+++ b/configure.ac
@@ -38,7 +38,9 @@ AC_DEFINE([PACKAGE_VERSION_RELEASE],[hivex_release],[Release number])
AC_DEFINE([PACKAGE_VERSION_EXTRA],["hivex_extra"],[Extra version string])
gl_EARLY
+gl_THREADLIB_EARLY
gl_INIT
+gl_THREADLIB
AM_PROG_LIBTOOL
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 4a7cea1..62cdf35 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -38,6 +38,8 @@ libhivex_la_SOURCES = \
visit.c \
write.c
+libhivex_la_SOURCES += $(top_srcdir)/gnulib/lib/glthread/threadlib.c
+
libhivex_la_LIBADD = ../gnulib/lib/libgnu.la $(LTLIBOBJS)
libhivex_la_LDFLAGS = \
-version-info 0:0:0 \
diff --git a/lib/handle.c b/lib/handle.c
index 9dcf81d..01b8d80 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -30,6 +30,8 @@
#include <sys/stat.h>
#include <errno.h>
#include <assert.h>
+#include <iconv.h>
+#include <glthread/lock.h>
#ifdef HAVE_MMAP
#include <sys/mman.h>
@@ -62,6 +64,32 @@ header_checksum (const hive_h *h)
#define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
+iconv_t *
+_hivex_get_iconv (hive_h *h, recode_type t)
+{
+ glthread_lock_lock (&h->iconv_cache[t].mutex);
+ if (h->iconv_cache[t].handle == NULL) {
+ if (t == utf8_to_latin1)
+ h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8");
+ else if (t == latin1_to_utf8)
+ h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1");
+ else if (t == utf8_to_utf16le)
+ h->iconv_cache[t].handle = iconv_open ("UTF-16LE",
"UTF-8");
+ else if (t == utf16le_to_utf8)
+ h->iconv_cache[t].handle = iconv_open ("UTF-8",
"UTF-16LE");
+ } else {
+ /* reinitialize iconv context */
+ iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0);
+ }
+ return h->iconv_cache[t].handle;
+}
+
+void
+_hivex_release_iconv (hive_h *h, recode_type t)
+{
+ glthread_lock_unlock (&h->iconv_cache[t].mutex);
+}
+
hive_h *
hivex_open (const char *filename, int flags)
{
@@ -164,11 +192,17 @@ hivex_open (const char *filename, int flags)
goto error;
}
+ for (int t=0; t<3; t++) {
+ glthread_lock_init (&h->iconv_cache[t].mutex);
+ h->iconv_cache[t].handle = NULL;
+ }
+
/* Last modified time. */
h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
if (h->msglvl >= 2) {
- char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64);
+ char *name = _hivex_recode (h, utf16le_to_utf8,
+ h->hdr->name, 64, NULL);
fprintf (stderr,
"hivex_open: header fields:\n"
@@ -424,6 +458,12 @@ hivex_close (hive_h *h)
else
r = 0;
free (h->filename);
+ for (int t=0; t<3; t++) {
+ if (h->iconv_cache[t].handle != NULL) {
+ iconv_close (h->iconv_cache[t].handle);
+ h->iconv_cache[t].handle = NULL;
+ }
+ }
free (h);
return r;
diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
index 9a497ed..d04ae3c 100644
--- a/lib/hivex-internal.h
+++ b/lib/hivex-internal.h
@@ -22,6 +22,8 @@
#include <stdarg.h>
#include <stddef.h>
#include <string.h>
+#include <iconv.h>
+#include <glthread/lock.h>
#include "byte_conversions.h"
@@ -35,6 +37,13 @@
#define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
#define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
+typedef enum {
+ utf8_to_latin1 = 0,
+ latin1_to_utf8,
+ utf8_to_utf16le,
+ utf16le_to_utf8,
+} recode_type;
+
struct hive_h {
char *filename;
int fd;
@@ -79,6 +88,11 @@ struct hive_h {
/* Internal data for mmap replacement */
void *p_winmap;
#endif
+
+ struct {
+ gl_lock_t mutex;
+ iconv_t *handle;
+ } iconv_cache[4];
};
/* Format of registry blocks. NB. All fields are little endian. */
@@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list);
extern size_t * _hivex_return_offset_list (offset_list *list);
extern void _hivex_print_offset_list (offset_list *list, FILE *fp);
+/* handle.c */
+extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r);
+extern void _hivex_release_iconv (hive_h *h, recode_type r);
+
/* utf16.c */
-extern char * _hivex_recode (const char *input_encoding,
- const char *input, size_t input_len,
- const char *output_encoding, size_t *output_len);
-#define _hivex_windows_utf16_to_utf8(_input, _len) \
- _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL)
-#define _hivex_windows_latin1_to_utf8(_input, _len) \
- _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL)
-extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16);
+extern char * _hivex_recode (hive_h *h, recode_type r,
+ const char *input, size_t input_len, size_t *output_len);
+extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int
*utf16);
extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len);
-extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16);
+extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16);
/* util.c */
extern void _hivex_free_strings (char **argv);
diff --git a/lib/node.c b/lib/node.c
index 36e61c4..21cd127 100644
--- a/lib/node.c
+++ b/lib/node.c
@@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node)
}
size_t flags = le16toh (nk->flags);
if (flags & 0x20) {
- return _hivex_windows_latin1_to_utf8 (nk->name, len);
+ return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL);
} else {
- return _hivex_windows_utf16_to_utf8 (nk->name, len);
+ return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL);
}
}
@@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node)
return 0;
}
- return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20));
+ return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) &
0x20));
}
diff --git a/lib/utf16.c b/lib/utf16.c
index 238f40a..c0f0b05 100644
--- a/lib/utf16.c
+++ b/lib/utf16.c
@@ -30,24 +30,21 @@
#include "hivex-internal.h"
char *
-_hivex_recode (const char *input_encoding, const char *input, size_t input_len,
- const char *output_encoding, size_t *output_len)
+_hivex_recode (hive_h *h, recode_type t,
+ const char *input, size_t input_len, size_t *output_len)
{
- iconv_t ic = iconv_open (output_encoding, input_encoding);
- if (ic == (iconv_t) -1)
- return NULL;
-
/* iconv(3) has an insane interface ... */
size_t outalloc = input_len;
+ iconv_t *ic = _hivex_get_iconv (h, t);
again:;
size_t inlen = input_len;
size_t outlen = outalloc;
char *out = malloc (outlen + 1);
if (out == NULL) {
int err = errno;
- iconv_close (ic);
+ _hivex_release_iconv (h, t);
errno = err;
return NULL;
}
@@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t
input_len,
size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp,
&outlen);
if (r == (size_t) -1) {
+ int err = errno;
if (errno == E2BIG) {
- int err = errno;
/* Reset errno here because we don't want to accidentally
* return E2BIG to a library caller.
*/
- errno = 0;
size_t prev = outalloc;
/* Try again with a larger output buffer. */
free (out);
outalloc *= 2;
if (outalloc < prev) {
- iconv_close (ic);
+ _hivex_release_iconv (h, t);
errno = err;
return NULL;
}
@@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t
input_len,
}
else {
/* Else some conversion failure, eg. EILSEQ, EINVAL. */
- int err = errno;
- iconv_close (ic);
+ _hivex_release_iconv (h, t);
free (out);
errno = err;
return NULL;
}
}
+ _hivex_release_iconv (h, t);
*outp = '\0';
- iconv_close (ic);
if (output_len != NULL)
*output_len = outp - out;
-
return out;
}
@@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t
input_len,
* storing in the hive file, as needed.
*/
char*
-_hivex_encode_string(const char *str, size_t *size, int *utf16)
+_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16)
{
char* outstr;
*utf16 = 0;
- outstr = _hivex_recode ("UTF-8", str, strlen(str),
- "LATIN1", size);
+ outstr = _hivex_recode (h, utf8_to_latin1,
+ str, strlen(str), size);
if (outstr != NULL)
return outstr;
*utf16 = 1;
- outstr = _hivex_recode ("UTF-8", str, strlen(str),
- "UTF-16LE", size);
+ outstr = _hivex_recode (h, utf8_to_utf16le,
+ str, strlen(str), size);
return outstr;
}
@@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len)
}
size_t
-_hivex_utf8_strlen (const char* str, size_t len, int utf16)
+_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16)
{
- const char *encoding = utf16 ? "UTF-16LE" : "LATIN1";
+ recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8;
size_t ret = 0;
- char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret);
+ char *buf = _hivex_recode (h, t, str, len, &ret);
free(buf);
return ret;
}
diff --git a/lib/value.c b/lib/value.c
index 2dfe006..3257b53 100644
--- a/lib/value.c
+++ b/lib/value.c
@@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value)
SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len);
return 0;
}
- return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01));
+ return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) &
0x01));
}
char *
@@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value)
return NULL;
}
if (flags & 0x01) {
- return _hivex_windows_latin1_to_utf8 (vk->name, len);
+ return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL);
} else {
- return _hivex_windows_utf16_to_utf8 (vk->name, len);
+ return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL);
}
}
@@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value)
if (slen < len)
len = slen;
- char *ret = _hivex_windows_utf16_to_utf8 (data, len);
+ char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL);
free (data);
if (ret == NULL)
return NULL;
@@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value)
}
ret = ret2;
- ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen);
+ ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL);
ret[nr_strings] = NULL;
if (ret[nr_strings-1] == NULL) {
_hivex_free_strings (ret);
diff --git a/lib/write.c b/lib/write.c
index 33b64e4..70105c9 100644
--- a/lib/write.c
+++ b/lib/write.c
@@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char
*name)
size_t recoded_name_len;
int use_utf16 = 0;
char *recoded_name =
- _hivex_encode_string (name, &recoded_name_len, &use_utf16);
+ _hivex_encode_string (h, name, &recoded_name_len, &use_utf16);
if (recoded_name == NULL) {
SET_ERRNO (EINVAL, "malformed name");
return 0;
@@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
static const char vk_id[2] = { 'v', 'k' };
size_t recoded_name_len;
int use_utf16;
- char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len,
+ char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len,
&use_utf16);
seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len;
size_t vk_offs = allocate_block (h, seg_len, vk_id);
diff --git a/m4/.gitignore b/m4/.gitignore
index 05ca27c..a19035c 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -138,3 +138,5 @@
/xalloc.m4
/xsize.m4
/xstrtol.m4
+/thread.m4
+/yield.m4
--
2.11.0