diff --git a/lib/hivex.c b/lib/hivex.c index 13d7556..bcee0ec 100644 --- a/lib/hivex.c +++ b/lib/hivex.c @@ -62,6 +62,7 @@ #define HIVEX_MAX_ALLOCATION 1000000 static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len); +static char *windows_latin1_to_utf8 (/* const */ char *input, size_t len); static size_t utf16_string_len_in_bytes (const char *str); static size_t utf16_string_len_in_bytes_max (const char *str, size_t len); @@ -177,7 +178,8 @@ block_len (hive_h *h, size_t blkoff, int *used) struct ntreg_nk_record { int32_t seg_len; /* length (always -ve because used) */ char id[2]; /* "nk" */ - uint16_t flags; + uint16_t flags; /* bit 5 set: latin1 + bit 5 clr: UTF-16 */ char timestamp[8]; uint32_t unknown1; uint32_t parent; /* offset of owner/parent */ @@ -571,11 +573,6 @@ hivex_node_name (hive_h *h, hive_node_h node) struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); - /* AFAIK the node name is always plain ASCII, so no conversion - * to UTF-8 is necessary. However we do need to nul-terminate - * the string. - */ - /* nk->name_len is unsigned, 16 bit, so this is safe ... However * we have to make sure the length doesn't exceed the block length. */ @@ -589,11 +586,12 @@ hivex_node_name (hive_h *h, hive_node_h node) return NULL; } - char *ret = malloc (len + 1); - if (ret == NULL) - return NULL; - memcpy (ret, nk->name, len); - ret[len] = '\0'; + char *ret; + if (le16toh(nk->flags) & 0x20) { + ret = windows_latin1_to_utf8(nk->name, len); + } else { + ret = windows_utf16_to_utf8(nk->name, len); + } return ret; } @@ -1113,6 +1111,7 @@ hivex_node_get_value (hive_h *h, hive_node_h node, const char *key) char * hivex_value_key (hive_h *h, hive_value_h value) { + iconv_t ic; if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) { errno = EINVAL; return 0; @@ -1120,10 +1119,6 @@ hivex_value_key (hive_h *h, hive_value_h value) struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value); - /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is - * necessary. However we do need to nul-terminate the string. - */ - /* vk->name_len is unsigned, 16 bit, so this is safe ... However * we have to make sure the length doesn't exceed the block length. */ @@ -1137,11 +1132,14 @@ hivex_value_key (hive_h *h, hive_value_h value) return NULL; } - char *ret = malloc (len + 1); - if (ret == NULL) - return NULL; - memcpy (ret, vk->name, len); - ret[len] = '\0'; + char *ret; + if (le16toh(vk->flags) & 0x01) { + ret = windows_latin1_to_utf8(vk->name, len); + } else { + ret = windows_utf16_to_utf8(vk->name, len); + } + if (!ret) + errno = EILSEQ; return ret; } @@ -1250,58 +1248,70 @@ hivex_value_value (hive_h *h, hive_value_h value, } static char * -windows_utf16_to_utf8 (/* const */ char *input, size_t len) +iconv_wrapper (iconv_t ic, char *input, size_t len) { - iconv_t ic = iconv_open ("UTF-8", "UTF-16"); - if (ic == (iconv_t) -1) - return NULL; - - /* iconv(3) has an insane interface ... */ - - /* Mostly UTF-8 will be smaller, so this is a good initial guess. */ size_t outalloc = len; - - again:; - size_t inlen = len; - size_t outlen = outalloc; - char *out = malloc (outlen + 1); - if (out == NULL) { - int err = errno; - iconv_close (ic); - errno = err; - return NULL; - } - char *inp = input; - char *outp = out; - - size_t r = iconv (ic, &inp, &inlen, &outp, &outlen); - if (r == (size_t) -1) { - if (errno == E2BIG) { + for(;;) { + size_t inlen = len; + size_t outlen = outalloc; + char *out = malloc (outlen + 1); + if (out == NULL) { int err = errno; - size_t prev = outalloc; - /* Try again with a larger output buffer. */ - free (out); - outalloc *= 2; - if (outalloc < prev) { - iconv_close (ic); + errno = err; + return NULL; + } + char *inp = input; + char *outp = out; + + size_t r = iconv (ic, &inp, &inlen, &outp, &outlen); + if (r == (size_t) -1) { + if (errno == E2BIG) { + int err = errno; + size_t prev = outalloc; + /* Try again with a larger output buffer. */ + free (out); + outalloc *= 2; + if (outalloc < prev) { + errno = err; + return NULL; + } + continue; + } + else { + /* Else some conversion failure, eg. EILSEQ, EINVAL. */ + int err = errno; + free (out); errno = err; return NULL; } - goto again; - } - else { - /* Else some conversion failure, eg. EILSEQ, EINVAL. */ - int err = errno; - iconv_close (ic); - free (out); - errno = err; - return NULL; } + *outp = '\0'; + return out; } +} - *outp = '\0'; +static char * +windows_latin1_to_utf8 (char *input, size_t len) +{ + iconv_t ic = iconv_open ("UTF-8", "ISO-8859-1"); + if (ic == (iconv_t) -1) + return NULL; + + /* In the most common case, there are only ASCII characters. */ + char * out = iconv_wrapper (ic, input, len); iconv_close (ic); + return out; +} + +static char * +windows_utf16_to_utf8 (/* const */ char *input, size_t len) +{ + iconv_t ic = iconv_open ("UTF-8", "UTF-16"); + if (ic == (iconv_t) -1) + return NULL; + char * out = iconv_wrapper(ic, input, len); + iconv_close (ic); return out; }