Basically, if any of the characters passed to a hivexml.c function aren't
isprint()-able, they're base64-encoded.
The characters passed rely on the iconv library. Whatever comes out of the processing of
iconv() in "UTF-8 or UTF-16" mode is what's passed to hivexml. I checked
all the UTF-processing points and have extensive notes in the postscript, but
essentially:
* Key and value names aren't run through iconv
* Value string data and the hive header name are
* The patch below runs a stricter sanitization than iconv, also running everything through
isprint.
--Alex
Notes:
* The hive's header-embedded name is just checked as UTF-16, though not passed onto
hivexml (yet) (lib/hivex.c:hivex_open). The name is converted with
lib/hivex.c:windows_utf16_to_utf8, which relies on the iconv library, so ASCII going in
should be ASCII coming out.
* For node and value names, there is no UTF-n check (lib/hivex.c:hivex_node_name,
hivex_value_key), because nobody developing hivex has seen something besides plain ASCII
in the names yet. I can probably identify somewhere where this isn't the case, but
it'd take some machine-hours. So far all I've seen in one corpus is a Registered
sign, 0xae.
* String values in general are treated like they could be UTF-16. All are passed to
windows_utf16_to_utf8 (lib/hivex.c:hivex_value_string,
lib/hivex.c:hivex_value_multiple_strings).
* If the hivex_value_string or hivex_value_multiple_strings function fails to process the
data, the value_string_invalid_utf16 function of the hivex library invoker is called
(lib/hivex.c:hivex__visit_node), which in hivexml's case just base64-encodes the
output (xml/hivexml.c:value_string_invalid_utf16).
On Sep 17, 2011, at 03:17 , Simson Garfinkel wrote:
What does this do when there is UTF8 in the hive?
Sent from my iPad
On Sep 17, 2011, at 12:30 AM, Alex Nelson <ajnelson(a)cs.ucsc.edu> wrote:
> Some of the data in names and string values were being unsafely printed,
> causing some types of XML processors to fail (e.g. Python's Expat).
> This patch checks for printability of each character and outputs base64
> with an encoding attribute for unsafe data.
> ---
> xml/hivexml.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 files changed, 64 insertions(+), 11 deletions(-)
>
> diff --git a/xml/hivexml.c b/xml/hivexml.c
> index cf11676..110c8fb 100644
> --- a/xml/hivexml.c
> +++ b/xml/hivexml.c
> @@ -27,6 +27,7 @@
> #include <errno.h>
> #include <time.h>
> #include <locale.h>
> +#include <ctype.h>
>
> #ifdef HAVE_LIBINTL_H
> #include <libintl.h>
> @@ -201,6 +202,52 @@ filetime_to_8601 (int64_t windows_ticks)
> return ret;
> }
>
> +/* Caller need not free return value afterwards. */
> +static char *
> +encoding_recommendation (const char *data)
> +{
> + /* Note that this function assumes data is null-terminated. */
> + //See if the data are printable
> + int is_printable = 0;
> + size_t i;
> + size_t data_len = strlen(data);
> +
> + for (i=0; i < data_len; i++) {
> + is_printable = isprint (data[i]);
> + if (!is_printable) {
> + fprintf (stderr, "encoding_recommendation: Non-printable character found
at data index %zu (c=%i)\n", i, data[i]);
> + break;
> + }
> + }
> +
> + return is_printable ? "none" : "base64";
> +}
> +
> +static int
> +safe_print_string_attribute (hive_h *h, void *writer_v, const char *attr_name, const
char *attr_encoding, const char *attr_data)
> +{
> + int ret = 0;
> + char *encoding_to_use = NULL;
> + if (attr_name && attr_data && attr_encoding) {
> + xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> + encoding_to_use = encoding_recommendation (attr_data);
> +
> + if (strcmp (encoding_to_use, "none") == 0)
> + XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_name, BAD_CAST
attr_data));
> + else if (strcmp (encoding_to_use, "base64") == 0) {
> + XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_encoding,
BAD_CAST "base64"));
> + XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST attr_name));
> + XML_CHECK (xmlTextWriterWriteBase64, (writer, BAD_CAST attr_data, 0,
strlen(attr_data)));
> + XML_CHECK (xmlTextWriterEndAttribute, (writer));
> + } else {
> + fprintf (stderr, "safe_print_string_attribute: Unexpected encoding to use
(won't print here).\n");
> + ret = -1;
> + }
> + } else
> + ret = -1;
> + return ret;
> +}
> +
> static int
> node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
> {
> @@ -210,7 +257,10 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const
char *name)
>
> xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
> - XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name",
BAD_CAST name));
> +
> + ret = safe_print_string_attribute (h, writer_v, "name",
"name_encoding", name);
> + if (ret)
> + fprintf (stderr, "Warning: node_start: safe_print_string_attribute failed,
but we're continuing.\n");
>
> if (node == hivex_root (h)) {
> XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "root",
BAD_CAST "1"));
> @@ -227,7 +277,7 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const
char *name)
> }
> }
>
> - return 0;
> + return ret;
> }
>
> static int
> @@ -242,13 +292,16 @@ static void
> start_value (xmlTextWriterPtr writer,
> const char *key, const char *type, const char *encoding)
> {
> + int ret = 0;
> XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "value"));
> XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "type", BAD_CAST
type));
> if (encoding)
> XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "encoding",
BAD_CAST encoding));
> - if (*key)
> - XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "key",
BAD_CAST key));
> - else /* default key */
> + if (*key) {
> + ret = safe_print_string_attribute (NULL, writer, "key",
"key_encoding", key);
> + if (ret)
> + fprintf (stderr, "Warning: start_value: safe_print_string_attribute
failed, but we're continuing.\n");
> + } else /* default key */
> XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "default",
BAD_CAST "1"));
> }
>
> @@ -264,6 +317,7 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
> {
> xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> const char *type;
> + int ret = 0;
>
> switch (t) {
> case hive_t_string: type = "string"; break;
> @@ -286,11 +340,9 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
> }
>
> start_value (writer, key, type, NULL);
> - XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
> - XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
> - XML_CHECK (xmlTextWriterEndAttribute, (writer));
> + ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", str);
> end_value (writer);
> - return 0;
> + return ret;
> }
>
> static int
> @@ -299,17 +351,18 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h
node,
> const char *key, char **argv)
> {
> xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> + int ret = 0;
> start_value (writer, key, "string-list", NULL);
>
> size_t i;
> for (i = 0; argv[i] != NULL; ++i) {
> XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
> - XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
> + ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", argv[i]);
> XML_CHECK (xmlTextWriterEndElement, (writer));
> }
>
> end_value (writer);
> - return 0;
> + return ret;
> }
>
> static int
> --
> 1.7.4.4
>
>