What does this do when there is UTF8 in the hive?
Sent from my iPad
On Sep 17, 2011, at 12:30 AM, Alex Nelson <ajnelson(a)cs.ucsc.edu> wrote:
Some of the data in names and string values were being unsafely
printed,
causing some types of XML processors to fail (e.g. Python's Expat).
This patch checks for printability of each character and outputs base64
with an encoding attribute for unsafe data.
---
xml/hivexml.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 64 insertions(+), 11 deletions(-)
diff --git a/xml/hivexml.c b/xml/hivexml.c
index cf11676..110c8fb 100644
--- a/xml/hivexml.c
+++ b/xml/hivexml.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <time.h>
#include <locale.h>
+#include <ctype.h>
#ifdef HAVE_LIBINTL_H
#include <libintl.h>
@@ -201,6 +202,52 @@ filetime_to_8601 (int64_t windows_ticks)
return ret;
}
+/* Caller need not free return value afterwards. */
+static char *
+encoding_recommendation (const char *data)
+{
+ /* Note that this function assumes data is null-terminated. */
+ //See if the data are printable
+ int is_printable = 0;
+ size_t i;
+ size_t data_len = strlen(data);
+
+ for (i=0; i < data_len; i++) {
+ is_printable = isprint (data[i]);
+ if (!is_printable) {
+ fprintf (stderr, "encoding_recommendation: Non-printable character found at
data index %zu (c=%i)\n", i, data[i]);
+ break;
+ }
+ }
+
+ return is_printable ? "none" : "base64";
+}
+
+static int
+safe_print_string_attribute (hive_h *h, void *writer_v, const char *attr_name, const
char *attr_encoding, const char *attr_data)
+{
+ int ret = 0;
+ char *encoding_to_use = NULL;
+ if (attr_name && attr_data && attr_encoding) {
+ xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+ encoding_to_use = encoding_recommendation (attr_data);
+
+ if (strcmp (encoding_to_use, "none") == 0)
+ XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_name, BAD_CAST
attr_data));
+ else if (strcmp (encoding_to_use, "base64") == 0) {
+ XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_encoding, BAD_CAST
"base64"));
+ XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST attr_name));
+ XML_CHECK (xmlTextWriterWriteBase64, (writer, BAD_CAST attr_data, 0,
strlen(attr_data)));
+ XML_CHECK (xmlTextWriterEndAttribute, (writer));
+ } else {
+ fprintf (stderr, "safe_print_string_attribute: Unexpected encoding to use
(won't print here).\n");
+ ret = -1;
+ }
+ } else
+ ret = -1;
+ return ret;
+}
+
static int
node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
{
@@ -210,7 +257,10 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char
*name)
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
- XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name", BAD_CAST
name));
+
+ ret = safe_print_string_attribute (h, writer_v, "name",
"name_encoding", name);
+ if (ret)
+ fprintf (stderr, "Warning: node_start: safe_print_string_attribute failed, but
we're continuing.\n");
if (node == hivex_root (h)) {
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "root", BAD_CAST
"1"));
@@ -227,7 +277,7 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char
*name)
}
}
- return 0;
+ return ret;
}
static int
@@ -242,13 +292,16 @@ static void
start_value (xmlTextWriterPtr writer,
const char *key, const char *type, const char *encoding)
{
+ int ret = 0;
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "value"));
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "type", BAD_CAST
type));
if (encoding)
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "encoding",
BAD_CAST encoding));
- if (*key)
- XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "key", BAD_CAST
key));
- else /* default key */
+ if (*key) {
+ ret = safe_print_string_attribute (NULL, writer, "key",
"key_encoding", key);
+ if (ret)
+ fprintf (stderr, "Warning: start_value: safe_print_string_attribute failed,
but we're continuing.\n");
+ } else /* default key */
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "default",
BAD_CAST "1"));
}
@@ -264,6 +317,7 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
{
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
const char *type;
+ int ret = 0;
switch (t) {
case hive_t_string: type = "string"; break;
@@ -286,11 +340,9 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
}
start_value (writer, key, type, NULL);
- XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
- XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
- XML_CHECK (xmlTextWriterEndAttribute, (writer));
+ ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", str);
end_value (writer);
- return 0;
+ return ret;
}
static int
@@ -299,17 +351,18 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h
node,
const char *key, char **argv)
{
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+ int ret = 0;
start_value (writer, key, "string-list", NULL);
size_t i;
for (i = 0; argv[i] != NULL; ++i) {
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
- XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
+ ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", argv[i]);
XML_CHECK (xmlTextWriterEndElement, (writer));
}
end_value (writer);
- return 0;
+ return ret;
}
static int
--
1.7.4.4