On Fri, Sep 16, 2011 at 09:30:01PM -0700, Alex Nelson wrote:
Some of the data in names and string values were being unsafely
printed,
causing some types of XML processors to fail (e.g. Python's Expat).
This patch checks for printability of each character and outputs base64
with an encoding attribute for unsafe data.
---
xml/hivexml.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 64 insertions(+), 11 deletions(-)
diff --git a/xml/hivexml.c b/xml/hivexml.c
index cf11676..110c8fb 100644
--- a/xml/hivexml.c
+++ b/xml/hivexml.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <time.h>
#include <locale.h>
+#include <ctype.h>
#ifdef HAVE_LIBINTL_H
#include <libintl.h>
@@ -201,6 +202,52 @@ filetime_to_8601 (int64_t windows_ticks)
return ret;
}
+/* Caller need not free return value afterwards. */
+static char *
+encoding_recommendation (const char *data)
+{
+ /* Note that this function assumes data is null-terminated. */
+ //See if the data are printable
+ int is_printable = 0;
+ size_t i;
+ size_t data_len = strlen(data);
+
+ for (i=0; i < data_len; i++) {
+ is_printable = isprint (data[i]);
isprint is a bit of a minefield because the output changes depending
on the user's locale. You might want to use c_isprint (from gnulib)
instead, since it always checks printability against the C locale.
BTW I'm told that there are more "unprintable" characters in XML
than just !isprint. An XML parser may reject any codepoint not in:
Char ::= I#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
+ if (!is_printable) {
+ fprintf (stderr, "encoding_recommendation: Non-printable character found at
data index %zu (c=%i)\n", i, data[i]);
+ break;
+ }
+ }
+
+ return is_printable ? "none" : "base64";
enum perhaps? You don't actually use the "base64" string later, and
enum makes more sense in a C program.
+}
+
+static int
+safe_print_string_attribute (hive_h *h, void *writer_v, const char *attr_name, const
char *attr_encoding, const char *attr_data)
+{
+ int ret = 0;
+ char *encoding_to_use = NULL;
+ if (attr_name && attr_data && attr_encoding) {
+ xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+ encoding_to_use = encoding_recommendation (attr_data);
+
+ if (strcmp (encoding_to_use, "none") == 0)
+ XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_name, BAD_CAST
attr_data));
+ else if (strcmp (encoding_to_use, "base64") == 0) {
+ XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_encoding, BAD_CAST
"base64"));
Instead of key_encoding, name_encoding etc. what's wrong with just
<name encoding="base64">? It seems strange to make the encoding
attribute name depend on the element name.
+ XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST
attr_name));
+ XML_CHECK (xmlTextWriterWriteBase64, (writer, BAD_CAST attr_data, 0,
strlen(attr_data)));
+ XML_CHECK (xmlTextWriterEndAttribute, (writer));
+ } else {
+ fprintf (stderr, "safe_print_string_attribute: Unexpected encoding to use
(won't print here).\n");
+ ret = -1;
Is there some place where we expect this to fail? How about making it
always work, or making it exit on failure?
+ }
+ } else
+ ret = -1;
+ return ret;
+}
+
static int
node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
{
@@ -210,7 +257,10 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char
*name)
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
- XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name", BAD_CAST
name));
+
+ ret = safe_print_string_attribute (h, writer_v, "name",
"name_encoding", name);
+ if (ret)
+ fprintf (stderr, "Warning: node_start: safe_print_string_attribute failed, but
we're continuing.\n");
if (node == hivex_root (h)) {
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "root", BAD_CAST
"1"));
@@ -227,7 +277,7 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char
*name)
}
}
- return 0;
+ return ret;
}
static int
@@ -242,13 +292,16 @@ static void
start_value (xmlTextWriterPtr writer,
const char *key, const char *type, const char *encoding)
{
+ int ret = 0;
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "value"));
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "type", BAD_CAST
type));
if (encoding)
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "encoding",
BAD_CAST encoding));
- if (*key)
- XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "key", BAD_CAST
key));
- else /* default key */
+ if (*key) {
+ ret = safe_print_string_attribute (NULL, writer, "key",
"key_encoding", key);
+ if (ret)
+ fprintf (stderr, "Warning: start_value: safe_print_string_attribute failed,
but we're continuing.\n");
+ } else /* default key */
XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "default",
BAD_CAST "1"));
}
@@ -264,6 +317,7 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
{
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
const char *type;
+ int ret = 0;
switch (t) {
case hive_t_string: type = "string"; break;
@@ -286,11 +340,9 @@ value_string (hive_h *h, void *writer_v, hive_node_h node,
hive_value_h value,
}
start_value (writer, key, type, NULL);
- XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
- XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
- XML_CHECK (xmlTextWriterEndAttribute, (writer));
+ ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", str);
end_value (writer);
- return 0;
+ return ret;
}
static int
@@ -299,17 +351,18 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h
node,
const char *key, char **argv)
{
xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+ int ret = 0;
start_value (writer, key, "string-list", NULL);
size_t i;
for (i = 0; argv[i] != NULL; ++i) {
XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
- XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
+ ret = safe_print_string_attribute (h, writer_v, "value",
"value_encoding", argv[i]);
XML_CHECK (xmlTextWriterEndElement, (writer));
}
end_value (writer);
- return 0;
+ return ret;
}
Rich.
--
Richard Jones, Virtualization Group, Red Hat
http://people.redhat.com/~rjones
Read my programming blog:
http://rwmj.wordpress.com
Fedora now supports 80 OCaml packages (the OPEN alternative to F#)
http://cocan.org/getting_started_with_ocaml_on_red_hat_and_fedora