>From e16d1cc51c234eb9f3e7db3b8f77f76198ffee73 Mon Sep 17 00:00:00 2001 From: Richard W.M. Jones Date: Tue, 17 May 2011 11:16:10 +0100 Subject: [PATCH] hivexregedit: Add --unsafe-printable-strings option. --- perl/lib/Win/Hivex/Regedit.pm | 40 ++++++++++++++++++++++++++------- regedit/hivexregedit | 48 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/perl/lib/Win/Hivex/Regedit.pm b/perl/lib/Win/Hivex/Regedit.pm index c69fda8..9b6a5d5 100644 --- a/perl/lib/Win/Hivex/Regedit.pm +++ b/perl/lib/Win/Hivex/Regedit.pm @@ -1,5 +1,5 @@ # Win::Hivex::Regedit -# Copyright (C) 2009-2010 Red Hat Inc. +# Copyright (C) 2009-2011 Red Hat Inc. # Derived from code by Petter Nordahl-Hagen under a compatible license: # Copyright (c) 1997-2007 Petter Nordahl-Hagen. # Derived from code by Markus Stephany under a compatible license: @@ -68,7 +68,7 @@ use strict; use warnings; use Carp qw(croak confess); -use Encode qw(encode); +use Encode qw(encode decode); require Exporter; @@ -433,7 +433,9 @@ sub _parse_error =head2 reg_export - reg_export ($h, $key, $fh, [prefix => $prefix]); + reg_export ($h, $key, $fh, + [prefix => $prefix], + [unsafe_printable_strings => 1]); This function exports the registry keys starting at the root C<$key> and recursively downwards into the file handle C<$fh>. @@ -451,11 +453,22 @@ C, would be written as: "Key 1"=... "Key 2"=... -The output is written as pure 7 bit ASCII, with line endings which are -the default for the local host. You may need to convert the file's -encoding using L and line endings using L if -sending to a Windows user. Strings are always encoded as hex bytes. -See L below. +If C is not given or is false, then the +output is written as pure 7 bit ASCII, with line endings which are the +default for the local host. Strings are always encoded as hex bytes. +This is safe because it preserves the original content and encoding of +strings. See L below. + +If C is true, then strings are assumed to be +UTF-16LE and are converted to UTF-8 for output. The final zero +codepoint in the string is removed if there is one. This is unsafe +because it does not preserve the fidelity of the strings in the +Registry and because the content type of strings is not always +UTF-16LE. However it is useful if you just want to display strings +for quick hacking and debugging. + +You may need to convert the file's encoding using L and line +endings using L if sending to a Windows user. Nodes and keys are sorted alphabetically in the output. @@ -514,6 +527,8 @@ sub reg_export_node print $fh $path; print $fh "]\n"; + my $unsafe_printable_strings = $params{unsafe_printable_strings}; + # Get the values. my @values = $h->node_values ($node); @@ -542,6 +557,13 @@ sub reg_export_node if ($type eq 4 && length ($data) == 4) { # only handle dword specially my $dword = unpack ("V", $data); printf $fh "dword:%08x\n", $dword + } elsif ($unsafe_printable_strings && ($type eq 1 || $type eq 2)) { + # Guess that the encoding is UTF-16LE. Convert it to UTF-8 + # for printing. + $data = decode ("utf16le", $data); + $data =~ s/\x{0}$//; # remove final zero codepoint + $data =~ s/"/\\"/g; # XXX more quoting needed? + printf $fh "str(%x):\"%s\"\n", $type, $data; } else { # Encode everything else as hex, see encoding section below. printf $fh "hex(%x):", $type; @@ -643,7 +665,7 @@ read back in. =head1 COPYRIGHT -Copyright (C) 2010 Red Hat Inc. +Copyright (C) 2010-2011 Red Hat Inc. =head1 LICENSE diff --git a/regedit/hivexregedit b/regedit/hivexregedit index 966f725..b2e84de 100755 --- a/regedit/hivexregedit +++ b/regedit/hivexregedit @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2010 Red Hat Inc. +# Copyright (C) 2010-2011 Red Hat Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -207,6 +207,47 @@ L. The default is to use UTF-16LE, which should work with recent versions of Windows. +=cut + +my $unsafe_printable_strings; + +=item B<--unsafe-printable-strings> + +When exporting (only), assume strings are UTF-16LE and print them as +strings instead of hex sequences. Remove the final zero codepoint +from strings if present. + +This is unsafe and does not preserve the fidelity of strings in the +original hive for various reasons: + +=over 4 + +=item * + +Assumes the original encoding is UTF-16LE. ASCII strings and strings +in other encodings will be corrupted by this transformation. + +=item * + +Assumes that everything which has type 1 or 2 is really a string +and that everything else is not a string, but the type field in +real hives is not reliable. + +=item * + +Loses information about whether a zero codepoint followed the string +in the hive or not. + +=back + +This all happens because the hive itself contains no information about +how strings are encoded (see +L). + +You should only use this option for quick hacking and debugging of the +hive contents, and I use it if the output is going to be passed +into another program or stored in another hive. + =back =cut @@ -217,6 +258,7 @@ GetOptions ("help|?" => \$help, "export" => \$export, "prefix=s" => \$prefix, "encoding=s" => \$encoding, + "unsafe-printable-strings" => \$unsafe_printable_strings, ) or pod2usage (2); pod2usage (1) if $help; @@ -274,7 +316,9 @@ if ($merge) { # --merge (reg_import) print "Windows Registry Editor Version 5.00\n\n"; - reg_export ($h, $key, \*STDOUT, prefix => $prefix); + reg_export ($h, $key, \*STDOUT, + prefix => $prefix, + unsafe_printable_strings => $unsafe_printable_strings); } =head1 SEE ALSO -- 1.7.5