Re: [Libguestfs] [PATCH 9/12] hivex: Begin implementation of writing to hives.

Thursday, 4 February 2010

On 03/02/10 18:34, Richard W.M. Jones wrote:
...
 Subject: [PATCH 09/12] hivex: Begin implementation of writing to
hives.

 This implements hivex_node_set_values which is used to
 delete the (key, value) pairs at a node and optionally
 replace them with a new set.

 This also implements hivex_commit which is used to commit
 changes to hives back to disk.
 ---
  hivex/hivex.c   |  384 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
  hivex/hivex.h   |   12 ++
  hivex/hivex.pod |  125 ++++++++++++++++++
  3 files changed, 521 insertions(+), 0 deletions(-)

This looks generally ok. However, it is *extremely* dense. I've
indicated a few places where I feel there are insufficient comments, or
magic numbers making the code less clear than it could be.

...
 +/* Allocate an hbin (page), extending the malloc'd space if
necessary,
 + * and updating the hive handle fields (but NOT the hive disk header
 + * -- the hive disk header is updated when we commit).  This function
 + * also extends the bitmap if necessary.
 + *
 + * 'allocation_hint' is the size of the block allocation we would like
 + * to make.  Normally registry blocks are very small (avg 50 bytes)
 + * and are contained in standard-sized pages (4KB), but the registry
 + * can support blocks which are larger than a standard page, in which
 + * case it creates a page of 8KB, 12KB etc.
 + *
 + * Returns:
 + * > 0 : offset of first usable byte of new page (after page header)
 + * 0   : error (errno set)
 + */
 +static size_t
 +allocate_page (hive_h *h, size_t allocation_hint)
 +{ 
Every usage of nr_4k_pages in this function multiplies it by 4096. It
would be more readable to choose a different name which reflects that
it's simply a rounding.

...
 +  /* In almost all cases this will be 1. */
 +  size_t nr_4k_pages =
 +    1 + (allocation_hint + sizeof (struct ntreg_hbin_page) - 1) / 4096;
 +  assert (nr_4k_pages >= 1); 
I think this assert is impossible to trigger.

...
 +  /* 'extend' is the number of bytes to extend the file by. 
Note that
 +   * hives found in the wild often contain slack between 'endpages'
 +   * and the actual end of the file, so we don't always need to make
 +   * the file larger.
 +   */
 +  ssize_t extend = h->endpages + nr_4k_pages * 4096 - h->size;
 +
 +  if (h->msglvl >= 2) {
 +    fprintf (stderr, "allocate_page: current endpages = 0x%zx, current size =
0x%zx\n",
 +             h->endpages, h->size);
 +    fprintf (stderr, "allocate_page: extending file by %zd bytes (<= 0 if no
extension)\n",
 +             extend);
 +  }
 +
 +  if (extend > 0) {
 +    size_t oldsize = h->size;
 +    size_t newsize = h->size + extend; 
In general, are we worrying about overflow?

...
 +    char *newaddr = realloc (h->addr, newsize);
 +    if (newaddr == NULL)
 +      return 0;
 +
 +    size_t oldbitmapsize = 1 + oldsize / 32;
 +    size_t newbitmapsize = 1 + newsize / 32;
 +    char *newbitmap = realloc (h->bitmap, newbitmapsize);
 +    if (newbitmap == NULL) {
 +      free (newaddr);
 +      return 0;
 +    }
 +
 +    h->addr = newaddr;
 +    h->size = newsize;
 +    h->bitmap = newbitmap;
 +
 +    memset (h->addr + oldsize, 0, newsize - oldsize);
 +    memset (h->bitmap + oldbitmapsize, 0, newbitmapsize - oldbitmapsize);
 +  }
 +
 +  size_t offset = h->endpages;
 +  h->endpages += nr_4k_pages * 4096;
 +
 +  if (h->msglvl >= 2)
 +    fprintf (stderr, "allocate_page: new endpages = 0x%zx, new size =
0x%zx\n",
 +             h->endpages, h->size);
 +
 +  /* Write the hbin header. */
 +  struct ntreg_hbin_page *page =
 +    (struct ntreg_hbin_page *) (h->addr + offset);
 +  page->magic[0] = 'h';
 +  page->magic[1] = 'b';
 +  page->magic[2] = 'i';
 +  page->magic[3] = 'n';
 +  page->offset_first = htole32 (offset - 0x1000); 
What's 0x1000? Can you comment this?

...
 +  page->page_size = htole32 (nr_4k_pages * 4096);
 +  memset (page->unknown, 0, sizeof (page->unknown));
 +
 +  if (h->msglvl >= 2)
 +    fprintf (stderr, "allocate_page: new page at 0x%zx\n", offset);
 +
 +  /* Offset of first usable byte after the header. */
 +  return offset + sizeof (struct ntreg_hbin_page);
 +}
 +
 +/* Allocate a single block, first allocating an hbin (page) at the end
 + * of the current file if necessary.  NB. To keep the implementation
 + * simple and more likely to be correct, we do not reuse existing free
 + * blocks.
 + *
 + * seg_len is the size of the block (this INCLUDES the block header).
 + * The header of the block is initialized to -seg_len (negative to
 + * indicate used).  id[2] is the block ID (type), eg. "nk" for nk-
 + * record.  The block bitmap is updated to show this block as valid.
 + * The rest of the contents of the block will be zero.
 + *
 + * Returns:
 + * > 0 : offset of new block
 + * 0   : error (errno set)
 + */
 +static size_t
 +allocate_block (hive_h *h, size_t seg_len, const char id[2])
 +{
 +  if (!h->writable) {
 +    errno = EROFS;
 +    return 0;
 +  }
 +
 +  if (seg_len < 4) { 
Can you replace this '4' with a sizeof(ntreg_hbin_block.seg_len)?

...
 +    /* The caller probably forgot to include the header.  Note that
 +     * value lists have no ID field, so seg_len == 4 would be possible
 +     * for them, albeit unusual.
 +     */
 +    if (h->msglvl >= 2)
 +      fprintf (stderr, "allocate_block: refusing too small allocation (%zu),
returning ERANGE\n",
 +               seg_len);
 +    errno = ERANGE;
 +    return 0;
 +  }
 +
 +  /* Refuse really large allocations. */
 +  if (seg_len > 1000000) { 
Can you replace 1000000 with a #define?

...
 +    if (h->msglvl >= 2)
 +      fprintf (stderr, "allocate_block: refusing large allocation (%zu), returning
ERANGE\n",
 +               seg_len);
 +    errno = ERANGE;
 +    return 0;
 +  }
 +
 +  /* Round up allocation to multiple of 8 bytes.  All blocks must be
 +   * on an 8 byte boundary.
 +   */
 +  seg_len = (seg_len + 7) & ~7;
 +
 +  /* Allocate a new page if necessary. */
 +  if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
 +    size_t newendblocks = allocate_page (h, seg_len);
 +    if (newendblocks == 0)
 +      return 0;
 +    h->endblocks = newendblocks;
 +  }
 +
 +  size_t offset = h->endblocks;
 +
 +  if (h->msglvl >= 2)
 +    fprintf (stderr, "allocate_block: new block at 0x%zx, size %zu\n",
 +             offset, seg_len);
 +
 +  struct ntreg_hbin_block *blockhdr =
 +    (struct ntreg_hbin_block *) (h->addr + offset);
 +
 +  blockhdr->seg_len = htole32 (- (int32_t) seg_len);
 +  if (id[0] && id[1] && seg_len >= 6) { 
Can you replace 6 with sizeof(ntreg_hbin_block)?

...
 +    blockhdr->id[0] = id[0];
 +    blockhdr->id[1] = id[1];
 +  }
 +
 +  h->endblocks += seg_len;
 +
 +  /* If there is space after the last block in the last page, then we
 +   * have to put a dummy free block header here to mark the rest of
 +   * the page as free.
 +   */
 +  ssize_t rem = h->endpages - h->endblocks;
 +  if (rem > 0) {
 +    if (h->msglvl >= 2)
 +      fprintf (stderr, "allocate_block: marking remainder of page free starting at
0x%zx, size %zd\n",
 +               h->endblocks, rem);
 +
 +    assert (rem >= 4); 
This assertion doesn't look safe. Why must this be the case? In the
event that rem < 4, would you allocate a new page for the dummy free block?

...
 +    blockhdr = (struct ntreg_hbin_block *) (h->addr +
h->endblocks);
 +    blockhdr->seg_len = htole32 ((int32_t) rem); 
Will blockhdr->id ever be read here? Is 0x00 valid?
...
 +  }
 +
 +  return offset;
 +}
 + 
...
 +
 +/* Delete all existing values at this node. */
 +static int
 +delete_values (hive_h *h, hive_node_h node)
 +{
 +  assert (h->writable);
 +
 +  hive_value_h *values;
 +  size_t *blocks;
 +  if (get_values (h, node, &values, &blocks) == -1)
 +    return -1;
 +
 +  size_t i;
 +  for (i = 0; blocks[i] != 0; ++i)
 +    mark_block_unused (h, blocks[i]);
 +
 +  free (blocks);
 +
 +  for (i = 0; values[i] != 0; ++i) {
 +    struct ntreg_vk_record *vk =
 +      (struct ntreg_vk_record *) (h->addr + values[i]);
 +
 +    size_t len;
 +    len = le32toh (vk->data_len);
 +    if (len == 0x80000000)      /* special case */
 +      len = 4;
 +    len &= 0x7fffffff; 
Comment, please. What's going on with this record length?

...
 +
 +    if (len > 4) {              /* non-inline, so remove data block */
 +      size_t data_offset = le32toh (vk->data_offset);
 +      data_offset += 0x1000;
 +      mark_block_unused (h, data_offset); 
More magic numbers :) Why is this necessary?

...
 +    }
 +
 +    /* remove vk record */
 +    mark_block_unused (h, values[i]);
 +  }
 +
 +  free (values);
 +
 +  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
 +  nk->nr_values = htole32 (0);
 +  nk->vallist = htole32 (0xffffffff);
 +
 +  return 0;
 +}
 +
 +int
 +hivex_commit (hive_h *h, const char *filename, int flags)
 +{
 +  if (flags != 0) {
 +    errno = EINVAL;
 +    return -1;
 +  } 
What are you intending to use the flags for?

...
 +  if (!h->writable) {
 +    errno = EROFS;
 +    return -1;
 +  }
 +
 +  filename = filename ? : h->filename;
 +  int fd = open (filename, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, 0666);
 +  if (fd == -1)
 +    return -1;
 +
 +  /* Update the header fields. */
 +  uint32_t sequence = le32toh (h->hdr->sequence1);
 +  sequence++;
 +  h->hdr->sequence1 = htole32 (sequence);
 +  h->hdr->sequence2 = htole32 (sequence);
 +  /* XXX Ought to update h->hdr->last_modified. */
 +  h->hdr->blocks = htole32 (h->endpages - 0x1000);
 +
 +  /* Recompute header checksum. */
 +  uint32_t sum = header_checksum (h);
 +  h->hdr->csum = htole32 (sum);
 +
 +  if (h->msglvl >= 2)
 +    fprintf (stderr, "hivex_commit: new header checksum: 0x%x\n", sum);
 +
 +  if (full_write (fd, h->addr, h->size) != h->size) {
 +    int err = errno;
 +    close (fd);
 +    errno = err;
 +    return -1;
 +  }
 +
 +  if (close (fd) == -1)
 +    return -1;
 +
 +  return 0;
 +}
 +
 +int
 +hivex_node_set_values (hive_h *h, hive_node_h node,
 +                       size_t nr_values, const hive_set_value *values,
 +                       int flags)
 +{
 +  if (!h->writable) {
 +    errno = EROFS;
 +    return -1;
 +  }
 +
 +  if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
 +    errno = EINVAL;
 +    return -1; 
This would be an internal inconsistency error, surely. If so, you
probably want an assert here instead.

...
 +  }
 +
 +  /* Delete all existing values. */
 +  if (delete_values (h, node) == -1)
 +    return -1;
 +
 +  if (nr_values == 0)
 +    return 0;
 +
 +  /* Allocate value list node.  Value lists have no id field. */
 +  static const char nul_id[2] = { 0, 0 };
 +  size_t seg_len =
 +    sizeof (struct ntreg_value_list) + (nr_values - 1) * sizeof (uint32_t); 
Why are you subtracting 1 from nr_values above?

...
 +  size_t vallist_offs = allocate_block (h, seg_len, nul_id);
 +  if (vallist_offs == 0)
 +    return -1;
 +
 +  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
 +  nk->nr_values = htole32 (nr_values);
 +  nk->vallist = htole32 (vallist_offs - 0x1000); 
Whatever the purpose of adding or subtracting 0x1000 to offsets is, it
could probably do with a descriptively-named macro, which also does
htole32 or le32toh as appropriate.

...
 +  struct ntreg_value_list *vallist =
 +    (struct ntreg_value_list *) (h->addr + vallist_offs);
 +
 +  size_t i;
 +  for (i = 0; i < nr_values; ++i) {
 +    /* Allocate vk record to store this (key, value) pair. */
 +    static const char vk_id[2] = { 'v', 'k' };
 +    seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key);
 +    size_t vk_offs = allocate_block (h, seg_len, vk_id);
 +    if (vk_offs == 0)
 +      return -1;
 +
 +    vallist->offset[i] = htole32 (vk_offs - 0x1000);
 +
 +    struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + vk_offs);
 +    size_t name_len = strlen (values[i].key); 
If you did this further up you could re-use the result of strlen.

...
 +    vk->name_len = htole16 (name_len);
 +    strcpy (vk->name, values[i].key);
 +    vk->data_type = htole32 (values[i].t);
 +    vk->data_len = htole16 (values[i].len);
 +    vk->flags = name_len == 0 ? 0 : 1;
 +
 +    if (values[i].len <= 4)     /* Store data inline. */ 
sizeof.

...
 +      memcpy (&vk->data_offset, values[i].value,
values[i].len);
 +    else {
 +      size_t offs = allocate_block (h, values[i].len + 4, nul_id); 
sizeof.

...
 +      if (offs == 0)
 +        return -1;
 +      memcpy (h->addr + offs + 4, values[i].value, values[i].len); 
sizeof, or the address of a struct member.

...
 +      vk->data_offset = htole32 (offs - 0x1000);
 +    }
 +
 +    if (name_len * 2 > le32toh (nk->max_vk_name_len))
 +      nk->max_vk_name_len = htole32 (name_len * 2); 
Why is name_len multiplied by 2? Comment, please.

...
 +    if (values[i].len > le32toh (nk->max_vk_data_len))
 +      nk->max_vk_data_len = htole32 (values[i].len);
 +  }
 +
 +  return 0;
 +}
 diff --git a/hivex/hivex.h b/hivex/hivex.h
 index 56718b4..6a3cb3a 100644
 --- a/hivex/hivex.h
 +++ b/hivex/hivex.h
 @@ -110,6 +110,18 @@ struct hivex_visitor {
  extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void
*opaque, int flags);
  extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor
*visitor, size_t len, void *opaque, int flags);

 +extern int hivex_commit (hive_h *h, const char *filename, int flags);
 +
 +struct hive_set_value {
 +  char *key;
 +  hive_type t;
 +  size_t len;
 +  char *value;
 +};
 +typedef struct hive_set_value hive_set_value;
 +
 +extern int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const
hive_set_value *values, int flags);
 +
  #ifdef __cplusplus
  }
  #endif
 diff --git a/hivex/hivex.pod b/hivex/hivex.pod
 index 5a58144..5df75aa 100644
 --- a/hivex/hivex.pod
 +++ b/hivex/hivex.pod
 @@ -326,6 +326,127 @@ starts at C<node>.

  =back

 +=head2 WRITING TO HIVE FILES
 +
 +The hivex library supports making limited modifications to hive files.
 +We have tried to implement this very conservatively in order to reduce
 +the chance of corrupting your registry.  However you should be careful
 +and take back-ups, since Microsoft has never documented the hive
 +format, and so it is possible there are nuances in the
 +reverse-engineered format that we do not understand.
 +
 +To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE>
 +flag to C<hivex_open>, otherwise any write operation will return with
 +errno C<EROFS>.
 +
 +The write operations shown below do not modify the on-disk file
 +immediately.  You must call C<hivex_commit> in order to write the
 +changes to disk.  If you call C<hivex_close> without committing then
 +any writes are discarded.
 +
 +Hive files internally consist of a "memory dump" of binary blocks
 +(like the C heap), and some of these blocks can be unused.  The hivex
 +library never reuses these unused blocks.  Instead, to ensure
 +robustness in the face of the partially understood on-disk format,
 +hivex only allocates new blocks after the end of the file, and makes
 +minimal modifications to existing structures in the file to point to
 +these new blocks.  This makes hivex slightly less disk-efficient than
 +it could be, but disk is cheap, and registry modifications tend to be
 +very small.
 +
 +When deleting nodes, it is possible that this library may leave
 +unreachable live blocks in the hive.  This is because certain parts of
 +the hive disk format such as security (sk) records and big data (db)
 +records and classname fields are not well understood (and not
 +documented at all) and we play it safe by not attempting to modify
 +them.  Apart from wasting a little bit of disk space, it is not
 +thought that unreachable blocks are a problem.
 +
 +=over 4
 +
 +=item int hivex_commit (hive_h *h, const char *filename, int flags);
 +
 +Commit (write) any changes which have been made.
 +
 +C<filename> is the new file to write.  If C<filename == NULL> then we
 +overwrite the original file (ie. the file name that was passed to
 +C<hivex_open>).  C<flags> is not used, always pass 0.
 +
 +Returns 0 on success.  On error this returns -1 and sets errno.
 +
 +Note this does not close the hive handle.  You can perform further
 +operations on the hive after committing, including making more
 +modifications.  If you no longer wish to use the hive, call
 +C<hivex_close> after this.
 +
 +=item hive_set_value
 +
 +The typedef C<hive_set_value> is used in conjunction with the
 +C<hivex_node_set_values> call described below.
 +
 + struct hive_set_value {
 +   char *key;     /* key - a UTF-8 encoded ASCIIZ string */
 +   hive_type t;   /* type of value field */
 +   size_t len;    /* length of value field in bytes */
 +   char *value;   /* value field */
 + };
 + typedef struct hive_set_value hive_set_value;
 +
 +To set the default value for a node, you have to pass C<key = "">.
 +
 +Note that the C<value> field is just treated as a list of bytes, and
 +is stored directly in the hive.  The caller has to ensure correct
 +encoding and endianness, for example converting dwords to little
 +endian.
 +
 +The correct type and encoding for values depends on the node and key
 +in the registry, the version of Windows, and sometimes even changes
 +between versions of Windows for the same key.  We don't document it
 +here.  Often it's not documented at all.
 +
 +=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const
hive_set_value *values, int flags);
 +
 +This call can be used to set all the (key, value) pairs stored in C<node>.
 +
 +C<node> is the node to modify.  C<values> is an array of (key, value)
 +pairs.  There should be C<nr_values> elements in this array.  C<flags>
 +is not used, always pass 0.
 +
 +Any existing values stored at the node are discarded, and their
 +C<hive_value_h> handles become invalid.  Thus you can remove all
 +values stored at C<node> by passing C<nr_values = 0>.
 +
 +Returns 0 on success.  On error this returns -1 and sets errno.
 +
 +Note that this library does not offer a way to modify just a single
 +key at a node.  We don't implement a way to do this efficiently.
 +
 +=back
 +
 +=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED
 +
 +=over 4
 +
 +=item *
 +
 +Changing the root node.
 +
 +=item *
 +
 +Creating a new hive file from scratch.  This is impossible at present
 +because not all fields in the header are understood.
 +
 +=item *
 +
 +Modifying or deleting single values at a node.
 +
 +=item *
 +
 +Modifying security key (sk) records or classnames.  These are not
 +well understood.
 +
 +=back
 +
  =head1 THE STRUCTURE OF THE WINDOWS REGISTRY

  Note: To understand the relationship between hives and the common
 @@ -452,6 +573,10 @@ Registry contains cycles.

  Field in the registry out of range.

 +=item EROFS
 +
 +Tried to write to a registry which is not opened for writing.
 +
  =back

  =head1 ENVIRONMENT VARIABLES
 -- 1.6.5.2 

-- 
Matthew Booth, RHCA, RHCSS
Red Hat Engineering, Virtualisation Team

M:       +44 (0)7977 267231
GPG ID:  D33C3490
GPG FPR: 3733 612D 2D05 5458 8A8A 1600 3441 EA19 D33C 3490

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Re: [Libguestfs] [PATCH 9/12] hivex: Begin implementation of writing to hives.