Re: [Libguestfs] [PATCH 1/2] Add type checking, support integers as value

Tuesday, 12 August 2014

On Tuesday 12 August 2014 10:45:07 Richard W.M. Jones wrote:
[..]
...
 > diff --git a/generator/generator.ml b/generator/generator.ml
 > index bcf1966..0aeb24e 100755
 > --- a/generator/generator.ml
 > +++ b/generator/generator.ml
 > @@ -2837,45 +2837,90 @@ put_handle (hive_h *h)
 >   * not be freed.
 >   */
 >  static int
 > -get_value (PyObject *v, hive_set_value *ret)
 > +get_value (PyObject *v, hive_set_value *ret, uint64_t *word)
 >  {
 >    PyObject *obj;
 > -#ifndef HAVE_PYSTRING_ASSTRING
 >    PyObject *bytes;
 > -#endif
 > +
 > +  if (!PyDict_Check (v)) {
 > +    PyErr_SetString (PyExc_TypeError, \"expected dictionary type for
value\");
 > +    return -1;
 > +  }

 This on its own is fine, and an example of a single change which
 should go in a separate patch. 
OK, one patch for type checking of the key and type.

...
 >    obj = PyDict_GetItemString (v, \"key\");
 >    if (!obj) {
 > -    PyErr_SetString (PyExc_RuntimeError, \"no 'key' element in
dictionary\");
 > +    PyErr_SetString (PyExc_KeyError, \"no 'key' element in
dictionary\");

 This is an unrelated change.  I don't really know if it's a good or
 bad change, but it needs to be reviewed in a separate patch, along
 with other identical changes to the exception type below. 
OK, another patch for changing types. RuntimeError is too generic for my taste,
this better captures the actual meaning of the error.

...
 > +    return -1;
 > +  }
 > +  if (PyUnicode_Check (obj)) {
 > +    bytes = PyUnicode_AsUTF8String (obj);
 > +    if (bytes == NULL) {
 > +      return -1;

 Don't we need to set PyErr_SetString here? 
PyUnicode_AsUTF8String should set the error code when it fails. I have not
triggered that case yet, but maybe it is better to be more explicit and mention
the source of the error.

...
 > +    }
 > +  } else if (PyBytes_Check (obj)) {
 > +    bytes = obj;
 > +  } else {
 > +    PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for
'key'\");
 >      return -1;
 >    }
 > -#ifdef HAVE_PYSTRING_ASSTRING
 > -  ret->key = PyString_AsString (obj);
 > -#else
 > -  bytes = PyUnicode_AsUTF8String (obj);
 >    ret->key = PyBytes_AS_STRING (bytes);
 > -#endif
 >  
 >    obj = PyDict_GetItemString (v, \"t\");
 >    if (!obj) {
 > -    PyErr_SetString (PyExc_RuntimeError, \"no 't' element in
dictionary\");
 > +    PyErr_SetString (PyExc_KeyError, \"no 't' element in
dictionary\");
 >      return -1;
 >    }
 >    ret->t = PyLong_AsLong (obj);
 > +  if (PyErr_Occurred ()) {
 > +    PyErr_SetString (PyExc_TypeError, \"expected int type for
't'\");
 > +    return -1;
 > +  }
 >  
 >    obj = PyDict_GetItemString (v, \"value\");
 >    if (!obj) {
 > -    PyErr_SetString (PyExc_RuntimeError, \"no 'value' element in
dictionary\");
 > +    PyErr_SetString (PyExc_KeyError, \"no 'value' element in
dictionary\");
 > +    return -1;
 > +  }
 > +  /* Support bytes and unicode strings. For DWORD and QWORD types, long and
 > +   * integers are also supported. Caller must ensure sanity of byte buffer
 > +   * lengths */
 > +  if (PyUnicode_Check (obj)) {
 > +    bytes = PyUnicode_AsUTF8String (obj);
 > +    if (bytes == NULL)
 > +      return -1;
 > +    ret->len = PyBytes_GET_SIZE (bytes);
 > +    ret->value = PyBytes_AS_STRING (bytes); 
I think we should outright reject Unicode strings. (u"" in Python 2 + 3,
"" in
Python 3.) The previous Python 3 code and the one in this patch tries to be too
smart and assumes that the value can be a non-NUL terminated UTF-8 string.

...
 > +  } else if (PyBytes_Check (obj)) {
 > +    ret->len = PyBytes_GET_SIZE (obj);
 > +    ret->value = PyBytes_AS_STRING (obj);
 > +  } else if (ret->t == hive_t_REG_DWORD ||
 > +             ret->t == hive_t_REG_DWORD_BIG_ENDIAN) {
 > +    uint32_t d = PyLong_AsLong (obj);
 > +    if (PyErr_Occurred ()) {
 > +      PyErr_SetString (PyExc_TypeError, \"expected int type for DWORD
value\");
 > +      return -1;
 > +    }

 This is where things start to go wrong.  There's no way you can trust
 the type field (even though it comes from the caller, not the registry
 in this case).  The type field is only a hint, it doesn't reflect the
 actual type stored. 
Indeed, type is a hint, but if you really insist in pushing a QWORD in a DWORD,
you can still use the byte type. In C, you cannot detect the type from a
pointer, but this is Python where it is possible so this is why I added it for
convenience.

To make this API better follow the C API, what about dropping support for
everything but bytes? If conversion from UTF-8 to nul-terminated UTF-16 is
wanted, or conversion from an int to a DWORD, a second API could be created
instead (that just uses the thin C <-> Python bindings).

...
 TBH I'm not sure why you're using PyLong_AsLong{,Long} here
at all. 
To convert a PyObject to a long.

...
 > +    ret->len = sizeof (d);
 > +    ret->value = (char *) word;
 > +    if (ret->t == hive_t_REG_DWORD)
 > +      *(uint32_t *) ret->value = htole32 (d);
 > +    else
 > +      *(uint32_t *) ret->value = htobe32 (d);
 > +  } else if (ret->t == hive_t_REG_QWORD) {
 > +    uint64_t l = PyLong_AsLongLong (obj);
 > +    if (PyErr_Occurred ()) {
 > +      PyErr_SetString (PyExc_TypeError, \"expected int type for QWORD
value\");
 > +      return -1;
 > +    }
 > +
 > +    ret->len = sizeof (l);
 > +    ret->value = (char *) word;
 > +    *(uint64_t *) ret->value = htole64 (l);
 > +  } else {
 > +    PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for
'value'\");
 >      return -1;
 >    }
 > -#ifdef HAVE_PYSTRING_ASSTRING
 > -  ret->value = PyString_AsString (obj);
 > -  ret->len = PyString_Size (obj);
 > -#else
 > -  bytes = PyUnicode_AsUTF8String (obj);
 > -  ret->value = PyBytes_AS_STRING (bytes);
 > -  ret->len = PyBytes_GET_SIZE (bytes);
 > -#endif
 >  
 >    return 0;
 >  }
 > @@ -2883,6 +2928,7 @@ get_value (PyObject *v, hive_set_value *ret)
 >  typedef struct py_set_values {
 >    size_t nr_values;
 >    hive_set_value *values;
 > +  uint64_t *words;
 >  } py_set_values;
 >  
 >  static int
 > @@ -2905,13 +2951,21 @@ get_values (PyObject *v, py_set_values *ret)
 >    ret->nr_values = len;
 >    ret->values = malloc (len * sizeof (hive_set_value));
 >    if (!ret->values) {
 > -    PyErr_SetString (PyExc_RuntimeError, strerror (errno));
 > +    PyErr_NoMemory ();
 > +    return -1;
 > +  }
 > +  /* if the value is a dword/qword, it will be stored here */
 > +  ret->words = malloc (len * sizeof (uint64_t));
 > +  if (!ret->words) {
 > +    free (ret->values);
 > +    PyErr_NoMemory ();
 >      return -1;
 >    }
 >  
 >    for (i = 0; i < len; ++i) {
 > -    if (get_value (PyList_GetItem (v, i), &(ret->values[i])) == -1) {
 > +    if (get_value (PyList_GetItem (v, i), &(ret->values[i]),
&(ret->words[i])) == -1) {
 >        free (ret->values);
 > +      free (ret->words);
 >        return -1;
 >      }
 >    }
 > @@ -3070,9 +3124,10 @@ put_val_type (char *val, size_t len, hive_type t)
 >  	| ASetValues ->
 >  	    pr "  py_set_values values;\n";
 >  	    pr "  PyObject *py_values;\n"
 > -	| ASetValue ->
 > -	    pr "  hive_set_value val;\n";
 > -	    pr "  PyObject *py_val;\n"
 > +        | ASetValue ->
 > +            pr "  hive_set_value val;\n";
 > +            pr "  PyObject *py_val;\n";
 > +            pr "  uint64_t word;"
 >        ) (snd style);
 >  
 >        pr "\n";
 > @@ -3136,8 +3191,8 @@ put_val_type (char *val, size_t len, hive_type t)
 >  	| ASetValues ->
 >              pr "  if (get_values (py_values, &values) == -1)\n";
 >              pr "    return NULL;\n"
 > -	| ASetValue ->
 > -            pr "  if (get_value (py_val, &val) == -1)\n";
 > +        | ASetValue ->
 > +            pr "  if (get_value (py_val, &val, &word) == -1)\n";
 >              pr "    return NULL;\n"
 >        ) (snd style);
 >  
 > @@ -3151,8 +3206,9 @@ put_val_type (char *val, size_t len, hive_type t)
 >          | AString _ | AStringNullable _
 >  	| AOpenFlags | AUnusedFlags -> ()
 >  	| ASetValues ->
 > -	    pr "  free (values.values);\n"
 > -	| ASetValue -> ()
 > +            pr "  free (values.values);\n";
 > +            pr "  free (values.words);\n"
 > +        | ASetValue -> ()
 >        ) (snd style);
 >  
 >        (* Check for errors from C library. *)
 > diff --git a/python/t/300-setvalue-types.py b/python/t/300-setvalue-types.py
 > new file mode 100644
 > index 0000000..6d72a59
 > --- /dev/null
 > +++ b/python/t/300-setvalue-types.py
 > @@ -0,0 +1,76 @@
 > +# Test various possible types for assignment to setvalue
 > +# Copyright (C) 2014 Peter Wu <peter(a)lekensteyn.nl&gt;
 > +#
 > +# This program is free software; you can redistribute it and/or modify
 > +# it under the terms of the GNU General Public License as published by
 > +# the Free Software Foundation; either version 2 of the License, or
 > +# (at your option) any later version.
 > +#
 > +# This program is distributed in the hope that it will be useful,
 > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 > +# GNU General Public License for more details.
 > +#
 > +# You should have received a copy of the GNU General Public License
 > +# along with this program; if not, write to the Free Software
 > +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 > +
 > +import hivex
 > +import os
 > +from sys import getrefcount
 > +
 > +srcdir = "."
 > +if "srcdir" in os.environ and os.environ["srcdir"]:
 > +    srcdir = os.environ["srcdir"]
 > +
 > +h = hivex.Hivex ("%s/../images/minimal" % srcdir,
 > +                 write = True)
 > +
 > +REG_SZ = 1
 > +REG_BINARY = 3
 > +REG_DWORD = 4
 > +REG_DWORD_BIG_ENDIAN = 5
 > +REG_QWORD = 11
 > +
 > +def set_value (key="test key", t=REG_BINARY, value=b'Val'):
 > +    global h
 > +    h.node_set_value (h.root (), {
 > +        "key": key,
 > +        "t": t,
 > +        "value": value
 > +    })
 > +
 > +def test_pass (t, value, exp_bytes):
 > +    global h
 > +    key = "test_key"
 > +    set_value (key, t, value)
 > +    val = h.node_get_value (h.root (), key)
 > +    ret_type, ret_value = h.value_value (val)
 > +    assert t == ret_type, \
 > +        "expected type {0}, got {1}".format(t, ret_type)
 > +    assert exp_bytes == ret_value, \
 > +        "expected value {0}, got {1}".format(exp_bytes, ret_value)
 > +
 > +def test_exception (exception_type, **kwargs):
 > +    try:
 > +        set_value (**kwargs)
 > +        raise AssertionError("expected {0}".format(exception_type))
 > +    except exception_type:
 > +        pass
 > +
 > +# Good weather tests
 > +test_pass (REG_BINARY,          b"\x01\x02",       
b"\x01\x02")
 > +test_pass (REG_SZ,              u"Val",            
b"\x56\x61\x6c")
 > +test_pass (REG_DWORD,           0xabcdef,           b'\xef\xcd\xab\x00')
 > +test_pass (REG_DWORD_BIG_ENDIAN, 0xabcdef,          b'\x00\xab\xcd\xef')
 > +test_pass (REG_QWORD,           0x0123456789abcdef,
 > +        b'\xef\xcd\xab\x89\x67\x45\x23\x01')
 > +
 > +# *WORDs must still accept bytes (the length is not checked)
 > +test_pass (REG_DWORD,           b'\xaa\xbb\xcc',   
b'\xaa\xbb\xcc')
 > +
 > +# Bad weather tests
 > +test_exception (TypeError, key = 1)
 > +test_exception (TypeError, t = "meh")
 > +test_exception (TypeError, t = REG_SZ, value = 1)
 > +test_exception (TypeError, t = REG_DWORD, value = None) 
I will come later with follow-up patches, thanks for your review!

-- 
Kind regards,
Peter
https://lekensteyn.nl

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Re: [Libguestfs] [PATCH 1/2] Add type checking, support integers as value