The Py_BuildValue "y#" format copies data; this is because Python
wants to allow our C memory to go out of scope without worrying about
whether the user's python callback has stashed off a longer-living
reference to its incoming parameter. But it is inefficient; we can do
better by utilizing Python's memoryview for a zero-copy exposure to
the callback's C buffer, as well as a .release method that we can
utilize just before our C memory goes out of scope. Now, if the user
stashes away a reference, they will get a clean Python error if they
try to access the memory after the fact. This IS an API change (code
previously expecting a stashed copy to be long-lived will break), but
we never promised Python API stability, and anyone writing a callback
that saves off data was already risky (neither libnbd nor nbdkit's
testsuite had such a case). For a demonstration of the new error,
where the old code succeeded:
$ ./run nbdsh
nbd> h.connect_command(["nbdkit", "-s", "memory",
"10"])
nbd> copy=None
nbd> def f(b,o,s,e):
... global copy
... copy = b
... print(b[0])
...
nbd> print(copy)
None
nbd> h.pread_structured(10,0,f)
0
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
nbd> copy
<released memory at 0x7ff97410de40>
nbd> copy[0]
Traceback (most recent call last):
File "/usr/lib64/python3.10/code.py", line 90, in runcode
exec(code, self.locals)
File "<console>", line 1, in <module>
ValueError: operation forbidden on released memoryview object
To demonstrate the speedup, I tested:
$ export script='
def f(b,o,s,e):
pass
m=1024*1024
size=h.get_size()
for i in range(size // m):
buf = h.pread_structured(m, m*i, f)
'
$ time ./run nbdkit -U - memory 10G --run 'nbdsh -u "$uri" -c
"$script"'
On my machine, this took 9.1s pre-patch, and 3.0s post-patch, for an
approximate 65% speedup.
The corresponding diff to the generated code is:
| --- python/methods.c.bak 2022-05-31 07:57:25.256293091 -0500
| +++ python/methods.c 2022-05-31 08:14:09.570567858 -0500
| @@ -73,8 +73,12 @@ chunk_wrapper (void *user_data, const vo
|
| PyGILState_STATE py_save = PyGILState_UNLOCKED;
| PyObject *py_args, *py_ret;
| + PyObject *py_subbuf = NULL;
| PyObject *py_error = NULL;
|
| + /* Cast away const */
| + py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ);
| + if (!py_subbuf) { PyErr_PrintEx (0); goto out; }
| PyObject *py_error_modname = PyUnicode_FromString ("ctypes");
| if (!py_error_modname) { PyErr_PrintEx (0); goto out; }
| PyObject *py_error_mod = PyImport_Import (py_error_modname);
| @@ -84,7 +88,7 @@
| Py_DECREF (py_error_mod);
| if (!py_error) { PyErr_PrintEx (0); goto out; }
|
| - py_args = Py_BuildValue ("(" "y#" "K" "I"
"O" ")", subbuf, (int) count, offset, status, py_error);
| + py_args = Py_BuildValue ("(" "O" "K" "I"
"O" ")", py_subbuf, offset, status, py_error);
| if (!py_args) { PyErr_PrintEx (0); goto out; }
|
| py_save = PyGILState_Ensure ();
| @@ -111,6 +115,11 @@ chunk_wrapper (void *user_data, const vo
| };
|
| out:
| + if (py_subbuf) {
| + PyObject *tmp = PyObject_CallMethod(py_subbuf, "release", NULL);
| + Py_XDECREF (tmp);
| + Py_DECREF (py_subbuf);
| + }
| if (py_error) {
| PyObject *py_error_ret = PyObject_GetAttrString (py_error, "value");
| *error = PyLong_AsLong (py_error_ret);
---
generator/Python.ml | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/generator/Python.ml b/generator/Python.ml
index 1c4446e..0191f79 100644
--- a/generator/Python.ml
+++ b/generator/Python.ml
@@ -169,8 +169,7 @@ let
pr " PyObject *py_args, *py_ret;\n";
List.iter (
function
- | CBArrayAndLen (UInt32 n, _) ->
- pr " PyObject *py_%s = NULL;\n" n
+ | CBArrayAndLen (UInt32 n, _) | CBBytesIn (n, _)
| CBMutable (Int n) ->
pr " PyObject *py_%s = NULL;\n" n
| _ -> ()
@@ -187,7 +186,10 @@ let
pr " if (!py_e_%s) { PyErr_PrintEx (0); goto out; }\n" n;
pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n;
pr " }\n"
- | CBBytesIn _
+ | CBBytesIn (n, len) ->
+ pr " /* Cast away const */\n";
+ pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n"
n n len;
+ pr " if (!py_%s) { PyErr_PrintEx (0); goto out; }\n" n
| CBInt _
| CBInt64 _ -> ()
| CBMutable (Int n) ->
@@ -210,7 +212,7 @@ let
List.iter (
function
| CBArrayAndLen (UInt32 n, len) -> pr " \"O\""
- | CBBytesIn (n, len) -> pr " \"y#\""
+ | CBBytesIn (n, _) -> pr " \"O\""
| CBInt n -> pr " \"i\""
| CBInt64 n -> pr " \"L\""
| CBMutable (Int n) -> pr " \"O\""
@@ -223,7 +225,7 @@ let
List.iter (
function
| CBArrayAndLen (UInt32 n, _) -> pr ", py_%s" n
- | CBBytesIn (n, len) -> pr ", %s, (int) %s" n len
+ | CBBytesIn (n, _) -> pr ", py_%s" n
| CBMutable (Int n) -> pr ", py_%s" n
| CBInt n | CBInt64 n
| CBString n
@@ -268,7 +270,13 @@ let
pr " Py_DECREF (py_%s_ret);\n" n;
pr " Py_DECREF (py_%s);\n" n;
pr " }\n"
- | CBBytesIn _
+ | CBBytesIn (n, _) ->
+ (*
https://stackoverflow.com/questions/58870010/how-to-call-release-on-a-mem...
*)
+ pr " if (py_%s) {\n" n;
+ pr " PyObject *tmp = PyObject_CallMethod(py_%s, \"release\",
NULL);\n" n;
+ pr " Py_XDECREF (tmp);\n";
+ pr " Py_DECREF (py_%s);\n" n;
+ pr " }\n"
| CBInt _ | CBInt64 _
| CBString _
| CBUInt _ | CBUInt64 _ -> ()
--
2.36.1