|
1 | 1 | ==============================
|
2 |
| -15.3 一个操作数组的扩展函数 |
| 2 | +15.3 编写扩展函数操作数组 |
3 | 3 | ==============================
|
4 | 4 |
|
5 | 5 | ----------
|
6 | 6 | 问题
|
7 | 7 | ----------
|
8 |
| -You want to write a C extension function that operates on contiguous arrays of data, as |
9 |
| -might be created by the array module or libraries like NumPy. However, you would like |
10 |
| -your function to be general purpose and not specific to any one array library. |
| 8 | +你想编写一个C扩展函数来操作数组,可能是被array模块或类似Numpy库所创建。 |
| 9 | +不过,你想让你的函数更加通用,而不是针对某个特定的库所生成的数组。 |
11 | 10 |
|
12 | 11 | |
|
13 | 12 |
|
14 | 13 | ----------
|
15 | 14 | 解决方案
|
16 | 15 | ----------
|
17 |
| -To receive and process arrays in a portable manner, you should write code that uses the |
18 |
| -Buffer Protocol. Here is an example of a handwritten C extension function that receives |
19 |
| -array data and calls the avg(double *buf, int len) function from this chapter’s in‐ |
20 |
| -troduction: |
21 |
| -
|
22 |
| -/* Call double avg(double *, int) */ |
23 |
| -static PyObject *py_avg(PyObject *self, PyObject *args) { |
24 |
| - PyObject *bufobj; |
25 |
| - Py_buffer view; |
26 |
| - double result; |
27 |
| - /* Get the passed Python object */ |
28 |
| - if (!PyArg_ParseTuple(args, "O", &bufobj)) { |
29 |
| - return NULL; |
30 |
| - } |
31 |
| -
|
32 |
| - /* Attempt to extract buffer information from it */ |
33 |
| -
|
34 |
| - if (PyObject_GetBuffer(bufobj, &view, |
35 |
| - PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) == -1) { |
36 |
| - return NULL; |
37 |
| - } |
38 |
| - |
39 |
| - if (view.ndim != 1) { |
40 |
| - PyErr_SetString(PyExc_TypeError, "Expected a 1-dimensional array"); |
41 |
| - PyBuffer_Release(&view); |
42 |
| - return NULL; |
43 |
| - } |
44 |
| - |
45 |
| - /* Check the type of items in the array */ |
46 |
| - if (strcmp(view.format,"d") != 0) { |
47 |
| - PyErr_SetString(PyExc_TypeError, "Expected an array of doubles"); |
48 |
| - PyBuffer_Release(&view); |
49 |
| - return NULL; |
50 |
| - } |
51 |
| -
|
52 |
| - /* Pass the raw buffer and size to the C function */ |
53 |
| - result = avg(view.buf, view.shape[0]); |
54 |
| -
|
55 |
| - /* Indicate we're done working with the buffer */ |
56 |
| - PyBuffer_Release(&view); |
57 |
| - return Py_BuildValue("d", result); |
58 |
| -} |
59 |
| -
|
60 |
| -Here is an example that shows how this extension function works: |
61 |
| - |
62 |
| ->>> import array |
63 |
| ->>> avg(array.array('d',[1,2,3])) |
64 |
| -2.0 |
65 |
| ->>> import numpy |
66 |
| ->>> avg(numpy.array([1.0,2.0,3.0])) |
67 |
| -2.0 |
68 |
| ->>> avg([1,2,3]) |
69 |
| -Traceback (most recent call last): |
70 |
| - File "<stdin>", line 1, in <module> |
71 |
| -TypeError: 'list' does not support the buffer interface |
72 |
| ->>> avg(b'Hello') |
73 |
| -Traceback (most recent call last): |
74 |
| - File "<stdin>", line 1, in <module> |
75 |
| -TypeError: Expected an array of doubles |
76 |
| ->>> a = numpy.array([[1.,2.,3.],[4.,5.,6.]]) |
77 |
| ->>> avg(a[:,2]) |
78 |
| -Traceback (most recent call last): |
79 |
| - File "<stdin>", line 1, in <module> |
80 |
| -ValueError: ndarray is not contiguous |
81 |
| ->>> sample.avg(a) |
82 |
| -Traceback (most recent call last): |
83 |
| - File "<stdin>", line 1, in <module> |
84 |
| -TypeError: Expected a 1-dimensional array |
85 |
| ->>> sample.avg(a[0]) |
86 |
| - |
87 |
| -2.0 |
88 |
| ->>> |
| 16 | +为了能让接受和处理数组具有可移植性,你需要使用到 `Buffer Protocol` . |
| 17 | +下面是一个手写的C扩展函数例子, |
| 18 | +用来接受数组数据并调用本章开篇部分的 ``avg(double *buf, int len)`` 函数: |
| 19 | + |
| 20 | +:: |
| 21 | + |
| 22 | + /* Call double avg(double *, int) */ |
| 23 | + static PyObject *py_avg(PyObject *self, PyObject *args) { |
| 24 | + PyObject *bufobj; |
| 25 | + Py_buffer view; |
| 26 | + double result; |
| 27 | + /* Get the passed Python object */ |
| 28 | + if (!PyArg_ParseTuple(args, "O", &bufobj)) { |
| 29 | + return NULL; |
| 30 | + } |
| 31 | + |
| 32 | + /* Attempt to extract buffer information from it */ |
| 33 | + |
| 34 | + if (PyObject_GetBuffer(bufobj, &view, |
| 35 | + PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) == -1) { |
| 36 | + return NULL; |
| 37 | + } |
| 38 | + |
| 39 | + if (view.ndim != 1) { |
| 40 | + PyErr_SetString(PyExc_TypeError, "Expected a 1-dimensional array"); |
| 41 | + PyBuffer_Release(&view); |
| 42 | + return NULL; |
| 43 | + } |
| 44 | + |
| 45 | + /* Check the type of items in the array */ |
| 46 | + if (strcmp(view.format,"d") != 0) { |
| 47 | + PyErr_SetString(PyExc_TypeError, "Expected an array of doubles"); |
| 48 | + PyBuffer_Release(&view); |
| 49 | + return NULL; |
| 50 | + } |
| 51 | + |
| 52 | + /* Pass the raw buffer and size to the C function */ |
| 53 | + result = avg(view.buf, view.shape[0]); |
| 54 | + |
| 55 | + /* Indicate we're done working with the buffer */ |
| 56 | + PyBuffer_Release(&view); |
| 57 | + return Py_BuildValue("d", result); |
| 58 | + } |
| 59 | + |
| 60 | +下面我们演示下这个扩展函数是如何工作的: |
| 61 | + |
| 62 | +:: |
| 63 | + |
| 64 | + >>> import array |
| 65 | + >>> avg(array.array('d',[1,2,3])) |
| 66 | + 2.0 |
| 67 | + >>> import numpy |
| 68 | + >>> avg(numpy.array([1.0,2.0,3.0])) |
| 69 | + 2.0 |
| 70 | + >>> avg([1,2,3]) |
| 71 | + Traceback (most recent call last): |
| 72 | + File "<stdin>", line 1, in <module> |
| 73 | + TypeError: 'list' does not support the buffer interface |
| 74 | + >>> avg(b'Hello') |
| 75 | + Traceback (most recent call last): |
| 76 | + File "<stdin>", line 1, in <module> |
| 77 | + TypeError: Expected an array of doubles |
| 78 | + >>> a = numpy.array([[1.,2.,3.],[4.,5.,6.]]) |
| 79 | + >>> avg(a[:,2]) |
| 80 | + Traceback (most recent call last): |
| 81 | + File "<stdin>", line 1, in <module> |
| 82 | + ValueError: ndarray is not contiguous |
| 83 | + >>> sample.avg(a) |
| 84 | + Traceback (most recent call last): |
| 85 | + File "<stdin>", line 1, in <module> |
| 86 | + TypeError: Expected a 1-dimensional array |
| 87 | + >>> sample.avg(a[0]) |
| 88 | + |
| 89 | + 2.0 |
| 90 | + >>> |
89 | 91 |
|
90 | 92 | |
|
91 | 93 |
|
92 | 94 | ----------
|
93 | 95 | 讨论
|
94 | 96 | ----------
|
95 |
| -Passing array objects to C functions might be one of the most common things you would |
96 |
| -want to do with a extension function. A large number of Python applications, ranging |
97 |
| -from image processing to scientific computing, are based on high-performance array |
98 |
| -processing. By writing code that can accept and operate on arrays, you can write cus‐ |
99 |
| -tomized code that plays nicely with those applications as opposed to having some sort |
100 |
| -of custom solution that only works with your own code. |
101 |
| -The key to this code is the PyBuffer_GetBuffer() function. Given an arbitrary Python |
102 |
| -object, it tries to obtain information about the underlying memory representation. If |
103 |
| -it’s not possible, as is the case with most normal Python objects, it simply raises an |
104 |
| -exception and returns -1. The special flags passed to PyBuffer_GetBuffer() give |
105 |
| -additional hints about the kind of memory buffer that is requested. For example, |
106 |
| -PyBUF_ANY_CONTIGUOUS specifies that a contiguous region of memory is required. |
107 |
| -For arrays, byte strings, and other similar objects, a Py_buffer structure is filled with |
108 |
| -information about the underlying memory. This includes a pointer to the memory, size, |
109 |
| -itemsize, format, and other details. Here is the definition of this structure: |
110 |
| - |
111 |
| -typedef struct bufferinfo { |
112 |
| - void *buf; /* Pointer to buffer memory */ |
113 |
| - PyObject *obj; /* Python object that is the owner */ |
114 |
| - Py_ssize_t len; /* Total size in bytes */ |
115 |
| - Py_ssize_t itemsize; /* Size in bytes of a single item */ |
116 |
| - int readonly; /* Read-only access flag */ |
117 |
| - int ndim; /* Number of dimensions */ |
118 |
| - char *format; /* struct code of a single item */ |
119 |
| - Py_ssize_t *shape; /* Array containing dimensions */ |
120 |
| - Py_ssize_t *strides; /* Array containing strides */ |
121 |
| - Py_ssize_t *suboffsets; /* Array containing suboffsets */ |
122 |
| -} Py_buffer; |
123 |
| -
|
124 |
| -In this recipe, we are simply concerned with receiving a contiguous array of doubles. |
125 |
| -To check if items are a double, the format attribute is checked to see if the string is |
126 |
| -"d". This is the same code that the struct module uses when encoding binary values. |
127 |
| -As a general rule, format could be any format string that’s compatible with the struct |
128 |
| -module and might include multiple items in the case of arrays containing C structures. |
129 |
| -Once we have verified the underlying buffer information, we simply pass it to the C |
130 |
| -function, which treats it as a normal C array. For all practical purposes, it is not con‐ |
131 |
| -cerned with what kind of array it is or what library created it. This is how the function |
132 |
| -is able to work with arrays created by the array module or by numpy. |
133 |
| - |
134 |
| -Before returning a final result, the underlying buffer view must be released using |
135 |
| -PyBuffer_Release(). This step is required to properly manage reference counts of |
136 |
| -objects. |
137 |
| -Again, this recipe only shows a tiny fragment of code that receives an array. If working |
138 |
| -with arrays, you might run into issues with multidimensional data, strided data, different |
139 |
| -data types, and more that will require study. Make sure you consult the official docu‐ |
140 |
| -mentation to get more details. |
141 |
| -If you need to write many extensions involving array handling, you may find it easier |
142 |
| -to implement the code in Cython. See Recipe 15.11. |
| 97 | +将一个数组对象传给C函数可能是一个扩展函数做的最常见的事。 |
| 98 | +很多Python应用程序,从图像处理到科学计算,都是基于高性能的数组处理。 |
| 99 | +通过编写能接受并操作数组的代码,你可以编写很好的兼容这些应用程序的自定义代码, |
| 100 | +而不是只能兼容你自己的代码。 |
| 101 | + |
| 102 | +代码的关键点在于 ``PyBuffer_GetBuffer()`` 函数。 |
| 103 | +给定一个任意的Python对象,它会试着去获取底层内存信息,它简单的抛出一个异常并返回-1. |
| 104 | +传给 ``PyBuffer_GetBuffer()`` 的特殊标志给出了所需的内存缓冲类型。 |
| 105 | +例如,``PyBUF_ANY_CONTIGUOUS`` 表示是一个联系的内存区域。 |
| 106 | + |
| 107 | +对于数组、字节字符串和其他类似对象而言,一个 ``Py_buffer`` 结构体包含了所有底层内存的信息。 |
| 108 | +它包含一个指向内存地址、大小、元素大小、格式和其他细节的指针。下面是这个结构体的定义: |
| 109 | + |
| 110 | +:: |
| 111 | + |
| 112 | + typedef struct bufferinfo { |
| 113 | + void *buf; /* Pointer to buffer memory */ |
| 114 | + PyObject *obj; /* Python object that is the owner */ |
| 115 | + Py_ssize_t len; /* Total size in bytes */ |
| 116 | + Py_ssize_t itemsize; /* Size in bytes of a single item */ |
| 117 | + int readonly; /* Read-only access flag */ |
| 118 | + int ndim; /* Number of dimensions */ |
| 119 | + char *format; /* struct code of a single item */ |
| 120 | + Py_ssize_t *shape; /* Array containing dimensions */ |
| 121 | + Py_ssize_t *strides; /* Array containing strides */ |
| 122 | + Py_ssize_t *suboffsets; /* Array containing suboffsets */ |
| 123 | + } Py_buffer; |
| 124 | + |
| 125 | +本节中,我们只关注接受一个双精度浮点数数组作为参数。 |
| 126 | +要检查元素是否是一个双精度浮点数,只需验证 ``format`` 属性是不是字符串"d". |
| 127 | +这个也是 ``struct`` 模块用来编码二进制数据的。 |
| 128 | +通常来讲,``format`` 可以是任何兼容 ``struct`` 模块的格式化字符串, |
| 129 | +并且如果数组包含了C结构的话它可以包含多个值。 |
| 130 | +一旦我们已经确定了底层的缓存区信息,那只需要简单的将它传给C函数,然后会被当做是一个普通的C数组了。 |
| 131 | +实际上,我们不必担心是怎样的数组类型或者它是被什么库创建出来的。 |
| 132 | +这也是为什么这个函数能兼容 ``array`` 模块也能兼容 ``numpy`` 模块中的数组了。 |
| 133 | + |
| 134 | +在返回最终结果之前,底层的缓冲区视图必须使用 ``PyBuffer_Release()`` 释放掉。 |
| 135 | +之所以要这一步是为了能正确的管理对象的引用计数。 |
| 136 | + |
| 137 | +同样,本节也仅仅只是演示了接受数组的一个小的代码片段。 |
| 138 | +如果你真的要处理数组,你可能会碰到多维数据、大数据、不同的数据类型等等问题, |
| 139 | +那么就得去学更高级的东西了。你需要参考官方文档来获取更多详细的细节。 |
| 140 | + |
| 141 | +如果你需要编写涉及到数组处理的多个扩展,那么通过Cython来实现会更容易下。参考15.11节。 |
0 commit comments