diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index bca78a9c369385..1240e8e3e4a69e 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -201,3 +201,52 @@ called with a non-bytes parameter. reallocation fails, the original bytes object at *\*bytes* is deallocated, *\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is returned. + +PyBytesWriter +^^^^^^^^^^^^^ + +The :c:type:`PyBytesWriter` API can be used to create a Python :class:`bytes` +object. + +.. versionadded:: 3.14 + +.. c:type:: PyBytesWriter + + A bytes writer instance. + + The instance must be destroyed by :c:func:`PyBytesWriter_Finish` on + success, or :c:func:`PyBytesWriter_Discard` on error. + +.. c:function:: PyBytesWriter* PyBytesWriter_Create(Py_ssize_t size, char **str) + + Create a bytes writer instance. + Preallocate *size* bytes. + + On success, set *\*str* and return a new writer. + On error, set an exception and return ``NULL``. + +.. c:function:: PyObject* PyBytesWriter_Finish(PyBytesWriter *writer, char *str) + + Return the final Python :class:`bytes` object and destroy the writer + instance. + + On success, return a bytes object. + On error, set an exception and return ``NULL``. + +.. c:function:: void PyBytesWriter_Discard(PyBytesWriter *writer) + + Discard the internal bytes buffer and destroy the writer instance. + +.. c:function:: int PyBytesWriter_Prepare(PyBytesWriter *writer, char **str, Py_ssize_t size) + + Allocate *size* bytes to prepare writing *size* bytes into *writer*. + + On success, update *\*str* and return ``0``. + On error, set an exception and return ``-1``. + +.. c:function:: int PyBytesWriter_WriteBytes(PyBytesWriter *writer, char **str, const void *bytes, Py_ssize_t size) + + Write a the bytes string *bytes* of *size* bytes into *writer*. + + On success, update *\*str* and return ``0``. + On error, set an exception and return ``-1``. diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 958fafd47ac81b..3f9ddd03627a97 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1546,6 +1546,7 @@ object. .. c:function:: PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length) Create a Unicode writer instance. + Preallocate *length* characters. Set an exception and return ``NULL`` on error. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7450597e8597ad..1f125dd341dba9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -405,6 +405,17 @@ New Features (Contributed by Victor Stinner in :gh:`119182`.) +* Add a new :c:type:`PyBytesWriter` API to create a Python :class:`bytes` + object: + + * :c:func:`PyBytesWriter_Create`; + * :c:func:`PyBytesWriter_Finish`; + * :c:func:`PyBytesWriter_Discard`; + * :c:func:`PyBytesWriter_Prepare`; + * :c:func:`PyBytesWriter_WriteBytes`. + + (Contributed by Victor Stinner in :gh:`121710`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 41537210b748a1..1824970c3054a5 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -35,3 +35,25 @@ static inline Py_ssize_t PyBytes_GET_SIZE(PyObject *op) { /* _PyBytes_Join(sep, x) is like sep.join(x). sep must be PyBytesObject*, x must be an iterable object. */ PyAPI_FUNC(PyObject*) _PyBytes_Join(PyObject *sep, PyObject *x); + +/* --- PyBytesWriter ------------------------------------------------------ */ + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter*) PyBytesWriter_Create( + Py_ssize_t size, + char **str); +PyAPI_FUNC(PyObject *) PyBytesWriter_Finish( + PyBytesWriter *writer, + char *str); +PyAPI_FUNC(void) PyBytesWriter_Discard(PyBytesWriter *writer); + +PyAPI_FUNC(int) PyBytesWriter_Prepare( + PyBytesWriter *writer, + char **str, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + char **str, + const void *bytes, + Py_ssize_t size); diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..86ba74eea8e2c3 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -82,9 +82,9 @@ typedef struct { This flag must be zero if use_bytearray is non-zero. */ int overallocate; - /* Stack buffer */ + /* Small buffer: smaller than pymalloc 512 bytes threshold */ int use_small_buffer; - char small_buffer[512]; + char small_buffer[256]; } _PyBytesWriter; /* Initialize a bytes writer @@ -120,21 +120,6 @@ PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size); -/* Resize the buffer to make it larger. - The new buffer may be larger than size bytes because of overallocation. - Return the updated current pointer inside the buffer. - Raise an exception and return NULL on error. - - Note: size must be greater than the number of allocated bytes in the writer. - - This function doesn't use the writer minimum size (min_size attribute). - - See also _PyBytesWriter_Prepare(). - */ -PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer, - void *str, - Py_ssize_t size); - /* Write bytes. Raise an exception and return NULL on error. */ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, diff --git a/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst b/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst new file mode 100644 index 00000000000000..017614d732eec6 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst @@ -0,0 +1,10 @@ +Add a new :c:type:`PyBytesWriter` API to create a Python :class:`bytes` +object: + +* :c:func:`PyBytesWriter_Create`; +* :c:func:`PyBytesWriter_Finish`; +* :c:func:`PyBytesWriter_Discard`; +* :c:func:`PyBytesWriter_Prepare`; +* :c:func:`PyBytesWriter_WriteBytes`. + +Patch by Victor Stinner. diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 02294d8887abb7..030715b3659ab2 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -37,8 +37,140 @@ bytes_resize(PyObject *Py_UNUSED(module), PyObject *args) } +static int +bytes_equal(PyObject *obj, const char *str) +{ + return (PyBytes_Size(obj) == (Py_ssize_t)strlen(str) + && strcmp(PyBytes_AsString(obj), str) == 0); +} + + +/* Test PyBytesWriter API */ +static PyObject * +test_byteswriter(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(3, &str); + if (writer == NULL) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(writer, &str, "abc", 3) < 0) { + goto error; + } + + // write empty string + if (PyBytesWriter_WriteBytes(writer, &str, "", 0) < 0) { + goto error; + } + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abc")); + Py_DECREF(obj); + + Py_RETURN_NONE; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +/* Test PyBytesWriter_Discard() */ +static PyObject * +test_byteswriter_discard(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(3, &str); + if (writer == NULL) { + return NULL; + } + assert(PyBytesWriter_WriteBytes(writer, &str, "abc", 3) == 0); + + PyBytesWriter_Discard(writer); + Py_RETURN_NONE; +} + + +/* Test PyBytesWriter_WriteBytes() */ +static PyObject * +test_byteswriter_writebytes(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(0, &str); + if (writer == NULL) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(writer, &str, "abc", 3) < 0) { + goto error; + } + if (PyBytesWriter_WriteBytes(writer, &str, "def", 3) < 0) { + goto error; + } + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abcdef")); + Py_DECREF(obj); + + Py_RETURN_NONE; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +/* Test PyBytesWriter_Prepare() */ +static PyObject * +test_byteswriter_prepare(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(0, &str); + if (writer == NULL) { + return NULL; + } + + // test error on purpose (negative size) + assert(PyBytesWriter_Prepare(writer, &str, -3) < 0); + assert(PyErr_ExceptionMatches(PyExc_ValueError)); + PyErr_Clear(); + + if (PyBytesWriter_Prepare(writer, &str, 3) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + + // Write "abc" + memcpy(str, "abc", 3); + str += 3; + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abc")); + Py_DECREF(obj); + + Py_RETURN_NONE; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, + {"test_byteswriter", test_byteswriter, METH_NOARGS}, + {"test_byteswriter_discard", test_byteswriter_discard, METH_NOARGS}, + {"test_byteswriter_writebytes", test_byteswriter_writebytes, METH_NOARGS}, + {"test_byteswriter_prepare", test_byteswriter_prepare, METH_NOARGS}, {NULL}, }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 459df6ceacf3a8..f9429a2231d328 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -34,6 +34,8 @@ class bytes "PyBytesObject *" "&PyBytes_Type" /* Forward declaration */ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); +static void* _PyBytesWriter_Resize(_PyBytesWriter *writer, + void *str, Py_ssize_t size); #define CHARACTERS _Py_SINGLETON(bytes_characters) @@ -2818,8 +2820,9 @@ _PyBytes_FromList(PyObject *x) if (i >= size) { str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; + if (str == NULL) { + goto error; + } size = writer.allocated; } *str++ = (char) value; @@ -2913,8 +2916,9 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) /* Append the byte */ if (i >= size) { str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; + if (str == NULL) { + goto error; + } size = writer.allocated; } *str++ = (char) value; @@ -3379,12 +3383,45 @@ _PyBytesWriter_Init(_PyBytesWriter *writer) #endif } + +PyBytesWriter* PyBytesWriter_Create(Py_ssize_t size, char **pstr) +{ + _PyBytesWriter *writer = PyMem_Malloc(sizeof(_PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + _PyBytesWriter_Init(writer); + + char *str = _PyBytesWriter_Alloc(writer, size); + if (str == NULL) { + PyBytesWriter_Discard((PyBytesWriter*)writer); + return NULL; + } + + // Always enable overallocation + writer->overallocate = 1; + + *pstr = str; + return (PyBytesWriter*)writer; +} + + void _PyBytesWriter_Dealloc(_PyBytesWriter *writer) { Py_CLEAR(writer->buffer); } + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + _PyBytesWriter_Dealloc((_PyBytesWriter*)writer); + PyMem_Free(writer); +} + + Py_LOCAL_INLINE(char*) _PyBytesWriter_AsString(_PyBytesWriter *writer) { @@ -3449,26 +3486,37 @@ _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) } #endif -void* + +/* Resize the buffer to make it larger. + The new buffer may be larger than size bytes because of overallocation. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. + + Note: size must be greater than the number of allocated bytes in the writer. + + This function doesn't use the writer minimum size (min_size attribute). + + See also _PyBytesWriter_Prepare(). +*/ +static void* _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) { - Py_ssize_t allocated, pos; - assert(_PyBytesWriter_CheckConsistency(writer, str)); assert(writer->allocated < size); - allocated = size; + Py_ssize_t allocated = size; if (writer->overallocate && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) { /* overallocate to limit the number of realloc() */ allocated += allocated / OVERALLOCATE_FACTOR; } - pos = _PyBytesWriter_GetSize(writer, str); + Py_ssize_t pos = _PyBytesWriter_GetSize(writer, str); if (!writer->use_small_buffer) { if (writer->use_bytearray) { - if (PyByteArray_Resize(writer->buffer, allocated)) - goto error; + if (PyByteArray_Resize(writer->buffer, allocated)) { + return NULL; + } /* writer->allocated can be smaller than writer->buffer->ob_alloc, but we cannot use ob_alloc because bytes may need to be moved to use the whole buffer. bytearray uses an internal optimization @@ -3476,8 +3524,9 @@ _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) beginning (ex: del bytearray[:1]). */ } else { - if (_PyBytes_Resize(&writer->buffer, allocated)) - goto error; + if (_PyBytes_Resize(&writer->buffer, allocated)) { + return NULL; + } } } else { @@ -3488,8 +3537,9 @@ _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated); else writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); - if (writer->buffer == NULL) - goto error; + if (writer->buffer == NULL) { + return NULL; + } if (pos != 0) { char *dest; @@ -3513,17 +3563,11 @@ _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) str = _PyBytesWriter_AsString(writer) + pos; assert(_PyBytesWriter_CheckConsistency(writer, str)); return str; - -error: - _PyBytesWriter_Dealloc(writer); - return NULL; } void* _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) { - Py_ssize_t new_min_size; - assert(_PyBytesWriter_CheckConsistency(writer, str)); assert(size >= 0); @@ -3534,18 +3578,39 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) if (writer->min_size > PY_SSIZE_T_MAX - size) { PyErr_NoMemory(); - _PyBytesWriter_Dealloc(writer); return NULL; } - new_min_size = writer->min_size + size; + Py_ssize_t new_min_size = writer->min_size + size; - if (new_min_size > writer->allocated) + if (new_min_size > writer->allocated) { str = _PyBytesWriter_Resize(writer, str, new_min_size); + if (str == NULL) { + return NULL; + } + } writer->min_size = new_min_size; return str; } + +int +PyBytesWriter_Prepare(PyBytesWriter *writer, char **str, Py_ssize_t size) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be positive"); + return -1; + } + + char *str2 = _PyBytesWriter_Prepare((_PyBytesWriter*)writer, *str, size); + if (str2 == NULL) { + return -1; + } + *str = str2; + return 0; +} + + /* Allocate the buffer to write size bytes. Return the pointer to the beginning of buffer data. Raise an exception and return NULL on error. */ @@ -3623,6 +3688,16 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) return result; } + +PyObject * +PyBytesWriter_Finish(PyBytesWriter *writer, char *str) +{ + PyObject *res = _PyBytesWriter_Finish((_PyBytesWriter*)writer, str); + PyMem_Free(writer); + return res; +} + + void* _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, const void *bytes, Py_ssize_t size) @@ -3640,6 +3715,21 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, } +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, char **str, + const void *bytes, Py_ssize_t size) +{ + char *str2 = _PyBytesWriter_WriteBytes((_PyBytesWriter *)writer, *str, + bytes, size); + if (str2 == NULL) { + return -1; + } + + *str = str2; + return 0; +} + + void _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src)