From 9ea12ad89fab75c6c6be66cd778a99b780caf575 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 16:26:25 +0200 Subject: [PATCH 1/9] Use private unicode writer for json --- Modules/_json.c | 87 +++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..f714b3a331a285 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -93,11 +93,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); static int -encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); static int -encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache); static int -encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache); static PyObject * _encoded_const(PyObject *obj); static void @@ -360,13 +360,6 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } -static inline int -_PyUnicodeWriter_IsEmpty(PyUnicodeWriter *writer_pub) -{ - _PyUnicodeWriter *writer = (_PyUnicodeWriter*)writer_pub; - return (writer->pos == 0); -} - static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { @@ -385,10 +378,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next const void *buf; int kind; - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - goto bail; - } + _PyUnicodeWriter writer; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; len = PyUnicode_GET_LENGTH(pystr); buf = PyUnicode_DATA(pystr); @@ -419,12 +411,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (c == '"') { // Fast path for simple case. - if (_PyUnicodeWriter_IsEmpty(writer)) { + if (writer.buffer == NULL) { PyObject *ret = PyUnicode_Substring(pystr, end, next); if (ret == NULL) { goto bail; } - PyUnicodeWriter_Discard(writer); *next_end_ptr = next + 1;; return ret; } @@ -436,7 +427,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next /* Pick up this chunk if it's not zero length */ if (next != end) { - if (PyUnicodeWriter_WriteSubstring(writer, pystr, end, next) < 0) { + if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) { goto bail; } } @@ -527,18 +518,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next end -= 6; } } - if (PyUnicodeWriter_WriteChar(writer, c) < 0) { + if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) { goto bail; } } - rval = PyUnicodeWriter_Finish(writer); + rval = _PyUnicodeWriter_Finish(&writer); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; - PyUnicodeWriter_Discard(writer); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } @@ -1356,33 +1347,33 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds) static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; Py_ssize_t indent_level; + _PyUnicodeWriter writer; PyEncoderObject *self = PyEncoderObject_CAST(op); if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, &obj, &indent_level)) return NULL; - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - return NULL; - } + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + PyObject *indent_cache = NULL; if (self->indent != Py_None) { indent_cache = create_indent_cache(self, indent_level); if (indent_cache == NULL) { - PyUnicodeWriter_Discard(writer); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } } - if (encoder_listencode_obj(self, writer, obj, indent_level, indent_cache)) { - PyUnicodeWriter_Discard(writer); + if (encoder_listencode_obj(self, &writer, obj, indent_level, indent_cache)) { + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(indent_cache); return NULL; } Py_XDECREF(indent_cache); - PyObject *str = PyUnicodeWriter_Finish(writer); + PyObject *str = _PyUnicodeWriter_Finish(&writer); if (str == NULL) { return NULL; } @@ -1458,16 +1449,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = PyUnicodeWriter_WriteStr(writer, stolen); + int rval = _PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1476,13 +1467,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return PyUnicodeWriter_WriteUTF8(writer, "null", 4); + return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); } else if (obj == Py_True) { - return PyUnicodeWriter_WriteUTF8(writer, "true", 4); + return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); } else if (obj == Py_False) { - return PyUnicodeWriter_WriteUTF8(writer, "false", 5); + return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1491,10 +1482,6 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { - if (PyLong_CheckExact(obj)) { - // Fast-path for exact integers - return PyUnicodeWriter_WriteRepr(writer, obj); - } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1571,7 +1558,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, Py_ssize_t indent_level, PyObject *indent_cache, PyObject *item_separator) @@ -1611,7 +1598,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs *first = false; } else { - if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1626,7 +1613,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) { @@ -1637,7 +1624,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs } static int -encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1649,7 +1636,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); } if (s->markers != Py_None) { @@ -1668,7 +1655,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, } } - if (PyUnicodeWriter_WriteChar(writer, '{')) { + if (_PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; } @@ -1727,7 +1714,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, } } - if (PyUnicodeWriter_WriteChar(writer, '}')) { + if (_PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; } return 0; @@ -1739,7 +1726,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1753,7 +1740,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); + return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1772,7 +1759,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, } } - if (PyUnicodeWriter_WriteChar(writer, '[')) { + if (_PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; } @@ -1789,7 +1776,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { @@ -1810,7 +1797,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, } } - if (PyUnicodeWriter_WriteChar(writer, ']')) { + if (_PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; } Py_DECREF(s_fast); From 46df04fe87c85d83f13bc4d72c4a40975d62035f Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 17:00:51 +0200 Subject: [PATCH 2/9] Part 2 --- Modules/_json.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index f714b3a331a285..aafdb738640cce 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1332,11 +1332,11 @@ get_item_separator(PyEncoderObject *s, } static int -write_newline_indent(PyUnicodeWriter *writer, +write_newline_indent(_PyUnicodeWriter *writer, Py_ssize_t indent_level, PyObject *indent_cache) { PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2); - return PyUnicodeWriter_WriteStr(writer, newline_indent); + return _PyUnicodeWriter_WriteStr(writer, newline_indent); } From ab1aa4256faa93675c28dc79f64d43830f7056a7 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 18:05:42 +0200 Subject: [PATCH 3/9] Restore fast path for integers --- Modules/_json.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_json.c b/Modules/_json.c index aafdb738640cce..0fdfcec9dc5e43 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1482,6 +1482,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + return _PyLong_FormatWriter(writer, obj, 10, 0); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; From d18c455e12f7bcc83210d20b63accab90583e354 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 18:07:12 +0200 Subject: [PATCH 4/9] Reduce diff --- Modules/_json.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_json.c b/Modules/_json.c index 0fdfcec9dc5e43..907b2442c64394 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1483,6 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers return _PyLong_FormatWriter(writer, obj, 10, 0); } PyObject *encoded = PyLong_Type.tp_repr(obj); From 51c760f9507422edd317c67301781ba98d1a2ca9 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 18:11:42 +0200 Subject: [PATCH 5/9] Include necessary headers --- Modules/_json.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_json.c b/Modules/_json.c index 907b2442c64394..21eb10076361ed 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -11,6 +11,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_global_strings.h" // _Py_ID() +#include "pycore_long.h" // _PyLong_FormatWriter() #include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_runtime.h" // _PyRuntime #include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency() From 72ae3d000cf69ccda477aa738f2ec0c67da4c8c1 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 10 May 2025 18:56:10 +0200 Subject: [PATCH 6/9] Use PyUnicodeWriter_WriteRepr --- Modules/_json.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 21eb10076361ed..49be842f3835f6 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -11,7 +11,6 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_global_strings.h" // _Py_ID() -#include "pycore_long.h" // _PyLong_FormatWriter() #include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_runtime.h" // _PyRuntime #include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency() @@ -1485,7 +1484,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { // Fast-path for exact integers - return _PyLong_FormatWriter(writer, obj, 10, 0); + return PyUnicodeWriter_WriteRepr((PyUnicodeWriter*)writer, obj); } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) From 2a6ec43104776cb142a4acaf8301e534aece6249 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Tue, 13 May 2025 09:02:13 +0200 Subject: [PATCH 7/9] Reduce diff --- Modules/_json.c | 89 +++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 49be842f3835f6..5efbb104bd20ee 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -93,11 +93,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache); static PyObject * _encoded_const(PyObject *obj); static void @@ -360,6 +360,13 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } +static inline int +_PyUnicodeWriter_IsEmpty(PyUnicodeWriter *writer_pub) +{ + _PyUnicodeWriter *writer = (_PyUnicodeWriter*)writer_pub; + return (writer->pos == 0); +} + static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { @@ -378,9 +385,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next const void *buf; int kind; - _PyUnicodeWriter writer; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + goto bail; + } len = PyUnicode_GET_LENGTH(pystr); buf = PyUnicode_DATA(pystr); @@ -411,11 +419,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (c == '"') { // Fast path for simple case. - if (writer.buffer == NULL) { + if (_PyUnicodeWriter_IsEmpty(writer)) { PyObject *ret = PyUnicode_Substring(pystr, end, next); if (ret == NULL) { goto bail; } + PyUnicodeWriter_Discard(writer); *next_end_ptr = next + 1;; return ret; } @@ -427,7 +436,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next /* Pick up this chunk if it's not zero length */ if (next != end) { - if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) { + if (PyUnicodeWriter_WriteSubstring(writer, pystr, end, next) < 0) { goto bail; } } @@ -518,18 +527,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next end -= 6; } } - if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) { + if (PyUnicodeWriter_WriteChar(writer, c) < 0) { goto bail; } } - rval = _PyUnicodeWriter_Finish(&writer); + rval = PyUnicodeWriter_Finish(writer); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } @@ -1332,11 +1341,11 @@ get_item_separator(PyEncoderObject *s, } static int -write_newline_indent(_PyUnicodeWriter *writer, +write_newline_indent(PyUnicodeWriter *writer, Py_ssize_t indent_level, PyObject *indent_cache) { PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2); - return _PyUnicodeWriter_WriteStr(writer, newline_indent); + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, newline_indent); } @@ -1347,33 +1356,33 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds) static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; PyEncoderObject *self = PyEncoderObject_CAST(op); if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *indent_cache = NULL; if (self->indent != Py_None) { indent_cache = create_indent_cache(self, indent_level); if (indent_cache == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, indent_level, indent_cache)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, indent_level, indent_cache)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(indent_cache); return NULL; } Py_XDECREF(indent_cache); - PyObject *str = _PyUnicodeWriter_Finish(&writer); + PyObject *str = PyUnicodeWriter_Finish(writer); if (str == NULL) { return NULL; } @@ -1449,16 +1458,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1467,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1484,7 +1493,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { // Fast-path for exact integers - return PyUnicodeWriter_WriteRepr((PyUnicodeWriter*)writer, obj); + return PyUnicodeWriter_WriteRepr(writer, obj); } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) @@ -1562,7 +1571,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, Py_ssize_t indent_level, PyObject *indent_cache, PyObject *item_separator) @@ -1602,7 +1611,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1617,7 +1626,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) { @@ -1628,7 +1637,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1640,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)writer, "{}", 2); } if (s->markers != Py_None) { @@ -1659,7 +1668,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) { + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; } @@ -1718,7 +1727,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) { + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; } return 0; @@ -1730,7 +1739,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache) { @@ -1744,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)writer, "[]", 2); } if (s->markers != Py_None) { @@ -1763,7 +1772,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) { + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; } @@ -1780,7 +1789,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { @@ -1801,7 +1810,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) { + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; } Py_DECREF(s_fast); From 822ea8626036a4572387819b3abf921a49750852 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Tue, 13 May 2025 16:04:30 +0200 Subject: [PATCH 8/9] Reduce diff --- Modules/_json.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 5efbb104bd20ee..6b1abbcb38bc29 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1345,7 +1345,7 @@ write_newline_indent(PyUnicodeWriter *writer, Py_ssize_t indent_level, PyObject *indent_cache) { PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2); - return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, newline_indent); + return PyUnicodeWriter_WriteStr(writer, newline_indent); } @@ -1461,7 +1461,7 @@ static int _steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } @@ -1611,7 +1611,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs *first = false; } else { - if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1626,7 +1626,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) { @@ -1789,7 +1789,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { From 01c45a95ba4422961afe597e7f0fd87e206530ff Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Thu, 15 May 2025 10:44:01 +0200 Subject: [PATCH 9/9] Avoid heap allocation --- Modules/_json.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 6b1abbcb38bc29..074115a355a1f4 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -360,13 +360,6 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } -static inline int -_PyUnicodeWriter_IsEmpty(PyUnicodeWriter *writer_pub) -{ - _PyUnicodeWriter *writer = (_PyUnicodeWriter*)writer_pub; - return (writer->pos == 0); -} - static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { @@ -385,10 +378,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next const void *buf; int kind; - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - goto bail; - } + PyUnicodeWriter *writer = NULL; len = PyUnicode_GET_LENGTH(pystr); buf = PyUnicode_DATA(pystr); @@ -419,12 +409,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (c == '"') { // Fast path for simple case. - if (_PyUnicodeWriter_IsEmpty(writer)) { + if (writer == NULL) { PyObject *ret = PyUnicode_Substring(pystr, end, next); if (ret == NULL) { goto bail; } - PyUnicodeWriter_Discard(writer); *next_end_ptr = next + 1;; return ret; } @@ -432,6 +421,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next else if (c != '\\') { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; + } else if (writer == NULL) { + writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + goto bail; + } } /* Pick up this chunk if it's not zero length */