From 8f956aa989893952d40b5c7393254b38a61d0456 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:24:34 +0200 Subject: [PATCH] gh-125196: Use PyUnicodeWriter for JSON encoder Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Modules/_json.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f..ce0093ab431d05 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyEncoderObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); static PyObject * _encoded_const(PyObject *obj); static void @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj, *result; + PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, - &obj, &indent_level)) + &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *newline_indent = NULL; if (self->indent != Py_None) { newline_indent = _create_newline_indent(self->indent, indent_level); if (newline_indent == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, newline_indent)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(newline_indent); return NULL; } Py_XDECREF(newline_indent); - result = PyTuple_New(1); - if (result == NULL || - PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) { - Py_XDECREF(result); + PyObject *str = PyUnicodeWriter_Finish(writer); + if (str == NULL) { return NULL; } + PyObject *result = PyTuple_Pack(1, str); + Py_DECREF(str); return result; } @@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent) { /* Encode Python object obj to a JSON term */ @@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return PyUnicodeWriter_WriteUTF8(writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return PyUnicodeWriter_WriteUTF8(writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers + return PyUnicodeWriter_WriteRepr(writer, obj); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent) { /* Encode Python dict dct a JSON term */ @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *new_newline_indent = NULL; PyObject *separator_indent = NULL; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_GET_SIZE(dct) == 0) { + /* Fast path */ + return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; + } PyObject *current_item_separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } // update item separator with a borrowed reference current_item_separator = separator_indent; - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } } @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; + } return 0; bail: @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent) { PyObject *ident = NULL; @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; + } PyObject *separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, goto bail; } - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->indent != Py_None) { Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; + } Py_DECREF(s_fast); return 0;