Skip to content

gh-125196: Use PyUnicodeWriter for JSON encoder #125249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 49 additions & 38 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyEncoderObject *self);
static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
static PyObject *
_encoded_const(PyObject *obj);
static void
Expand Down Expand Up @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
{
/* Python callable interface to encode_listencode_obj */
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
PyObject *obj, *result;
PyObject *obj;
Py_ssize_t indent_level;
_PyUnicodeWriter writer;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
&obj, &indent_level))
&obj, &indent_level))
return NULL;

_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

PyObject *newline_indent = NULL;
if (self->indent != Py_None) {
newline_indent = _create_newline_indent(self->indent, indent_level);
if (newline_indent == NULL) {
_PyUnicodeWriter_Dealloc(&writer);
PyUnicodeWriter_Discard(writer);
return NULL;
}
}
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
_PyUnicodeWriter_Dealloc(&writer);
if (encoder_listencode_obj(self, writer, obj, newline_indent)) {
PyUnicodeWriter_Discard(writer);
Py_XDECREF(newline_indent);
return NULL;
}
Py_XDECREF(newline_indent);

result = PyTuple_New(1);
if (result == NULL ||
PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
Py_XDECREF(result);
PyObject *str = PyUnicodeWriter_Finish(writer);
if (str == NULL) {
return NULL;
}
PyObject *result = PyTuple_Pack(1, str);
Copy link
Contributor

@eendebakpt eendebakpt Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this new code is cleaner than the old code. But PyTuple_Pack is a bit slow (see #118222), so we should measure whether this change makes an impact. Since this is not in one of the inner loops, I suspect the impact is really small.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment below. It's not faster, it's not slower. The change has no significant effect on performance.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that PyTuple_Pack() is the performance bottleneck of this function.

I chose to use PyTuple_Pack() to avoid creating an incomplete tuple and then fill it. I prefer an atomic API which creates directly valid tuple.

Py_DECREF(str);
return result;
}

Expand Down Expand Up @@ -1370,30 +1371,30 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
}

static int
_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
{
/* Append stolen and then decrement its reference count */
int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
Py_DECREF(stolen);
return rval;
}

static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *obj, PyObject *newline_indent)
{
/* Encode Python object obj to a JSON term */
PyObject *newobj;
int rv;

if (obj == Py_None) {
return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
}
else if (obj == Py_True) {
return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
}
else if (obj == Py_False) {
return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
}
else if (PyUnicode_Check(obj)) {
PyObject *encoded = encoder_encode_string(s, obj);
Expand All @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
return _steal_accumulate(writer, encoded);
}
else if (PyLong_Check(obj)) {
if (PyLong_CheckExact(obj)) {
// Fast-path for exact integers
return PyUnicodeWriter_WriteRepr(writer, obj);
}
PyObject *encoded = PyLong_Type.tp_repr(obj);
if (encoded == NULL)
return -1;
Expand Down Expand Up @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
}

static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
PyObject *dct, PyObject *key, PyObject *value,
PyObject *newline_indent,
PyObject *item_separator)
Expand Down Expand Up @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
*first = false;
}
else {
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr);
return -1;
}
Expand All @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
if (_steal_accumulate(writer, encoded) < 0) {
return -1;
}
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1;
}
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
Expand All @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
}

static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *dct, PyObject *newline_indent)
{
/* Encode Python dict dct a JSON term */
Expand All @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *new_newline_indent = NULL;
PyObject *separator_indent = NULL;

if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
if (PyDict_GET_SIZE(dct) == 0) {
/* Fast path */
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
}

if (s->markers != Py_None) {
int has_key;
Expand All @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
}
}

if (_PyUnicodeWriter_WriteChar(writer, '{'))
if (PyUnicodeWriter_WriteChar(writer, '{')) {
goto bail;
}

PyObject *current_item_separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
Expand All @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
}
// update item separator with a borrowed reference
current_item_separator = separator_indent;
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}
}
Expand Down Expand Up @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);

if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}

if (_PyUnicodeWriter_WriteChar(writer, '}'))
if (PyUnicodeWriter_WriteChar(writer, '}')) {
goto bail;
}
return 0;

bail:
Expand All @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
}

static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *seq, PyObject *newline_indent)
{
PyObject *ident = NULL;
Expand All @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
return -1;
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Py_DECREF(s_fast);
return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
}

if (s->markers != Py_None) {
Expand All @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
}
}

if (_PyUnicodeWriter_WriteChar(writer, '['))
if (PyUnicodeWriter_WriteChar(writer, '[')) {
goto bail;
}

PyObject *separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
Expand All @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
goto bail;
}

if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
goto bail;
}

Expand All @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
Expand All @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
if (s->indent != Py_None) {
Py_CLEAR(new_newline_indent);
Py_CLEAR(separator_indent);
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
goto bail;
}
}

if (_PyUnicodeWriter_WriteChar(writer, ']'))
if (PyUnicodeWriter_WriteChar(writer, ']')) {
goto bail;
}
Py_DECREF(s_fast);
return 0;

Expand Down
Loading