Skip to content

Commit 5c8fcf9

Browse files
committed
use tmp buffer
1 parent d026be3 commit 5c8fcf9

File tree

1 file changed

+97
-23
lines changed

1 file changed

+97
-23
lines changed

Modules/_json.c

+97-23
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,10 @@ typedef struct _PyEncoderObject {
4848
PyObject *indent;
4949
PyObject *key_separator;
5050
PyObject *item_separator;
51+
int (*fast_encode)(PyUnicodeWriter *, PyObject *);
5152
bool sort_keys;
5253
bool skipkeys;
5354
bool allow_nan;
54-
bool fast_encode;
55-
bool ensure_ascii; /* used only when fast_encode == true */
5655
} PyEncoderObject;
5756

5857
#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
@@ -304,18 +303,20 @@ escape_unicode(PyObject *pystr)
304303
return rval;
305304
}
306305

307-
// Take a PyUnicode pystr and write an escaped string to writer.
306+
#define ESCAPE_BUF_SIZE 200
307+
308+
// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
308309
static int
309-
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
310+
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
310311
{
311312
Py_ssize_t i;
312313
Py_ssize_t input_chars;
313-
Py_ssize_t chars;
314-
Py_ssize_t copy_len = 0;
314+
Py_ssize_t buf_len;
315315
const void *input;
316+
Py_UCS4 c = 0;
316317
int kind;
317318
int ret;
318-
unsigned char buf[12];
319+
char buf[ESCAPE_BUF_SIZE]; // avoid overhead of PyUnicodeWriter APIs
319320

320321
input_chars = PyUnicode_GET_LENGTH(pystr);
321322
input = PyUnicode_DATA(pystr);
@@ -324,27 +325,102 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
324325
ret = PyUnicodeWriter_WriteChar(writer, '"');
325326
if (ret) return ret;
326327

328+
// Fast path for string doesn't need escape at all: e.g. "id", "name"
327329
for (i = 0; i < input_chars; i++) {
330+
c = PyUnicode_READ(kind, input, i);
331+
if (!S_CHAR(c)) {
332+
break;
333+
}
334+
}
335+
if (i > 0) {
336+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i);
337+
if (ret) return ret;
338+
}
339+
if (i == input_chars) {
340+
return PyUnicodeWriter_WriteChar(writer, '"');
341+
}
342+
343+
buf_len = ascii_escape_unichar(c, (unsigned char*)buf, 0);
344+
345+
for (i++ ; i < input_chars; i++) {
328346
Py_UCS4 c = PyUnicode_READ(kind, input, i);
329-
if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) {
330-
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
331-
if (ret) return ret;
332-
copy_len = 0;
347+
if (S_CHAR(c)) {
348+
buf[buf_len++] = c;
349+
}
350+
else {
351+
buf_len = ascii_escape_unichar(c, (unsigned char*)buf, buf_len);
352+
}
333353

334-
chars = ascii_escape_unichar(c, buf, 0);
335-
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
354+
if (buf_len + 12 > ESCAPE_BUF_SIZE) {
355+
ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
336356
if (ret) return ret;
357+
buf_len = 0;
358+
}
359+
}
360+
361+
assert(buf_len < ESCAPE_BUF_SIZE);
362+
buf[buf_len++] = '"';
363+
return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
364+
}
365+
366+
static int
367+
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
368+
{
369+
Py_ssize_t i;
370+
Py_ssize_t input_size;
371+
Py_ssize_t buf_len;
372+
const unsigned char *input;
373+
int ret;
374+
unsigned char c;
375+
char buf[ESCAPE_BUF_SIZE];
376+
377+
// We don't need to escape non-ASCII chars.
378+
// So we just copy UTF-8 from pystr to buf.
379+
input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size);
380+
381+
ret = PyUnicodeWriter_WriteChar(writer, '"');
382+
if (ret) return ret;
383+
384+
// Fast path for string doesn't need escape at all: e.g. "id", "name"
385+
for (i = 0; i < input_size; i++) {
386+
c = input[i];
387+
if (c <= 0x1f || c == '\\' || c == '"') {
388+
break;
389+
}
390+
}
391+
if (i > 0) {
392+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i);
393+
if (ret) return ret;
394+
}
395+
if (i == input_size) {
396+
return PyUnicodeWriter_WriteChar(writer, '"');
397+
}
398+
399+
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0);
400+
401+
for (i++; i < input_size; i++) {
402+
c = input[i];
403+
if (c <= 0x1f || c == '\\' || c == '"') {
404+
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len);
337405
}
338406
else {
339-
copy_len++;
407+
buf[buf_len++] = c;
408+
}
409+
410+
if (buf_len + 6 > ESCAPE_BUF_SIZE) {
411+
ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
412+
if (ret) return ret;
413+
buf_len = 0;
340414
}
341415
}
342416

343-
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
344-
if (ret) return ret;
345-
return PyUnicodeWriter_WriteChar(writer, '"');
417+
assert(buf_len < ESCAPE_BUF_SIZE);
418+
buf[buf_len++] = '"';
419+
return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
346420
}
347421

422+
#undef ESCAPE_BUF_SIZE
423+
348424
static void
349425
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
350426
{
@@ -1293,17 +1369,15 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12931369
s->sort_keys = sort_keys;
12941370
s->skipkeys = skipkeys;
12951371
s->allow_nan = allow_nan;
1296-
s->fast_encode = false;
1297-
s->ensure_ascii = false;
1372+
s->fast_encode = NULL;
12981373

12991374
if (PyCFunction_Check(s->encoder)) {
13001375
PyCFunction f = PyCFunction_GetFunction(s->encoder);
13011376
if (f == py_encode_basestring_ascii){
1302-
s->fast_encode = true;
1303-
s->ensure_ascii = true;
1377+
s->fast_encode = write_escaped_ascii;
13041378
}
13051379
else if (f == py_encode_basestring) {
1306-
s->fast_encode = true;
1380+
s->fast_encode = write_escaped_unicode;
13071381
}
13081382
}
13091383

@@ -1497,7 +1571,7 @@ static int
14971571
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
14981572
{
14991573
if (s->fast_encode) {
1500-
return write_escaped_unicode(writer, obj, s->ensure_ascii);
1574+
return s->fast_encode(writer, obj);
15011575
}
15021576

15031577
/* Return the JSON representation of a string */

0 commit comments

Comments
 (0)