Skip to content

Commit 0a8de93

Browse files
committed
Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes, backslashes, and non-printable characters. This results in nearly 40% faster JSON parsing on x86-64 when most values are long strings, and all platforms should see some improvement. Reviewed by Andres Freund and Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
1 parent 0551912 commit 0a8de93

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

src/common/jsonapi.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "common/jsonapi.h"
2121
#include "mb/pg_wchar.h"
22+
#include "port/pg_lfind.h"
2223

2324
#ifndef FRONTEND
2425
#include "miscadmin.h"
@@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex)
844845
}
845846
else
846847
{
847-
char *p;
848+
char *p = s;
848849

849850
if (hi_surrogate != -1)
850851
return JSON_UNICODE_LOW_SURROGATE;
@@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex)
853854
* Skip to the first byte that requires special handling, so we
854855
* can batch calls to appendBinaryStringInfo.
855856
*/
856-
for (p = s; p < end; p++)
857+
while (p < end - sizeof(Vector8) &&
858+
!pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
859+
!pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
860+
!pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
861+
p += sizeof(Vector8);
862+
863+
for (; p < end; p++)
857864
{
858865
if (*p == '\\' || *p == '"')
859866
break;
860-
else if ((unsigned char) *p < 32)
867+
else if ((unsigned char) *p <= 31)
861868
{
862869
/* Per RFC4627, these characters MUST be escaped. */
863870
/*

src/test/regress/expected/json.out

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json;
4242
^
4343
DETAIL: Escape sequence "\v" is invalid.
4444
CONTEXT: JSON data, line 1: "\v...
45+
-- Check fast path for longer strings (at least 16 bytes long)
46+
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
47+
json
48+
-------------------
49+
"............abc"
50+
(1 row)
51+
52+
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
53+
json
54+
---------------------
55+
"............abc\n"
56+
(1 row)
57+
4558
-- see json_encoding test for input with unicode escapes
4659
-- Numbers.
4760
SELECT '1'::json; -- OK

src/test/regress/sql/json.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ SELECT '"abc
77
def"'::json; -- ERROR, unescaped newline in string constant
88
SELECT '"\n\"\\"'::json; -- OK, legal escapes
99
SELECT '"\v"'::json; -- ERROR, not a valid JSON escape
10+
11+
-- Check fast path for longer strings (at least 16 bytes long)
12+
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
13+
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
14+
1015
-- see json_encoding test for input with unicode escapes
1116

1217
-- Numbers.

0 commit comments

Comments
 (0)