Skip to content

Commit 0ad1a81

Browse files
committed
Do not escape a unicode sequence when escaping JSON text.
Previously, any backslash in text being escaped for JSON was doubled so that the result was still valid JSON. However, this led to some perverse results in the case of Unicode sequences, These are now detected and the initial backslash is no longer escaped. All other backslashes are still escaped. No validity check is performed, all that is looked for is \uXXXX where X is a hexidecimal digit. This is a change from the 9.2 and 9.3 behaviour as noted in the Release notes. Per complaint from Teodor Sigaev.
1 parent f30015b commit 0ad1a81

File tree

7 files changed

+77
-7
lines changed

7 files changed

+77
-7
lines changed

doc/src/sgml/release-9.4.sgml

+15
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,21 @@
180180
</para>
181181
</listitem>
182182

183+
<listitem>
184+
<para>
185+
Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
186+
text values are no longer rendered with the backslash escaped.
187+
(Andrew Dunstan)
188+
</para>
189+
190+
<para>
191+
Previously all backslashes in text values being formed into JSON were
192+
escaped. Now a backslash followed by "u" and four hexadecimal digits is
193+
not escaped, as this is a legal sequence in a JSON string value, and
194+
escaping the backslash led to some perverse results.
195+
</para>
196+
</listitem>
197+
183198
<listitem>
184199
<para>
185200
Rename <link linkend="SQL-EXPLAIN"><command>EXPLAIN

src/backend/utils/adt/json.c

+20-1
Original file line numberDiff line numberDiff line change
@@ -2315,7 +2315,26 @@ escape_json(StringInfo buf, const char *str)
23152315
appendStringInfoString(buf, "\\\"");
23162316
break;
23172317
case '\\':
2318-
appendStringInfoString(buf, "\\\\");
2318+
/*
2319+
* Unicode escapes are passed through as is. There is no
2320+
* requirement that they denote a valid character in the
2321+
* server encoding - indeed that is a big part of their
2322+
* usefulness.
2323+
*
2324+
* All we require is that they consist of \uXXXX where
2325+
* the Xs are hexadecimal digits. It is the responsibility
2326+
* of the caller of, say, to_json() to make sure that the
2327+
* unicode escape is valid.
2328+
*
2329+
* In the case of a jsonb string value being escaped, the
2330+
* only unicode escape that should be present is \u0000,
2331+
* all the other unicode escapes will have been resolved.
2332+
*/
2333+
if (p[1] == 'u' && isxdigit(p[2]) && isxdigit(p[3])
2334+
&& isxdigit(p[4]) && isxdigit(p[5]))
2335+
appendStringInfoCharMacro(buf, *p);
2336+
else
2337+
appendStringInfoString(buf, "\\\\");
23192338
break;
23202339
default:
23212340
if ((unsigned char) *p < ' ')

src/test/regress/expected/json.out

+14
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
426426
(1 row)
427427

428428
COMMIT;
429+
-- unicode escape - backslash is not escaped
430+
select to_json(text '\uabcd');
431+
to_json
432+
----------
433+
"\uabcd"
434+
(1 row)
435+
436+
-- any other backslash is escaped
437+
select to_json(text '\abcd');
438+
to_json
439+
----------
440+
"\\abcd"
441+
(1 row)
442+
429443
--json_agg
430444
SELECT json_agg(q)
431445
FROM ( SELECT $$a$$ || x AS b, y AS c,

src/test/regress/expected/json_1.out

+14
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
426426
(1 row)
427427

428428
COMMIT;
429+
-- unicode escape - backslash is not escaped
430+
select to_json(text '\uabcd');
431+
to_json
432+
----------
433+
"\uabcd"
434+
(1 row)
435+
436+
-- any other backslash is escaped
437+
select to_json(text '\abcd');
438+
to_json
439+
----------
440+
"\\abcd"
441+
(1 row)
442+
429443
--json_agg
430444
SELECT json_agg(q)
431445
FROM ( SELECT $$a$$ || x AS b, y AS c,

src/test/regress/expected/jsonb.out

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
6161
DETAIL: "\u" must be followed by four hexadecimal digits.
6262
CONTEXT: JSON data, line 1: "\u000g...
6363
SELECT '"\u0000"'::jsonb; -- OK, legal escape
64-
jsonb
65-
-----------
66-
"\\u0000"
64+
jsonb
65+
----------
66+
"\u0000"
6767
(1 row)
6868

6969
-- use octet_length here so we don't get an odd unicode char in the

src/test/regress/expected/jsonb_1.out

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
6161
DETAIL: "\u" must be followed by four hexadecimal digits.
6262
CONTEXT: JSON data, line 1: "\u000g...
6363
SELECT '"\u0000"'::jsonb; -- OK, legal escape
64-
jsonb
65-
-----------
66-
"\\u0000"
64+
jsonb
65+
----------
66+
"\u0000"
6767
(1 row)
6868

6969
-- use octet_length here so we don't get an odd unicode char in the

src/test/regress/sql/json.sql

+8
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ SET LOCAL TIME ZONE -8;
111111
select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
112112
COMMIT;
113113

114+
-- unicode escape - backslash is not escaped
115+
116+
select to_json(text '\uabcd');
117+
118+
-- any other backslash is escaped
119+
120+
select to_json(text '\abcd');
121+
114122
--json_agg
115123

116124
SELECT json_agg(q)

0 commit comments

Comments
 (0)