Skip to content

Commit edf1de6

Browse files
committed
hstore: Tighten key/value parsing check for whitespaces
isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11
1 parent 9920552 commit edf1de6

File tree

5 files changed

+69
-5
lines changed

5 files changed

+69
-5
lines changed

contrib/hstore/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ PGFILEDESC = "hstore - key/value pair data type"
1515

1616
HEADERS = hstore.h
1717

18-
REGRESS = hstore
18+
REGRESS = hstore hstore_utf8
1919

2020
ifdef USE_PGXS
2121
PG_CONFIG = pg_config
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit
9+
\endif
10+
SET client_encoding = utf8;
11+
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
12+
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
13+
SELECT E'key\u0105=>value\u0105'::hstore;
14+
hstore
15+
------------------
16+
"keyą"=>"valueą"
17+
(1 row)
18+
19+
SELECT 'keyą=>valueą'::hstore;
20+
hstore
21+
------------------
22+
"keyą"=>"valueą"
23+
(1 row)
24+
25+
SELECT 'ą=>ą'::hstore;
26+
hstore
27+
----------
28+
"ą"=>"ą"
29+
(1 row)
30+
31+
SELECT 'keyąfoo=>valueą'::hstore;
32+
hstore
33+
---------------------
34+
"keyąfoo"=>"valueą"
35+
(1 row)
36+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit

contrib/hstore/hstore_io.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "funcapi.h"
1111
#include "lib/stringinfo.h"
1212
#include "libpq/pqformat.h"
13+
#include "parser/scansup.h"
1314
#include "utils/builtins.h"
1415
#include "utils/json.h"
1516
#include "utils/jsonapi.h"
@@ -87,7 +88,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
8788
{
8889
st = GV_WAITESCIN;
8990
}
90-
else if (!isspace((unsigned char) *(state->ptr)))
91+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
9192
{
9293
*(state->cur) = *(state->ptr);
9394
state->cur++;
@@ -110,7 +111,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
110111
state->ptr--;
111112
return true;
112113
}
113-
else if (isspace((unsigned char) *(state->ptr)))
114+
else if (scanner_isspace((unsigned char) *(state->ptr)))
114115
{
115116
return true;
116117
}
@@ -218,7 +219,7 @@ parse_hstore(HSParser *state)
218219
{
219220
elog(ERROR, "Unexpected end of string");
220221
}
221-
else if (!isspace((unsigned char) *(state->ptr)))
222+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
222223
{
223224
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
224225
}
@@ -266,7 +267,7 @@ parse_hstore(HSParser *state)
266267
{
267268
return;
268269
}
269-
else if (!isspace((unsigned char) *(state->ptr)))
270+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
270271
{
271272
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
272273
}

contrib/hstore/sql/hstore_utf8.sql

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
6+
SELECT getdatabaseencoding() <> 'UTF8'
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
12+
SET client_encoding = utf8;
13+
14+
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
15+
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
16+
SELECT E'key\u0105=>value\u0105'::hstore;
17+
SELECT 'keyą=>valueą'::hstore;
18+
SELECT 'ą=>ą'::hstore;
19+
SELECT 'keyąfoo=>valueą'::hstore;

0 commit comments

Comments
 (0)