Skip to content

Commit e0e6829

Browse files
committed
hstore: Tighten key/value parsing check for whitespaces
isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11
1 parent c6043fc commit e0e6829

File tree

5 files changed

+69
-5
lines changed

5 files changed

+69
-5
lines changed

contrib/hstore/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type"
2222

2323
HEADERS = hstore.h
2424

25-
REGRESS = hstore
25+
REGRESS = hstore hstore_utf8
2626

2727
ifdef USE_PGXS
2828
PG_CONFIG = pg_config
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit
9+
\endif
10+
SET client_encoding = utf8;
11+
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
12+
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
13+
SELECT E'key\u0105=>value\u0105'::hstore;
14+
hstore
15+
------------------
16+
"keyą"=>"valueą"
17+
(1 row)
18+
19+
SELECT 'keyą=>valueą'::hstore;
20+
hstore
21+
------------------
22+
"keyą"=>"valueą"
23+
(1 row)
24+
25+
SELECT 'ą=>ą'::hstore;
26+
hstore
27+
----------
28+
"ą"=>"ą"
29+
(1 row)
30+
31+
SELECT 'keyąfoo=>valueą'::hstore;
32+
hstore
33+
---------------------
34+
"keyąfoo"=>"valueą"
35+
(1 row)
36+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit

contrib/hstore/hstore_io.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "hstore.h"
1313
#include "lib/stringinfo.h"
1414
#include "libpq/pqformat.h"
15+
#include "parser/scansup.h"
1516
#include "utils/builtins.h"
1617
#include "utils/json.h"
1718
#include "utils/jsonb.h"
@@ -88,7 +89,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
8889
{
8990
st = GV_WAITESCIN;
9091
}
91-
else if (!isspace((unsigned char) *(state->ptr)))
92+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
9293
{
9394
*(state->cur) = *(state->ptr);
9495
state->cur++;
@@ -111,7 +112,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
111112
state->ptr--;
112113
return true;
113114
}
114-
else if (isspace((unsigned char) *(state->ptr)))
115+
else if (scanner_isspace((unsigned char) *(state->ptr)))
115116
{
116117
return true;
117118
}
@@ -219,7 +220,7 @@ parse_hstore(HSParser *state)
219220
{
220221
elog(ERROR, "Unexpected end of string");
221222
}
222-
else if (!isspace((unsigned char) *(state->ptr)))
223+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
223224
{
224225
elog(ERROR, "Syntax error near \"%.*s\" at position %d",
225226
pg_mblen(state->ptr), state->ptr,
@@ -271,7 +272,7 @@ parse_hstore(HSParser *state)
271272
{
272273
return;
273274
}
274-
else if (!isspace((unsigned char) *(state->ptr)))
275+
else if (!scanner_isspace((unsigned char) *(state->ptr)))
275276
{
276277
elog(ERROR, "Syntax error near \"%.*s\" at position %d",
277278
pg_mblen(state->ptr), state->ptr,

contrib/hstore/sql/hstore_utf8.sql

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* This test must be run in a database with UTF-8 encoding,
3+
* because other encodings don't support all the characters used.
4+
*/
5+
6+
SELECT getdatabaseencoding() <> 'UTF8'
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
12+
SET client_encoding = utf8;
13+
14+
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
15+
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
16+
SELECT E'key\u0105=>value\u0105'::hstore;
17+
SELECT 'keyą=>valueą'::hstore;
18+
SELECT 'ą=>ą'::hstore;
19+
SELECT 'keyąfoo=>valueą'::hstore;

0 commit comments

Comments
 (0)