Skip to content

Commit 971a158

Browse files
committed
Tighten checks for whitespace in functions that parse identifiers etc.
This patch replaces isspace() calls with scanner_isspace() in functions that are likely to be presented with non-ASCII input. isspace() has the small advantage that it will correctly recognize no-break space in single-byte encodings (such as LATIN1); but it cannot work successfully for any multibyte character, and depending on platform it might return false positive results for some fragments of multibyte characters. That's disastrous for functions that are trying to discard whitespace between valid strings, as noted in bug #14662 from Justin Muise. Even treating no-break space as whitespace is pretty questionable for the usages touched here, because the core scanner would think it is an identifier character. Affected functions are parse_ident(), parseNameAndArgTypes (underlying regprocedurein() and siblings), SplitIdentifierString (used for parsing GUCs and options that are qualified names or lists of names), and SplitDirectoriesString (used for parsing GUCs that are lists of directories). All the functions adjusted here are parsing SQL identifiers and similar constructs, so it's reasonable to insist that their definition of whitespace match the core scanner. So we can hope that this won't cause many backwards-compatibility problems. I've left alone isspace() calls in places that aren't really expecting any non-ASCII input characters, such as float8in(). Back-patch to all supported branches. Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
1 parent beaa3b1 commit 971a158

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

src/backend/utils/adt/regproc.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "lib/stringinfo.h"
3636
#include "miscadmin.h"
3737
#include "parser/parse_type.h"
38+
#include "parser/scansup.h"
3839
#include "utils/builtins.h"
3940
#include "utils/fmgroids.h"
4041
#include "utils/lsyscache.h"
@@ -1436,7 +1437,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
14361437
ptr2 = ptr + strlen(ptr);
14371438
while (--ptr2 > ptr)
14381439
{
1439-
if (!isspace((unsigned char) *ptr2))
1440+
if (!scanner_isspace(*ptr2))
14401441
break;
14411442
}
14421443
if (*ptr2 != ')')
@@ -1453,7 +1454,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
14531454
for (;;)
14541455
{
14551456
/* allow leading whitespace */
1456-
while (isspace((unsigned char) *ptr))
1457+
while (scanner_isspace(*ptr))
14571458
ptr++;
14581459
if (*ptr == '\0')
14591460
{
@@ -1509,7 +1510,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
15091510
/* Lop off trailing whitespace */
15101511
while (--ptr2 >= typename)
15111512
{
1512-
if (!isspace((unsigned char) *ptr2))
1513+
if (!scanner_isspace(*ptr2))
15131514
break;
15141515
*ptr2 = '\0';
15151516
}

src/backend/utils/adt/varlena.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,7 +2362,7 @@ SplitIdentifierString(char *rawstring, char separator,
23622362

23632363
*namelist = NIL;
23642364

2365-
while (isspace((unsigned char) *nextp))
2365+
while (scanner_isspace(*nextp))
23662366
nextp++; /* skip leading whitespace */
23672367

23682368
if (*nextp == '\0')
@@ -2400,7 +2400,7 @@ SplitIdentifierString(char *rawstring, char separator,
24002400

24012401
curname = nextp;
24022402
while (*nextp && *nextp != separator &&
2403-
!isspace((unsigned char) *nextp))
2403+
!scanner_isspace(*nextp))
24042404
nextp++;
24052405
endp = nextp;
24062406
if (curname == nextp)
@@ -2422,13 +2422,13 @@ SplitIdentifierString(char *rawstring, char separator,
24222422
pfree(downname);
24232423
}
24242424

2425-
while (isspace((unsigned char) *nextp))
2425+
while (scanner_isspace(*nextp))
24262426
nextp++; /* skip trailing whitespace */
24272427

24282428
if (*nextp == separator)
24292429
{
24302430
nextp++;
2431-
while (isspace((unsigned char) *nextp))
2431+
while (scanner_isspace(*nextp))
24322432
nextp++; /* skip leading whitespace for next */
24332433
/* we expect another name, so done remains false */
24342434
}
@@ -2487,7 +2487,7 @@ SplitDirectoriesString(char *rawstring, char separator,
24872487

24882488
*namelist = NIL;
24892489

2490-
while (isspace((unsigned char) *nextp))
2490+
while (scanner_isspace(*nextp))
24912491
nextp++; /* skip leading whitespace */
24922492

24932493
if (*nextp == '\0')
@@ -2524,21 +2524,21 @@ SplitDirectoriesString(char *rawstring, char separator,
25242524
while (*nextp && *nextp != separator)
25252525
{
25262526
/* trailing whitespace should not be included in name */
2527-
if (!isspace((unsigned char) *nextp))
2527+
if (!scanner_isspace(*nextp))
25282528
endp = nextp + 1;
25292529
nextp++;
25302530
}
25312531
if (curname == endp)
25322532
return false; /* empty unquoted name not allowed */
25332533
}
25342534

2535-
while (isspace((unsigned char) *nextp))
2535+
while (scanner_isspace(*nextp))
25362536
nextp++; /* skip trailing whitespace */
25372537

25382538
if (*nextp == separator)
25392539
{
25402540
nextp++;
2541-
while (isspace((unsigned char) *nextp))
2541+
while (scanner_isspace(*nextp))
25422542
nextp++; /* skip leading whitespace for next */
25432543
/* we expect another name, so done remains false */
25442544
}

0 commit comments

Comments
 (0)