Skip to content

Commit d12e5bb

Browse files
committed
Code and docs review for commit 3187d6d.
Fix up check for high-bit-set characters, which provoked "comparison is always true due to limited range of data type" warnings on some compilers, and was unlike the way we do it elsewhere anyway. Fix omission of "$" from the set of valid identifier continuation characters. Get rid of sanitize_text(), which was utterly inconsistent with any other error report anywhere in the system, and wasn't even well designed on its own terms (double-quoting the result string without escaping contained double quotes doesn't seem very well thought out). Fix up error messages, which didn't follow the message style guidelines very well, and were overly specific in situations where the actual mistake might not be what they said. Improve documentation. (I started out just intending to fix the compiler warning, but the more I looked at the patch the less I liked it.)
1 parent 499a505 commit d12e5bb

File tree

3 files changed

+103
-152
lines changed

3 files changed

+103
-152
lines changed

doc/src/sgml/func.sgml

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,25 +1823,22 @@
18231823
<indexterm>
18241824
<primary>parse_ident</primary>
18251825
</indexterm>
1826-
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
1827-
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
1826+
<literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
1827+
[, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
18281828
</entry>
18291829
<entry><type>text[]</type></entry>
1830-
<entry>Split <parameter>qualified identifier</parameter> into array
1831-
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
1832-
false, extra characters after the identifier are ignored. This is useful
1833-
for parsing identifiers for objects like functions and arrays that may
1834-
have trailing characters. By default, extra characters after the last
1835-
identifier are considered an error, but if the second parameter is false,
1836-
then the characters after the last identifier are ignored. Note that this
1837-
function does not truncate quoted identifiers. If you care about that
1838-
you should cast the result of this function to name[]. Non-printable
1839-
characters (like 0 to 31) are always displayed as hexadecimal codes,
1840-
which can be different from PostgreSQL internal SQL identifiers
1841-
processing, when the original escaped value is displayed.
1830+
<entry>
1831+
Split <parameter>qualified_identifier</parameter> into an array of
1832+
identifiers, removing any quoting of individual identifiers. By
1833+
default, extra characters after the last identifier are considered an
1834+
error; but if the second parameter is <literal>false</>, then such
1835+
extra characters are ignored. (This behavior is useful for parsing
1836+
names for objects like functions.) Note that this function does not
1837+
truncate over-length identifiers. If you want truncation you can cast
1838+
the result to <type>name[]</>.
18421839
</entry>
18431840
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
1844-
<entry><literal>"SomeSchema,sometable"</literal></entry>
1841+
<entry><literal>{SomeSchema,sometable}</literal></entry>
18451842
</row>
18461843

18471844
<row>

src/backend/utils/adt/misc.c

Lines changed: 74 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
723723

724724

725725
/*
726-
* This simple parser utility are compatible with lexer implementation,
727-
* used only in parse_ident function
726+
* Is character a valid identifier start?
727+
* Must match scan.l's {ident_start} character class.
728728
*/
729729
static bool
730730
is_ident_start(unsigned char c)
731731
{
732+
/* Underscores and ASCII letters are OK */
732733
if (c == '_')
733734
return true;
734735
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
735736
return true;
736-
737-
if (c >= 0200 && c <= 0377)
737+
/* Any high-bit-set character is OK (might be part of a multibyte char) */
738+
if (IS_HIGHBIT_SET(c))
738739
return true;
739-
740740
return false;
741741
}
742742

743+
/*
744+
* Is character a valid identifier continuation?
745+
* Must match scan.l's {ident_cont} character class.
746+
*/
743747
static bool
744748
is_ident_cont(unsigned char c)
745749
{
746-
if (c >= '0' && c <= '9')
750+
/* Can be digit or dollar sign ... */
751+
if ((c >= '0' && c <= '9') || c == '$')
747752
return true;
748-
753+
/* ... or an identifier start character */
749754
return is_ident_start(c);
750755
}
751756

752757
/*
753-
* Sanitize SQL string for using in error message.
754-
*/
755-
static char *
756-
sanitize_text(text *t)
757-
{
758-
int len = VARSIZE_ANY_EXHDR(t);
759-
const char *p = VARDATA_ANY(t);
760-
StringInfo dstr;
761-
762-
dstr = makeStringInfo();
763-
764-
appendStringInfoChar(dstr, '"');
765-
766-
while (len--)
767-
{
768-
switch (*p)
769-
{
770-
case '\b':
771-
appendStringInfoString(dstr, "\\b");
772-
break;
773-
case '\f':
774-
appendStringInfoString(dstr, "\\f");
775-
break;
776-
case '\n':
777-
appendStringInfoString(dstr, "\\n");
778-
break;
779-
case '\r':
780-
appendStringInfoString(dstr, "\\r");
781-
break;
782-
case '\t':
783-
appendStringInfoString(dstr, "\\t");
784-
break;
785-
case '\'':
786-
appendStringInfoString(dstr, "''");
787-
break;
788-
case '\\':
789-
appendStringInfoString(dstr, "\\\\");
790-
break;
791-
default:
792-
if ((unsigned char) *p < ' ')
793-
appendStringInfo(dstr, "\\u%04x", (int) *p);
794-
else
795-
appendStringInfoCharMacro(dstr, *p);
796-
break;
797-
}
798-
p++;
799-
}
800-
801-
appendStringInfoChar(dstr, '"');
802-
803-
return dstr->data;
804-
}
805-
806-
/*
807-
* parse_ident - parse SQL composed identifier to separate identifiers.
758+
* parse_ident - parse a SQL qualified identifier into separate identifiers.
808759
* When strict mode is active (second parameter), then any chars after
809-
* last identifiers are disallowed.
760+
* the last identifier are disallowed.
810761
*/
811762
Datum
812763
parse_ident(PG_FUNCTION_ARGS)
813764
{
814-
text *qualname;
815-
char *qualname_str;
816-
bool strict;
765+
text *qualname = PG_GETARG_TEXT_PP(0);
766+
bool strict = PG_GETARG_BOOL(1);
767+
char *qualname_str = text_to_cstring(qualname);
768+
ArrayBuildState *astate = NULL;
817769
char *nextp;
818770
bool after_dot = false;
819-
ArrayBuildState *astate = NULL;
820-
821-
qualname = PG_GETARG_TEXT_PP(0);
822-
qualname_str = text_to_cstring(qualname);
823-
strict = PG_GETARG_BOOL(1);
824771

772+
/*
773+
* The code below scribbles on qualname_str in some cases, so we should
774+
* reconvert qualname if we need to show the original string in error
775+
* messages.
776+
*/
825777
nextp = qualname_str;
826778

827779
/* skip leading whitespace */
@@ -830,90 +782,87 @@ parse_ident(PG_FUNCTION_ARGS)
830782

831783
for (;;)
832784
{
833-
char *curname;
834-
char *endp;
835-
bool missing_ident;
836-
837-
missing_ident = true;
785+
char *curname;
786+
bool missing_ident = true;
838787

839-
if (*nextp == '\"')
788+
if (*nextp == '"')
840789
{
790+
char *endp;
791+
841792
curname = nextp + 1;
842793
for (;;)
843794
{
844-
endp = strchr(nextp + 1, '\"');
795+
endp = strchr(nextp + 1, '"');
845796
if (endp == NULL)
846797
ereport(ERROR,
847-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
848-
errmsg("unclosed double quotes"),
849-
errdetail("string %s is not valid identifier",
850-
sanitize_text(qualname))));
851-
if (endp[1] != '\"')
798+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
799+
errmsg("string is not a valid identifier: \"%s\"",
800+
text_to_cstring(qualname)),
801+
errdetail("String has unclosed double quotes.")));
802+
if (endp[1] != '"')
852803
break;
853804
memmove(endp, endp + 1, strlen(endp));
854805
nextp = endp;
855806
}
856807
nextp = endp + 1;
857808
*endp = '\0';
858809

859-
/* Show complete input string in this case. */
860810
if (endp - curname == 0)
861811
ereport(ERROR,
862-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
863-
errmsg("identifier should not be empty: %s",
864-
sanitize_text(qualname))));
812+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
813+
errmsg("string is not a valid identifier: \"%s\"",
814+
text_to_cstring(qualname)),
815+
errdetail("Quoted identifier must not be empty.")));
865816

866817
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
867818
false, TEXTOID, CurrentMemoryContext);
868819
missing_ident = false;
869820
}
870-
else
821+
else if (is_ident_start((unsigned char) *nextp))
871822
{
872-
if (is_ident_start((unsigned char) *nextp))
873-
{
874-
char *downname;
875-
int len;
876-
text *part;
877-
878-
curname = nextp++;
879-
while (is_ident_cont((unsigned char) *nextp))
880-
nextp++;
881-
882-
len = nextp - curname;
883-
884-
/*
885-
* Unlike name, we don't implicitly truncate identifiers. This
886-
* is useful for allowing the user to check for specific parts
887-
* of the identifier being too long. It's easy enough for the
888-
* user to get the truncated names by casting our output to
889-
* name[].
890-
*/
891-
downname = downcase_identifier(curname, len, false, false);
892-
part = cstring_to_text_with_len(downname, len);
893-
astate = accumArrayResult(astate, PointerGetDatum(part), false,
894-
TEXTOID, CurrentMemoryContext);
895-
missing_ident = false;
896-
}
823+
char *downname;
824+
int len;
825+
text *part;
826+
827+
curname = nextp++;
828+
while (is_ident_cont((unsigned char) *nextp))
829+
nextp++;
830+
831+
len = nextp - curname;
832+
833+
/*
834+
* We don't implicitly truncate identifiers. This is useful for
835+
* allowing the user to check for specific parts of the identifier
836+
* being too long. It's easy enough for the user to get the
837+
* truncated names by casting our output to name[].
838+
*/
839+
downname = downcase_identifier(curname, len, false, false);
840+
part = cstring_to_text_with_len(downname, len);
841+
astate = accumArrayResult(astate, PointerGetDatum(part), false,
842+
TEXTOID, CurrentMemoryContext);
843+
missing_ident = false;
897844
}
898845

899846
if (missing_ident)
900847
{
901848
/* Different error messages based on where we failed. */
902849
if (*nextp == '.')
903850
ereport(ERROR,
904-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
905-
errmsg("missing valid identifier before \".\" symbol: %s",
906-
sanitize_text(qualname))));
851+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
852+
errmsg("string is not a valid identifier: \"%s\"",
853+
text_to_cstring(qualname)),
854+
errdetail("No valid identifier before \".\" symbol.")));
907855
else if (after_dot)
908856
ereport(ERROR,
909-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
910-
errmsg("missing valid identifier after \".\" symbol: %s",
911-
sanitize_text(qualname))));
857+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
858+
errmsg("string is not a valid identifier: \"%s\"",
859+
text_to_cstring(qualname)),
860+
errdetail("No valid identifier after \".\" symbol.")));
912861
else
913862
ereport(ERROR,
914-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
915-
errmsg("missing valid identifier: %s",
916-
sanitize_text(qualname))));
863+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
864+
errmsg("string is not a valid identifier: \"%s\"",
865+
text_to_cstring(qualname))));
917866
}
918867

919868
while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
934883
{
935884
if (strict)
936885
ereport(ERROR,
937-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
938-
errmsg("identifier contains disallowed characters: %s",
939-
sanitize_text(qualname))));
886+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
887+
errmsg("string is not a valid identifier: \"%s\"",
888+
text_to_cstring(qualname))));
940889
break;
941890
}
942891
}

src/test/regress/expected/name.out

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
142142
(1 row)
143143

144144
SELECT parse_ident('foo.boo[]'); -- should fail
145-
ERROR: identifier contains disallowed characters: "foo.boo[]"
145+
ERROR: string is not a valid identifier: "foo.boo[]"
146146
SELECT parse_ident('foo.boo[]', strict => false); -- ok
147147
parse_ident
148148
-------------
@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
151151

152152
-- should fail
153153
SELECT parse_ident(' ');
154-
ERROR: missing valid identifier: " "
154+
ERROR: string is not a valid identifier: " "
155155
SELECT parse_ident(' .aaa');
156-
ERROR: missing valid identifier before "." symbol: " .aaa"
156+
ERROR: string is not a valid identifier: " .aaa"
157+
DETAIL: No valid identifier before "." symbol.
157158
SELECT parse_ident(' aaa . ');
158-
ERROR: missing valid identifier after "." symbol: " aaa . "
159+
ERROR: string is not a valid identifier: " aaa . "
160+
DETAIL: No valid identifier after "." symbol.
159161
SELECT parse_ident('aaa.a%b');
160-
ERROR: identifier contains disallowed characters: "aaa.a%b"
162+
ERROR: string is not a valid identifier: "aaa.a%b"
161163
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
162-
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
164+
ERROR: string is not a valid identifier: "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
163165
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
164166
length | length
165167
--------+--------
@@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
179181
(1 row)
180182

181183
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
182-
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
184+
ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
183185
SELECT parse_ident('1020');
184-
ERROR: missing valid identifier: "1020"
186+
ERROR: string is not a valid identifier: "1020"
185187
SELECT parse_ident('10.20');
186-
ERROR: missing valid identifier: "10.20"
188+
ERROR: string is not a valid identifier: "10.20"
187189
SELECT parse_ident('.');
188-
ERROR: missing valid identifier before "." symbol: "."
190+
ERROR: string is not a valid identifier: "."
191+
DETAIL: No valid identifier before "." symbol.
189192
SELECT parse_ident('.1020');
190-
ERROR: missing valid identifier before "." symbol: ".1020"
193+
ERROR: string is not a valid identifier: ".1020"
194+
DETAIL: No valid identifier before "." symbol.
191195
SELECT parse_ident('xxx.1020');
192-
ERROR: missing valid identifier after "." symbol: "xxx.1020"
196+
ERROR: string is not a valid identifier: "xxx.1020"
197+
DETAIL: No valid identifier after "." symbol.

0 commit comments

Comments
 (0)