Skip to content

Commit 2c8726c

Browse files
committed
Factor pattern-construction logic out of processSQLNamePattern.
The logic for converting the shell-glob-like syntax supported by utilities like psql and pg_dump to regular expression is extracted into a new function patternToSQLRegex. The existing function processSQLNamePattern now uses this function as a subroutine. patternToSQLRegex is a little more general than what is required by processSQLNamePattern. That function is only interested in patterns that can have up to 2 parts, a schema and a relation; but patternToSQLRegex can limit the maximum number of parts to between 1 and 3, so that patterns can look like either "database.schema.relation", "schema.relation", or "relation" depending on how it's invoked and what the user specifies. processSQLNamePattern only passes two buffers, so works exactly the same as before, always interpreting the pattern as either a "schema.relation" pattern or a "relation" pattern. But, future callers can use this function in other ways. Mark Dilger, reviewed by me. The larger patch series of which this is a part has also had review from Peter Geoghegan, Andres Freund, Álvaro Herrera, Michael Paquier, and Amul Sul, but I don't know whether any of them have reviewed this bit specifically. Discussion: http://postgr.es/m/12ED3DA8-25F0-4B68-937D-D907CFBF08E7@enterprisedb.com Discussion: http://postgr.es/m/5F743835-3399-419C-8324-2D424237E999@enterprisedb.com Discussion: http://postgr.es/m/70655DF3-33CE-4527-9A4D-DDEB582B6BA0@enterprisedb.com
1 parent ba0faf8 commit 2c8726c

File tree

2 files changed

+167
-97
lines changed

2 files changed

+167
-97
lines changed

src/fe_utils/string_utils.c

Lines changed: 163 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -831,10 +831,6 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
831831
{
832832
PQExpBufferData schemabuf;
833833
PQExpBufferData namebuf;
834-
int encoding = PQclientEncoding(conn);
835-
bool inquotes;
836-
const char *cp;
837-
int i;
838834
bool added_clause = false;
839835

840836
#define WHEREAND() \
@@ -856,98 +852,12 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
856852
initPQExpBuffer(&namebuf);
857853

858854
/*
859-
* Parse the pattern, converting quotes and lower-casing unquoted letters.
860-
* Also, adjust shell-style wildcard characters into regexp notation.
861-
*
862-
* We surround the pattern with "^(...)$" to force it to match the whole
863-
* string, as per SQL practice. We have to have parens in case the string
864-
* contains "|", else the "^" and "$" will be bound into the first and
865-
* last alternatives which is not what we want.
866-
*
867-
* Note: the result of this pass is the actual regexp pattern(s) we want
868-
* to execute. Quoting/escaping into SQL literal format will be done
869-
* below using appendStringLiteralConn().
855+
* Convert shell-style 'pattern' into the regular expression(s) we want to
856+
* execute. Quoting/escaping into SQL literal format will be done below
857+
* using appendStringLiteralConn().
870858
*/
871-
appendPQExpBufferStr(&namebuf, "^(");
872-
873-
inquotes = false;
874-
cp = pattern;
875-
876-
while (*cp)
877-
{
878-
char ch = *cp;
879-
880-
if (ch == '"')
881-
{
882-
if (inquotes && cp[1] == '"')
883-
{
884-
/* emit one quote, stay in inquotes mode */
885-
appendPQExpBufferChar(&namebuf, '"');
886-
cp++;
887-
}
888-
else
889-
inquotes = !inquotes;
890-
cp++;
891-
}
892-
else if (!inquotes && isupper((unsigned char) ch))
893-
{
894-
appendPQExpBufferChar(&namebuf,
895-
pg_tolower((unsigned char) ch));
896-
cp++;
897-
}
898-
else if (!inquotes && ch == '*')
899-
{
900-
appendPQExpBufferStr(&namebuf, ".*");
901-
cp++;
902-
}
903-
else if (!inquotes && ch == '?')
904-
{
905-
appendPQExpBufferChar(&namebuf, '.');
906-
cp++;
907-
}
908-
else if (!inquotes && ch == '.')
909-
{
910-
/* Found schema/name separator, move current pattern to schema */
911-
resetPQExpBuffer(&schemabuf);
912-
appendPQExpBufferStr(&schemabuf, namebuf.data);
913-
resetPQExpBuffer(&namebuf);
914-
appendPQExpBufferStr(&namebuf, "^(");
915-
cp++;
916-
}
917-
else if (ch == '$')
918-
{
919-
/*
920-
* Dollar is always quoted, whether inside quotes or not. The
921-
* reason is that it's allowed in SQL identifiers, so there's a
922-
* significant use-case for treating it literally, while because
923-
* we anchor the pattern automatically there is no use-case for
924-
* having it possess its regexp meaning.
925-
*/
926-
appendPQExpBufferStr(&namebuf, "\\$");
927-
cp++;
928-
}
929-
else
930-
{
931-
/*
932-
* Ordinary data character, transfer to pattern
933-
*
934-
* Inside double quotes, or at all times if force_escape is true,
935-
* quote regexp special characters with a backslash to avoid
936-
* regexp errors. Outside quotes, however, let them pass through
937-
* as-is; this lets knowledgeable users build regexp expressions
938-
* that are more powerful than shell-style patterns.
939-
*/
940-
if ((inquotes || force_escape) &&
941-
strchr("|*+?()[]{}.^$\\", ch))
942-
appendPQExpBufferChar(&namebuf, '\\');
943-
i = PQmblen(cp, encoding);
944-
while (i-- && *cp)
945-
{
946-
appendPQExpBufferChar(&namebuf, *cp);
947-
cp++;
948-
}
949-
}
950-
}
859+
patternToSQLRegex(PQclientEncoding(conn), NULL, &schemabuf, &namebuf,
860+
pattern, force_escape);
951861

952862
/*
953863
* Now decide what we need to emit. We may run under a hostile
@@ -964,7 +874,6 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
964874
{
965875
/* We have a name pattern, so constrain the namevar(s) */
966876

967-
appendPQExpBufferStr(&namebuf, ")$");
968877
/* Optimize away a "*" pattern */
969878
if (strcmp(namebuf.data, "^(.*)$") != 0)
970879
{
@@ -999,7 +908,6 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
999908
{
1000909
/* We have a schema pattern, so constrain the schemavar */
1001910

1002-
appendPQExpBufferStr(&schemabuf, ")$");
1003911
/* Optimize away a "*" pattern */
1004912
if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
1005913
{
@@ -1027,3 +935,161 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
1027935
return added_clause;
1028936
#undef WHEREAND
1029937
}
938+
939+
/*
940+
* Transform a possibly qualified shell-style object name pattern into up to
941+
* three SQL-style regular expressions, converting quotes, lower-casing
942+
* unquoted letters, and adjusting shell-style wildcard characters into regexp
943+
* notation.
944+
*
945+
* If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
946+
* contains two or more dbname/schema/name separators, we parse the portions of
947+
* the pattern prior to the first and second separators into dbnamebuf and
948+
* schemabuf, and the rest into namebuf. (Additional dots in the name portion
949+
* are not treated as special.)
950+
*
951+
* If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
952+
* least one separator, we parse the first portion into schemabuf and the rest
953+
* into namebuf.
954+
*
955+
* Otherwise, we parse all the pattern into namebuf.
956+
*
957+
* We surround the regexps with "^(...)$" to force them to match whole strings,
958+
* as per SQL practice. We have to have parens in case strings contain "|",
959+
* else the "^" and "$" will be bound into the first and last alternatives
960+
* which is not what we want.
961+
*
962+
* The regexps we parse into the buffers are appended to the data (if any)
963+
* already present. If we parse fewer fields than the number of buffers we
964+
* were given, the extra buffers are unaltered.
965+
*/
966+
void
967+
patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
968+
PQExpBuffer namebuf, const char *pattern, bool force_escape)
969+
{
970+
PQExpBufferData buf[3];
971+
PQExpBuffer curbuf;
972+
PQExpBuffer maxbuf;
973+
int i;
974+
bool inquotes;
975+
const char *cp;
976+
977+
Assert(pattern != NULL);
978+
Assert(namebuf != NULL);
979+
980+
/* callers should never expect "dbname.relname" format */
981+
Assert(dbnamebuf == NULL || schemabuf != NULL);
982+
983+
inquotes = false;
984+
cp = pattern;
985+
986+
if (dbnamebuf != NULL)
987+
maxbuf = &buf[2];
988+
else if (schemabuf != NULL)
989+
maxbuf = &buf[1];
990+
else
991+
maxbuf = &buf[0];
992+
993+
curbuf = &buf[0];
994+
initPQExpBuffer(curbuf);
995+
appendPQExpBufferStr(curbuf, "^(");
996+
while (*cp)
997+
{
998+
char ch = *cp;
999+
1000+
if (ch == '"')
1001+
{
1002+
if (inquotes && cp[1] == '"')
1003+
{
1004+
/* emit one quote, stay in inquotes mode */
1005+
appendPQExpBufferChar(curbuf, '"');
1006+
cp++;
1007+
}
1008+
else
1009+
inquotes = !inquotes;
1010+
cp++;
1011+
}
1012+
else if (!inquotes && isupper((unsigned char) ch))
1013+
{
1014+
appendPQExpBufferChar(curbuf,
1015+
pg_tolower((unsigned char) ch));
1016+
cp++;
1017+
}
1018+
else if (!inquotes && ch == '*')
1019+
{
1020+
appendPQExpBufferStr(curbuf, ".*");
1021+
cp++;
1022+
}
1023+
else if (!inquotes && ch == '?')
1024+
{
1025+
appendPQExpBufferChar(curbuf, '.');
1026+
cp++;
1027+
}
1028+
1029+
/*
1030+
* When we find a dbname/schema/name separator, we treat it specially
1031+
* only if the caller requested more patterns to be parsed than we
1032+
* have already parsed from the pattern. Otherwise, dot characters
1033+
* are not special.
1034+
*/
1035+
else if (!inquotes && ch == '.' && curbuf < maxbuf)
1036+
{
1037+
appendPQExpBufferStr(curbuf, ")$");
1038+
curbuf++;
1039+
initPQExpBuffer(curbuf);
1040+
appendPQExpBufferStr(curbuf, "^(");
1041+
cp++;
1042+
}
1043+
else if (ch == '$')
1044+
{
1045+
/*
1046+
* Dollar is always quoted, whether inside quotes or not. The
1047+
* reason is that it's allowed in SQL identifiers, so there's a
1048+
* significant use-case for treating it literally, while because
1049+
* we anchor the pattern automatically there is no use-case for
1050+
* having it possess its regexp meaning.
1051+
*/
1052+
appendPQExpBufferStr(curbuf, "\\$");
1053+
cp++;
1054+
}
1055+
else
1056+
{
1057+
/*
1058+
* Ordinary data character, transfer to pattern
1059+
*
1060+
* Inside double quotes, or at all times if force_escape is true,
1061+
* quote regexp special characters with a backslash to avoid
1062+
* regexp errors. Outside quotes, however, let them pass through
1063+
* as-is; this lets knowledgeable users build regexp expressions
1064+
* that are more powerful than shell-style patterns.
1065+
*/
1066+
if ((inquotes || force_escape) &&
1067+
strchr("|*+?()[]{}.^$\\", ch))
1068+
appendPQExpBufferChar(curbuf, '\\');
1069+
i = PQmblen(cp, encoding);
1070+
while (i-- && *cp)
1071+
{
1072+
appendPQExpBufferChar(curbuf, *cp);
1073+
cp++;
1074+
}
1075+
}
1076+
}
1077+
appendPQExpBufferStr(curbuf, ")$");
1078+
1079+
appendPQExpBufferStr(namebuf, curbuf->data);
1080+
termPQExpBuffer(curbuf);
1081+
1082+
if (curbuf > buf)
1083+
{
1084+
curbuf--;
1085+
appendPQExpBufferStr(schemabuf, curbuf->data);
1086+
termPQExpBuffer(curbuf);
1087+
1088+
if (curbuf > buf)
1089+
{
1090+
curbuf--;
1091+
appendPQExpBufferStr(dbnamebuf, curbuf->data);
1092+
termPQExpBuffer(curbuf);
1093+
}
1094+
}
1095+
}

src/include/fe_utils/string_utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,8 @@ extern bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf,
5656
const char *schemavar, const char *namevar,
5757
const char *altnamevar, const char *visibilityrule);
5858

59+
extern void patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf,
60+
PQExpBuffer schemabuf, PQExpBuffer namebuf,
61+
const char *pattern, bool force_escape);
62+
5963
#endif /* STRING_UTILS_H */

0 commit comments

Comments
 (0)