Skip to content

Commit dbde97c

Browse files
committed
Rewrite LIKE's %-followed-by-_ optimization so it really works (this time
for sure ;-)). It now also optimizes more cases, such as %_%_. Improve comments too. Per bug #5478. In passing, also rename the TCHAR macro to GETCHAR, because pgindent is messing with the formatting of the former (apparently it now thinks TCHAR is a typedef name). Back-patch to 8.3, where the bug was introduced.
1 parent e54b0cb commit dbde97c

File tree

3 files changed

+74
-70
lines changed

3 files changed

+74
-70
lines changed

src/backend/utils/adt/like_match.c

Lines changed: 64 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
* NextChar
1515
* MatchText - to name of function wanted
1616
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17-
* MATCH_LOWER - define iff using to_lower on text chars
17+
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars
1818
*
1919
* Copyright (c) 1996-2010, PostgreSQL Global Development Group
2020
*
2121
* IDENTIFICATION
22-
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.27 2010/01/02 16:57:54 momjian Exp $
22+
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.28 2010/05/28 17:35:23 tgl Exp $
2323
*
2424
*-------------------------------------------------------------------------
2525
*/
@@ -70,9 +70,9 @@
7070
*/
7171

7272
#ifdef MATCH_LOWER
73-
#define TCHAR(t) ((char) tolower((unsigned char) (t)))
73+
#define GETCHAR(t) ((char) tolower((unsigned char) (t)))
7474
#else
75-
#define TCHAR(t) (t)
75+
#define GETCHAR(t) (t)
7676
#endif
7777

7878
static int
@@ -101,85 +101,80 @@ MatchText(char *t, int tlen, char *p, int plen)
101101
ereport(ERROR,
102102
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
103103
errmsg("LIKE pattern must not end with escape character")));
104-
if (TCHAR (*p) != TCHAR (*t))
104+
if (GETCHAR(*p) != GETCHAR(*t))
105105
return LIKE_FALSE;
106106
}
107107
else if (*p == '%')
108108
{
109+
char firstpat;
110+
109111
/*
110-
* % processing is essentially a search for a match for what
111-
* follows the %, plus a recursive match of the remainder. We
112-
* succeed if and only if both conditions are met.
112+
* % processing is essentially a search for a text position at
113+
* which the remainder of the text matches the remainder of the
114+
* pattern, using a recursive call to check each potential match.
115+
*
116+
* If there are wildcards immediately following the %, we can skip
117+
* over them first, using the idea that any sequence of N _'s and
118+
* one or more %'s is equivalent to N _'s and one % (ie, it will
119+
* match any sequence of at least N text characters). In this
120+
* way we will always run the recursive search loop using a
121+
* pattern fragment that begins with a literal character-to-match,
122+
* thereby not recursing more than we have to.
113123
*/
124+
NextByte(p, plen);
114125

115-
/* %% is the same as % according to the SQL standard */
116-
/* Advance past all %'s */
117-
while (plen > 0 && *p == '%')
118-
NextByte(p, plen);
119-
/* Trailing percent matches everything. */
126+
while (plen > 0)
127+
{
128+
if (*p == '%')
129+
NextByte(p, plen);
130+
else if (*p == '_')
131+
{
132+
/* If not enough text left to match the pattern, ABORT */
133+
if (tlen <= 0)
134+
return LIKE_ABORT;
135+
NextChar(t, tlen);
136+
NextByte(p, plen);
137+
}
138+
else
139+
break; /* Reached a non-wildcard pattern char */
140+
}
141+
142+
/*
143+
* If we're at end of pattern, match: we have a trailing % which
144+
* matches any remaining text string.
145+
*/
120146
if (plen <= 0)
121147
return LIKE_TRUE;
122148

123149
/*
124150
* Otherwise, scan for a text position at which we can match the
125-
* rest of the pattern.
151+
* rest of the pattern. The first remaining pattern char is known
152+
* to be a regular or escaped literal character, so we can compare
153+
* the first pattern byte to each text byte to avoid recursing
154+
* more than we have to. This fact also guarantees that we don't
155+
* have to consider a match to the zero-length substring at the
156+
* end of the text.
126157
*/
127-
if (*p == '_')
158+
if (*p == '\\')
128159
{
129-
/* %_ is the same as _% - avoid matching _ repeatedly */
160+
if (plen < 2)
161+
return LIKE_FALSE; /* XXX should throw error */
162+
firstpat = GETCHAR(p[1]);
163+
}
164+
else
165+
firstpat = GETCHAR(*p);
130166

131-
do
132-
{
133-
NextChar(t, tlen);
134-
NextByte(p, plen);
135-
} while (tlen > 0 && plen > 0 && *p == '_');
136-
137-
/*
138-
* If we are at the end of the pattern, succeed: % followed by
139-
* n _'s matches any string of at least n characters, and we
140-
* have now found there are at least n characters.
141-
*/
142-
if (plen <= 0)
143-
return LIKE_TRUE;
144-
145-
/* Look for a place that matches the rest of the pattern */
146-
while (tlen > 0)
167+
while (tlen > 0)
168+
{
169+
if (GETCHAR(*t) == firstpat)
147170
{
148171
int matched = MatchText(t, tlen, p, plen);
149172

150173
if (matched != LIKE_FALSE)
151-
return matched; /* TRUE or ABORT */
152-
153-
NextChar(t, tlen);
174+
return matched; /* TRUE or ABORT */
154175
}
155-
}
156-
else
157-
{
158-
char firstpat = TCHAR (*p);
159176

160-
if (*p == '\\')
161-
{
162-
if (plen < 2)
163-
return LIKE_FALSE;
164-
firstpat = TCHAR (p[1]);
165-
}
166-
167-
while (tlen > 0)
168-
{
169-
/*
170-
* Optimization to prevent most recursion: don't recurse
171-
* unless first pattern byte matches first text byte.
172-
*/
173-
if (TCHAR (*t) == firstpat)
174-
{
175-
int matched = MatchText(t, tlen, p, plen);
176-
177-
if (matched != LIKE_FALSE)
178-
return matched; /* TRUE or ABORT */
179-
}
180-
181-
NextChar(t, tlen);
182-
}
177+
NextChar(t, tlen);
183178
}
184179

185180
/*
@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
195190
NextByte(p, plen);
196191
continue;
197192
}
198-
else if (TCHAR (*p) != TCHAR (*t))
193+
else if (GETCHAR(*p) != GETCHAR(*t))
199194
{
200195
/* non-wildcard pattern char fails to match text char */
201196
return LIKE_FALSE;
@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
220215
if (tlen > 0)
221216
return LIKE_FALSE; /* end of pattern, but not of text */
222217

223-
/* End of text string. Do we have matching pattern remaining? */
224-
while (plen > 0 && *p == '%') /* allow multiple %'s at end of
225-
* pattern */
218+
/*
219+
* End of text, but perhaps not of pattern. Match iff the remaining
220+
* pattern can match a zero-length string, ie, it's zero or more %'s.
221+
*/
222+
while (plen > 0 && *p == '%')
226223
NextByte(p, plen);
227-
228224
if (plen <= 0)
229225
return LIKE_TRUE;
230226

@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
348344
#undef do_like_escape
349345
#endif
350346

351-
#undef TCHAR
347+
#undef GETCHAR
352348

353349
#ifdef MATCH_LOWER
354350
#undef MATCH_LOWER

src/test/regress/expected/strings.out

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
996996
(1 row)
997997

998998
--
999-
-- test %/_ combination cases, cf bug #4821
999+
-- test %/_ combination cases, cf bugs #4821 and #5478
10001000
--
10011001
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
10021002
t | t | f
@@ -1022,6 +1022,12 @@ SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
10221022
t | t | f
10231023
(1 row)
10241024

1025+
SELECT 'jack' LIKE '%____%' AS t;
1026+
t
1027+
---
1028+
t
1029+
(1 row)
1030+
10251031
--
10261032
-- test implicit type conversion
10271033
--

src/test/regress/sql/strings.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true";
301301
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
302302

303303
--
304-
-- test %/_ combination cases, cf bug #4821
304+
-- test %/_ combination cases, cf bugs #4821 and #5478
305305
--
306306

307307
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
@@ -310,6 +310,8 @@ SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
310310
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
311311
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
312312

313+
SELECT 'jack' LIKE '%____%' AS t;
314+
313315

314316
--
315317
-- test implicit type conversion

0 commit comments

Comments
 (0)