Skip to content

Commit 1925ac2

Browse files
committed
Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.
If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
1 parent d66bb04 commit 1925ac2

File tree

3 files changed

+25
-10
lines changed

3 files changed

+25
-10
lines changed

contrib/pg_trgm/expected/pg_word_trgm.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban
10441044
Waikala | 0.3
10451045
(89 rows)
10461046

1047+
-- test unsatisfiable pattern
1048+
select * from test_trgm2 where t ~ '.*$x';
1049+
t
1050+
---
1051+
(0 rows)
1052+

contrib/pg_trgm/sql/pg_word_trgm.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t
4343
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
4444
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
4545
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
46+
47+
-- test unsatisfiable pattern
48+
select * from test_trgm2 where t ~ '.*$x';

contrib/pg_trgm/trgm_regexp.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,9 +1947,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
19471947
arcsCount;
19481948
HASH_SEQ_STATUS scan_status;
19491949
TrgmState *state;
1950-
TrgmPackArcInfo *arcs,
1951-
*p1,
1952-
*p2;
1950+
TrgmPackArcInfo *arcs;
19531951
TrgmPackedArc *packedArcs;
19541952
TrgmPackedGraph *result;
19551953
int i,
@@ -2021,17 +2019,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
20212019
qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
20222020

20232021
/* We could have duplicates because states were merged. Remove them. */
2024-
/* p1 is probe point, p2 is last known non-duplicate. */
2025-
p2 = arcs;
2026-
for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2022+
if (arcIndex > 1)
20272023
{
2028-
if (packArcInfoCmp(p1, p2) > 0)
2024+
/* p1 is probe point, p2 is last known non-duplicate. */
2025+
TrgmPackArcInfo *p1,
2026+
*p2;
2027+
2028+
p2 = arcs;
2029+
for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
20292030
{
2030-
p2++;
2031-
*p2 = *p1;
2031+
if (packArcInfoCmp(p1, p2) > 0)
2032+
{
2033+
p2++;
2034+
*p2 = *p1;
2035+
}
20322036
}
2037+
arcsCount = (p2 - arcs) + 1;
20332038
}
2034-
arcsCount = (p2 - arcs) + 1;
2039+
else
2040+
arcsCount = arcIndex;
20352041

20362042
/* Create packed representation */
20372043
result = (TrgmPackedGraph *)

0 commit comments

Comments
 (0)