Skip to content

Commit 7865280

Browse files
committed
Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.
If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
1 parent 53a53ea commit 7865280

File tree

3 files changed

+25
-10
lines changed

3 files changed

+25
-10
lines changed

contrib/pg_trgm/expected/pg_word_trgm.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban
10441044
Waikala | 0.3
10451045
(89 rows)
10461046

1047+
-- test unsatisfiable pattern
1048+
select * from test_trgm2 where t ~ '.*$x';
1049+
t
1050+
---
1051+
(0 rows)
1052+

contrib/pg_trgm/sql/pg_word_trgm.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t
4343
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
4444
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
4545
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
46+
47+
-- test unsatisfiable pattern
48+
select * from test_trgm2 where t ~ '.*$x';

contrib/pg_trgm/trgm_regexp.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
19441944
arcsCount;
19451945
HASH_SEQ_STATUS scan_status;
19461946
TrgmState *state;
1947-
TrgmPackArcInfo *arcs,
1948-
*p1,
1949-
*p2;
1947+
TrgmPackArcInfo *arcs;
19501948
TrgmPackedArc *packedArcs;
19511949
TrgmPackedGraph *result;
19521950
int i,
@@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
20182016
qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
20192017

20202018
/* We could have duplicates because states were merged. Remove them. */
2021-
/* p1 is probe point, p2 is last known non-duplicate. */
2022-
p2 = arcs;
2023-
for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2019+
if (arcIndex > 1)
20242020
{
2025-
if (packArcInfoCmp(p1, p2) > 0)
2021+
/* p1 is probe point, p2 is last known non-duplicate. */
2022+
TrgmPackArcInfo *p1,
2023+
*p2;
2024+
2025+
p2 = arcs;
2026+
for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
20262027
{
2027-
p2++;
2028-
*p2 = *p1;
2028+
if (packArcInfoCmp(p1, p2) > 0)
2029+
{
2030+
p2++;
2031+
*p2 = *p1;
2032+
}
20292033
}
2034+
arcsCount = (p2 - arcs) + 1;
20302035
}
2031-
arcsCount = (p2 - arcs) + 1;
2036+
else
2037+
arcsCount = arcIndex;
20322038

20332039
/* Create packed representation */
20342040
result = (TrgmPackedGraph *)

0 commit comments

Comments
 (0)