Skip to content

Commit 029dea8

Browse files
committed
Fix ts_headline() edge cases for empty query and empty search text.
tsquery's GETQUERY() macro is only safe to apply to a tsquery that is known non-empty; otherwise it gives a pointer to garbage. Before commit 5a617d7, ts_headline() avoided this pitfall, but only in a very indirect, nonobvious way. (hlCover could not reach its TS_execute call, because if the query contains no lexemes then hlFirstIndex would surely return -1.) After that commit, it fell into the trap, resulting in weird errors such as "unrecognized operator" and/or valgrind complaints. In HEAD, fix this by not calling TS_execute_locations() at all for an empty query. In the back branches, add a defensive check to hlCover() --- that's not fixing any live bug, but I judge the code a bit too fragile as-is. Also, both mark_hl_fragments() and mark_hl_words() were careless about the possibility of empty search text: in the cases where no match has been found, they'd end up telling mark_fragment() to mark from word indexes 0 to 0 inclusive, even when there is no word 0. This is harmless since we over-allocated the prs->words array, but it does annoy valgrind. Fix so that the end index is -1 and thus mark_fragment() will do nothing in such cases. Bottom line is that this fixes a live bug in HEAD, but in the back branches it's only getting rid of a valgrind nitpick. Back-patch anyway. Per report from Alexander Lakhin. Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com
1 parent 18103b7 commit 029dea8

File tree

3 files changed

+41
-7
lines changed

3 files changed

+41
-7
lines changed

src/backend/tsearch/wparser_def.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2417,7 +2417,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations,
24172417
/* show the first min_words words if we have not marked anything */
24182418
if (num_f <= 0)
24192419
{
2420-
startpos = endpos = curlen = 0;
2420+
startpos = curlen = 0;
2421+
endpos = -1;
24212422
for (i = 0; i < prs->curwords && curlen < min_words; i++)
24222423
{
24232424
if (!NONWORDTOKEN(prs->words[i].type))
@@ -2571,7 +2572,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations,
25712572
if (bestlen < 0)
25722573
{
25732574
curlen = 0;
2574-
pose = 0;
2575+
pose = -1;
25752576
for (i = 0; i < prs->curwords && curlen < min_words; i++)
25762577
{
25772578
if (!NONWORDTOKEN(prs->words[i].type))
@@ -2601,7 +2602,6 @@ prsd_headline(PG_FUNCTION_ARGS)
26012602
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
26022603
List *prsoptions = (List *) PG_GETARG_POINTER(1);
26032604
TSQuery query = PG_GETARG_TSQUERY(2);
2604-
hlCheck ch;
26052605
List *locations;
26062606

26072607
/* default option values: */
@@ -2671,10 +2671,17 @@ prsd_headline(PG_FUNCTION_ARGS)
26712671
}
26722672

26732673
/* Locate words and phrases matching the query */
2674-
ch.words = prs->words;
2675-
ch.len = prs->curwords;
2676-
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
2677-
checkcondition_HL);
2674+
if (query->size > 0)
2675+
{
2676+
hlCheck ch;
2677+
2678+
ch.words = prs->words;
2679+
ch.len = prs->curwords;
2680+
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
2681+
checkcondition_HL);
2682+
}
2683+
else
2684+
locations = NIL; /* empty query matches nothing */
26782685

26792686
/* Apply appropriate headline selector */
26802687
if (max_fragments == 0)

src/test/regress/expected/tsearch.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,6 +2127,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
21272127
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
21282128
(1 row)
21292129

2130+
-- Edge cases with empty query
2131+
SELECT ts_headline('english',
2132+
'', ''::tsquery);
2133+
NOTICE: text-search query doesn't contain lexemes: ""
2134+
LINE 2: '', ''::tsquery);
2135+
^
2136+
ts_headline
2137+
-------------
2138+
2139+
(1 row)
2140+
2141+
SELECT ts_headline('english',
2142+
'foo bar', ''::tsquery);
2143+
NOTICE: text-search query doesn't contain lexemes: ""
2144+
LINE 2: 'foo bar', ''::tsquery);
2145+
^
2146+
ts_headline
2147+
-------------
2148+
foo bar
2149+
(1 row)
2150+
21302151
--Rewrite sub system
21312152
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
21322153
\set ECHO none

src/test/regress/sql/tsearch.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,12 @@ SELECT ts_headline('english',
640640
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
641641
'MaxFragments=100, MaxWords=100, MinWords=1');
642642

643+
-- Edge cases with empty query
644+
SELECT ts_headline('english',
645+
'', ''::tsquery);
646+
SELECT ts_headline('english',
647+
'foo bar', ''::tsquery);
648+
643649
--Rewrite sub system
644650

645651
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);

0 commit comments

Comments
 (0)