Skip to content

Commit 5912bf7

Browse files
committed
Fix parsing of ignored operators in websearch_to_tsquery().
The manual says clearly that punctuation in the input of websearch_to_tsquery() is ignored, except for the special cases of dashes and quotes. However, this failed for cases like "(foo bar) or something", or in general an ISOPERATOR character in front of the "or". We'd switch back to WAITOPERAND state, then ignore the operator character while remaining in that state, and then reach the "or" in WAITOPERAND state which (intentionally) makes us treat it as data. The fix is simple enough: if we see an ISOPERATOR character while in WAITOPERATOR state, we have to skip it while staying in that state. (We don't need to worry about other punctuation characters: those will be consumed as though they were words, but then rejected by lexizing.) In v14 and up (since commit eb08605) we can simplify the code a bit more too, because there is no longer a reason for the WAITOPERAND state to distinguish between quoted and unquoted operands. Per bug #18479 from Manos Emmanouilidis. Back-patch to all supported branches. Discussion: https://postgr.es/m/18479-d9b46e2fc242c33e@postgresql.org
1 parent 1450db7 commit 5912bf7

File tree

3 files changed

+19
-13
lines changed

3 files changed

+19
-13
lines changed

src/backend/utils/adt/tsquery.c

+9-13
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
420420
}
421421
else if (ISOPERATOR(state->buf))
422422
{
423-
/* or else gettoken_tsvector() will raise an error */
423+
/* ignore, else gettoken_tsvector() will raise an error */
424424
state->buf++;
425425
state->state = WAITOPERAND;
426426
continue;
@@ -452,31 +452,27 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
452452
break;
453453

454454
case WAITOPERATOR:
455-
if (t_iseq(state->buf, '"'))
455+
if (*state->buf == '\0')
456456
{
457-
/*
458-
* put implicit AND after an operand and handle this quote
459-
* in WAITOPERAND
460-
*/
461-
state->state = WAITOPERAND;
462-
*operator = OP_AND;
463-
return PT_OPR;
457+
return PT_END;
464458
}
465459
else if (parse_or_operator(state))
466460
{
467461
state->state = WAITOPERAND;
468462
*operator = OP_OR;
469463
return PT_OPR;
470464
}
471-
else if (*state->buf == '\0')
465+
else if (ISOPERATOR(state->buf))
472466
{
473-
return PT_END;
467+
/* ignore other operators in this state too */
468+
state->buf++;
469+
continue;
474470
}
475471
else if (!t_isspace(state->buf))
476472
{
477-
/* put implicit AND after an operand */
478-
*operator = OP_AND;
473+
/* insert implicit AND between operands */
479474
state->state = WAITOPERAND;
475+
*operator = OP_AND;
480476
return PT_OPR;
481477
}
482478
break;

src/test/regress/expected/tsearch.out

+7
Original file line numberDiff line numberDiff line change
@@ -2539,12 +2539,19 @@ select websearch_to_tsquery('simple', 'abc <-> def');
25392539
'abc' & 'def'
25402540
(1 row)
25412541

2542+
-- parens are ignored, too
25422543
select websearch_to_tsquery('simple', 'abc (pg or class)');
25432544
websearch_to_tsquery
25442545
------------------------
25452546
'abc' & 'pg' | 'class'
25462547
(1 row)
25472548

2549+
select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
2550+
websearch_to_tsquery
2551+
---------------------------------
2552+
'foo' & 'bar' | 'ding' & 'dong'
2553+
(1 row)
2554+
25482555
-- NOT is ignored in quotes
25492556
select websearch_to_tsquery('english', 'My brand new smartphone');
25502557
websearch_to_tsquery

src/test/regress/sql/tsearch.sql

+3
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,10 @@ select websearch_to_tsquery('simple', ':');
727727
select websearch_to_tsquery('simple', 'abc & def');
728728
select websearch_to_tsquery('simple', 'abc | def');
729729
select websearch_to_tsquery('simple', 'abc <-> def');
730+
731+
-- parens are ignored, too
730732
select websearch_to_tsquery('simple', 'abc (pg or class)');
733+
select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
731734

732735
-- NOT is ignored in quotes
733736
select websearch_to_tsquery('english', 'My brand new smartphone');

0 commit comments

Comments
 (0)