Skip to content

Commit a40631a

Browse files
committed
Fix lexing of standard multi-character operators in edge cases.
Commits c6b3c93 (which fixed the precedence of >=, <=, <> operators) and 865f14a (which added support for the standard => notation for named arguments) created a class of lexer tokens which look like multi-character operators but which have their own token IDs distinct from Op. However, longest-match rules meant that following any of these tokens with another operator character, as in (1<>-1), would cause them to be incorrectly returned as Op. The error here isn't immediately obvious, because the parser would usually still find the correct operator via the Op token, but there were more subtle problems: 1. If immediately followed by a comment or +-, >= <= <> would be given the old precedence of Op rather than the correct new precedence; 2. If followed by a comment, != would be returned as Op rather than as NOT_EQUAL, causing it not to be found at all; 3. If followed by a comment or +-, the => token for named arguments would be lexed as Op, causing the argument to be mis-parsed as a simple expression, usually causing an error. Fix by explicitly checking for the operators in the {operator} code block in addition to all the existing special cases there. Backpatch to 9.5 where the problem was introduced. Analysis and patch by me; review by Tom Lane. Discussion: https://postgr.es/m/87va851ppl.fsf@news-spur.riddles.org.uk
1 parent d4a63f8 commit a40631a

File tree

7 files changed

+221
-0
lines changed

7 files changed

+221
-0
lines changed

src/backend/parser/scan.l

+28
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,15 @@ identifier {ident_start}{ident_cont}*
339339
typecast "::"
340340
dot_dot \.\.
341341
colon_equals ":="
342+
343+
/*
344+
* These operator-like tokens (unlike the above ones) also match the {operator}
345+
* rule, which means that they might be overridden by a longer match if they
346+
* are followed by a comment start or a + or - character. Accordingly, if you
347+
* add to this list, you must also add corresponding code to the {operator}
348+
* block to return the correct token in such cases. (This is not needed in
349+
* psqlscan.l since the token value is ignored there.)
350+
*/
342351
equals_greater "=>"
343352
less_equals "<="
344353
greater_equals ">="
@@ -929,6 +938,25 @@ other .
929938
if (nchars == 1 &&
930939
strchr(",()[].;:+-*/%^<>=", yytext[0]))
931940
return yytext[0];
941+
/*
942+
* Likewise, if what we have left is two chars, and
943+
* those match the tokens ">=", "<=", "=>", "<>" or
944+
* "!=", then we must return the appropriate token
945+
* rather than the generic Op.
946+
*/
947+
if (nchars == 2)
948+
{
949+
if (yytext[0] == '=' && yytext[1] == '>')
950+
return EQUALS_GREATER;
951+
if (yytext[0] == '>' && yytext[1] == '=')
952+
return GREATER_EQUALS;
953+
if (yytext[0] == '<' && yytext[1] == '=')
954+
return LESS_EQUALS;
955+
if (yytext[0] == '<' && yytext[1] == '>')
956+
return NOT_EQUALS;
957+
if (yytext[0] == '!' && yytext[1] == '=')
958+
return NOT_EQUALS;
959+
}
932960
}
933961

934962
/*

src/fe_utils/psqlscan.l

+9
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,15 @@ identifier {ident_start}{ident_cont}*
298298
typecast "::"
299299
dot_dot \.\.
300300
colon_equals ":="
301+
302+
/*
303+
* These operator-like tokens (unlike the above ones) also match the {operator}
304+
* rule, which means that they might be overridden by a longer match if they
305+
* are followed by a comment start or a + or - character. Accordingly, if you
306+
* add to this list, you must also add corresponding code to the {operator}
307+
* block to return the correct token in such cases. (This is not needed in
308+
* psqlscan.l since the token value is ignored there.)
309+
*/
301310
equals_greater "=>"
302311
less_equals "<="
303312
greater_equals ">="

src/interfaces/ecpg/preproc/pgc.l

+28
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,15 @@ array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
245245
typecast "::"
246246
dot_dot \.\.
247247
colon_equals ":="
248+
249+
/*
250+
* These operator-like tokens (unlike the above ones) also match the {operator}
251+
* rule, which means that they might be overridden by a longer match if they
252+
* are followed by a comment start or a + or - character. Accordingly, if you
253+
* add to this list, you must also add corresponding code to the {operator}
254+
* block to return the correct token in such cases. (This is not needed in
255+
* psqlscan.l since the token value is ignored there.)
256+
*/
248257
equals_greater "=>"
249258
less_equals "<="
250259
greater_equals ">="
@@ -732,6 +741,25 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
732741
if (nchars == 1 &&
733742
strchr(",()[].;:+-*/%^<>=", yytext[0]))
734743
return yytext[0];
744+
/*
745+
* Likewise, if what we have left is two chars, and
746+
* those match the tokens ">=", "<=", "=>", "<>" or
747+
* "!=", then we must return the appropriate token
748+
* rather than the generic Op.
749+
*/
750+
if (nchars == 2)
751+
{
752+
if (yytext[0] == '=' && yytext[1] == '>')
753+
return EQUALS_GREATER;
754+
if (yytext[0] == '>' && yytext[1] == '=')
755+
return GREATER_EQUALS;
756+
if (yytext[0] == '<' && yytext[1] == '=')
757+
return LESS_EQUALS;
758+
if (yytext[0] == '<' && yytext[1] == '>')
759+
return NOT_EQUALS;
760+
if (yytext[0] == '!' && yytext[1] == '=')
761+
return NOT_EQUALS;
762+
}
735763
}
736764

737765
base_yylval.str = mm_strdup(yytext);

src/test/regress/expected/create_operator.out

+74
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,80 @@ CREATE OPERATOR => (
4545
ERROR: syntax error at or near "=>"
4646
LINE 1: CREATE OPERATOR => (
4747
^
48+
-- lexing of <=, >=, <>, != has a number of edge cases
49+
-- (=> is tested elsewhere)
50+
-- this is legal because ! is not allowed in sql ops
51+
CREATE OPERATOR !=- (
52+
leftarg = int8, -- right unary
53+
procedure = numeric_fac
54+
);
55+
SELECT 2 !=-;
56+
?column?
57+
----------
58+
2
59+
(1 row)
60+
61+
-- make sure lexer returns != as <> even in edge cases
62+
SELECT 2 !=/**/ 1, 2 !=/**/ 2;
63+
?column? | ?column?
64+
----------+----------
65+
t | f
66+
(1 row)
67+
68+
SELECT 2 !=-- comment to be removed by psql
69+
1;
70+
?column?
71+
----------
72+
t
73+
(1 row)
74+
75+
DO $$ -- use DO to protect -- from psql
76+
declare r boolean;
77+
begin
78+
execute $e$ select 2 !=-- comment
79+
1 $e$ into r;
80+
raise info 'r = %', r;
81+
end;
82+
$$;
83+
INFO: r = t
84+
-- check that <= etc. followed by more operator characters are returned
85+
-- as the correct token with correct precedence
86+
SELECT true<>-1 BETWEEN 1 AND 1; -- BETWEEN has prec. above <> but below Op
87+
?column?
88+
----------
89+
t
90+
(1 row)
91+
92+
SELECT false<>/**/1 BETWEEN 1 AND 1;
93+
?column?
94+
----------
95+
t
96+
(1 row)
97+
98+
SELECT false<=-1 BETWEEN 1 AND 1;
99+
?column?
100+
----------
101+
t
102+
(1 row)
103+
104+
SELECT false>=-1 BETWEEN 1 AND 1;
105+
?column?
106+
----------
107+
t
108+
(1 row)
109+
110+
SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
111+
?column? | ?column? | ?column?
112+
----------+----------+----------
113+
t | t | t
114+
(1 row)
115+
116+
SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
117+
?column? | ?column? | ?column?
118+
----------+----------+----------
119+
f | f | f
120+
(1 row)
121+
48122
-- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
49123
BEGIN TRANSACTION;
50124
CREATE ROLE regress_rol_op1;

src/test/regress/expected/polymorphism.out

+36
Original file line numberDiff line numberDiff line change
@@ -1478,6 +1478,42 @@ select dfunc('a'::text, 'b', flag => true); -- mixed notation
14781478
a
14791479
(1 row)
14801480

1481+
-- this tests lexer edge cases around =>
1482+
select dfunc(a =>-1);
1483+
dfunc
1484+
-------
1485+
-1
1486+
(1 row)
1487+
1488+
select dfunc(a =>+1);
1489+
dfunc
1490+
-------
1491+
1
1492+
(1 row)
1493+
1494+
select dfunc(a =>/**/1);
1495+
dfunc
1496+
-------
1497+
1
1498+
(1 row)
1499+
1500+
select dfunc(a =>--comment to be removed by psql
1501+
1);
1502+
dfunc
1503+
-------
1504+
1
1505+
(1 row)
1506+
1507+
-- need DO to protect the -- from psql
1508+
do $$
1509+
declare r integer;
1510+
begin
1511+
select dfunc(a=>-- comment
1512+
1) into r;
1513+
raise info 'r = %', r;
1514+
end;
1515+
$$;
1516+
INFO: r = 1
14811517
-- check reverse-listing of named-arg calls
14821518
CREATE VIEW dfview AS
14831519
SELECT q1, q2,

src/test/regress/sql/create_operator.sql

+31
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,37 @@ CREATE OPERATOR => (
4545
procedure = numeric_fac
4646
);
4747

48+
-- lexing of <=, >=, <>, != has a number of edge cases
49+
-- (=> is tested elsewhere)
50+
51+
-- this is legal because ! is not allowed in sql ops
52+
CREATE OPERATOR !=- (
53+
leftarg = int8, -- right unary
54+
procedure = numeric_fac
55+
);
56+
SELECT 2 !=-;
57+
-- make sure lexer returns != as <> even in edge cases
58+
SELECT 2 !=/**/ 1, 2 !=/**/ 2;
59+
SELECT 2 !=-- comment to be removed by psql
60+
1;
61+
DO $$ -- use DO to protect -- from psql
62+
declare r boolean;
63+
begin
64+
execute $e$ select 2 !=-- comment
65+
1 $e$ into r;
66+
raise info 'r = %', r;
67+
end;
68+
$$;
69+
70+
-- check that <= etc. followed by more operator characters are returned
71+
-- as the correct token with correct precedence
72+
SELECT true<>-1 BETWEEN 1 AND 1; -- BETWEEN has prec. above <> but below Op
73+
SELECT false<>/**/1 BETWEEN 1 AND 1;
74+
SELECT false<=-1 BETWEEN 1 AND 1;
75+
SELECT false>=-1 BETWEEN 1 AND 1;
76+
SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
77+
SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
78+
4879
-- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
4980
BEGIN TRANSACTION;
5081
CREATE ROLE regress_rol_op1;

src/test/regress/sql/polymorphism.sql

+15
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,21 @@ select dfunc('a'::text, 'b', flag => false); -- mixed notation
785785
select dfunc('a'::text, 'b', true); -- full positional notation
786786
select dfunc('a'::text, 'b', flag => true); -- mixed notation
787787

788+
-- this tests lexer edge cases around =>
789+
select dfunc(a =>-1);
790+
select dfunc(a =>+1);
791+
select dfunc(a =>/**/1);
792+
select dfunc(a =>--comment to be removed by psql
793+
1);
794+
-- need DO to protect the -- from psql
795+
do $$
796+
declare r integer;
797+
begin
798+
select dfunc(a=>-- comment
799+
1) into r;
800+
raise info 'r = %', r;
801+
end;
802+
$$;
788803

789804
-- check reverse-listing of named-arg calls
790805
CREATE VIEW dfview AS

0 commit comments

Comments
 (0)