Skip to content

Commit 48f216d

Browse files
committed
Fix plpgsql's handling of -- comments following expressions.
Up to now, read_sql_construct() has collected all the source text from the statement or expression's initial token up to the character just before the "until" token. It normally tries to strip trailing whitespace from that, largely for neatness. If there was a "-- text" comment after the expression, this resulted in removing the newline that terminates the comment, which creates a hazard if we try to paste the collected text into a larger SQL construct without inserting a newline after it. In particular this caused our handling of CASE constructs to fail if there's a comment after a WHEN expression. Commit 4adead1 noticed a similar problem with cursor arguments, and worked around it through the rather crude hack of suppressing the whitespace-trimming behavior for those. Rather than do that and leave the hazard open for future hackers to trip over, let's fix it properly. pl_scanner.c already has enough infrastructure to report the end location of the expression's last token, so we can copy up to that location and never collect any trailing whitespace or comment to begin with. Erik Wienhold and Tom Lane, per report from Michal Bartak. Back-patch to all supported branches. Discussion: https://postgr.es/m/CAAVzF_FjRoi8fOVuLCZhQJx6HATQ7MKm=aFOHWZODFnLmjX-xA@mail.gmail.com
1 parent f6dcddf commit 48f216d

File tree

7 files changed

+76
-36
lines changed

7 files changed

+76
-36
lines changed

src/pl/plpgsql/src/expected/plpgsql_control.out

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,3 +681,20 @@ select case_test(13);
681681
other
682682
(1 row)
683683

684+
-- test line comment between WHEN and THEN
685+
create or replace function case_comment(int) returns text as $$
686+
begin
687+
case $1
688+
when 1 -- comment before THEN
689+
then return 'one';
690+
else
691+
return 'other';
692+
end case;
693+
end;
694+
$$ language plpgsql immutable;
695+
select case_comment(1);
696+
case_comment
697+
--------------
698+
one
699+
(1 row)
700+

src/pl/plpgsql/src/pl_gram.y

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ static PLpgSQL_expr *read_sql_construct(int until,
6666
RawParseMode parsemode,
6767
bool isexpression,
6868
bool valid_sql,
69-
bool trim,
7069
int *startloc,
7170
int *endtoken);
7271
static PLpgSQL_expr *read_sql_expression(int until,
@@ -894,7 +893,7 @@ stmt_perform : K_PERFORM
894893
*/
895894
new->expr = read_sql_construct(';', 0, 0, ";",
896895
RAW_PARSE_DEFAULT,
897-
false, false, true,
896+
false, false,
898897
&startloc, NULL);
899898
/* overwrite "perform" ... */
900899
memcpy(new->expr->query, " SELECT", 7);
@@ -980,7 +979,7 @@ stmt_assign : T_DATUM
980979
plpgsql_push_back_token(T_DATUM);
981980
new->expr = read_sql_construct(';', 0, 0, ";",
982981
pmode,
983-
false, true, true,
982+
false, true,
984983
NULL, NULL);
985984

986985
$$ = (PLpgSQL_stmt *) new;
@@ -1473,7 +1472,6 @@ for_control : for_variable K_IN
14731472
RAW_PARSE_DEFAULT,
14741473
true,
14751474
false,
1476-
true,
14771475
&expr1loc,
14781476
&tok);
14791477

@@ -1878,7 +1876,7 @@ stmt_raise : K_RAISE
18781876
expr = read_sql_construct(',', ';', K_USING,
18791877
", or ; or USING",
18801878
RAW_PARSE_PLPGSQL_EXPR,
1881-
true, true, true,
1879+
true, true,
18821880
NULL, &tok);
18831881
new->params = lappend(new->params, expr);
18841882
}
@@ -2015,7 +2013,7 @@ stmt_dynexecute : K_EXECUTE
20152013
expr = read_sql_construct(K_INTO, K_USING, ';',
20162014
"INTO or USING or ;",
20172015
RAW_PARSE_PLPGSQL_EXPR,
2018-
true, true, true,
2016+
true, true,
20192017
NULL, &endtoken);
20202018

20212019
new = palloc(sizeof(PLpgSQL_stmt_dynexecute));
@@ -2054,7 +2052,7 @@ stmt_dynexecute : K_EXECUTE
20542052
expr = read_sql_construct(',', ';', K_INTO,
20552053
", or ; or INTO",
20562054
RAW_PARSE_PLPGSQL_EXPR,
2057-
true, true, true,
2055+
true, true,
20582056
NULL, &endtoken);
20592057
new->params = lappend(new->params, expr);
20602058
} while (endtoken == ',');
@@ -2639,7 +2637,7 @@ read_sql_expression(int until, const char *expected)
26392637
{
26402638
return read_sql_construct(until, 0, 0, expected,
26412639
RAW_PARSE_PLPGSQL_EXPR,
2642-
true, true, true, NULL, NULL);
2640+
true, true, NULL, NULL);
26432641
}
26442642

26452643
/* Convenience routine to read an expression with two possible terminators */
@@ -2649,7 +2647,7 @@ read_sql_expression2(int until, int until2, const char *expected,
26492647
{
26502648
return read_sql_construct(until, until2, 0, expected,
26512649
RAW_PARSE_PLPGSQL_EXPR,
2652-
true, true, true, NULL, endtoken);
2650+
true, true, NULL, endtoken);
26532651
}
26542652

26552653
/* Convenience routine to read a SQL statement that must end with ';' */
@@ -2658,7 +2656,7 @@ read_sql_stmt(void)
26582656
{
26592657
return read_sql_construct(';', 0, 0, ";",
26602658
RAW_PARSE_DEFAULT,
2661-
false, true, true, NULL, NULL);
2659+
false, true, NULL, NULL);
26622660
}
26632661

26642662
/*
@@ -2671,7 +2669,6 @@ read_sql_stmt(void)
26712669
* parsemode: raw_parser() mode to use
26722670
* isexpression: whether to say we're reading an "expression" or a "statement"
26732671
* valid_sql: whether to check the syntax of the expr
2674-
* trim: trim trailing whitespace
26752672
* startloc: if not NULL, location of first token is stored at *startloc
26762673
* endtoken: if not NULL, ending token is stored at *endtoken
26772674
* (this is only interesting if until2 or until3 isn't zero)
@@ -2684,14 +2681,14 @@ read_sql_construct(int until,
26842681
RawParseMode parsemode,
26852682
bool isexpression,
26862683
bool valid_sql,
2687-
bool trim,
26882684
int *startloc,
26892685
int *endtoken)
26902686
{
26912687
int tok;
26922688
StringInfoData ds;
26932689
IdentifierLookup save_IdentifierLookup;
26942690
int startlocation = -1;
2691+
int endlocation = -1;
26952692
int parenlevel = 0;
26962693
PLpgSQL_expr *expr;
26972694

@@ -2742,6 +2739,8 @@ read_sql_construct(int until,
27422739
expected),
27432740
parser_errposition(yylloc)));
27442741
}
2742+
/* Remember end+1 location of last accepted token */
2743+
endlocation = yylloc + plpgsql_token_length();
27452744
}
27462745

27472746
plpgsql_IdentifierLookup = save_IdentifierLookup;
@@ -2752,22 +2751,22 @@ read_sql_construct(int until,
27522751
*endtoken = tok;
27532752

27542753
/* give helpful complaint about empty input */
2755-
if (startlocation >= yylloc)
2754+
if (startlocation >= endlocation)
27562755
{
27572756
if (isexpression)
27582757
yyerror("missing expression");
27592758
else
27602759
yyerror("missing SQL statement");
27612760
}
27622761

2763-
plpgsql_append_source_text(&ds, startlocation, yylloc);
2764-
2765-
/* trim any trailing whitespace, for neatness */
2766-
if (trim)
2767-
{
2768-
while (ds.len > 0 && scanner_isspace(ds.data[ds.len - 1]))
2769-
ds.data[--ds.len] = '\0';
2770-
}
2762+
/*
2763+
* We save only the text from startlocation to endlocation-1. This
2764+
* suppresses the "until" token as well as any whitespace or comments
2765+
* following the last accepted token. (We used to strip such trailing
2766+
* whitespace by hand, but that causes problems if there's a "-- comment"
2767+
* in front of said whitespace.)
2768+
*/
2769+
plpgsql_append_source_text(&ds, startlocation, endlocation);
27712770

27722771
expr = palloc0(sizeof(PLpgSQL_expr));
27732772
expr->query = pstrdup(ds.data);
@@ -3908,16 +3907,12 @@ read_cursor_args(PLpgSQL_var *cursor, int until)
39083907
* Read the value expression. To provide the user with meaningful
39093908
* parse error positions, we check the syntax immediately, instead of
39103909
* checking the final expression that may have the arguments
3911-
* reordered. Trailing whitespace must not be trimmed, because
3912-
* otherwise input of the form (param -- comment\n, param) would be
3913-
* translated into a form where the second parameter is commented
3914-
* out.
3910+
* reordered.
39153911
*/
39163912
item = read_sql_construct(',', ')', 0,
39173913
",\" or \")",
39183914
RAW_PARSE_PLPGSQL_EXPR,
39193915
true, true,
3920-
false, /* do not trim */
39213916
NULL, &endtoken);
39223917

39233918
argv[argpos] = item->query;

src/pl/plpgsql/src/pl_scanner.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ plpgsql_yylex(void)
184184
tok1 = T_DATUM;
185185
else
186186
tok1 = T_CWORD;
187+
/* Adjust token length to include A.B.C */
188+
aux1.leng = aux5.lloc - aux1.lloc + aux5.leng;
187189
}
188190
else
189191
{
@@ -197,6 +199,8 @@ plpgsql_yylex(void)
197199
tok1 = T_DATUM;
198200
else
199201
tok1 = T_CWORD;
202+
/* Adjust token length to include A.B */
203+
aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
200204
}
201205
}
202206
else
@@ -210,6 +214,8 @@ plpgsql_yylex(void)
210214
tok1 = T_DATUM;
211215
else
212216
tok1 = T_CWORD;
217+
/* Adjust token length to include A.B */
218+
aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
213219
}
214220
}
215221
else
@@ -298,6 +304,17 @@ plpgsql_yylex(void)
298304
return tok1;
299305
}
300306

307+
/*
308+
* Return the length of the token last returned by plpgsql_yylex().
309+
*
310+
* In the case of compound tokens, the length includes all the parts.
311+
*/
312+
int
313+
plpgsql_token_length(void)
314+
{
315+
return plpgsql_yyleng;
316+
}
317+
301318
/*
302319
* Internal yylex function. This wraps the core lexer and adds one feature:
303320
* a token pushback stack. We also make a couple of trivial single-token

src/pl/plpgsql/src/plpgsql.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func);
13161316
*/
13171317
extern int plpgsql_base_yylex(void);
13181318
extern int plpgsql_yylex(void);
1319+
extern int plpgsql_token_length(void);
13191320
extern void plpgsql_push_back_token(int token);
13201321
extern bool plpgsql_token_is_unreserved_keyword(int token);
13211322
extern void plpgsql_append_source_text(StringInfo buf,

src/pl/plpgsql/src/sql/plpgsql_control.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,3 +486,17 @@ select case_test(1);
486486
select case_test(2);
487487
select case_test(12);
488488
select case_test(13);
489+
490+
-- test line comment between WHEN and THEN
491+
create or replace function case_comment(int) returns text as $$
492+
begin
493+
case $1
494+
when 1 -- comment before THEN
495+
then return 'one';
496+
else
497+
return 'other';
498+
end case;
499+
end;
500+
$$ language plpgsql immutable;
501+
502+
select case_comment(1);

src/test/regress/expected/plpgsql.out

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2390,11 +2390,9 @@ select namedparmcursor_test7();
23902390
ERROR: division by zero
23912391
CONTEXT: SQL expression "42/0 AS p1, 77 AS p2"
23922392
PL/pgSQL function namedparmcursor_test7() line 6 at OPEN
2393-
-- check that line comments work correctly within the argument list (there
2394-
-- is some special handling of this case in the code: the newline after the
2395-
-- comment must be preserved when the argument-evaluating query is
2396-
-- constructed, otherwise the comment effectively comments out the next
2397-
-- argument, too)
2393+
-- check that line comments work correctly within the argument list
2394+
-- (this used to require a special hack in the code; it no longer does,
2395+
-- but let's keep the test anyway)
23982396
create function namedparmcursor_test8() returns int4 as $$
23992397
declare
24002398
c1 cursor (p1 int, p2 int) for

src/test/regress/sql/plpgsql.sql

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,11 +2047,9 @@ begin
20472047
end $$ language plpgsql;
20482048
select namedparmcursor_test7();
20492049

2050-
-- check that line comments work correctly within the argument list (there
2051-
-- is some special handling of this case in the code: the newline after the
2052-
-- comment must be preserved when the argument-evaluating query is
2053-
-- constructed, otherwise the comment effectively comments out the next
2054-
-- argument, too)
2050+
-- check that line comments work correctly within the argument list
2051+
-- (this used to require a special hack in the code; it no longer does,
2052+
-- but let's keep the test anyway)
20552053
create function namedparmcursor_test8() returns int4 as $$
20562054
declare
20572055
c1 cursor (p1 int, p2 int) for

0 commit comments

Comments
 (0)