Skip to content

Commit 58fb156

Browse files
authored
bpo-42997: Improve error message for missing : before suites (GH-24292)
* Add to the peg generator a new directive ('&&') that allows to expect a token and hard fail the parsing if the token is not found. This allows to quickly emmit syntax errors for missing tokens. * Use the new grammar element to hard-fail if the ':' is missing before suites.
1 parent 802b645 commit 58fb156

File tree

11 files changed

+1269
-478
lines changed

11 files changed

+1269
-478
lines changed

Doc/tools/extensions/peg_highlight.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ class PEGLexer(RegexLexer):
2727
tokens = {
2828
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
2929
"lookaheads": [
30+
# Forced tokens
31+
(r"(&&)(?=\w+\s?)", bygroups(None)),
32+
(r"(&&)(?='.+'\s?)", bygroups(None)),
33+
(r'(&&)(?=".+"\s?)', bygroups(None)),
34+
(r"(&&)(?=\(.+\)\s?)", bygroups(None)),
35+
3036
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
3137
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
3238
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),

Grammar/python.gram

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -162,22 +162,22 @@ dotted_name[expr_ty]:
162162
| NAME
163163

164164
if_stmt[stmt_ty]:
165-
| 'if' a=named_expression ':' b=block c=elif_stmt {
165+
| 'if' a=named_expression &&':' b=block c=elif_stmt {
166166
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
167-
| 'if' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
167+
| 'if' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
168168
elif_stmt[stmt_ty]:
169-
| 'elif' a=named_expression ':' b=block c=elif_stmt {
169+
| 'elif' a=named_expression &&':' b=block c=elif_stmt {
170170
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
171-
| 'elif' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
172-
else_block[asdl_stmt_seq*]: 'else' ':' b=block { b }
171+
| 'elif' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
172+
else_block[asdl_stmt_seq*]: 'else' &&':' b=block { b }
173173

174174
while_stmt[stmt_ty]:
175-
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
175+
| 'while' a=named_expression &&':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
176176

177177
for_stmt[stmt_ty]:
178-
| 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
178+
| 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
179179
_Py_For(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
180-
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
180+
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
181181
CHECK_VERSION(stmt_ty, 5, "Async for loops are", _Py_AsyncFor(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
182182
| invalid_for_target
183183

@@ -190,18 +190,20 @@ with_stmt[stmt_ty]:
190190
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NULL, EXTRA)) }
191191
| ASYNC 'with' a[asdl_withitem_seq*]=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
192192
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
193+
| invalid_with_stmt
194+
193195
with_item[withitem_ty]:
194196
| e=expression 'as' t=star_target &(',' | ')' | ':') { _Py_withitem(e, t, p->arena) }
195197
| invalid_with_item
196198
| e=expression { _Py_withitem(e, NULL, p->arena) }
197199

198200
try_stmt[stmt_ty]:
199-
| 'try' ':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
200-
| 'try' ':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
201+
| 'try' &&':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
202+
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
201203
except_block[excepthandler_ty]:
202-
| 'except' e=expression t=['as' z=NAME { z }] ':' b=block {
204+
| 'except' e=expression t=['as' z=NAME { z }] &&':' b=block {
203205
_Py_ExceptHandler(e, (t) ? ((expr_ty) t)->v.Name.id : NULL, b, EXTRA) }
204-
| 'except' ':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
206+
| 'except' &&':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
205207
finally_block[asdl_stmt_seq*]: 'finally' ':' a=block { a }
206208

207209
return_stmt[stmt_ty]:
@@ -216,11 +218,11 @@ function_def[stmt_ty]:
216218
| function_def_raw
217219

218220
function_def_raw[stmt_ty]:
219-
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
221+
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
220222
_Py_FunctionDef(n->v.Name.id,
221223
(params) ? params : CHECK(arguments_ty, _PyPegen_empty_arguments(p)),
222224
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) }
223-
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
225+
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
224226
CHECK_VERSION(
225227
stmt_ty,
226228
5,
@@ -300,7 +302,7 @@ class_def[stmt_ty]:
300302
| a=decorators b=class_def_raw { _PyPegen_class_def_decorators(p, a, b) }
301303
| class_def_raw
302304
class_def_raw[stmt_ty]:
303-
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] ':' c=block {
305+
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] &&':' c=block {
304306
_Py_ClassDef(a->v.Name.id,
305307
(b) ? ((expr_ty) b)->v.Call.args : NULL,
306308
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
@@ -718,7 +720,7 @@ invalid_double_type_comments:
718720
| TYPE_COMMENT NEWLINE TYPE_COMMENT NEWLINE INDENT {
719721
RAISE_SYNTAX_ERROR("Cannot have two type comments on def") }
720722
invalid_with_item:
721-
| expression 'as' a=expression {
723+
| expression 'as' a=expression &(',' | ')' | ':') {
722724
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
723725

724726
invalid_for_target:
@@ -731,3 +733,7 @@ invalid_group:
731733
invalid_import_from_targets:
732734
| import_from_as_names ',' {
733735
RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") }
736+
737+
invalid_with_stmt:
738+
| [ASYNC] 'with' ','.(expression ['as' star_target])+ &&':'
739+
| [ASYNC] 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' &&':'

Lib/test/test_syntax.py

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@
229229
230230
>>> with a as b
231231
Traceback (most recent call last):
232-
SyntaxError: invalid syntax
232+
SyntaxError: expected ':'
233233
234234
>>> p = p =
235235
Traceback (most recent call last):
@@ -331,7 +331,7 @@
331331
>>> class C(x for x in L):
332332
... pass
333333
Traceback (most recent call last):
334-
SyntaxError: invalid syntax
334+
SyntaxError: expected ':'
335335
336336
>>> def g(*args, **kwargs):
337337
... print(args, sorted(kwargs.items()))
@@ -708,6 +708,107 @@
708708
...
709709
SyntaxError: cannot assign to function call
710710
711+
Missing ':' before suites:
712+
713+
>>> def f()
714+
... pass
715+
Traceback (most recent call last):
716+
SyntaxError: expected ':'
717+
718+
>>> class A
719+
... pass
720+
Traceback (most recent call last):
721+
SyntaxError: expected ':'
722+
723+
>>> if 1
724+
... pass
725+
... elif 1:
726+
... pass
727+
... else:
728+
... x() = 1
729+
Traceback (most recent call last):
730+
SyntaxError: expected ':'
731+
732+
>>> if 1:
733+
... pass
734+
... elif 1
735+
... pass
736+
... else:
737+
... x() = 1
738+
Traceback (most recent call last):
739+
SyntaxError: expected ':'
740+
741+
>>> if 1:
742+
... pass
743+
... elif 1:
744+
... pass
745+
... else
746+
... x() = 1
747+
Traceback (most recent call last):
748+
SyntaxError: expected ':'
749+
750+
>>> for x in range(10)
751+
... pass
752+
Traceback (most recent call last):
753+
SyntaxError: expected ':'
754+
755+
>>> while True
756+
... pass
757+
Traceback (most recent call last):
758+
SyntaxError: expected ':'
759+
760+
>>> with blech as something
761+
... pass
762+
Traceback (most recent call last):
763+
SyntaxError: expected ':'
764+
765+
>>> with blech
766+
... pass
767+
Traceback (most recent call last):
768+
SyntaxError: expected ':'
769+
770+
>>> with blech, block as something
771+
... pass
772+
Traceback (most recent call last):
773+
SyntaxError: expected ':'
774+
775+
>>> with blech, block as something, bluch
776+
... pass
777+
Traceback (most recent call last):
778+
SyntaxError: expected ':'
779+
780+
>>> with (blech as something)
781+
... pass
782+
Traceback (most recent call last):
783+
SyntaxError: expected ':'
784+
785+
>>> with (blech)
786+
... pass
787+
Traceback (most recent call last):
788+
SyntaxError: expected ':'
789+
790+
>>> with (blech, block as something)
791+
... pass
792+
Traceback (most recent call last):
793+
SyntaxError: expected ':'
794+
795+
>>> with (blech, block as something, bluch)
796+
... pass
797+
Traceback (most recent call last):
798+
SyntaxError: expected ':'
799+
800+
>>> try
801+
... pass
802+
Traceback (most recent call last):
803+
SyntaxError: expected ':'
804+
805+
>>> try:
806+
... pass
807+
... except
808+
... pass
809+
Traceback (most recent call last):
810+
SyntaxError: expected ':'
811+
711812
Make sure that the old "raise X, Y[, Z]" form is gone:
712813
>>> raise X, Y
713814
Traceback (most recent call last):
@@ -992,7 +1093,7 @@ def func2():
9921093
finally:
9931094
pass
9941095
"""
995-
self._check_error(code, "invalid syntax")
1096+
self._check_error(code, "expected ':'")
9961097

9971098
def test_invalid_line_continuation_left_recursive(self):
9981099
# Check bpo-42218: SyntaxErrors following left-recursive rules
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve error message for missing ":" before blocks. Patch by Pablo Galindo.

0 commit comments

Comments
 (0)