Skip to content

Commit 1bfe659

Browse files
authored
[3.9] Backport GH-20370 and GH-20436: Soft keywords (GH-20458)
1 parent 788d7bf commit 1bfe659

File tree

4 files changed

+105
-6
lines changed

4 files changed

+105
-6
lines changed

Lib/test/test_peg_generator/test_c_parser.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,45 @@ def test_error_in_rules(self) -> None:
402402
parse.parse_string("a", mode=0)
403403
"""
404404
self.run_test(grammar_source, test_source)
405+
406+
def test_no_soft_keywords(self) -> None:
407+
grammar_source = """
408+
start: expr+ NEWLINE? ENDMARKER
409+
expr: 'foo'
410+
"""
411+
grammar = parse_string(grammar_source, GrammarParser)
412+
parser_source = generate_c_parser_source(grammar)
413+
assert "expect_soft_keyword" not in parser_source
414+
415+
def test_soft_keywords(self) -> None:
416+
grammar_source = """
417+
start: expr+ NEWLINE? ENDMARKER
418+
expr: "foo"
419+
"""
420+
grammar = parse_string(grammar_source, GrammarParser)
421+
parser_source = generate_c_parser_source(grammar)
422+
assert "expect_soft_keyword" in parser_source
423+
424+
def test_soft_keywords_parse(self) -> None:
425+
grammar_source = """
426+
start: "if" expr '+' expr NEWLINE
427+
expr: NAME
428+
"""
429+
test_source = """
430+
valid_cases = ["if if + if"]
431+
invalid_cases = ["if if"]
432+
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
433+
"""
434+
self.run_test(grammar_source, test_source)
435+
436+
def test_soft_keywords_lookahead(self) -> None:
437+
grammar_source = """
438+
start: &"if" "if" expr '+' expr NEWLINE
439+
expr: NAME
440+
"""
441+
test_source = """
442+
valid_cases = ["if if + if"]
443+
invalid_cases = ["if if"]
444+
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
445+
"""
446+
self.run_test(grammar_source, test_source)

Parser/pegen/pegen.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,6 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
708708
return 0;
709709
}
710710

711-
712711
int
713712
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
714713
{
@@ -718,6 +717,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
718717
return (res != NULL) == positive;
719718
}
720719

720+
int
721+
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
722+
{
723+
int mark = p->mark;
724+
void *res = func(p, arg);
725+
p->mark = mark;
726+
return (res != NULL) == positive;
727+
}
728+
721729
int
722730
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
723731
{
@@ -753,6 +761,30 @@ _PyPegen_expect_token(Parser *p, int type)
753761
return t;
754762
}
755763

764+
expr_ty
765+
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
766+
{
767+
if (p->mark == p->fill) {
768+
if (_PyPegen_fill_token(p) < 0) {
769+
p->error_indicator = 1;
770+
return NULL;
771+
}
772+
}
773+
Token *t = p->tokens[p->mark];
774+
if (t->type != NAME) {
775+
return NULL;
776+
}
777+
char* s = PyBytes_AsString(t->bytes);
778+
if (!s) {
779+
p->error_indicator = 1;
780+
return NULL;
781+
}
782+
if (strcmp(s, keyword) != 0) {
783+
return NULL;
784+
}
785+
return _PyPegen_name_token(p);
786+
}
787+
756788
Token *
757789
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
758790
{

Parser/pegen/pegen.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,12 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
118118
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
119119

120120
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
121+
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
121122
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
122123
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
123124

124125
Token *_PyPegen_expect_token(Parser *p, int type);
126+
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
125127
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
126128
int _PyPegen_fill_token(Parser *p);
127129
expr_ty _PyPegen_name_token(Parser *p);

Tools/peg_generator/pegen/c_generator.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ class NodeTypes(Enum):
5858
STRING_TOKEN = 2
5959
GENERIC_TOKEN = 3
6060
KEYWORD = 4
61-
CUT_OPERATOR = 5
61+
SOFT_KEYWORD = 5
62+
CUT_OPERATOR = 6
6263

6364

6465
BASE_NODETYPES = {
@@ -117,6 +118,16 @@ def keyword_helper(self, keyword: str) -> FunctionCall:
117118
comment=f"token='{keyword}'",
118119
)
119120

121+
def soft_keyword_helper(self, value: str) -> FunctionCall:
122+
return FunctionCall(
123+
assigned_variable="_keyword",
124+
function="_PyPegen_expect_soft_keyword",
125+
arguments=["p", value],
126+
return_type="expr_ty",
127+
nodetype=NodeTypes.SOFT_KEYWORD,
128+
comment=f"soft_keyword='{value}'",
129+
)
130+
120131
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
121132
name = node.value
122133
if name in self.non_exact_tokens:
@@ -154,7 +165,10 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
154165
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
155166
val = ast.literal_eval(node.value)
156167
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
157-
return self.keyword_helper(val)
168+
if node.value.endswith("'"):
169+
return self.keyword_helper(val)
170+
else:
171+
return self.soft_keyword_helper(node.value)
158172
else:
159173
assert val in self.exact_tokens, f"{node.value} is not a known literal"
160174
type = self.exact_tokens[val]
@@ -204,6 +218,12 @@ def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
204218
arguments=[positive, call.function, *call.arguments],
205219
return_type="int",
206220
)
221+
elif call.nodetype == NodeTypes.SOFT_KEYWORD:
222+
return FunctionCall(
223+
function=f"_PyPegen_lookahead_with_string",
224+
arguments=[positive, call.function, *call.arguments],
225+
return_type="int",
226+
)
207227
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
208228
return FunctionCall(
209229
function=f"_PyPegen_lookahead_with_int",
@@ -656,8 +676,9 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str])
656676
self.print("{")
657677
# We have parsed successfully all the conditions for the option.
658678
with self.indent():
679+
node_str = str(node).replace('"', '\\"')
659680
self.print(
660-
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node}"));'
681+
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
661682
)
662683
# Prepare to emmit the rule action and do so
663684
if node.action and "EXTRA" in node.action:
@@ -710,8 +731,9 @@ def visit_Alt(
710731
self.print(f"{{ // {node}")
711732
with self.indent():
712733
self._check_for_errors()
734+
node_str = str(node).replace('"', '\\"')
713735
self.print(
714-
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node}"));'
736+
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
715737
)
716738
# Prepare variable declarations for the alternative
717739
vars = self.collect_vars(node)
@@ -733,9 +755,10 @@ def visit_Alt(
733755
self.handle_alt_normal(node, is_gather, rulename)
734756

735757
self.print("p->mark = _mark;")
758+
node_str = str(node).replace('"', '\\"')
736759
self.print(
737760
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
738-
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node}"));'
761+
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
739762
)
740763
if "_cut_var" in vars:
741764
self.print("if (_cut_var) {")

0 commit comments

Comments
 (0)