Skip to content

Commit b45af1a

Browse files
authored
Add soft keywords (GH-20370)
These are like keywords but they only work in context; they are not reserved except when there is an exact match. This would enable things like match statements without reserving `match` (which would be bad for the `re.match()` function and probably lots of other places). Automerge-Triggered-By: @gvanrossum
1 parent 578c395 commit b45af1a

File tree

4 files changed

+75
-4
lines changed

4 files changed

+75
-4
lines changed

Lib/test/test_peg_generator/test_c_parser.py

+30
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,33 @@ def test_error_in_rules(self) -> None:
402402
parse.parse_string("a", mode=0)
403403
"""
404404
self.run_test(grammar_source, test_source)
405+
406+
def test_no_soft_keywords(self) -> None:
407+
grammar_source = """
408+
start: expr+ NEWLINE? ENDMARKER
409+
expr: 'foo'
410+
"""
411+
grammar = parse_string(grammar_source, GrammarParser)
412+
parser_source = generate_c_parser_source(grammar)
413+
assert "expect_soft_keyword" not in parser_source
414+
415+
def test_soft_keywords(self) -> None:
416+
grammar_source = """
417+
start: expr+ NEWLINE? ENDMARKER
418+
expr: "foo"
419+
"""
420+
grammar = parse_string(grammar_source, GrammarParser)
421+
parser_source = generate_c_parser_source(grammar)
422+
assert "expect_soft_keyword" in parser_source
423+
424+
def test_soft_keywords_parse(self) -> None:
425+
grammar_source = """
426+
start: "if" expr '+' expr NEWLINE
427+
expr: NAME
428+
"""
429+
test_source = """
430+
valid_cases = ["if if + if"]
431+
invalid_cases = ["if if"]
432+
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
433+
"""
434+
self.run_test(grammar_source, test_source)

Parser/pegen/pegen.c

+24
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,30 @@ _PyPegen_expect_token(Parser *p, int type)
753753
return t;
754754
}
755755

756+
expr_ty
757+
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
758+
{
759+
if (p->mark == p->fill) {
760+
if (_PyPegen_fill_token(p) < 0) {
761+
p->error_indicator = 1;
762+
return NULL;
763+
}
764+
}
765+
Token *t = p->tokens[p->mark];
766+
if (t->type != NAME) {
767+
return NULL;
768+
}
769+
char* s = PyBytes_AsString(t->bytes);
770+
if (!s) {
771+
return NULL;
772+
}
773+
if (strcmp(s, keyword) != 0) {
774+
return NULL;
775+
}
776+
expr_ty res = _PyPegen_name_token(p);
777+
return res;
778+
}
779+
756780
Token *
757781
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
758782
{

Parser/pegen/pegen.h

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int
122122
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
123123

124124
Token *_PyPegen_expect_token(Parser *p, int type);
125+
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
125126
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
126127
int _PyPegen_fill_token(Parser *p);
127128
expr_ty _PyPegen_name_token(Parser *p);

Tools/peg_generator/pegen/c_generator.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,16 @@ def keyword_helper(self, keyword: str) -> FunctionCall:
117117
comment=f"token='{keyword}'",
118118
)
119119

120+
def soft_keyword_helper(self, value: str) -> FunctionCall:
121+
return FunctionCall(
122+
assigned_variable="_keyword",
123+
function="_PyPegen_expect_soft_keyword",
124+
arguments=["p", value],
125+
return_type="expr_ty",
126+
nodetype=NodeTypes.NAME_TOKEN,
127+
comment=f"soft_keyword='{value}'",
128+
)
129+
120130
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
121131
name = node.value
122132
if name in self.non_exact_tokens:
@@ -154,7 +164,10 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
154164
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
155165
val = ast.literal_eval(node.value)
156166
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
157-
return self.keyword_helper(val)
167+
if node.value.endswith("'"):
168+
return self.keyword_helper(val)
169+
else:
170+
return self.soft_keyword_helper(node.value)
158171
else:
159172
assert val in self.exact_tokens, f"{node.value} is not a known literal"
160173
type = self.exact_tokens[val]
@@ -656,8 +669,9 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str])
656669
self.print("{")
657670
# We have parsed successfully all the conditions for the option.
658671
with self.indent():
672+
node_str = str(node).replace('"', '\\"')
659673
self.print(
660-
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node}"));'
674+
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
661675
)
662676
# Prepare to emmit the rule action and do so
663677
if node.action and "EXTRA" in node.action:
@@ -710,8 +724,9 @@ def visit_Alt(
710724
self.print(f"{{ // {node}")
711725
with self.indent():
712726
self._check_for_errors()
727+
node_str = str(node).replace('"', '\\"')
713728
self.print(
714-
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node}"));'
729+
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
715730
)
716731
# Prepare variable declarations for the alternative
717732
vars = self.collect_vars(node)
@@ -733,9 +748,10 @@ def visit_Alt(
733748
self.handle_alt_normal(node, is_gather, rulename)
734749

735750
self.print("p->mark = _mark;")
751+
node_str = str(node).replace('"', '\\"')
736752
self.print(
737753
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
738-
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node}"));'
754+
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
739755
)
740756
if "_cut_var" in vars:
741757
self.print("if (_cut_var) {")

0 commit comments

Comments
 (0)