Skip to content

bpo-46520: Handle identifiers that look like keywords in ast.unparse #31012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 30 additions & 18 deletions Lib/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from _ast import *
from contextlib import contextmanager, nullcontext
from enum import IntEnum, auto, _simple_enum
from keyword import iskeyword


def parse(source, filename='<unknown>', mode='exec', *,
Expand Down Expand Up @@ -668,6 +669,18 @@ def next(self):
return self


_MANGLE_INCR = -ord('a') + ord('𝐚')

def _mangle_keyword(x):
"""If the input would be a keyword, replace the first character with a
non-ASCII character that's equivalent according to NFKC. Then it
won't be parsed as a keyword, as desired."""
return (
x if x in ('True', 'False', 'None') else
chr(ord(x[0]) + _MANGLE_INCR) + x[1:] if iskeyword(x) else
x)


_SINGLE_QUOTES = ("'", '"')
_MULTI_QUOTES = ('"""', "'''")
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
Expand Down Expand Up @@ -854,7 +867,7 @@ def visit_ImportFrom(self, node):
self.fill("from ")
self.write("." * node.level)
if node.module:
self.write(node.module)
self.write(_mangle_keyword(node.module))
self.write(" import ")
self.interleave(lambda: self.write(", "), self.traverse, node.names)

Expand Down Expand Up @@ -910,13 +923,12 @@ def visit_Assert(self, node):
self.write(", ")
self.traverse(node.msg)

def visit_Global(self, node):
self.fill("global ")
self.interleave(lambda: self.write(", "), self.write, node.names)
def visit_Global(self, node, kw="global "):
self.fill(kw)
self.interleave(lambda: self.write(", "), self.write, map(_mangle_keyword, node.names))

def visit_Nonlocal(self, node):
self.fill("nonlocal ")
self.interleave(lambda: self.write(", "), self.write, node.names)
self.visit_Global(node, kw="nonlocal ")

def visit_Await(self, node):
with self.require_parens(_Precedence.AWAIT, node):
Expand Down Expand Up @@ -992,7 +1004,7 @@ def visit_ExceptHandler(self, node):
self.traverse(node.type)
if node.name:
self.write(" as ")
self.write(node.name)
self.write(_mangle_keyword(node.name))
with self.block():
self.traverse(node.body)

Expand All @@ -1001,7 +1013,7 @@ def visit_ClassDef(self, node):
for deco in node.decorator_list:
self.fill("@")
self.traverse(deco)
self.fill("class " + node.name)
self.fill("class " + _mangle_keyword(node.name))
with self.delimit_if("(", ")", condition = node.bases or node.keywords):
comma = False
for e in node.bases:
Expand Down Expand Up @@ -1031,7 +1043,7 @@ def _function_helper(self, node, fill_suffix):
for deco in node.decorator_list:
self.fill("@")
self.traverse(deco)
def_str = fill_suffix + " " + node.name
def_str = fill_suffix + " " + _mangle_keyword(node.name)
self.fill(def_str)
with self.delimit("(", ")"):
self.traverse(node.args)
Expand Down Expand Up @@ -1215,7 +1227,7 @@ def unparse_inner(inner):
self._write_fstring_inner(node.format_spec)

def visit_Name(self, node):
self.write(node.id)
self.write(_mangle_keyword(node.id))

def _write_docstring(self, node):
self.fill()
Expand Down Expand Up @@ -1455,7 +1467,7 @@ def visit_Attribute(self, node):
if isinstance(node.value, Constant) and isinstance(node.value.value, int):
self.write(" ")
self.write(".")
self.write(node.attr)
self.write(_mangle_keyword(node.attr))

def visit_Call(self, node):
self.set_precedence(_Precedence.ATOM, node.func)
Expand Down Expand Up @@ -1520,7 +1532,7 @@ def visit_Match(self, node):
self.traverse(case)

def visit_arg(self, node):
self.write(node.arg)
self.write(_mangle_keyword(node.arg))
if node.annotation:
self.write(": ")
self.traverse(node.annotation)
Expand Down Expand Up @@ -1551,7 +1563,7 @@ def visit_arguments(self, node):
self.write(", ")
self.write("*")
if node.vararg:
self.write(node.vararg.arg)
self.write(_mangle_keyword(node.vararg.arg))
if node.vararg.annotation:
self.write(": ")
self.traverse(node.vararg.annotation)
Expand All @@ -1571,7 +1583,7 @@ def visit_arguments(self, node):
first = False
else:
self.write(", ")
self.write("**" + node.kwarg.arg)
self.write("**" + _mangle_keyword(node.kwarg.arg))
if node.kwarg.annotation:
self.write(": ")
self.traverse(node.kwarg.annotation)
Expand All @@ -1580,7 +1592,7 @@ def visit_keyword(self, node):
if node.arg is None:
self.write("**")
else:
self.write(node.arg)
self.write(_mangle_keyword(node.arg))
self.write("=")
self.traverse(node.value)

Expand All @@ -1596,9 +1608,9 @@ def visit_Lambda(self, node):
self.traverse(node.body)

def visit_alias(self, node):
self.write(node.name)
self.write(_mangle_keyword(node.name))
if node.asname:
self.write(" as " + node.asname)
self.write(" as " + _mangle_keyword(node.asname))

def visit_withitem(self, node):
self.traverse(node.context_expr)
Expand Down Expand Up @@ -1687,7 +1699,7 @@ def visit_MatchAs(self, node):
with self.require_parens(_Precedence.TEST, node):
self.set_precedence(_Precedence.BOR, node.pattern)
self.traverse(node.pattern)
self.write(f" as {node.name}")
self.write(f" as {_mangle_keyword(node.name)}")

def visit_MatchOr(self, node):
with self.require_parens(_Precedence.BOR, node):
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,31 @@ def test_nonlocal(self):
def test_raise_from(self):
self.check_ast_roundtrip(raise_from)

def test_unicode_mangled_keywords(self):
# See issue 46520
self.check_ast_roundtrip('𝕕𝕖𝕗 = 1')
self.check_ast_roundtrip('del 𝕕𝕖𝕝')
self.check_ast_roundtrip('f(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗)')
self.check_ast_roundtrip('def 𝕕𝕖𝕗(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗): pass')
self.check_ast_roundtrip('class 𝕔𝕝𝕒𝕤𝕤: pass')
self.check_ast_roundtrip('with 𝕨𝕚𝕥𝕙 as 𝕒𝕤: pass')
self.check_ast_roundtrip('try: pass\nexcept 𝕖𝕩𝕔𝕖𝕡𝕥 as 𝕒𝕤: pass')
self.check_ast_roundtrip('import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤')
self.check_ast_roundtrip('from 𝕗𝕣𝕠𝕞 import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤')
self.check_ast_roundtrip('global 𝕘𝕝𝕠𝕓𝕒𝕝')
self.check_ast_roundtrip('nonlocal 𝕟𝕠𝕟𝕝𝕠𝕔𝕒𝕝')
self.check_ast_roundtrip('foo.𝕝𝕒𝕞𝕓𝕕𝕒')
self.check_ast_roundtrip('lambda 𝕝𝕒𝕞𝕓𝕕𝕒: 1')
self.check_ast_roundtrip('(𝕕𝕖𝕗 := 1)')
# `match` is parsed unusually, allowing ASCII keywords in many
# places.
self.check_ast_roundtrip('''match match:
case [*case]: 1
case {**case}: 1
case 𝕔𝕝𝕒𝕤𝕤(case = 1): 1
case case as 𝕒𝕤: 1'''
)

def test_bytes(self):
self.check_ast_roundtrip("b'123'")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``ast.unparse`` can now handle the result of parsing code that uses
not-quite-Python-keywords like "𝕕𝕖𝕗".