Skip to content

Commit a993e90

Browse files
bpo-28002: Roundtrip f-strings with ast.unparse better (python#19612)
By attempting to avoid backslashes in f-string expressions. We also now proactively raise errors for some backslashes we can't avoid while unparsing FormattedValues Co-authored-by: hauntsaninja <> Co-authored-by: Shantanu <hauntsaninja@users.noreply.github.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
1 parent 9fc319d commit a993e90

File tree

2 files changed

+115
-37
lines changed

2 files changed

+115
-37
lines changed

Lib/ast.py

Lines changed: 86 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -662,17 +662,23 @@ def next(self):
662662
except ValueError:
663663
return self
664664

665+
666+
_SINGLE_QUOTES = ("'", '"')
667+
_MULTI_QUOTES = ('"""', "'''")
668+
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
669+
665670
class _Unparser(NodeVisitor):
666671
"""Methods in this class recursively traverse an AST and
667672
output source code for the abstract syntax; original formatting
668673
is disregarded."""
669674

670-
def __init__(self):
675+
def __init__(self, *, _avoid_backslashes=False):
671676
self._source = []
672677
self._buffer = []
673678
self._precedences = {}
674679
self._type_ignores = {}
675680
self._indent = 0
681+
self._avoid_backslashes = _avoid_backslashes
676682

677683
def interleave(self, inter, f, seq):
678684
"""Call f on each item in seq, calling inter() in between."""
@@ -1067,15 +1073,85 @@ def visit_AsyncWith(self, node):
10671073
with self.block(extra=self.get_type_comment(node)):
10681074
self.traverse(node.body)
10691075

1076+
def _str_literal_helper(
1077+
self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
1078+
):
1079+
"""Helper for writing string literals, minimizing escapes.
1080+
Returns the tuple (string literal to write, possible quote types).
1081+
"""
1082+
def escape_char(c):
1083+
# \n and \t are non-printable, but we only escape them if
1084+
# escape_special_whitespace is True
1085+
if not escape_special_whitespace and c in "\n\t":
1086+
return c
1087+
# Always escape backslashes and other non-printable characters
1088+
if c == "\\" or not c.isprintable():
1089+
return c.encode("unicode_escape").decode("ascii")
1090+
return c
1091+
1092+
escaped_string = "".join(map(escape_char, string))
1093+
possible_quotes = quote_types
1094+
if "\n" in escaped_string:
1095+
possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
1096+
possible_quotes = [q for q in possible_quotes if q not in escaped_string]
1097+
if not possible_quotes:
1098+
# If there aren't any possible_quotes, fallback to using repr
1099+
# on the original string. Try to use a quote from quote_types,
1100+
# e.g., so that we use triple quotes for docstrings.
1101+
string = repr(string)
1102+
quote = next((q for q in quote_types if string[0] in q), string[0])
1103+
return string[1:-1], [quote]
1104+
if escaped_string:
1105+
# Sort so that we prefer '''"''' over """\""""
1106+
possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
1107+
# If we're using triple quotes and we'd need to escape a final
1108+
# quote, escape it
1109+
if possible_quotes[0][0] == escaped_string[-1]:
1110+
assert len(possible_quotes[0]) == 3
1111+
escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1]
1112+
return escaped_string, possible_quotes
1113+
1114+
def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
1115+
"""Write string literal value with a best effort attempt to avoid backslashes."""
1116+
string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
1117+
quote_type = quote_types[0]
1118+
self.write(f"{quote_type}{string}{quote_type}")
1119+
10701120
def visit_JoinedStr(self, node):
10711121
self.write("f")
1072-
self._fstring_JoinedStr(node, self.buffer_writer)
1073-
self.write(repr(self.buffer))
1122+
if self._avoid_backslashes:
1123+
self._fstring_JoinedStr(node, self.buffer_writer)
1124+
self._write_str_avoiding_backslashes(self.buffer)
1125+
return
1126+
1127+
# If we don't need to avoid backslashes globally (i.e., we only need
1128+
# to avoid them inside FormattedValues), it's cosmetically preferred
1129+
# to use escaped whitespace. That is, it's preferred to use backslashes
1130+
# for cases like: f"{x}\n". To accomplish this, we keep track of what
1131+
# in our buffer corresponds to FormattedValues and what corresponds to
1132+
# Constant parts of the f-string, and allow escapes accordingly.
1133+
buffer = []
1134+
for value in node.values:
1135+
meth = getattr(self, "_fstring_" + type(value).__name__)
1136+
meth(value, self.buffer_writer)
1137+
buffer.append((self.buffer, isinstance(value, Constant)))
1138+
new_buffer = []
1139+
quote_types = _ALL_QUOTES
1140+
for value, is_constant in buffer:
1141+
# Repeatedly narrow down the list of possible quote_types
1142+
value, quote_types = self._str_literal_helper(
1143+
value, quote_types=quote_types,
1144+
escape_special_whitespace=is_constant
1145+
)
1146+
new_buffer.append(value)
1147+
value = "".join(new_buffer)
1148+
quote_type = quote_types[0]
1149+
self.write(f"{quote_type}{value}{quote_type}")
10741150

10751151
def visit_FormattedValue(self, node):
10761152
self.write("f")
10771153
self._fstring_FormattedValue(node, self.buffer_writer)
1078-
self.write(repr(self.buffer))
1154+
self._write_str_avoiding_backslashes(self.buffer)
10791155

10801156
def _fstring_JoinedStr(self, node, write):
10811157
for value in node.values:
@@ -1090,11 +1166,13 @@ def _fstring_Constant(self, node, write):
10901166

10911167
def _fstring_FormattedValue(self, node, write):
10921168
write("{")
1093-
unparser = type(self)()
1169+
unparser = type(self)(_avoid_backslashes=True)
10941170
unparser.set_precedence(_Precedence.TEST.next(), node.value)
10951171
expr = unparser.visit(node.value)
10961172
if expr.startswith("{"):
10971173
write(" ") # Separate pair of opening brackets as "{ {"
1174+
if "\\" in expr:
1175+
raise ValueError("Unable to avoid backslash in f-string expression part")
10981176
write(expr)
10991177
if node.conversion != -1:
11001178
conversion = chr(node.conversion)
@@ -1111,33 +1189,17 @@ def visit_Name(self, node):
11111189
self.write(node.id)
11121190

11131191
def _write_docstring(self, node):
1114-
def esc_char(c):
1115-
if c in ("\n", "\t"):
1116-
# In the AST form, we don't know the author's intentation
1117-
# about how this should be displayed. We'll only escape
1118-
# \n and \t, because they are more likely to be unescaped
1119-
# in the source
1120-
return c
1121-
return c.encode('unicode_escape').decode('ascii')
1122-
11231192
self.fill()
11241193
if node.kind == "u":
11251194
self.write("u")
1126-
1127-
value = node.value
1128-
if value:
1129-
# Preserve quotes in the docstring by escaping them
1130-
value = "".join(map(esc_char, value))
1131-
if value[-1] == '"':
1132-
value = value.replace('"', '\\"', -1)
1133-
value = value.replace('"""', '""\\"')
1134-
1135-
self.write(f'"""{value}"""')
1195+
self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)
11361196

11371197
def _write_constant(self, value):
11381198
if isinstance(value, (float, complex)):
11391199
# Substitute overflowing decimal literal for AST infinities.
11401200
self.write(repr(value).replace("inf", _INFSTR))
1201+
elif self._avoid_backslashes and isinstance(value, str):
1202+
self._write_str_avoiding_backslashes(value)
11411203
else:
11421204
self.write(repr(value))
11431205

Lib/test/test_unparse.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,18 @@ def test_fstrings(self):
152152
# See issue 25180
153153
self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""")
154154
self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""")
155+
self.check_ast_roundtrip("""f''""")
156+
self.check_ast_roundtrip('''f"""'end' "quote\\""""''')
157+
158+
def test_fstrings_complicated(self):
159+
# See issue 28002
160+
self.check_ast_roundtrip("""f'''{"'"}'''""")
161+
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
162+
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''')
163+
self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""')
164+
self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""')
165+
self.check_ast_roundtrip('''f"a\\r\\nb"''')
166+
self.check_ast_roundtrip('''f"\\u2028{'x'}"''')
155167

156168
def test_strings(self):
157169
self.check_ast_roundtrip("u'foo'")
@@ -311,6 +323,9 @@ def test_invalid_fstring_conversion(self):
311323
)
312324
)
313325

326+
def test_invalid_fstring_backslash(self):
327+
self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\")))
328+
314329
def test_invalid_set(self):
315330
self.check_invalid(ast.Set(elts=[]))
316331

@@ -330,8 +345,8 @@ def test_docstrings(self):
330345
'\r\\r\t\\t\n\\n',
331346
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
332347
r'foo\n\x00',
333-
'🐍⛎𩸽üéş^\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
334-
348+
"' \\'\\'\\'\"\"\" \"\"\\'\\' \\'",
349+
'🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
335350
)
336351
for docstring in docstrings:
337352
# check as Module docstrings for easy testing
@@ -416,7 +431,6 @@ def test_simple_expressions_parens(self):
416431
self.check_src_roundtrip("call((yield x))")
417432
self.check_src_roundtrip("return x + (yield x)")
418433

419-
420434
def test_class_bases_and_keywords(self):
421435
self.check_src_roundtrip("class X:\n pass")
422436
self.check_src_roundtrip("class X(A):\n pass")
@@ -429,6 +443,13 @@ def test_class_bases_and_keywords(self):
429443
self.check_src_roundtrip("class X(*args):\n pass")
430444
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")
431445

446+
def test_fstrings(self):
447+
self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
448+
self.check_src_roundtrip('''f"\\u2028{'x'}"''')
449+
self.check_src_roundtrip(r"f'{x}\n'")
450+
self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''')
451+
self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''')
452+
432453
def test_docstrings(self):
433454
docstrings = (
434455
'"""simple doc string"""',
@@ -443,6 +464,10 @@ def test_docstrings(self):
443464
'""""""',
444465
'"""\'\'\'"""',
445466
'"""\'\'\'\'\'\'"""',
467+
'"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""',
468+
'"""end in single \'quote\'"""',
469+
"'''end in double \"quote\"'''",
470+
'"""almost end in double "quote"."""',
446471
)
447472

448473
for prefix in docstring_prefixes:
@@ -483,9 +508,8 @@ class DirectoryTestCase(ASTTestCase):
483508

484509
lib_dir = pathlib.Path(__file__).parent / ".."
485510
test_directories = (lib_dir, lib_dir / "test")
486-
skip_files = {"test_fstring.py"}
487511
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
488-
"test_ast.py", "test_asdl_parser.py"}
512+
"test_ast.py", "test_asdl_parser.py", "test_fstring.py"}
489513

490514
_files_to_test = None
491515

@@ -525,14 +549,6 @@ def test_files(self):
525549
if test.support.verbose:
526550
print(f"Testing {item.absolute()}")
527551

528-
# Some f-strings are not correctly round-tripped by
529-
# Tools/parser/unparse.py. See issue 28002 for details.
530-
# We need to skip files that contain such f-strings.
531-
if item.name in self.skip_files:
532-
if test.support.verbose:
533-
print(f"Skipping {item.absolute()}: see issue 28002")
534-
continue
535-
536552
with self.subTest(filename=item):
537553
source = read_pyfile(item)
538554
self.check_ast_roundtrip(source)

0 commit comments

Comments
 (0)