Skip to content

Commit 323f635

Browse files
[3.12] gh-115154: Fix untokenize handling of unicode named literals (GH-115171) (#115662)
gh-115154: Fix untokenize handling of unicode named literals (GH-115171) (cherry picked from commit ecf16ee) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent 94d1a7b commit 323f635

File tree

3 files changed

+85
-10
lines changed

3 files changed

+85
-10
lines changed

Lib/test/test_tokenize.py

+37-3
Original file line numberDiff line numberDiff line change
@@ -1874,6 +1874,43 @@ def test_roundtrip(self):
18741874
" print('Can not import' # comment2\n)"
18751875
"else: print('Loaded')\n")
18761876

1877+
self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
1878+
self.check_roundtrip(r"f'\\N{SNAKE}'")
1879+
self.check_roundtrip(r"f'\\N{{SNAKE}}'")
1880+
self.check_roundtrip(r"f'\N{SNAKE}'")
1881+
self.check_roundtrip(r"f'\\\N{SNAKE}'")
1882+
self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
1883+
self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
1884+
1885+
self.check_roundtrip(r"f'\\N{1}'")
1886+
self.check_roundtrip(r"f'\\\\N{2}'")
1887+
self.check_roundtrip(r"f'\\\\\\N{3}'")
1888+
self.check_roundtrip(r"f'\\\\\\\\N{4}'")
1889+
1890+
self.check_roundtrip(r"f'\\N{{'")
1891+
self.check_roundtrip(r"f'\\\\N{{'")
1892+
self.check_roundtrip(r"f'\\\\\\N{{'")
1893+
self.check_roundtrip(r"f'\\\\\\\\N{{'")
1894+
cases = [
1895+
"""
1896+
if 1:
1897+
"foo"
1898+
"bar"
1899+
""",
1900+
"""
1901+
if 1:
1902+
("foo"
1903+
"bar")
1904+
""",
1905+
"""
1906+
if 1:
1907+
"foo"
1908+
"bar"
1909+
""" ]
1910+
for case in cases:
1911+
self.check_roundtrip(case)
1912+
1913+
18771914
def test_continuation(self):
18781915
# Balancing continuation
18791916
self.check_roundtrip("a = (3,4, \n"
@@ -1908,9 +1945,6 @@ def test_random_files(self):
19081945
tempdir = os.path.dirname(__file__) or os.curdir
19091946
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
19101947

1911-
# TODO: Remove this once we can untokenize PEP 701 syntax
1912-
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
1913-
19141948
if not support.is_resource_enabled("cpu"):
19151949
testfiles = random.sample(testfiles, 10)
19161950

Lib/tokenize.py

+46-7
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def __init__(self):
170170
self.tokens = []
171171
self.prev_row = 1
172172
self.prev_col = 0
173+
self.prev_type = None
173174
self.encoding = None
174175

175176
def add_whitespace(self, start):
@@ -185,6 +186,29 @@ def add_whitespace(self, start):
185186
if col_offset:
186187
self.tokens.append(" " * col_offset)
187188

189+
def escape_brackets(self, token):
190+
characters = []
191+
consume_until_next_bracket = False
192+
for character in token:
193+
if character == "}":
194+
if consume_until_next_bracket:
195+
consume_until_next_bracket = False
196+
else:
197+
characters.append(character)
198+
if character == "{":
199+
n_backslashes = sum(
200+
1 for char in _itertools.takewhile(
201+
"\\".__eq__,
202+
characters[-2::-1]
203+
)
204+
)
205+
if n_backslashes % 2 == 0:
206+
characters.append(character)
207+
else:
208+
consume_until_next_bracket = True
209+
characters.append(character)
210+
return "".join(characters)
211+
188212
def untokenize(self, iterable):
189213
it = iter(iterable)
190214
indents = []
@@ -216,25 +240,29 @@ def untokenize(self, iterable):
216240
startline = False
217241
elif tok_type == FSTRING_MIDDLE:
218242
if '{' in token or '}' in token:
243+
token = self.escape_brackets(token)
244+
last_line = token.splitlines()[-1]
219245
end_line, end_col = end
220-
end = (end_line, end_col + token.count('{') + token.count('}'))
221-
token = re.sub('{', '{{', token)
222-
token = re.sub('}', '}}', token)
223-
246+
extra_chars = last_line.count("{{") + last_line.count("}}")
247+
end = (end_line, end_col + extra_chars)
248+
elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
249+
self.tokens.append(" ")
224250

225251
self.add_whitespace(start)
226252
self.tokens.append(token)
227253
self.prev_row, self.prev_col = end
228254
if tok_type in (NEWLINE, NL):
229255
self.prev_row += 1
230256
self.prev_col = 0
257+
self.prev_type = tok_type
231258
return "".join(self.tokens)
232259

233260
def compat(self, token, iterable):
234261
indents = []
235262
toks_append = self.tokens.append
236263
startline = token[0] in (NEWLINE, NL)
237264
prevstring = False
265+
in_fstring = 0
238266

239267
for tok in _itertools.chain([token], iterable):
240268
toknum, tokval = tok[:2]
@@ -253,6 +281,10 @@ def compat(self, token, iterable):
253281
else:
254282
prevstring = False
255283

284+
if toknum == FSTRING_START:
285+
in_fstring += 1
286+
elif toknum == FSTRING_END:
287+
in_fstring -= 1
256288
if toknum == INDENT:
257289
indents.append(tokval)
258290
continue
@@ -265,11 +297,18 @@ def compat(self, token, iterable):
265297
toks_append(indents[-1])
266298
startline = False
267299
elif toknum == FSTRING_MIDDLE:
268-
if '{' in tokval or '}' in tokval:
269-
tokval = re.sub('{', '{{', tokval)
270-
tokval = re.sub('}', '}}', tokval)
300+
tokval = self.escape_brackets(tokval)
301+
302+
# Insert a space between two consecutive brackets if we are in an f-string
303+
if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
304+
tokval = ' ' + tokval
305+
306+
# Insert a space between two consecutive f-strings
307+
if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
308+
self.tokens.append(" ")
271309

272310
toks_append(tokval)
311+
self.prev_type = toknum
273312

274313

275314
def untokenize(iterable):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug that was causing the :func:`tokenize.untokenize` function to
2+
handle unicode named literals incorrectly. Patch by Pablo Galindo

0 commit comments

Comments
 (0)