Skip to content

Commit 6f87093

Browse files
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects bytes literals and f-string expressions. (cherry picked from commit 6952482) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent fc8693d commit 6f87093

File tree

3 files changed

+76
-11
lines changed

3 files changed

+76
-11
lines changed

Lib/test/test_tools/test_i18n.py

+65-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import os
44
import unittest
5-
import textwrap
5+
from textwrap import dedent
66

77
from test.support.script_helper import assert_python_ok
88
from test.test_tools import skip_if_missing, toolsdir
@@ -107,25 +107,84 @@ def test_POT_Creation_Date(self):
107107
# This will raise if the date format does not exactly match.
108108
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
109109

110+
def test_funcdocstring(self):
111+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
112+
with self.subTest(doc):
113+
msgids = self.extract_docstrings_from_str(dedent('''\
114+
def foo(bar):
115+
%s
116+
''' % doc))
117+
self.assertIn('doc', msgids)
118+
119+
def test_funcdocstring_bytes(self):
120+
msgids = self.extract_docstrings_from_str(dedent('''\
121+
def foo(bar):
122+
b"""doc"""
123+
'''))
124+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
125+
126+
def test_funcdocstring_fstring(self):
127+
msgids = self.extract_docstrings_from_str(dedent('''\
128+
def foo(bar):
129+
f"""doc"""
130+
'''))
131+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
132+
133+
def test_classdocstring(self):
134+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
135+
with self.subTest(doc):
136+
msgids = self.extract_docstrings_from_str(dedent('''\
137+
class C:
138+
%s
139+
''' % doc))
140+
self.assertIn('doc', msgids)
141+
142+
def test_classdocstring_bytes(self):
143+
msgids = self.extract_docstrings_from_str(dedent('''\
144+
class C:
145+
b"""doc"""
146+
'''))
147+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
148+
149+
def test_classdocstring_fstring(self):
150+
msgids = self.extract_docstrings_from_str(dedent('''\
151+
class C:
152+
f"""doc"""
153+
'''))
154+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
155+
156+
def test_msgid(self):
157+
msgids = self.extract_docstrings_from_str(
158+
'''_("""doc""" r'str' u"ing")''')
159+
self.assertIn('docstring', msgids)
160+
161+
def test_msgid_bytes(self):
162+
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
163+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
164+
165+
def test_msgid_fstring(self):
166+
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
167+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
168+
110169
def test_funcdocstring_annotated_args(self):
111170
""" Test docstrings for functions with annotated args """
112-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
171+
msgids = self.extract_docstrings_from_str(dedent('''\
113172
def foo(bar: str):
114173
"""doc"""
115174
'''))
116175
self.assertIn('doc', msgids)
117176

118177
def test_funcdocstring_annotated_return(self):
119178
""" Test docstrings for functions with annotated return type """
120-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
179+
msgids = self.extract_docstrings_from_str(dedent('''\
121180
def foo(bar) -> str:
122181
"""doc"""
123182
'''))
124183
self.assertIn('doc', msgids)
125184

126185
def test_funcdocstring_defvalue_args(self):
127186
""" Test docstring for functions with default arg values """
128-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
187+
msgids = self.extract_docstrings_from_str(dedent('''\
129188
def foo(bar=()):
130189
"""doc"""
131190
'''))
@@ -135,7 +194,7 @@ def test_funcdocstring_multiple_funcs(self):
135194
""" Test docstring extraction for multiple functions combining
136195
annotated args, annotated return types and default arg values
137196
"""
138-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
197+
msgids = self.extract_docstrings_from_str(dedent('''\
139198
def foo1(bar: tuple=()) -> str:
140199
"""doc1"""
141200
@@ -153,7 +212,7 @@ def test_classdocstring_early_colon(self):
153212
""" Test docstring extraction for a class with colons occuring within
154213
the parentheses.
155214
"""
156-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
215+
msgids = self.extract_docstrings_from_str(dedent('''\
157216
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
158217
"""doc"""
159218
'''))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:program:`pygettext.py` now recognizes only literal strings as docstrings
2+
and translatable strings, and rejects bytes literals and f-string expressions.

Tools/i18n/pygettext.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
232232
return ''.join(escapes[b] for b in s.encode(encoding))
233233

234234

235+
def is_literal_string(s):
236+
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
237+
238+
235239
def safe_eval(s):
236240
# unwrap quotes, safely
237241
return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ def __init__(self, options):
317321
def __call__(self, ttype, tstring, stup, etup, line):
318322
# dispatch
319323
## import token
320-
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
321-
## 'tstring:', tstring
324+
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
325+
## file=sys.stderr)
322326
self.__state(ttype, tstring, stup[0])
323327

324328
def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ def __waiting(self, ttype, tstring, lineno):
327331
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
328332
# module docstring?
329333
if self.__freshmodule:
330-
if ttype == tokenize.STRING:
334+
if ttype == tokenize.STRING and is_literal_string(tstring):
331335
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
332336
self.__freshmodule = 0
333337
elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ def __suiteseen(self, ttype, tstring, lineno):
353357

354358
def __suitedocstring(self, ttype, tstring, lineno):
355359
# ignore any intervening noise
356-
if ttype == tokenize.STRING:
360+
if ttype == tokenize.STRING and is_literal_string(tstring):
357361
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
358362
self.__state = self.__waiting
359363
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ def __openseen(self, ttype, tstring, lineno):
378382
if self.__data:
379383
self.__addentry(EMPTYSTRING.join(self.__data))
380384
self.__state = self.__waiting
381-
elif ttype == tokenize.STRING:
385+
elif ttype == tokenize.STRING and is_literal_string(tstring):
382386
self.__data.append(safe_eval(tstring))
383387
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
384388
token.NEWLINE, tokenize.NL]:

0 commit comments

Comments
 (0)