Skip to content

Commit 92458ea

Browse files
committed
Add support for t-string gettext extraction
1 parent 406dc71 commit 92458ea

File tree

5 files changed

+200
-0
lines changed

5 files changed

+200
-0
lines changed

Lib/gettext.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
# you'll need to study the GNU gettext code to do this.
4444

4545

46+
import ast
4647
import operator
4748
import os
4849
import sys
@@ -655,3 +656,96 @@ def npgettext(context, msgid1, msgid2, n):
655656
# gettext.
656657

657658
Catalog = translation
659+
660+
661+
# utils for t-string handling in gettext translation + pygettext extraction
662+
# TBD where they should go, and whether this should be a public API or internal,
663+
# especially the part about generating names from interpolations which is IMHO
664+
# beneficial to have in stdlib so any implementation can re-use it without
665+
# risking diverging behavior for the same expression between implementations
666+
667+
class _NameTooComplexError(ValueError):
668+
"""
669+
Raised when an expression is too complex to derive a format string name
670+
from it, or the resulting name would not be valid in a format string.
671+
"""
672+
673+
674+
class _ExtractNamesVisitor(ast.NodeVisitor):
675+
def __init__(self):
676+
self._name_parts = []
677+
678+
@property
679+
def name(self) -> str:
680+
name = '__'.join(self._name_parts)
681+
if not name.isidentifier():
682+
raise _NameTooComplexError(
683+
'Only expressions which can be converted to a format string '
684+
'placeholder may be used in a gettext call; assign the '
685+
'expression to a variable and use that instead'
686+
)
687+
return name
688+
689+
def generic_visit(self, node):
690+
name = node.__class__.__name__
691+
raise _NameTooComplexError(
692+
f'Only simple expressions are supported, {name} is not allowed; '
693+
'assign the expression to a variable and use that instead'
694+
)
695+
696+
def visit_Attribute(self, node):
697+
self.visit(node.value)
698+
self._name_parts.append(node.attr)
699+
700+
def visit_Name(self, node):
701+
self._name_parts.append(node.id)
702+
703+
def visit_Subscript(self, node):
704+
self.visit(node.value)
705+
if not isinstance(node.slice, ast.Constant):
706+
raise _NameTooComplexError(
707+
'Only constant value dict keys may be used in a gettext call; '
708+
'assign the expression to a variable and use that instead'
709+
)
710+
self.visit(node.slice)
711+
712+
def visit_Constant(self, node):
713+
self._name_parts.append(str(node.value))
714+
715+
def visit_Call(self, node):
716+
self.visit(node.func)
717+
if node.args:
718+
raise _NameTooComplexError(
719+
'Function calls with arguments are not supported in gettext '
720+
'calls; assign the result to a variable and use that instead'
721+
)
722+
723+
724+
def _template_node_to_format(node: ast.TemplateStr) -> str:
725+
"""Generate a format string from a template string AST node.
726+
727+
This fails with a :exc:`_NameTooComplexError` in case the expression is
728+
not suitable for conversion.
729+
"""
730+
parts = []
731+
interpolation_format_names = {}
732+
for child in node.values:
733+
match child:
734+
case ast.Constant(value):
735+
parts.append(value.replace('{', '{{').replace('}', '}}'))
736+
case ast.Interpolation(value):
737+
visitor = _ExtractNamesVisitor()
738+
visitor.visit(value)
739+
name = visitor.name
740+
expr = ast.unparse(value)
741+
if (
742+
existing_expr := interpolation_format_names.get(name)
743+
) and existing_expr != expr:
744+
raise _NameTooComplexError(
745+
f'Interpolations of {existing_expr} and {expr} cannot '
746+
'be mixed in the same gettext call; assign one of '
747+
'them to a variable and use that instead'
748+
)
749+
interpolation_format_names[name] = expr
750+
parts.append(f'{{{name}}}')
751+
return ''.join(parts)

Lib/test/test_tools/i18n_data/messages.pot

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,59 @@ msgid_plural "worlds"
9797
msgstr[0] ""
9898
msgstr[1] ""
9999

100+
#: messages.py:122 messages.py:123
101+
msgid "Hello World"
102+
msgstr ""
103+
104+
#: messages.py:124
105+
msgid "Hello {name}"
106+
msgstr ""
107+
108+
#: messages.py:125
109+
msgid "Hello {name__title}"
110+
msgstr ""
111+
112+
#: messages.py:126 messages.py:127 messages.py:128
113+
msgid "Hello {user__name}"
114+
msgstr ""
115+
116+
#: messages.py:129
117+
msgid "Hello {numbers__69}"
118+
msgstr ""
119+
120+
#: messages.py:132
121+
msgid "Hello {{escaped braces}}"
122+
msgstr ""
123+
124+
#: messages.py:133
125+
msgid "Hello {{{interpolated_braces}}} inside esacped braces"
126+
msgstr ""
127+
128+
#: messages.py:134
129+
msgid "}}Even{{ more {{braces}}"
130+
msgstr ""
131+
132+
#: messages.py:135
133+
msgid "}}Even{{ more {{{interpolated_braces}}}"
134+
msgstr ""
135+
136+
#: messages.py:139
137+
msgid "Weird {meow__False}"
138+
msgstr ""
139+
140+
#: messages.py:140
141+
msgid "Weird {meow__True}"
142+
msgstr ""
143+
144+
#: messages.py:141
145+
msgid "Weird {meow__69j}"
146+
msgstr ""
147+
148+
#: messages.py:142
149+
msgid "Weird {meow__Ellipsis}"
150+
msgstr ""
151+
152+
#: messages.py:143
153+
msgid "Weird {meow__None}"
154+
msgstr ""
155+

Lib/test/test_tools/i18n_data/messages.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,42 @@ def _(x="don't extract me"):
117117
# f-strings
118118
f"Hello, {_('world')}!"
119119
f"Hello, {ngettext('world', 'worlds', 3)}!"
120+
121+
# t-strings
122+
_(t'Hello World')
123+
_(t'Hello' t' World')
124+
_(t'Hello {name}')
125+
_(t'Hello {name.title()}')
126+
_(t'Hello {user.name}')
127+
_(t'Hello {user['name']}')
128+
_(t'Hello {user["name"]}')
129+
_(t'Hello {numbers[69]}')
130+
131+
# t-strings - escaped braces
132+
_(t'Hello {{escaped braces}}')
133+
_(t'Hello {{{interpolated_braces}}} inside esacped braces')
134+
_(t'}}Even{{ more {{braces}}')
135+
_(t'}}Even{{ more {{{interpolated_braces}}}')
136+
137+
# t-strings - slightly weird cases but simple enough to convert in a
138+
# straightforward manner
139+
_(t'Weird {meow[False]}')
140+
_(t'Weird {meow[True]}')
141+
_(t'Weird {meow[69j]}')
142+
_(t'Weird {meow[...]}')
143+
_(t'Weird {meow[None]}')
144+
145+
# t-strings - invalid cases
146+
_(t'Invalid {t"nesting"}') # nested tstrings are not allowed
147+
_(t'Invalid {meow[meow()]}') # non-const subscript
148+
_(t'Invalid {meow[kitty]}') # non-const subscript
149+
_(t'Invalid {meow[()]}') # non-primitive subscript
150+
_(t'Invalid {meow(42)}') # call with argument
151+
_(t'Invalid {meow["foo:r"]}') # subscript that cannot be formatstringified
152+
_(t'Invalid {meow[3.14]}') # subscript that cannot be formatstringified
153+
_(t'Invalid {meow[...]} {meow.Ellipsis}') # same name for different expressions
154+
_(t'Invalid {meow.loudly} {meow["loudly"]}') # same name for different expressions
155+
_(t'Invalid {meow.loudly} {meow.loudly()}') # same name for different expressions
156+
_(t'Invalid {3.14}') # format string is not a valid identifier
157+
_(t'Invalid {42}') # format string is not a valid identifier
158+
_(t'Invalid {69j}') # format string is not a valid identifier
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support extracting t-strings in :program:`pygettext`.

Tools/i18n/pygettext.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@
149149
import time
150150
import tokenize
151151
from dataclasses import dataclass, field
152+
from gettext import _NameTooComplexError, _template_node_to_format
152153
from io import BytesIO
153154
from operator import itemgetter
154155

@@ -537,6 +538,12 @@ def _extract_message_with_spec(self, node, spec):
537538
msg_data = {}
538539
for arg_type, position in spec.items():
539540
arg = node.args[position]
541+
if self._is_template_str(arg):
542+
try:
543+
msg_data[arg_type] = _template_node_to_format(arg)
544+
except _NameTooComplexError as exc:
545+
return str(exc)
546+
continue
540547
if not self._is_string_const(arg):
541548
return (f'Expected a string constant for argument '
542549
f'{position + 1}, got {ast.unparse(arg)}')
@@ -626,6 +633,9 @@ def _get_func_name(self, node):
626633
def _is_string_const(self, node):
627634
return isinstance(node, ast.Constant) and isinstance(node.value, str)
628635

636+
def _is_template_str(self, node):
637+
return isinstance(node, ast.TemplateStr)
638+
629639
def write_pot_file(messages, options, fp):
630640
timestamp = time.strftime('%Y-%m-%d %H:%M%z')
631641
encoding = fp.encoding if fp.encoding else 'UTF-8'

0 commit comments

Comments
 (0)