Skip to content

Commit e57d72a

Browse files
authored
Add a script for machine-translation (python#76)
This script can be very handy for translating highly-viewed yet not translated documents. But it will require human review for sure.
1 parent 4777eba commit e57d72a

File tree

2 files changed

+149
-0
lines changed

2 files changed

+149
-0
lines changed

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ poutils
22
tqdm
33
pre-commit
44
polib
5+
deep_translator

scripts/translate.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import os
2+
import re
3+
import sys
4+
import polib
5+
from deep_translator import DeeplTranslator, GoogleTranslator
6+
from typing import Dict, Tuple
7+
from argparse import ArgumentParser
8+
9+
parser = ArgumentParser()
10+
parser.add_argument("filename", help="File to translate")
11+
parser.add_argument("-t", "--translator", choices=["google", "deepl"], default="deepl", help="Translator to use")
12+
parser.add_argument(
13+
"-a",
14+
"--api-key",
15+
required="deepl" in list(map(lambda x: x.casefold(), sys.argv)),
16+
help="API key for DeepL (required if using DeepL)",
17+
)
18+
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
19+
parser.add_argument("-d", "--debug", action="store_true", help="Debug mode")
20+
parser.add_argument("-s", "--skip-translated-entries", choices=[True, False], default=True, help="Skip already translated entries")
21+
22+
args = parser.parse_args()
23+
24+
VERBOSE = args.verbose
25+
DEBUG = args.debug
26+
SKIP_TRANSLATED_ENTRIES = args.skip_translated_entries
27+
28+
29+
_patterns = [
30+
":c:func:`[^`]+`",
31+
":c:type:`[^`]+`",
32+
":c:macro:`[^`]+`",
33+
":c:member:`[^`]+`",
34+
":c:data:`[^`]+`",
35+
":py:data:`[^`]+`",
36+
":py:mod:`[^`]+`",
37+
":func:`[^`]+`",
38+
":mod:`[^`]+`",
39+
":ref:`[^`]+`",
40+
":class:`[^`]+`",
41+
":pep:`[^`]+`",
42+
":data:`[^`]+`",
43+
":exc:`[^`]+`",
44+
":term:`[^`]+`",
45+
":meth:`[^`]+`",
46+
":envvar:`[^`]+`",
47+
":file:`[^`]+`",
48+
":attr:`[^`]+`",
49+
":const:`[^`]+`",
50+
":issue:`[^`]+`",
51+
":opcode:`[^`]+`",
52+
":option:`[^`]+`",
53+
":program:`[^`]+`",
54+
":keyword:`[^`]+`",
55+
":RFC:`[^`]+`",
56+
":rfc:`[^`]+`",
57+
":doc:`[^`]+`",
58+
":source:`[^`]+`",
59+
":manpage:`[^`]+`",
60+
":mimetype:`[^`]+`",
61+
":sup:`[^`]+`",
62+
":kbd:`[^`]+`",
63+
":const:`[^`]+`",
64+
"``[^`]+``",
65+
"`[^`]+`__",
66+
"`[^`]+`_",
67+
r"\*\*[^\*]+\*\*", # bold text between **
68+
r"\*[^\*]+\*", # italic text between *
69+
]
70+
71+
_exps = [re.compile(e) for e in _patterns]
72+
73+
74+
def protect_sphinx_directives(s: str) -> Tuple[dict, str]:
75+
"""
76+
Parameters:
77+
string containing the text to translate
78+
Returns:
79+
dictionary containing all the placeholder text as keys
80+
and the correct value.
81+
"""
82+
83+
d: Dict[str, str] = {}
84+
for index, exp in enumerate(_exps):
85+
matches = exp.findall(s)
86+
if DEBUG:
87+
print(exp, matches)
88+
for match in matches:
89+
ph = f"XASDF{str(index).zfill(2)}"
90+
s = s.replace(match, ph)
91+
if ph in d and VERBOSE:
92+
print(f"Error: {ph} is already in the dictionary")
93+
print("new", match)
94+
print("old", d[ph])
95+
d[ph] = match
96+
return d, s
97+
98+
99+
def undo_sphinx_directives_protection(placeholders: dict, translated_text: str) -> str:
100+
for ph, value in placeholders.items():
101+
translated_text = translated_text.replace(ph, value)
102+
if DEBUG:
103+
print(ph, value)
104+
print(translated_text)
105+
return translated_text
106+
107+
108+
if __name__ == "__main__":
109+
filename = args.filename
110+
if not os.path.isfile(filename):
111+
print(f"File not found: '{filename}'")
112+
sys.exit(-1)
113+
114+
po = polib.pofile(filename)
115+
116+
if args.translator.lower() == "google":
117+
translator = GoogleTranslator(source="en", target="tr")
118+
elif args.translator.lower() == "deepl":
119+
translator = DeeplTranslator(api_key=args.api_key, source="en", target="tr", use_free_api=True)
120+
else:
121+
raise ValueError("Invalid translator")
122+
123+
for entry in po:
124+
# If the entry has already a translation, skip.
125+
if SKIP_TRANSLATED_ENTRIES and entry.msgstr:
126+
continue
127+
128+
print("\nEN |", entry.msgid)
129+
placeholders, temp_text = protect_sphinx_directives(entry.msgid)
130+
if VERBOSE:
131+
print(temp_text)
132+
print(placeholders)
133+
134+
# Translate the temporary text without sphinx statements
135+
translated_text = translator.translate(temp_text)
136+
137+
# Recover sphinx statements
138+
real_text = undo_sphinx_directives_protection(placeholders, translated_text)
139+
print("TR |", real_text)
140+
141+
# Replace the po file translated entry
142+
entry.msgstr = real_text
143+
144+
# Add fuzzy flag so a real-human can review later
145+
entry.flags.append("fuzzy")
146+
147+
# Save the file after all the entries are translated
148+
po.save()

0 commit comments

Comments
 (0)