Skip to content

Commit dc4c343

Browse files
authored
Merge branch '3.11' into trans-library-graphlib
2 parents 670bde9 + a975c17 commit dc4c343

File tree

9 files changed

+534
-14
lines changed

9 files changed

+534
-14
lines changed

.scripts/README.md

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Scripts
2+
3+
Useful scripts for the translation.
4+
5+
## From Google Translation
6+
7+
Translate all untranslated entries of the given .po file with Google Translate.
8+
9+
10+
```sh
11+
.scripts/google_translate.sh library/csv.po
12+
```
13+
14+
## From zh_CN Translation
15+
16+
If a specific doc has been translated into Simplified Chinese (zh_CN) and you'd like to adopt it as a base, you can insert the command:
17+
18+
```sh
19+
.scripts/from_cn.sh library/csv.po
20+
```

.scripts/from_cn.sh

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/sh
2+
cd .scripts
3+
source utils/install_poetry.sh
4+
5+
# check if OpenCC is installed
6+
if [[ ! -x "`which opencc 2>/dev/null`" ]]
7+
then
8+
echo "You do not have OpenCC installed. Please install it first."
9+
echo "Instruction: https://github.com/BYVoid/OpenCC/wiki/Download"
10+
exit 1
11+
fi
12+
13+
# clone pydoc zh_CN repo and pull from remote
14+
CN_REPO=.python-docs-zh-cn
15+
if [[ ! -d $CN_REPO ]]
16+
then
17+
read -p "You do not have a clone of zh_CN repo. Clone now? (y/N)" choice
18+
case "$choice" in
19+
y|Y ) git clone --depth 1 --no-single-branch https://github.com/python/python-docs-zh-cn $CN_REPO ;;
20+
n|N|* ) echo "Aborted"; exit 1 ;;
21+
esac
22+
fi
23+
git -C $CN_REPO checkout 3.10 # the current latest version of CN repo
24+
git -C $CN_REPO pull
25+
26+
27+
# convert zh_CN po content and merge into zh_TW po
28+
TARGET=$1
29+
CN_PATH=$CN_REPO/$TARGET
30+
TW_PATH=../$TARGET
31+
32+
poetry lock
33+
poetry install
34+
poetry run bash -c "
35+
opencc -i $CN_PATH -c s2twp.json -o /tmp/tmp.po
36+
pofilter --nonotes --excludefilter unchanged --excludefilter untranslated /tmp/tmp.po | msgattrib --set-fuzzy -o /tmp/tmp.po
37+
pomerge -t $CN_PATH -i /tmp/tmp.po -o /tmp/tmp.po
38+
39+
pofilter --nonotes --excludefilter untranslated $TW_PATH /tmp/tmp2.po
40+
pomerge -t /tmp/tmp.po -i /tmp/tmp2.po -o /tmp/tmp3.po
41+
msgcat --lang zh_TW /tmp/tmp3.po -o $TW_PATH
42+
"
43+
44+
rm /tmp/tmp.po /tmp/tmp2.po /tmp/tmp3.po

.scripts/google_translate.sh

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/sh
2+
3+
WORK_DIR=.scripts
4+
cd $WORK_DIR
5+
6+
source utils/install_poetry.sh
7+
8+
TEMP=tmp.po
9+
TARGET=../$1
10+
11+
poetry lock
12+
poetry install
13+
poetry run bash -c "
14+
python google_translate/main.py $TARGET > $TEMP
15+
pomerge -t $TARGET -i $TEMP -o $TARGET
16+
"
17+
rm $TEMP

.scripts/google_translate/main.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import argparse
2+
import logging
3+
from pathlib import Path
4+
from typing import List
5+
6+
import polib
7+
from googletrans import Translator
8+
9+
from utils import refine_translations
10+
11+
12+
def _get_po_paths(path: Path) -> List[Path]:
13+
"""Find all .po files in given path"""
14+
if not path.exists():
15+
logging.error(f"The path '{path.absolute()}' does not exist!")
16+
17+
# return 1-element list if it's a file
18+
if path.is_file():
19+
return [path.resolve()]
20+
21+
# find all .po files
22+
po_paths = [p.resolve() for p in path.glob("**/*.po")]
23+
return po_paths
24+
25+
26+
if __name__ == '__main__':
27+
parser = argparse.ArgumentParser()
28+
parser.add_argument(
29+
"path",
30+
help="the path of a PO file or a directory containing PO files"
31+
)
32+
args = parser.parse_args()
33+
34+
translator = Translator()
35+
po_files = _get_po_paths(Path(args.path).resolve())
36+
errors = []
37+
for path in po_files:
38+
try:
39+
pofile = polib.pofile(path)
40+
except OSError:
41+
errors.append(f"{path} doesn't seem to be a .po file")
42+
continue
43+
44+
for entry in pofile.untranslated_entries()[::-1]:
45+
translation = translator.translate(entry.msgid, src='en', dest='zh-TW')
46+
47+
print(
48+
'#, fuzzy\n'
49+
f'msgid "{repr(entry.msgid)[1:-1]}"\n'
50+
f'msgstr "{repr(refine_translations(translation.text))[1:-1]}"\n'
51+
)

.scripts/google_translate/utils.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
MAPPING_ZH_TW_COMMON_TRANSLATION_ERROR = {
2+
'創建': '建立', # create
3+
'代碼': '程式碼', # code
4+
'信息': '資訊', # information
5+
'模塊': '模組', # module
6+
'標誌': '旗標', # flag
7+
'異常': '例外', # exception
8+
'解釋器': '直譯器', # interpreter
9+
'頭文件': '標頭檔', # header
10+
'對象': '物件', # objetc
11+
'支持': '支援', # support
12+
'默認': '預設', # default
13+
'兼容': '相容', # compatible
14+
'字符串': '字串', # string
15+
'宏': '巨集', # macro
16+
'描述符': '描述器', # descriptor
17+
'字節': '位元組', # bytes
18+
'緩存': '快取', # cache
19+
'調用': '呼叫', # call
20+
'哈希': '雜湊', # hash
21+
'類型': '型別', # type
22+
'子類': '子類別', # subclass
23+
'實現': '實作', # implement
24+
'數據': '資料', # data
25+
'返回': '回傳', # return
26+
'指針': '指標', # pointer
27+
'字段': '欄位', # field
28+
'擴展': '擴充', # extension
29+
'遞歸': '遞迴', # recursive
30+
'用戶': '使用者', # user
31+
'算法': '演算法', # algorithm
32+
'優化': '最佳化', # optimize
33+
'字符': '字元', # character
34+
'設置': '設定', # setting/configure
35+
'線程': '執行緒', # thread
36+
'進程': '行程', # process
37+
'迭代': '疊代', # iterate
38+
'內存': '記憶體', # memory
39+
'打印': '印出', # print
40+
'異步': '非同步', # async
41+
'調試': '除錯', # debug
42+
'堆棧': '堆疊', # stack
43+
'回調': '回呼', # callback
44+
'公共': '公開', # public
45+
'函數': '函式', # function
46+
'變量': '變數', # variable
47+
'常量': '常數', # constant
48+
'添加': '新增', # add
49+
'基類': '基底類別', # base class
50+
}
51+
52+
53+
def refine_translations(s: str) -> str:
54+
for original, target in MAPPING_ZH_TW_COMMON_TRANSLATION_ERROR.items():
55+
s = s.replace(original, target)
56+
return s

0 commit comments

Comments
 (0)