Skip to content

Commit 526a3f2

Browse files
committed
Mejorar script para encontrar diferencias de formato
Este script puede ser utilizaro en archivos, directorios, o en todo el proyecto (sin argumentos) La idea es encontrar inconsistencias en los archivos traducidos relacionados al formato de rst y sphinx.
1 parent 83c5e7e commit 526a3f2

File tree

1 file changed

+102
-47
lines changed

1 file changed

+102
-47
lines changed

scripts/format_differences.py

Lines changed: 102 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,111 @@
11
import collections
2-
import os
3-
import glob
4-
2+
import re
3+
import sys
4+
from pathlib import Path
55
from pprint import pprint
6-
7-
import polib # fades
8-
9-
PO_DIR = os.path.abspath(
10-
os.path.join(
11-
os.path.dirname(__file__),
12-
'..',
13-
))
14-
15-
16-
17-
DELIMITERS = ("``", "*")
18-
19-
def has_delimiters(x):
20-
for d in DELIMITERS:
21-
if d in x:
22-
return True
23-
return False
24-
25-
def main():
26-
files_with_differences = collections.defaultdict(list)
27-
28-
for i, pofilename in enumerate(glob.glob(PO_DIR + '**/**/*.po')):
6+
from typing import List
7+
8+
import polib
9+
10+
_patterns = [
11+
":c:func:`[^`]+`",
12+
":c:type:`[^`]+`",
13+
":c:macro:`[^`]+`",
14+
":c:member:`[^`]+`",
15+
":c:data:`[^`]+`",
16+
":py:data:`[^`]+`",
17+
":py:mod:`[^`]+`",
18+
":func:`[^`]+`",
19+
":mod:`[^`]+`",
20+
":ref:`[^`]+`",
21+
":class:`[^`]+`",
22+
":pep:`[^`]+`",
23+
":data:`[^`]+`",
24+
":exc:`[^`]+`",
25+
":term:`[^`]+`",
26+
":meth:`[^`]+`",
27+
":envvar:`[^`]+`",
28+
":file:`[^`]+`",
29+
":attr:`[^`]+`",
30+
":const:`[^`]+`",
31+
":issue:`[^`]+`",
32+
":opcode:`[^`]+`",
33+
":option:`[^`]+`",
34+
":program:`[^`]+`",
35+
":keyword:`[^`]+`",
36+
":RFC:`[^`]+`",
37+
":rfc:`[^`]+`",
38+
":doc:`[^`]+`",
39+
"``[^`]+``",
40+
"`[^`]+`__",
41+
"`[^`]+`_",
42+
"\*\*[^\*]+\*\*", # bold text between **
43+
"\*[^\*]+\*", # italic text between *
44+
]
45+
46+
_exps = [re.compile(e) for e in _patterns]
47+
48+
49+
def get_sphinx_directives(s: str) -> List[str]:
50+
"""
51+
Parameters:
52+
string containing the text to translate
53+
54+
Returns:
55+
dictionary containing all the placeholder text as keys
56+
and the correct value.
57+
"""
58+
59+
output: List[str] = []
60+
for exp in _exps:
61+
matches = exp.findall(s)
62+
for match in matches:
63+
output.append(match)
64+
# remove the found pattern from the original string
65+
s = s.replace(match, "")
66+
return output
67+
68+
def ind(level=0):
69+
return f"{' ' * 4 * level}"
70+
71+
if __name__ == "__main__":
72+
PO_DIR = Path(__file__).resolve().parent.parent
73+
VENV_DIR = PO_DIR / "venv"
74+
75+
if len(sys.argv) > 1:
76+
filename = sys.argv[1]
77+
files = []
78+
if filename:
79+
if Path(filename).is_dir():
80+
files = [i for i in PO_DIR.glob(f"{filename}/*.po") if not i.is_relative_to(VENV_DIR)]
81+
elif not Path(filename).is_file():
82+
print(f"File not found: '{filename}'")
83+
sys.exit(-1)
84+
else:
85+
files = [filename]
86+
else:
87+
files = [i for i in PO_DIR.glob("**/**/*.po") if not i.is_relative_to(VENV_DIR)]
88+
89+
for i, pofilename in enumerate(files):
90+
print(f"\n> Processing {pofilename}")
2991
po = polib.pofile(pofilename)
30-
if po.percent_translated() < 85:
31-
continue
3292

3393
for entry in po:
34-
words = []
35-
wordsid = wordsstr = list()
36-
37-
if has_delimiters(entry.msgid):
38-
wordsid = [word for word in entry.msgid.split() if has_delimiters(word)]
39-
40-
if has_delimiters(entry.msgstr):
41-
wordsstr = [word for word in entry.msgstr.split() if has_delimiters(word)]
4294

43-
if len(wordsid) != len(wordsstr):
44-
key = pofilename.replace(PO_DIR, '')
45-
files_with_differences[key].append({
46-
'occurrences': entry.occurrences,
47-
'words': {
48-
'original': wordsid,
49-
'translated': wordsstr,
50-
},
51-
})
95+
directives_id = get_sphinx_directives(entry.msgid)
96+
directives_str = get_sphinx_directives(entry.msgstr)
5297

53-
return files_with_differences
98+
# Check if any of them is not empty
99+
if directives_id or directives_str:
54100

101+
# Check if the directives are the same
102+
for ori, dst in zip(directives_id, directives_str):
103+
if ori == dst:
104+
continue
55105

56-
pprint(main())
106+
if ori != dst:
107+
occs = [f"{ind(2)}{t[0]}:{t[1]}" for t in entry.occurrences]
108+
print(f"\n{ind(1)}{pofilename}:{entry.linenum}")
109+
print(f"\n".join(occs))
110+
print(f"{ind(3)}{ori}")
111+
print(f"{ind(3)}{dst}")

0 commit comments

Comments
 (0)