From b3bb7cbd5be86b07eb5240d359e2ca3599992eec Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 8 Oct 2020 17:04:35 +0200 Subject: [PATCH 1/3] Script to find format differences There is a lot of work to do here, but this is the initial of the idea. --- scripts/format_differences.py | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 scripts/format_differences.py diff --git a/scripts/format_differences.py b/scripts/format_differences.py new file mode 100644 index 0000000000..1229a44c50 --- /dev/null +++ b/scripts/format_differences.py @@ -0,0 +1,47 @@ +import collections +import os +import glob + +from pprint import pprint + +import polib # fades + +PO_DIR = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '..', + )) + + +def main(): + files_with_differences = collections.defaultdict(list) + + for i, pofilename in enumerate(glob.glob(PO_DIR + '**/**/*.po')): + po = polib.pofile(pofilename) + if po.percent_translated() < 85: + continue + + for entry in po: + words = [] + wordsid = wordsstr = list() + + if '*' in entry.msgid or '``' in entry.msgid: + wordsid = [word for word in entry.msgid.split() if '*' in word or '``' in word] + + if '*' in entry.msgstr or '``' in entry.msgstr: + wordsstr = [word for word in entry.msgstr.split() if '*' in word or '``' in word] + + if len(wordsid) != len(wordsstr): + key = pofilename.replace(PO_DIR, '') + files_with_differences[key].append({ + 'occurrences': entry.occurrences, + 'words': { + 'original': wordsid, + 'translated': wordsstr, + }, + }) + + return files_with_differences + + +pprint(main()) From 9dd3143cff159dbb71fefb3db62a63f9d91d98b2 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 8 Oct 2020 17:04:58 +0200 Subject: [PATCH 2/3] Example of what the script is able to find --- distutils/setupscript.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distutils/setupscript.po b/distutils/setupscript.po index 68320e0050..9ac5b175d6 100644 --- a/distutils/setupscript.po +++ b/distutils/setupscript.po @@ -1237,7 +1237,7 @@ msgid "" "The ``long_description`` field is used by PyPI when you publish a package, " "to build its project page." msgstr "" -"PyPI utiliza el campo ``descripción larga`` cuando publica un paquete para " +"PyPI utiliza el campo ``long_description`` cuando publica un paquete para " "construir su pÔgina de proyecto." #: ../Doc/distutils/setupscript.rst:621 From 0103a82337ef17f86a973bfbdf10c34e4c590baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cristi=C3=A1n=20Maureira-Fredes?= Date: Fri, 19 Mar 2021 19:02:30 +0100 Subject: [PATCH 3/3] Update scripts/format_differences.py --- scripts/format_differences.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/format_differences.py b/scripts/format_differences.py index 1229a44c50..06a763743f 100644 --- a/scripts/format_differences.py +++ b/scripts/format_differences.py @@ -13,6 +13,15 @@ )) + +DELIMITERS = ("``", "*") + +def has_delimiters(x): + for d in DELIMITERS: + if d in x: + return True + return False + def main(): files_with_differences = collections.defaultdict(list) @@ -25,11 +34,11 @@ def main(): words = [] wordsid = wordsstr = list() - if '*' in entry.msgid or '``' in entry.msgid: - wordsid = [word for word in entry.msgid.split() if '*' in word or '``' in word] + if has_delimiters(entry.msgid): + wordsid = [word for word in entry.msgid.split() if has_delimiter(word)] - if '*' in entry.msgstr or '``' in entry.msgstr: - wordsstr = [word for word in entry.msgstr.split() if '*' in word or '``' in word] + if has_delimiters(entry.msgstr): + wordsstr = [word for word in entry.msgstr.split() if has_delimiter(word)] if len(wordsid) != len(wordsstr): key = pofilename.replace(PO_DIR, '')