From 6db5c004c4054560c70df7299a4229aab0404adb Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 4 Aug 2025 10:41:25 +0200 Subject: [PATCH] Update `gettext.py` from 3.13.5 --- Lib/gettext.py | 25 ++- Lib/test/test_gettext.py | 445 ++++++++++++++++++++++++++++++++++----- 2 files changed, 419 insertions(+), 51 deletions(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index b72b15f82d..62cff81b7b 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -46,6 +46,7 @@ # find this format documented anywhere. +import operator import os import re import sys @@ -166,14 +167,28 @@ def _parse(tokens, priority=-1): def _as_int(n): try: - i = round(n) + round(n) except TypeError: raise TypeError('Plural value must be an integer, got %s' % (n.__class__.__name__,)) from None + return _as_int2(n) + +def _as_int2(n): + try: + return operator.index(n) + except TypeError: + pass + import warnings + frame = sys._getframe(1) + stacklevel = 2 + while frame.f_back is not None and frame.f_globals.get('__name__') == __name__: + stacklevel += 1 + frame = frame.f_back warnings.warn('Plural value must be an integer, got %s' % (n.__class__.__name__,), - DeprecationWarning, 4) + DeprecationWarning, + stacklevel) return n @@ -200,7 +215,7 @@ def c2py(plural): elif c == ')': depth -= 1 - ns = {'_as_int': _as_int} + ns = {'_as_int': _as_int, '__name__': __name__} exec('''if True: def func(n): if not isinstance(n, int): @@ -280,6 +295,7 @@ def gettext(self, message): def ngettext(self, msgid1, msgid2, n): if self._fallback: return self._fallback.ngettext(msgid1, msgid2, n) + n = _as_int2(n) if n == 1: return msgid1 else: @@ -293,6 +309,7 @@ def pgettext(self, context, message): def npgettext(self, context, msgid1, msgid2, n): if self._fallback: return self._fallback.npgettext(context, msgid1, msgid2, n) + n = _as_int2(n) if n == 1: return msgid1 else: @@ -579,6 +596,7 @@ def dngettext(domain, msgid1, msgid2, n): try: t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: return msgid1 else: @@ -598,6 +616,7 @@ def dnpgettext(domain, context, msgid1, msgid2, n): try: t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: return msgid1 else: diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 8430fc234d..0653bb762a 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -2,6 +2,8 @@ import base64 import gettext import unittest +import unittest.mock +from functools import partial from test import support from test.support import os_helper @@ -37,6 +39,9 @@ bmsgd2luayAoaW4gIm15IG90aGVyIGNvbnRleHQiKQB3aW5rIHdpbmsA ''' +# .mo file with an invalid magic number +GNU_MO_DATA_BAD_MAGIC_NUMBER = base64.b64encode(b'ABCD') + # This data contains an invalid major version number (5) # An unexpected major version number should be treated as an error when # parsing a .mo file @@ -85,6 +90,49 @@ ciBUQUgKdHJnZ3JrZyB6cmZmbnRyIHBuZ255YnQgeXZvZW5lbC4AYmFjb24Ad2luayB3aW5rAA== ''' +# Corrupt .mo file +# Generated from +# +# msgid "foo" +# msgstr "bar" +# +# with msgfmt --no-hash +# +# The translation offset is changed to 0xFFFFFFFF, +# making it larger than the file size, which should +# raise an error when parsing. +GNU_MO_DATA_CORRUPT = base64.b64encode(bytes([ + 0xDE, 0x12, 0x04, 0x95, # Magic + 0x00, 0x00, 0x00, 0x00, # Version + 0x01, 0x00, 0x00, 0x00, # Message count + 0x1C, 0x00, 0x00, 0x00, # Message offset + 0x24, 0x00, 0x00, 0x00, # Translation offset + 0x00, 0x00, 0x00, 0x00, # Hash table size + 0x2C, 0x00, 0x00, 0x00, # Hash table offset + 0x03, 0x00, 0x00, 0x00, # 1st message length + 0x2C, 0x00, 0x00, 0x00, # 1st message offset + 0x03, 0x00, 0x00, 0x00, # 1st trans length + 0xFF, 0xFF, 0xFF, 0xFF, # 1st trans offset (Modified to make it invalid) + 0x66, 0x6F, 0x6F, 0x00, # Message data + 0x62, 0x61, 0x72, 0x00, # Message data +])) + + +GNU_MO_DATA_BIG_ENDIAN = base64.b64encode(bytes([ + 0x95, 0x04, 0x12, 0xDE, # Magic + 0x00, 0x00, 0x00, 0x00, # Version + 0x00, 0x00, 0x00, 0x01, # Message count + 0x00, 0x00, 0x00, 0x1C, # Message offset + 0x00, 0x00, 0x00, 0x24, # Translation offset + 0x00, 0x00, 0x00, 0x00, # Hash table size + 0x00, 0x00, 0x00, 0x2C, # Hash table offset + 0x00, 0x00, 0x00, 0x03, # 1st message length + 0x00, 0x00, 0x00, 0x2C, # 1st message offset + 0x00, 0x00, 0x00, 0x03, # 1st trans length + 0x00, 0x00, 0x00, 0x30, # 1st trans offset + 0x66, 0x6F, 0x6F, 0x00, # Message data + 0x62, 0x61, 0x72, 0x00, # Message data +])) UMO_DATA = b'''\ 3hIElQAAAAADAAAAHAAAADQAAAAAAAAAAAAAAAAAAABMAAAABAAAAE0AAAAQAAAAUgAAAA8BAABj @@ -109,30 +157,49 @@ LOCALEDIR = os.path.join('xx', 'LC_MESSAGES') MOFILE = os.path.join(LOCALEDIR, 'gettext.mo') +MOFILE_BAD_MAGIC_NUMBER = os.path.join(LOCALEDIR, 'gettext_bad_magic_number.mo') MOFILE_BAD_MAJOR_VERSION = os.path.join(LOCALEDIR, 'gettext_bad_major_version.mo') MOFILE_BAD_MINOR_VERSION = os.path.join(LOCALEDIR, 'gettext_bad_minor_version.mo') +MOFILE_CORRUPT = os.path.join(LOCALEDIR, 'gettext_corrupt.mo') +MOFILE_BIG_ENDIAN = os.path.join(LOCALEDIR, 'gettext_big_endian.mo') UMOFILE = os.path.join(LOCALEDIR, 'ugettext.mo') MMOFILE = os.path.join(LOCALEDIR, 'metadata.mo') +def reset_gettext(): + gettext._localedirs.clear() + gettext._current_domain = 'messages' + gettext._translations.clear() + + class GettextBaseTest(unittest.TestCase): - def setUp(self): - self.addCleanup(os_helper.rmtree, os.path.split(LOCALEDIR)[0]) + @classmethod + def setUpClass(cls): + cls.addClassCleanup(os_helper.rmtree, os.path.split(LOCALEDIR)[0]) if not os.path.isdir(LOCALEDIR): os.makedirs(LOCALEDIR) with open(MOFILE, 'wb') as fp: fp.write(base64.decodebytes(GNU_MO_DATA)) + with open(MOFILE_BAD_MAGIC_NUMBER, 'wb') as fp: + fp.write(base64.decodebytes(GNU_MO_DATA_BAD_MAGIC_NUMBER)) with open(MOFILE_BAD_MAJOR_VERSION, 'wb') as fp: fp.write(base64.decodebytes(GNU_MO_DATA_BAD_MAJOR_VERSION)) with open(MOFILE_BAD_MINOR_VERSION, 'wb') as fp: fp.write(base64.decodebytes(GNU_MO_DATA_BAD_MINOR_VERSION)) + with open(MOFILE_CORRUPT, 'wb') as fp: + fp.write(base64.decodebytes(GNU_MO_DATA_CORRUPT)) + with open(MOFILE_BIG_ENDIAN, 'wb') as fp: + fp.write(base64.decodebytes(GNU_MO_DATA_BIG_ENDIAN)) with open(UMOFILE, 'wb') as fp: fp.write(base64.decodebytes(UMO_DATA)) with open(MMOFILE, 'wb') as fp: fp.write(base64.decodebytes(MMO_DATA)) + + def setUp(self): self.env = self.enterContext(os_helper.EnvironmentVarGuard()) self.env['LANGUAGE'] = 'xx' - gettext._translations.clear() + reset_gettext() + self.addCleanup(reset_gettext) GNU_MO_DATA_ISSUE_17898 = b'''\ @@ -237,6 +304,16 @@ def test_bindtextdomain(self): def test_textdomain(self): self.assertEqual(gettext.textdomain(), 'gettext') + def test_bad_magic_number(self): + with open(MOFILE_BAD_MAGIC_NUMBER, 'rb') as fp: + with self.assertRaises(OSError) as cm: + gettext.GNUTranslations(fp) + + exception = cm.exception + self.assertEqual(exception.errno, 0) + self.assertEqual(exception.strerror, "Bad magic number") + self.assertEqual(exception.filename, MOFILE_BAD_MAGIC_NUMBER) + def test_bad_major_version(self): with open(MOFILE_BAD_MAJOR_VERSION, 'rb') as fp: with self.assertRaises(OSError) as cm: @@ -252,6 +329,22 @@ def test_bad_minor_version(self): # Check that no error is thrown with a bad minor version number gettext.GNUTranslations(fp) + def test_corrupt_file(self): + with open(MOFILE_CORRUPT, 'rb') as fp: + with self.assertRaises(OSError) as cm: + gettext.GNUTranslations(fp) + + exception = cm.exception + self.assertEqual(exception.errno, 0) + self.assertEqual(exception.strerror, "File is corrupt") + self.assertEqual(exception.filename, MOFILE_CORRUPT) + + def test_big_endian_file(self): + with open(MOFILE_BIG_ENDIAN, 'rb') as fp: + t = gettext.GNUTranslations(fp) + + self.assertEqual(t.gettext('foo'), 'bar') + def test_some_translations(self): eq = self.assertEqual # test some translations @@ -309,55 +402,153 @@ def test_multiline_strings(self): trggrkg zrffntr pngnybt yvoenel.''') -class PluralFormsTestCase(GettextBaseTest): +class PluralFormsTests: + + def _test_plural_forms(self, ngettext, gettext, + singular, plural, tsingular, tplural, + numbers_only=True): + x = ngettext(singular, plural, 1) + self.assertEqual(x, tsingular) + x = ngettext(singular, plural, 2) + self.assertEqual(x, tplural) + x = gettext(singular) + self.assertEqual(x, tsingular) + + lineno = self._test_plural_forms.__code__.co_firstlineno + 12 + with self.assertWarns(DeprecationWarning) as cm: + x = ngettext(singular, plural, 1.0) + self.assertEqual(cm.filename, __file__) + self.assertEqual(cm.lineno, lineno) + self.assertEqual(x, tsingular) + with self.assertWarns(DeprecationWarning) as cm: + x = ngettext(singular, plural, 1.1) + self.assertEqual(cm.filename, __file__) + self.assertEqual(cm.lineno, lineno + 5) + self.assertEqual(x, tplural) + + if numbers_only: + with self.assertRaises(TypeError): + ngettext(singular, plural, None) + else: + with self.assertWarns(DeprecationWarning) as cm: + x = ngettext(singular, plural, None) + self.assertEqual(x, tplural) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_forms(self): + self._test_plural_forms( + self.ngettext, self.gettext, + 'There is %s file', 'There are %s files', + 'Hay %s fichero', 'Hay %s ficheros') + self._test_plural_forms( + self.ngettext, self.gettext, + '%d file deleted', '%d files deleted', + '%d file deleted', '%d files deleted') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_context_forms(self): + ngettext = partial(self.npgettext, 'With context') + gettext = partial(self.pgettext, 'With context') + self._test_plural_forms( + ngettext, gettext, + 'There is %s file', 'There are %s files', + 'Hay %s fichero (context)', 'Hay %s ficheros (context)') + self._test_plural_forms( + ngettext, gettext, + '%d file deleted', '%d files deleted', + '%d file deleted', '%d files deleted') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_wrong_context_forms(self): + self._test_plural_forms( + partial(self.npgettext, 'Unknown context'), + partial(self.pgettext, 'Unknown context'), + 'There is %s file', 'There are %s files', + 'There is %s file', 'There are %s files') + + +class GNUTranslationsPluralFormsTestCase(PluralFormsTests, GettextBaseTest): def setUp(self): GettextBaseTest.setUp(self) - self.mofile = MOFILE + # Set up the bindings + gettext.bindtextdomain('gettext', os.curdir) + gettext.textdomain('gettext') - def test_plural_forms1(self): - eq = self.assertEqual - x = gettext.ngettext('There is %s file', 'There are %s files', 1) - eq(x, 'Hay %s fichero') - x = gettext.ngettext('There is %s file', 'There are %s files', 2) - eq(x, 'Hay %s ficheros') - x = gettext.gettext('There is %s file') - eq(x, 'Hay %s fichero') - - def test_plural_context_forms1(self): - eq = self.assertEqual - x = gettext.npgettext('With context', - 'There is %s file', 'There are %s files', 1) - eq(x, 'Hay %s fichero (context)') - x = gettext.npgettext('With context', - 'There is %s file', 'There are %s files', 2) - eq(x, 'Hay %s ficheros (context)') - x = gettext.pgettext('With context', 'There is %s file') - eq(x, 'Hay %s fichero (context)') - - def test_plural_forms2(self): - eq = self.assertEqual - with open(self.mofile, 'rb') as fp: - t = gettext.GNUTranslations(fp) - x = t.ngettext('There is %s file', 'There are %s files', 1) - eq(x, 'Hay %s fichero') - x = t.ngettext('There is %s file', 'There are %s files', 2) - eq(x, 'Hay %s ficheros') - x = t.gettext('There is %s file') - eq(x, 'Hay %s fichero') - - def test_plural_context_forms2(self): - eq = self.assertEqual - with open(self.mofile, 'rb') as fp: + self.gettext = gettext.gettext + self.ngettext = gettext.ngettext + self.pgettext = gettext.pgettext + self.npgettext = gettext.npgettext + + +class GNUTranslationsWithDomainPluralFormsTestCase(PluralFormsTests, GettextBaseTest): + def setUp(self): + GettextBaseTest.setUp(self) + # Set up the bindings + gettext.bindtextdomain('gettext', os.curdir) + + self.gettext = partial(gettext.dgettext, 'gettext') + self.ngettext = partial(gettext.dngettext, 'gettext') + self.pgettext = partial(gettext.dpgettext, 'gettext') + self.npgettext = partial(gettext.dnpgettext, 'gettext') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_forms_wrong_domain(self): + self._test_plural_forms( + partial(gettext.dngettext, 'unknown'), + partial(gettext.dgettext, 'unknown'), + 'There is %s file', 'There are %s files', + 'There is %s file', 'There are %s files', + numbers_only=False) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_context_forms_wrong_domain(self): + self._test_plural_forms( + partial(gettext.dnpgettext, 'unknown', 'With context'), + partial(gettext.dpgettext, 'unknown', 'With context'), + 'There is %s file', 'There are %s files', + 'There is %s file', 'There are %s files', + numbers_only=False) + + +class GNUTranslationsClassPluralFormsTestCase(PluralFormsTests, GettextBaseTest): + def setUp(self): + GettextBaseTest.setUp(self) + with open(MOFILE, 'rb') as fp: t = gettext.GNUTranslations(fp) - x = t.npgettext('With context', - 'There is %s file', 'There are %s files', 1) - eq(x, 'Hay %s fichero (context)') - x = t.npgettext('With context', - 'There is %s file', 'There are %s files', 2) - eq(x, 'Hay %s ficheros (context)') - x = gettext.pgettext('With context', 'There is %s file') - eq(x, 'Hay %s fichero (context)') + self.gettext = t.gettext + self.ngettext = t.ngettext + self.pgettext = t.pgettext + self.npgettext = t.npgettext + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_forms_null_translations(self): + t = gettext.NullTranslations() + self._test_plural_forms( + t.ngettext, t.gettext, + 'There is %s file', 'There are %s files', + 'There is %s file', 'There are %s files', + numbers_only=False) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_plural_context_forms_null_translations(self): + t = gettext.NullTranslations() + self._test_plural_forms( + partial(t.npgettext, 'With context'), + partial(t.pgettext, 'With context'), + 'There is %s file', 'There are %s files', + 'There is %s file', 'There are %s files', + numbers_only=False) + + +class PluralFormsInternalTestCase(unittest.TestCase): # Examples from http://www.gnu.org/software/gettext/manual/gettext.html def test_ja(self): @@ -472,12 +663,18 @@ def test_decimal_number(self): def test_invalid_syntax(self): invalid_expressions = [ 'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2', - 'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n', + 'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n', 'n ? 1 2' ] for expr in invalid_expressions: with self.assertRaises(ValueError): gettext.c2py(expr) + def test_negation(self): + f = gettext.c2py('!!!n') + self.assertEqual(f(0), 1) + self.assertEqual(f(1), 0) + self.assertEqual(f(2), 0) + def test_nested_condition_operator(self): self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4) self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2) @@ -640,6 +837,158 @@ def test_cache(self): self.assertEqual(t.__class__, DummyGNUTranslations) +class FallbackTranslations(gettext.NullTranslations): + def gettext(self, message): + return f'gettext: {message}' + + def ngettext(self, msgid1, msgid2, n): + return f'ngettext: {msgid1}, {msgid2}, {n}' + + def pgettext(self, context, message): + return f'pgettext: {context}, {message}' + + def npgettext(self, context, msgid1, msgid2, n): + return f'npgettext: {context}, {msgid1}, {msgid2}, {n}' + + +class FallbackTestCase(GettextBaseTest): + def test_null_translations_fallback(self): + t = gettext.NullTranslations() + t.add_fallback(FallbackTranslations()) + self.assertEqual(t.gettext('foo'), 'gettext: foo') + self.assertEqual(t.ngettext('foo', 'foos', 1), + 'ngettext: foo, foos, 1') + self.assertEqual(t.pgettext('context', 'foo'), + 'pgettext: context, foo') + self.assertEqual(t.npgettext('context', 'foo', 'foos', 1), + 'npgettext: context, foo, foos, 1') + + def test_gnu_translations_fallback(self): + with open(MOFILE, 'rb') as fp: + t = gettext.GNUTranslations(fp) + t.add_fallback(FallbackTranslations()) + self.assertEqual(t.gettext('foo'), 'gettext: foo') + self.assertEqual(t.ngettext('foo', 'foos', 1), + 'ngettext: foo, foos, 1') + self.assertEqual(t.pgettext('context', 'foo'), + 'pgettext: context, foo') + self.assertEqual(t.npgettext('context', 'foo', 'foos', 1), + 'npgettext: context, foo, foos, 1') + + def test_nested_fallbacks(self): + class NestedFallback(gettext.NullTranslations): + def gettext(self, message): + if message == 'foo': + return 'fallback' + return super().gettext(message) + + fallback1 = NestedFallback() + fallback2 = FallbackTranslations() + t = gettext.NullTranslations() + t.add_fallback(fallback1) + t.add_fallback(fallback2) + + self.assertEqual(fallback1.gettext('bar'), 'gettext: bar') + self.assertEqual(t.gettext('foo'), 'fallback') + self.assertEqual(t.gettext('bar'), 'gettext: bar') + + +class ExpandLangTestCase(unittest.TestCase): + def test_expand_lang(self): + # Test all combinations of territory, charset and + # modifier (locale extension) + locales = { + 'cs': ['cs'], + 'cs_CZ': ['cs_CZ', 'cs'], + 'cs.ISO8859-2': ['cs.ISO8859-2', 'cs'], + 'cs@euro': ['cs@euro', 'cs'], + 'cs_CZ.ISO8859-2': ['cs_CZ.ISO8859-2', 'cs_CZ', 'cs.ISO8859-2', + 'cs'], + 'cs_CZ@euro': ['cs_CZ@euro', 'cs@euro', 'cs_CZ', 'cs'], + 'cs.ISO8859-2@euro': ['cs.ISO8859-2@euro', 'cs@euro', + 'cs.ISO8859-2', 'cs'], + 'cs_CZ.ISO8859-2@euro': ['cs_CZ.ISO8859-2@euro', 'cs_CZ@euro', + 'cs.ISO8859-2@euro', 'cs@euro', + 'cs_CZ.ISO8859-2', 'cs_CZ', + 'cs.ISO8859-2', 'cs'], + } + for locale, expanded in locales.items(): + with self.subTest(locale=locale): + with unittest.mock.patch("locale.normalize", + return_value=locale): + self.assertEqual(gettext._expand_lang(locale), expanded) + + +class FindTestCase(unittest.TestCase): + + def setUp(self): + self.env = self.enterContext(os_helper.EnvironmentVarGuard()) + self.tempdir = self.enterContext(os_helper.temp_cwd()) + + for key in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): + self.env.unset(key) + + def create_mo_file(self, lang): + locale_dir = os.path.join(self.tempdir, "locale") + mofile_dir = os.path.join(locale_dir, lang, "LC_MESSAGES") + os.makedirs(mofile_dir) + mo_file = os.path.join(mofile_dir, "mofile.mo") + with open(mo_file, "wb") as f: + f.write(GNU_MO_DATA) + return mo_file + + def test_find_with_env_vars(self): + # test that find correctly finds the environment variables + # when languages are not supplied + mo_file = self.create_mo_file("ga_IE") + for var in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): + self.env.set(var, 'ga_IE') + result = gettext.find("mofile", + localedir=os.path.join(self.tempdir, "locale")) + self.assertEqual(result, mo_file) + self.env.unset(var) + + def test_find_with_languages(self): + # test that passed languages are used + self.env.set('LANGUAGE', 'pt_BR') + mo_file = self.create_mo_file("ga_IE") + + result = gettext.find("mofile", + localedir=os.path.join(self.tempdir, "locale"), + languages=['ga_IE']) + self.assertEqual(result, mo_file) + + @unittest.mock.patch('gettext._expand_lang') + def test_find_with_no_lang(self, patch_expand_lang): + # no language can be found + gettext.find('foo') + patch_expand_lang.assert_called_with('C') + + @unittest.mock.patch('gettext._expand_lang') + def test_find_with_c(self, patch_expand_lang): + # 'C' is already in languages + self.env.set('LANGUAGE', 'C') + gettext.find('foo') + patch_expand_lang.assert_called_with('C') + + def test_find_all(self): + # test that all are returned when all is set + paths = [] + for lang in ["ga_IE", "es_ES"]: + paths.append(self.create_mo_file(lang)) + result = gettext.find('mofile', + localedir=os.path.join(self.tempdir, "locale"), + languages=["ga_IE", "es_ES"], all=True) + self.assertEqual(sorted(result), sorted(paths)) + + def test_find_deduplication(self): + # test that find removes duplicate languages + mo_file = [self.create_mo_file('ga_IE')] + result = gettext.find("mofile", localedir=os.path.join(self.tempdir, "locale"), + languages=['ga_IE', 'ga_IE'], all=True) + self.assertEqual(result, mo_file) + + class MiscTestCase(unittest.TestCase): def test__all__(self): support.check__all__(self, gettext,