diff --git a/.ci/appveyor.yml b/.ci/appveyor.yml deleted file mode 100644 index 2dfbdd884..000000000 --- a/.ci/appveyor.yml +++ /dev/null @@ -1,38 +0,0 @@ -# From https://github.com/ogrisel/python-appveyor-demo/blob/master/appveyor.yml - -environment: - global: - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script intepreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\.ci\\run_with_env.cmd" - - matrix: - - PYTHON: "C:\\Python35" - PYTHON_VERSION: "3.5.x" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python35-x64" - PYTHON_VERSION: "3.5.x" - PYTHON_ARCH: "64" - -branches: # Only build official branches, PRs are built anyway. - only: - - master - - /release.*/ - -install: - - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - # Check that we have the expected version and architecture for Python - - "python --version" - - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" - # Build data files - - "pip install --upgrade pytest==2.8.5 pytest-cov==2.2.0 codecov freezegun==0.3.9" - - "pip install --editable ." - - "python setup.py import_cldr" - -build: false # Not a C# project, build stuff at the test step instead. - -test_script: - - "%CMD_IN_ENV% python -m pytest --cov=babel" - - "codecov" diff --git a/.ci/deploy.linux.sh b/.ci/deploy.linux.sh deleted file mode 100644 index 4d59382d7..000000000 --- a/.ci/deploy.linux.sh +++ /dev/null @@ -1,4 +0,0 @@ -set -x -set -e - -bash <(curl -s https://codecov.io/bash) diff --git a/.ci/deploy.osx.sh b/.ci/deploy.osx.sh deleted file mode 100644 index c44550eff..000000000 --- a/.ci/deploy.osx.sh +++ /dev/null @@ -1,4 +0,0 @@ -set -x -set -e - -echo "Due to a bug in codecov, coverage cannot be deployed for Mac builds." diff --git a/.ci/deps.linux.sh b/.ci/deps.linux.sh deleted file mode 100644 index 13cc9e1ef..000000000 --- a/.ci/deps.linux.sh +++ /dev/null @@ -1,4 +0,0 @@ -set -x -set -e - -echo "No dependencies to install for linux." diff --git a/.ci/deps.osx.sh b/.ci/deps.osx.sh deleted file mode 100644 index b52a84f6d..000000000 --- a/.ci/deps.osx.sh +++ /dev/null @@ -1,11 +0,0 @@ -set -e -set -x - -# Install packages with brew -brew update >/dev/null -brew outdated pyenv || brew upgrade --quiet pyenv - -# Install required python version for this build -pyenv install -ks $PYTHON_VERSION -pyenv global $PYTHON_VERSION -python --version diff --git a/.ci/run_with_env.cmd b/.ci/run_with_env.cmd deleted file mode 100644 index 3a472bc83..000000000 --- a/.ci/run_with_env.cmd +++ /dev/null @@ -1,47 +0,0 @@ -:: To build extensions for 64 bit Python 3, we need to configure environment -:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) -:: -:: To build extensions for 64 bit Python 2, we need to configure environment -:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) -:: -:: 32 bit builds do not require specific environment configurations. -:: -:: Note: this script needs to be run with the /E:ON and /V:ON flags for the -:: cmd interpreter, at least for (SDK v7.0) -:: -:: More details at: -:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows -:: http://stackoverflow.com/a/13751649/163740 -:: -:: Author: Olivier Grisel -:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ -@ECHO OFF - -SET COMMAND_TO_RUN=%* -SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows - -SET MAJOR_PYTHON_VERSION="%PYTHON_VERSION:~0,1%" -IF %MAJOR_PYTHON_VERSION% == "2" ( - SET WINDOWS_SDK_VERSION="v7.0" -) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( - SET WINDOWS_SDK_VERSION="v7.1" -) ELSE ( - ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" - EXIT 1 -) - -IF "%PYTHON_ARCH%"=="64" ( - ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture - SET DISTUTILS_USE_SDK=1 - SET MSSdk=1 - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 -) ELSE ( - ECHO Using default MSVC build environment for 32 bit architecture - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 -) diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 000000000..a3d8ae65e --- /dev/null +++ b/.coveragerc @@ -0,0 +1,5 @@ +[report] +exclude_lines = + NotImplemented + pragma: no cover + warnings.warn \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..e9c411862 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,38 @@ +name: Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, windows-2019, macos-10.15] + python-version: [3.6, 3.7, 3.8, 3.9, pypy3] + exclude: + - os: windows-2019 + python-version: pypy3 + # TODO: Remove this; see: + # https://github.com/actions/setup-python/issues/151 + # https://github.com/tox-dev/tox/issues/1704 + # https://foss.heptapod.net/pypy/pypy/-/issues/3331 + env: + BABEL_CLDR_NO_DOWNLOAD_PROGRESS: "1" + BABEL_CLDR_QUIET: "1" + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install tox tox-gh-actions==2.1.0 + - name: Run test via Tox + run: tox --skip-missing-interpreters + - uses: codecov/codecov-action@v1 diff --git a/.gitignore b/.gitignore index d8f8bc164..2886dec52 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,23 @@ -*~ -*.swp -.idea -*.so -docs/_build +**/__pycache__ +*.egg +*.egg-info *.pyc *.pyo -*.egg-info -*.egg -build -dist +*.so +*.swp +*~ +.*cache .DS_Store +.idea .tox -test-env -**/__pycache__ +/venv* babel/global.dat babel/global.dat.json +build +dist +docs/_build +test-env +tests/messages/data/project/i18n/en_US tests/messages/data/project/i18n/long_messages.pot tests/messages/data/project/i18n/temp* -tests/messages/data/project/i18n/en_US -/venv* +tests/messages/data/project/i18n/fi_BUGGY/LC_MESSAGES/*.mo diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b7eff7d94..000000000 --- a/.travis.yml +++ /dev/null @@ -1,57 +0,0 @@ -language: python - -# Use travis docker infrastructure for greater speed -sudo: false - -cache: - directories: - - cldr - - "$HOME/.cache/pip" - - "$HOME/.pyenv" - -matrix: - include: - - os: linux - python: 2.6 - - os: linux - python: 2.6 - env: - - CDECIMAL=m3-cdecimal - - os: linux - python: 2.7 - - os: linux - python: 2.7 - env: - - CDECIMAL=m3-cdecimal - - os: linux - python: pypy - - os: linux - python: pypy3 - - os: linux - python: 3.3 - - os: linux - python: 3.4 - - os: linux - python: 3.5 - env: - - PYTHON_TEST_FLAGS=-bb - -install: - - bash .ci/deps.${TRAVIS_OS_NAME}.sh - - pip install --upgrade pip - - pip install --upgrade pytest==2.8.5 pytest-cov==2.2.0 $CDECIMAL freezegun==0.3.9 - - pip install --editable . - -script: - - make test-cov - - bash .ci/deploy.${TRAVIS_OS_NAME}.sh - -notifications: - email: false - irc: - channels: - - "chat.freenode.net#pocoo" - on_success: change - on_failure: always - use_notice: true - skip_join: true diff --git a/AUTHORS b/AUTHORS index b9208fe59..9cf8f4e7d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,25 +1,126 @@ -Babel is written and maintained by the Babel team and various contributors: - -Maintainer and Current Project Lead: -- Armin Ronacher - -Contributors: +Babel is written and maintained by the Babel team and various contributors: -- Christopher Lenz -- Alex Morega -- Felix Schwarz -- Pedro Algarvio -- Jeroen Ruigrok van der Werven -- Philip Jenvey -- Tobias Bieniek -- Jonas Borgström -- Daniel Neuhäuser -- Nick Retallack -- Thomas Waldmann -- Lennart Regebro -- Isaac Jurado -- Craig Loftus +- Aarni Koskela +- Christopher Lenz +- Armin Ronacher +- Alex Morega +- Lasse Schuirmann +- Felix Schwarz +- Pedro Algarvio +- Jeroen Ruigrok van der Werven +- Philip Jenvey +- benselme +- Isaac Jurado +- Tobias Bieniek +- Erick Wilder +- Michael Birtwell +- Jonas Borgström +- Kevin Deldycke +- Jon Dufresne +- Ville Skyttä +- Hugo +- Heungsub Lee +- Jakob Schnitzer +- Sachin Paliwal +- Alex Willmer +- Daniel Neuhäuser +- Miro Hrončok +- Cédric Krier +- Luke Plant +- Jennifer Wang +- Lukas Balaga +- sudheesh001 +- Niklas Hambüchen +- Changaco +- Xavier Fernandez +- KO. Mattsson +- Sébastien Diemer +- alexbodn@gmail.com +- saurabhiiit +- srisankethu +- Erik Romijn +- Lukas B +- Ryan J Ollos +- Arturas Moskvinas +- Leonardo Pistone +- Jun Omae +- Hyunjun Kim +- Alessio Bogon +- Nikiforov Konstantin +- Abdullah Javed Nesar +- Brad Martin +- Tyler Kennedy +- CyanNani123 +- sebleblanc +- He Chen +- Steve (Gadget) Barnes +- Romuald Brunet +- Mario Frasca +- BT-sschmid +- Alberto Mardegan +- mondeja +- NotAFile +- Julien Palard +- Brian Cappello +- Serban Constantin +- Bryn Truscott +- Chris +- Charly C +- PTrottier +- xmo-odoo +- StevenJ +- Jungmo Ku +- Simeon Visser +- Narendra Vardi +- Stefane Fermigier +- Narayan Acharya +- François Magimel +- Wolfgang Doll +- Roy Williams +- Marc-André Dufresne +- Abhishek Tiwari +- David Baumgold +- Alex Kuzmenko +- Georg Schölly +- ldwoolley +- Rodrigo Ramírez Norambuena +- Jakub Wilk +- Roman Rader +- Max Shenfield +- Nicolas Grilly +- Kenny Root +- Adam Chainz +- Sébastien Fievet +- Anthony Sottile +- Yuriy Shatrov +- iamshubh22 +- Sven Anderson +- Eoin Nugent +- Roman Imankulov +- David Stanek +- Roy Wellington Ⅳ +- Florian Schulze +- Todd M. Guerra +- Joseph Breihan +- Craig Loftus +- The Gitter Badger +- Régis Behmo +- Julen Ruiz Aizpuru +- astaric +- Felix Yan +- Philip_Tzou +- Jesús Espino +- Jeremy Weinstein +- James Page +- masklinn +- Sjoerd Langkemper +- Matt Iversen +- Alexander A. Dyshev +- Dirkjan Ochtman +- Nick Retallack +- Thomas Waldmann +- xen Babel was previously developed under the Copyright of Edgewall Software. The following copyright notice holds true for releases before 2013: "Copyright (c) diff --git a/CHANGES b/CHANGES index eff75e070..e3c54bfc8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,173 @@ Babel Changelog =============== +Version 2.9.1 +------------- + +Bugfixes +~~~~~~~~ + +* The internal locale-data loading functions now validate the name of the locale file to be loaded and only + allow files within Babel's data directory. Thank you to Chris Lyne of Tenable, Inc. for discovering the issue! + +Version 2.9.0 +------------- + +Upcoming version support changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* This version, Babel 2.9, is the last version of Babel to support Python 2.7, Python 3.4, and Python 3.5. + +Improvements +~~~~~~~~~~~~ + +* CLDR: Use CLDR 37 – Aarni Koskela (#734) +* Dates: Handle ZoneInfo objects in get_timezone_location, get_timezone_name - Alessio Bogon (#741) +* Numbers: Add group_separator feature in number formatting - Abdullah Javed Nesar (#726) + +Bugfixes +~~~~~~~~ + +* Dates: Correct default Format().timedelta format to 'long' to mute deprecation warnings – Aarni Koskela +* Import: Simplify iteration code in "import_cldr.py" – Felix Schwarz +* Import: Stop using deprecated ElementTree methods "getchildren()" and "getiterator()" – Felix Schwarz +* Messages: Fix unicode printing error on Python 2 without TTY. – Niklas Hambüchen +* Messages: Introduce invariant that _invalid_pofile() takes unicode line. – Niklas Hambüchen +* Tests: fix tests when using Python 3.9 – Felix Schwarz +* Tests: Remove deprecated 'sudo: false' from Travis configuration – Jon Dufresne +* Tests: Support Py.test 6.x – Aarni Koskela +* Utilities: LazyProxy: Handle AttributeError in specified func – Nikiforov Konstantin (#724) +* Utilities: Replace usage of parser.suite with ast.parse – Miro Hrončok + +Documentation +~~~~~~~~~~~~~ + +* Update parse_number comments – Brad Martin (#708) +* Add __iter__ to Catalog documentation – @CyanNani123 + +Version 2.8.1 +------------- + +This is solely a patch release to make running tests on Py.test 6+ possible. + +Bugfixes +~~~~~~~~ + +* Support Py.test 6 - Aarni Koskela (#747, #750, #752) + +Version 2.8.0 +------------- + +Improvements +~~~~~~~~~~~~ + +* CLDR: Upgrade to CLDR 36.0 - Aarni Koskela (#679) +* Messages: Don't even open files with the "ignore" extraction method - @sebleblanc (#678) + +Bugfixes +~~~~~~~~ + +* Numbers: Fix formatting very small decimals when quantization is disabled - Lev Lybin, @miluChen (#662) +* Messages: Attempt to sort all messages – Mario Frasca (#651, #606) + +Docs +~~~~ + +* Add years to changelog - Romuald Brunet +* Note that installation requires pytz - Steve (Gadget) Barnes + +Version 2.7.0 +------------- + +Possibly incompatible changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These may be backward incompatible in some cases, as some more-or-less internal +APIs have changed. Please feel free to file issues if you bump into anything +strange and we'll try to help! + +* General: Internal uses of ``babel.util.odict`` have been replaced with + ``collections.OrderedDict`` from The Python standard library. + +Improvements +~~~~~~~~~~~~ + +* CLDR: Upgrade to CLDR 35.1 - Alberto Mardegan, Aarni Koskela (#626, #643) +* General: allow anchoring path patterns to the start of a string - Brian Cappello (#600) +* General: Bumped version requirement on pytz - @chrisbrake (#592) +* Messages: `pybabel compile`: exit with code 1 if errors were encountered - Aarni Koskela (#647) +* Messages: Add omit-header to update_catalog - Cédric Krier (#633) +* Messages: Catalog update: keep user comments from destination by default - Aarni Koskela (#648) +* Messages: Skip empty message when writing mo file - Cédric Krier (#564) +* Messages: Small fixes to avoid crashes on badly formatted .po files - Bryn Truscott (#597) +* Numbers: `parse_decimal()` `strict` argument and `suggestions` - Charly C (#590) +* Numbers: don't repeat suggestions in parse_decimal strict - Serban Constantin (#599) +* Numbers: implement currency formatting with long display names - Luke Plant (#585) +* Numbers: parse_decimal(): assume spaces are equivalent to non-breaking spaces when not in strict mode - Aarni Koskela (#649) +* Performance: Cache locale_identifiers() - Aarni Koskela (#644) + +Bugfixes +~~~~~~~~ + +* CLDR: Skip alt=... for week data (minDays, firstDay, weekendStart, weekendEnd) - Aarni Koskela (#634) +* Dates: Fix wrong weeknumber for 31.12.2018 - BT-sschmid (#621) +* Locale: Avoid KeyError trying to get data on WindowsXP - mondeja (#604) +* Locale: get_display_name(): Don't attempt to concatenate variant information to None - Aarni Koskela (#645) +* Messages: pofile: Add comparison operators to _NormalizedString - Aarni Koskela (#646) +* Messages: pofile: don't crash when message.locations can't be sorted - Aarni Koskela (#646) + +Tooling & docs +~~~~~~~~~~~~~~ + +* Docs: Remove all references to deprecated easy_install - Jon Dufresne (#610) +* Docs: Switch print statement in docs to print function - NotAFile +* Docs: Update all pypi.python.org URLs to pypi.org - Jon Dufresne (#587) +* Docs: Use https URLs throughout project where available - Jon Dufresne (#588) +* Support: Add testing and document support for Python 3.7 - Jon Dufresne (#611) +* Support: Test on Python 3.8-dev - Aarni Koskela (#642) +* Support: Using ABCs from collections instead of collections.abc is deprecated. - Julien Palard (#609) +* Tests: Fix conftest.py compatibility with pytest 4.3 - Miro Hrončok (#635) +* Tests: Update pytest and pytest-cov - Miro Hrončok (#635) + +Version 2.6.0 +------------- + +Possibly incompatible changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These may be backward incompatible in some cases, as some more-or-less internal APIs have changed. +Please feel free to file issues if you bump into anything strange and we'll try to help! + +* Numbers: Refactor decimal handling code and allow bypass of decimal quantization. (@kdeldycke) (PR #538) +* Messages: allow processing files that are in locales unknown to Babel (@akx) (PR #557) +* General: Drop support for EOL Python 2.6 and 3.3 (@hugovk) (PR #546) + +Other changes +~~~~~~~~~~~~~ + +* CLDR: Use CLDR 33 (@akx) (PR #581) +* Lists: Add support for various list styles other than the default (@akx) (#552) +* Messages: Add new PoFileError exception (@Bedrock02) (PR #532) +* Times: Simplify Linux distro specific explicit timezone setting search (@scop) (PR #528) + +Bugfixes +~~~~~~~~ + +* CLDR: avoid importing alt=narrow currency symbols (@akx) (PR #558) +* CLDR: ignore non-Latin numbering systems (@akx) (PR #579) +* Docs: Fix improper example for date formatting (@PTrottier) (PR #574) +* Tooling: Fix some deprecation warnings (@akx) (PR #580) + +Tooling & docs +~~~~~~~~~~~~~~ + +* Add explicit signatures to some date autofunctions (@xmo-odoo) (PR #554) +* Include license file in the generated wheel package (@jdufresne) (PR #539) +* Python 3.6 invalid escape sequence deprecation fixes (@scop) (PR #528) +* Test and document all supported Python versions (@jdufresne) (PR #540) +* Update copyright header years and authors file (@akx) (PR #559) + + Version 2.5.3 ------------- @@ -96,7 +263,7 @@ Internal improvements Version 2.3.4 ------------- -(Bugfix release, released on April 22th) +(Bugfix release, released on April 22th 2016) Bugfixes ~~~~~~~~ @@ -107,7 +274,7 @@ Bugfixes Version 2.3.3 ------------- -(Bugfix release, released on April 12th) +(Bugfix release, released on April 12th 2016) Bugfixes ~~~~~~~~ @@ -117,7 +284,7 @@ Bugfixes Version 2.3.2 ------------- -(Bugfix release, released on April 9th) +(Bugfix release, released on April 9th 2016) Bugfixes ~~~~~~~~ @@ -127,12 +294,12 @@ Bugfixes Version 2.3.1 ------------- -(Bugfix release because of deployment problems, released on April 8th) +(Bugfix release because of deployment problems, released on April 8th 2016) Version 2.3 ----------- -(Feature release, released on April 8th) +(Feature release, released on April 8th 2016) Internal improvements ~~~~~~~~~~~~~~~~~~~~~ @@ -210,7 +377,7 @@ Version 2.1 - Parse and honour the locale inheritance exceptions (https://github.com/python-babel/babel/issues/97) -- Fix Locale.parse using ``global.dat`` incompatible types +- Fix Locale.parse using ``global.dat`` incompatible types (https://github.com/python-babel/babel/issues/174) - Fix display of negative offsets in ``FixedOffsetTimezone`` (https://github.com/python-babel/babel/issues/214) @@ -218,7 +385,7 @@ Version 2.1 build, should improve compilation time for large projects - Add support for "narrow" format for ``format_timedelta`` - Add universal wheel support -- Support 'Language' header field in .PO files +- Support 'Language' header field in .PO files (fixes https://github.com/python-babel/babel/issues/76) - Test suite enhancements (coverage, broken tests fixed, etc) - Documentation updated @@ -313,7 +480,7 @@ Version 1.0 string does not contain any string formattings (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). -- Fix so frontend.CommandLineInterface.run does not accumulate logging +- Fix so frontend.CommandLineInterface.run does not accumulate logging handlers (:trac:`227`, reported with initial patch by dfraser) - Fix exception if environment contains an invalid locale setting (:trac:`200`) @@ -390,11 +557,11 @@ Version 0.9.6 string does not contain any string formattings (:trac:`150`). - Fix Serbian plural forms (:trac:`213`). - Small speed improvement in format_date() (:trac:`216`). -- Fix number formatting for locales where CLDR specifies alt or draft +- Fix number formatting for locales where CLDR specifies alt or draft items (:trac:`217`) - Fix bad check in format_time (:trac:`257`, reported with patch and tests by jomae) -- Fix so frontend.CommandLineInterface.run does not accumulate logging +- Fix so frontend.CommandLineInterface.run does not accumulate logging handlers (:trac:`227`, reported with initial patch by dfraser) - Fix exception if environment contains an invalid locale setting (:trac:`200`) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1b7e6959..079ef06b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,5 +51,5 @@ shall be amended so the history is not cluttered by "fixup commits". ## Writing Good Commits Please see -http://api.coala.io/en/latest/Developers/Writing_Good_Commits.html +https://api.coala.io/en/latest/Developers/Writing_Good_Commits.html for guidelines on how to write good commits and proper commit messages. diff --git a/LICENSE b/LICENSE index 1f1f55b60..693e1a187 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2013 by the Babel Team, see AUTHORS for more information. +Copyright (c) 2013-2021 by the Babel Team, see AUTHORS for more information. All rights reserved. diff --git a/babel/__init__.py b/babel/__init__.py index 5a388226b..3e20e4bd1 100644 --- a/babel/__init__.py +++ b/babel/__init__.py @@ -13,7 +13,7 @@ access to various locale display names, localized number and date formatting, etc. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -21,4 +21,4 @@ negotiate_locale, parse_locale, get_locale_identifier -__version__ = '2.5.3' +__version__ = '2.9.1' diff --git a/babel/_compat.py b/babel/_compat.py index aea338938..11b4d7a6b 100644 --- a/babel/_compat.py +++ b/babel/_compat.py @@ -10,9 +10,9 @@ text_type = str string_types = (str,) integer_types = (int, ) - unichr = chr text_to_native = lambda s, enc: s + unichr = chr iterkeys = lambda d: iter(d.keys()) itervalues = lambda d: iter(d.values()) @@ -28,6 +28,7 @@ cmp = lambda a, b: (a > b) - (a < b) array_tobytes = array.array.tobytes + from collections import abc else: text_type = unicode @@ -52,11 +53,19 @@ cmp = cmp array_tobytes = array.array.tostring - + import collections as abc number_types = integer_types + (float,) +def force_text(s, encoding='utf-8', errors='strict'): + if isinstance(s, text_type): + return s + if isinstance(s, bytes): + return s.decode(encoding, errors) + return text_type(s) + + # # Since Python 3.3, a fast decimal implementation is already included in the # standard library. Otherwise use cdecimal when available diff --git a/babel/core.py b/babel/core.py index 5140f49d7..a323a7295 100644 --- a/babel/core.py +++ b/babel/core.py @@ -5,7 +5,7 @@ Core locale representation and locale data access. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -39,7 +39,7 @@ def get_global(key): information independent of individual locales. >>> get_global('zone_aliases')['UTC'] - u'Etc/GMT' + u'Etc/UTC' >>> get_global('zone_territories')['Europe/Berlin'] u'DE' @@ -379,7 +379,7 @@ def get_display_name(self, locale=None): locale = self locale = Locale.parse(locale) retval = locale.languages.get(self.language) - if self.territory or self.script or self.variant: + if retval and (self.territory or self.script or self.variant): details = [] if self.script: details.append(locale.scripts.get(self.script)) @@ -857,11 +857,11 @@ def list_patterns(self): .. note:: The format of the value returned may change between Babel versions. - >>> Locale('en').list_patterns['start'] + >>> Locale('en').list_patterns['standard']['start'] u'{0}, {1}' - >>> Locale('en').list_patterns['end'] + >>> Locale('en').list_patterns['standard']['end'] u'{0}, and {1}' - >>> Locale('en_GB').list_patterns['end'] + >>> Locale('en_GB').list_patterns['standard']['end'] u'{0} and {1}' """ return self._data['list_patterns'] @@ -1118,7 +1118,7 @@ def parse_locale(identifier, sep='_'): def get_locale_identifier(tup, sep='_'): """The reverse of :func:`parse_locale`. It creates a locale identifier out of a ``(language, territory, script, variant)`` tuple. Items can be set to - ``None`` and trailing ``None``\s can also be left out of the tuple. + ``None`` and trailing ``None``\\s can also be left out of the tuple. >>> get_locale_identifier(('de', 'DE', None, '1999')) 'de_DE_1999' diff --git a/babel/dates.py b/babel/dates.py index d1fafe2a7..75e8f3501 100644 --- a/babel/dates.py +++ b/babel/dates.py @@ -12,7 +12,7 @@ * ``LC_ALL``, and * ``LANG`` - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -34,7 +34,7 @@ # be inherited, the inheritance of this value can be explicitly disabled by # use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic] # empty set characters ( U+2205 )." -# - http://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names +# - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names NO_INHERITANCE_MARKER = u'\u2205\u2205\u2205' @@ -76,6 +76,21 @@ def _get_dt_and_tzinfo(dt_or_tzinfo): return dt, tzinfo +def _get_tz_name(dt_or_tzinfo): + """ + Get the timezone name out of a time, datetime, or tzinfo object. + + :rtype: str + """ + dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) + if hasattr(tzinfo, 'zone'): # pytz object + return tzinfo.zone + elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object + return tzinfo.key + else: + return tzinfo.tzname(dt or datetime.utcnow()) + + def _get_datetime(instant): """ Get a datetime out of an "instant" (date, time, datetime, number). @@ -500,13 +515,9 @@ def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME, return_city=False): :return: the localized timezone name using location format """ - dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) locale = Locale.parse(locale) - if hasattr(tzinfo, 'zone'): - zone = tzinfo.zone - else: - zone = tzinfo.tzname(dt or datetime.utcnow()) + zone = _get_tz_name(dt_or_tzinfo) # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) @@ -521,7 +532,7 @@ def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME, return_city=False): territory = 'ZZ' # invalid/unknown territory_name = locale.territories[territory] if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1: - return region_format % (territory_name) + return region_format % territory_name # Otherwise, include the city in the output fallback_format = locale.zone_formats['fallback'] @@ -592,7 +603,7 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, format. For more information see `LDML Appendix J: Time Zone Display Names - `_ + `_ .. versionadded:: 0.9 @@ -619,10 +630,7 @@ def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo) locale = Locale.parse(locale) - if hasattr(tzinfo, 'zone'): - zone = tzinfo.zone - else: - zone = tzinfo.tzname(dt) + zone = _get_tz_name(dt_or_tzinfo) if zone_variant is None: if dt is None: @@ -982,7 +990,7 @@ def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=L u'15.\u201317.1.2016' >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB") - '12:12 \u2013 16:16' + '12:12\u201316:16' >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US") '5:12 AM \u2013 4:16 PM' @@ -1018,7 +1026,7 @@ def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=L locale = Locale.parse(locale) # NB: The quote comments below are from the algorithm description in - # http://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats + # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats # > Look for the intervalFormatItem element that matches the "skeleton", # > starting in the current locale and then following the locale fallback @@ -1155,7 +1163,7 @@ def parse_date(string, locale=LC_TIME): # FIXME: this currently only supports numbers, but should also support month # names, both in the requested locale, and english - numbers = re.findall('(\d+)', string) + numbers = re.findall(r'(\d+)', string) year = numbers[indexes['Y']] if len(year) == 2: year = 2000 + int(year) @@ -1198,7 +1206,7 @@ def parse_time(string, locale=LC_TIME): # and seconds should be optional, maybe minutes too # oh, and time-zones, of course - numbers = re.findall('(\d+)', string) + numbers = re.findall(r'(\d+)', string) hour = int(numbers[indexes['H']]) minute = int(numbers[indexes['M']]) second = int(numbers[indexes['S']]) @@ -1303,7 +1311,7 @@ def extract(self, char): elif char == 'H': return self.value.hour elif char == 'h': - return (self.value.hour % 12 or 12) + return self.value.hour % 12 or 12 elif char == 'm': return self.value.minute elif char == 'a': @@ -1319,9 +1327,7 @@ def format_era(self, char, num): def format_year(self, char, num): value = self.value.year if char.isupper(): - week = self.get_week_number(self.get_day_of_year()) - if week == 0: - value -= 1 + value = self.value.isocalendar()[0] year = self.format(value, num) if num == 2: year = year[-2:] @@ -1505,8 +1511,20 @@ def get_week_number(self, day_of_period, day_of_week=None): if first_day < 0: first_day += 7 week_number = (day_of_period + first_day - 1) // 7 + if 7 - first_day >= self.locale.min_week_days: week_number += 1 + + if self.locale.first_week_day == 0: + # Correct the weeknumber in case of iso-calendar usage (first_week_day=0). + # If the weeknumber exceeds the maximum number of weeks for the given year + # we must count from zero.For example the above calculation gives week 53 + # for 2018-12-31. By iso-calender definition 2018 has a max of 52 + # weeks, thus the weeknumber must be 53-52=1. + max_weeks = date(year=self.value.year, day=28, month=12).isocalendar()[1] + if week_number > max_weeks: + week_number -= max_weeks + return week_number @@ -1527,7 +1545,7 @@ def get_week_number(self, day_of_period, day_of_week=None): } #: The pattern characters declared in the Date Field Symbol Table -#: (http://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) +#: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) #: in order of decreasing magnitude. PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx" @@ -1675,7 +1693,7 @@ def split_interval_pattern(pattern): Split an interval-describing datetime pattern into multiple pieces. > The pattern is then designed to be broken up into two pieces by determining the first repeating field. - - http://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats + - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats >>> split_interval_pattern(u'E d.M. \u2013 E d.M.') [u'E d.M. \u2013 ', 'E d.M.'] diff --git a/babel/languages.py b/babel/languages.py index 40f5d9828..097436705 100644 --- a/babel/languages.py +++ b/babel/languages.py @@ -26,7 +26,7 @@ def get_official_languages(territory, regional=False, de_facto=False): """ territory = str(territory).upper() - allowed_stati = set(("official",)) + allowed_stati = {"official"} if regional: allowed_stati.add("official_regional") if de_facto: @@ -60,7 +60,7 @@ def get_territory_language_info(territory): .. note:: Note that the format of the dict returned may change between Babel versions. - See http://www.unicode.org/cldr/charts/latest/supplemental/territory_language_information.html + See https://www.unicode.org/cldr/charts/latest/supplemental/territory_language_information.html :param territory: Territory code :type territory: str diff --git a/babel/lists.py b/babel/lists.py index 82e5590c1..8368b27a6 100644 --- a/babel/lists.py +++ b/babel/lists.py @@ -11,7 +11,7 @@ * ``LC_ALL``, and * ``LANG`` - :copyright: (c) 2015 by the Babel Team. + :copyright: (c) 2015-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -20,16 +20,46 @@ DEFAULT_LOCALE = default_locale() -def format_list(lst, locale=DEFAULT_LOCALE): +def format_list(lst, style='standard', locale=DEFAULT_LOCALE): """ Format the items in `lst` as a list. - >>> format_list(['apples', 'oranges', 'pears'], 'en') + >>> format_list(['apples', 'oranges', 'pears'], locale='en') u'apples, oranges, and pears' - >>> format_list(['apples', 'oranges', 'pears'], 'zh') + >>> format_list(['apples', 'oranges', 'pears'], locale='zh') u'apples\u3001oranges\u548cpears' + >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi') + u'omena, peruna tai aplari' + + These styles are defined, but not all are necessarily available in all locales. + The following text is verbatim from the Unicode TR35-49 spec [1]. + + * standard: + A typical 'and' list for arbitrary placeholders. + eg. "January, February, and March" + * standard-short: + A short version of a 'and' list, suitable for use with short or abbreviated placeholder values. + eg. "Jan., Feb., and Mar." + * or: + A typical 'or' list for arbitrary placeholders. + eg. "January, February, or March" + * or-short: + A short version of an 'or' list. + eg. "Jan., Feb., or Mar." + * unit: + A list suitable for wide units. + eg. "3 feet, 7 inches" + * unit-short: + A list suitable for short units + eg. "3 ft, 7 in" + * unit-narrow: + A list suitable for narrow units, where space on the screen is very limited. + eg. "3′ 7″" + + [1]: https://www.unicode.org/reports/tr35/tr35-49/tr35-general.html#ListPatterns :param lst: a sequence of items to format in to a list + :param style: the style to format the list with. See above for description. :param locale: the locale """ locale = Locale.parse(locale) @@ -37,12 +67,21 @@ def format_list(lst, locale=DEFAULT_LOCALE): return '' if len(lst) == 1: return lst[0] + + if style not in locale.list_patterns: + raise ValueError('Locale %s does not support list formatting style %r (supported are %s)' % ( + locale, + style, + list(sorted(locale.list_patterns)), + )) + patterns = locale.list_patterns[style] + if len(lst) == 2: - return locale.list_patterns['2'].format(*lst) + return patterns['2'].format(*lst) - result = locale.list_patterns['start'].format(lst[0], lst[1]) + result = patterns['start'].format(lst[0], lst[1]) for elem in lst[2:-1]: - result = locale.list_patterns['middle'].format(result, elem) - result = locale.list_patterns['end'].format(result, lst[-1]) + result = patterns['middle'].format(result, elem) + result = patterns['end'].format(result, lst[-1]) return result diff --git a/babel/localedata.py b/babel/localedata.py index 0c94e49ea..438afb643 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -8,21 +8,23 @@ :note: The `Locale` class, which uses this module under the hood, provides a more convenient interface for accessing the locale data. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ import os +import re +import sys import threading -from collections import MutableMapping from itertools import chain -from babel._compat import pickle, string_types +from babel._compat import pickle, string_types, abc _cache = {} _cache_lock = threading.RLock() _dirname = os.path.join(os.path.dirname(__file__), 'locale-data') +_windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I) def normalize_locale(name): @@ -39,6 +41,22 @@ def normalize_locale(name): return locale_id +def resolve_locale_filename(name): + """ + Resolve a locale identifier to a `.dat` path on disk. + """ + + # Clean up any possible relative paths. + name = os.path.basename(name) + + # Ensure we're not left with one of the Windows reserved names. + if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]): + raise ValueError("Name %s is invalid on Windows" % name) + + # Build the path. + return os.path.join(_dirname, '%s.dat' % name) + + def exists(name): """Check whether locale data is available for the given locale. @@ -50,7 +68,7 @@ def exists(name): return False if name in _cache: return True - file_found = os.path.exists(os.path.join(_dirname, '%s.dat' % name)) + file_found = os.path.exists(resolve_locale_filename(name)) return True if file_found else bool(normalize_locale(name)) @@ -58,13 +76,24 @@ def locale_identifiers(): """Return a list of all locale identifiers for which locale data is available. + This data is cached after the first invocation in `locale_identifiers.cache`. + + Removing the `locale_identifiers.cache` attribute or setting it to `None` + will cause this function to re-read the list from disk. + .. versionadded:: 0.8.1 :return: a list of locale identifiers (strings) """ - return [stem for stem, extension in [ - os.path.splitext(filename) for filename in os.listdir(_dirname) - ] if extension == '.dat' and stem != 'root'] + data = getattr(locale_identifiers, 'cache', None) + if data is None: + locale_identifiers.cache = data = [ + stem + for stem, extension in + (os.path.splitext(filename) for filename in os.listdir(_dirname)) + if extension == '.dat' and stem != 'root' + ] + return data def load(name, merge_inherited=True): @@ -92,6 +121,7 @@ def load(name, merge_inherited=True): :raise `IOError`: if no locale data file is found for the given locale identifer, or one of the locales it inherits from """ + name = os.path.basename(name) _cache_lock.acquire() try: data = _cache.get(name) @@ -109,7 +139,7 @@ def load(name, merge_inherited=True): else: parent = '_'.join(parts[:-1]) data = load(parent).copy() - filename = os.path.join(_dirname, '%s.dat' % name) + filename = resolve_locale_filename(name) with open(filename, 'rb') as fileobj: if name != 'root' and merge_inherited: merge(data, pickle.load(fileobj)) @@ -187,7 +217,7 @@ def resolve(self, data): return data -class LocaleDataDict(MutableMapping): +class LocaleDataDict(abc.MutableMapping): """Dictionary wrapper that automatically resolves aliases to the actual values. """ diff --git a/babel/localtime/__init__.py b/babel/localtime/__init__.py index 883ff1661..bd3954951 100644 --- a/babel/localtime/__init__.py +++ b/babel/localtime/__init__.py @@ -6,7 +6,7 @@ Babel specific fork of tzlocal to determine the local timezone of the system. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ diff --git a/babel/localtime/_unix.py b/babel/localtime/_unix.py index 8a8b4e967..c2194694c 100644 --- a/babel/localtime/_unix.py +++ b/babel/localtime/_unix.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import with_statement import os import re @@ -100,9 +101,7 @@ def _get_localzone(_root='/'): # OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and # Gentoo has a TIMEZONE setting in /etc/conf.d/clock # We look through these files for a timezone: - zone_re = re.compile('\s*ZONE\s*=\s*\"') - timezone_re = re.compile('\s*TIMEZONE\s*=\s*\"') - end_re = re.compile('\"') + timezone_re = re.compile(r'\s*(TIME)?ZONE\s*=\s*"(?P.+)"') for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'): tzpath = os.path.join(_root, filename) @@ -110,17 +109,10 @@ def _get_localzone(_root='/'): continue with open(tzpath, 'rt') as tzfile: for line in tzfile: - # Look for the ZONE= setting. - match = zone_re.match(line) - if match is None: - # No ZONE= setting. Look for the TIMEZONE= setting. - match = timezone_re.match(line) + match = timezone_re.match(line) if match is not None: - # Some setting existed - line = line[match.end():] - etctz = line[:end_re.search(line).start()] - # We found a timezone + etctz = match.group("etctz") return pytz.timezone(etctz.replace(' ', '_')) # No explicit setting existed. Use localtime diff --git a/babel/localtime/_win32.py b/babel/localtime/_win32.py index 3752dffac..65cc0885d 100644 --- a/babel/localtime/_win32.py +++ b/babel/localtime/_win32.py @@ -66,7 +66,7 @@ def get_localzone_name(): sub = winreg.OpenKey(tzkey, subkey) data = valuestodict(sub) sub.Close() - if data['Std'] == tzwin: + if data.get('Std', None) == tzwin: tzkeyname = subkey break diff --git a/babel/messages/__init__.py b/babel/messages/__init__.py index 1b63bae2e..7d2587f63 100644 --- a/babel/messages/__init__.py +++ b/babel/messages/__init__.py @@ -5,7 +5,7 @@ Support for ``gettext`` message catalogs. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index fd40058ad..a19a3e6d8 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -5,7 +5,7 @@ Data structures for message catalogs. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -13,17 +13,18 @@ import time from cgi import parse_header +from collections import OrderedDict from datetime import datetime, time as time_ from difflib import get_close_matches from email import message_from_string from copy import copy from babel import __version__ as VERSION -from babel.core import Locale +from babel.core import Locale, UnknownLocaleError from babel.dates import format_datetime from babel.messages.plurals import get_plural -from babel.util import odict, distinct, LOCALTZ, FixedOffsetTimezone -from babel._compat import string_types, number_types, PY2, cmp +from babel.util import distinct, LOCALTZ, FixedOffsetTimezone +from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text __all__ = ['Message', 'Catalog', 'TranslationError'] @@ -120,8 +121,8 @@ def __cmp__(self, other): """Compare Messages, taking into account plural ids""" def values_to_compare(obj): if isinstance(obj, Message) and obj.pluralizable: - return (obj.id[0], obj.context or '') - return (obj.id, obj.context or '') + return obj.id[0], obj.context or '' + return obj.id, obj.context or '' return cmp(values_to_compare(self), values_to_compare(other)) def __gt__(self, other): @@ -267,11 +268,9 @@ def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, :param fuzzy: the fuzzy bit on the catalog header """ self.domain = domain - if locale: - locale = Locale.parse(locale) self.locale = locale self._header_comment = header_comment - self._messages = odict() + self._messages = OrderedDict() self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -297,10 +296,40 @@ def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, self.revision_date = revision_date self.fuzzy = fuzzy - self.obsolete = odict() # Dictionary of obsolete messages + self.obsolete = OrderedDict() # Dictionary of obsolete messages self._num_plurals = None self._plural_expr = None + def _set_locale(self, locale): + if locale is None: + self._locale_identifier = None + self._locale = None + return + + if isinstance(locale, Locale): + self._locale_identifier = text_type(locale) + self._locale = locale + return + + if isinstance(locale, string_types): + self._locale_identifier = text_type(locale) + try: + self._locale = Locale.parse(locale) + except UnknownLocaleError: + self._locale = None + return + + raise TypeError('`locale` must be a Locale, a locale identifier string, or None; got %r' % locale) + + def _get_locale(self): + return self._locale + + def _get_locale_identifier(self): + return self._locale_identifier + + locale = property(_get_locale, _set_locale) + locale_identifier = property(_get_locale_identifier) + def _get_header_comment(self): comment = self._header_comment year = datetime.now(LOCALTZ).strftime('%Y') @@ -310,9 +339,9 @@ def _get_header_comment(self): .replace('VERSION', self.version) \ .replace('YEAR', year) \ .replace('ORGANIZATION', self.copyright_holder) - if self.locale: - comment = comment.replace('Translations template', '%s translations' - % self.locale.english_name) + locale_name = (self.locale.english_name if self.locale else self.locale_identifier) + if locale_name: + comment = comment.replace('Translations template', '%s translations' % locale_name) return comment def _set_header_comment(self, string): @@ -366,12 +395,12 @@ def _get_mime_headers(self): else: headers.append(('PO-Revision-Date', self.revision_date)) headers.append(('Last-Translator', self.last_translator)) - if self.locale is not None: - headers.append(('Language', str(self.locale))) - if (self.locale is not None) and ('LANGUAGE' in self.language_team): + if self.locale_identifier: + headers.append(('Language', str(self.locale_identifier))) + if self.locale_identifier and ('LANGUAGE' in self.language_team): headers.append(('Language-Team', self.language_team.replace('LANGUAGE', - str(self.locale)))) + str(self.locale_identifier)))) else: headers.append(('Language-Team', self.language_team)) if self.locale is not None: @@ -385,7 +414,8 @@ def _get_mime_headers(self): def _set_mime_headers(self, headers): for name, value in headers: - name = name.lower() + name = force_text(name.lower(), encoding=self.charset) + value = force_text(value, encoding=self.charset) if name == 'project-id-version': parts = value.split(' ') self.project = u' '.join(parts[:-1]) @@ -396,7 +426,7 @@ def _set_mime_headers(self, headers): self.last_translator = value elif name == 'language': value = value.replace('-', '_') - self.locale = Locale.parse(value) + self._set_locale(value) elif name == 'language-team': self.language_team = value elif name == 'content-type': @@ -490,6 +520,8 @@ def plural_expr(self): '(n != 1)' >>> Catalog(locale='ga').plural_expr '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' + >>> Catalog(locale='ding').plural_expr # unknown locale + '(n != 1)' :type: `string_types`""" if self._plural_expr is None: @@ -531,7 +563,7 @@ def __iter__(self): buf.append('%s: %s' % (name, value)) flags = set() if self.fuzzy: - flags |= set(['fuzzy']) + flags |= {'fuzzy'} yield Message(u'', '\n'.join(buf), flags=flags) for key in self._messages: yield self._messages[key] @@ -668,7 +700,7 @@ def delete(self, id, context=None): if key in self._messages: del self._messages[key] - def update(self, template, no_fuzzy_matching=False, update_header_comment=False): + def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True): """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog @@ -723,7 +755,7 @@ def update(self, template, no_fuzzy_matching=False, update_header_comment=False) """ messages = self._messages remaining = messages.copy() - self._messages = odict() + self._messages = OrderedDict() # Prepare for fuzzy matching fuzzy_candidates = [] @@ -748,6 +780,10 @@ def _merge(message, oldkey, newkey): else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string + + if keep_user_comments: + message.user_comments = list(distinct(oldmsg.user_comments)) + if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True @@ -762,7 +798,7 @@ def _merge(message, oldkey, newkey): message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: - message.flags |= set([u'fuzzy']) + message.flags |= {u'fuzzy'} self[message.id] = message for message in template: @@ -771,7 +807,7 @@ def _merge(message, oldkey, newkey): if key in messages: _merge(message, key, key) else: - if no_fuzzy_matching is False: + if not no_fuzzy_matching: # do some fuzzy matching with difflib if isinstance(key, tuple): matchkey = key[0] # just the msgid, no context diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py index 24ecdcfed..cba911d72 100644 --- a/babel/messages/checkers.py +++ b/babel/messages/checkers.py @@ -7,7 +7,7 @@ :since: version 0.9 - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -17,9 +17,9 @@ #: list of format chars that are compatible to each other _string_format_compatibilities = [ - set(['i', 'd', 'u']), - set(['x', 'X']), - set(['f', 'F', 'g', 'G']) + {'i', 'd', 'u'}, + {'x', 'X'}, + {'f', 'F', 'g', 'G'} ] @@ -61,7 +61,7 @@ def python_format(catalog, message): def _validate_format(format, alternative): """Test format string `alternative` against `format`. `format` can be the - msgid of a message and `alternative` one of the `msgstr`\s. The two + msgid of a message and `alternative` one of the `msgstr`\\s. The two arguments are not interchangeable as `alternative` may contain less placeholders if `format` uses named placeholders. diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 351a29070..64497762c 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -13,7 +13,7 @@ The main entry points into the extraction functionality are the functions `extract_from_dir` and `extract_from_file`. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -236,9 +236,12 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, :returns: list of tuples of the form ``(lineno, message, comments, context)`` :rtype: list[tuple[int, str|tuple[str], list[str], str|None] """ + if method == 'ignore': + return [] + with open(filename, 'rb') as fileobj: - return list(extract(method, fileobj, keywords, comment_tags, options, - strip_comment_tags)) + return list(extract(method, fileobj, keywords, comment_tags, + options, strip_comment_tags)) def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index fb171e36a..c5eb1dea9 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -5,7 +5,7 @@ Frontends for the message extraction functionality. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import print_function @@ -17,18 +17,19 @@ import shutil import sys import tempfile +from collections import OrderedDict from datetime import datetime from locale import getpreferredencoding from babel import __version__ as VERSION from babel import Locale, localedata -from babel._compat import StringIO, string_types, text_type +from babel._compat import StringIO, string_types, text_type, PY2 from babel.core import UnknownLocaleError from babel.messages.catalog import Catalog from babel.messages.extract import DEFAULT_KEYWORDS, DEFAULT_MAPPING, check_and_call_extract_file, extract_from_dir from babel.messages.mofile import write_mo from babel.messages.pofile import read_po, write_po -from babel.util import LOCALTZ, odict +from babel.util import LOCALTZ from distutils import log as distutils_log from distutils.cmd import Command as _Command from distutils.errors import DistutilsOptionError, DistutilsSetupError @@ -39,6 +40,9 @@ from configparser import RawConfigParser +po_file_read_mode = ('rU' if PY2 else 'r') + + def listify_value(arg, split=None): """ Make a list out of an argument. @@ -178,8 +182,13 @@ def finalize_options(self): 'or the base directory') def run(self): + n_errors = 0 for domain in self.domain: - self._run_domain(domain) + for catalog, errors in self._run_domain(domain).items(): + n_errors += len(errors) + if n_errors: + self.log.error('%d errors encountered.' % n_errors) + return (1 if n_errors else 0) def _run_domain(self, domain): po_files = [] @@ -215,6 +224,8 @@ def _run_domain(self, domain): if not po_files: raise DistutilsOptionError('no message catalogs found') + catalogs_and_errors = {} + for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] with open(po_file, 'rb') as infile: @@ -237,7 +248,8 @@ def _run_domain(self, domain): self.log.info('catalog %s is marked as fuzzy, skipping', po_file) continue - for message, errors in catalog.check(): + catalogs_and_errors[catalog] = catalog_errors = list(catalog.check()) + for message, errors in catalog_errors: for error in errors: self.log.error( 'error: %s:%d: %s', po_file, message.lineno, error @@ -248,6 +260,8 @@ def _run_domain(self, domain): with open(mo_file, 'wb') as outfile: write_mo(outfile, catalog, use_fuzzy=self.use_fuzzy) + return catalogs_and_errors + class extract_messages(Command): """Message extraction command for use in ``setup.py`` scripts. @@ -391,7 +405,7 @@ def finalize_options(self): if self.input_paths: if isinstance(self.input_paths, string_types): - self.input_paths = re.split(',\s*', self.input_paths) + self.input_paths = re.split(r',\s*', self.input_paths) elif self.distribution is not None: self.input_paths = dict.fromkeys([ k.split('.', 1)[0] @@ -485,7 +499,7 @@ def _get_mappings(self): mappings = [] if self.mapping_file: - with open(self.mapping_file, 'U') as fileobj: + with open(self.mapping_file, po_file_read_mode) as fileobj: method_map, options_map = parse_mapping(fileobj) for path in self.input_paths: mappings.append((path, method_map, options_map)) @@ -643,6 +657,8 @@ class update_catalog(Command): ('output-file=', 'o', "name of the output file (default " "'//LC_MESSAGES/.po')"), + ('omit-header', None, + "do not include msgid "" entry in header"), ('locale=', 'l', 'locale of the catalog to compile'), ('width=', 'w', @@ -657,15 +673,19 @@ class update_catalog(Command): ('update-header-comment', None, 'update target header comment'), ('previous', None, - 'keep previous msgids of translated messages') + 'keep previous msgids of translated messages'), + ] + boolean_options = [ + 'omit-header', 'no-wrap', 'ignore-obsolete', 'no-fuzzy-matching', + 'previous', 'update-header-comment', ] - boolean_options = ['no-wrap', 'ignore-obsolete', 'no-fuzzy-matching', 'previous', 'update-header-comment'] def initialize_options(self): self.domain = 'messages' self.input_file = None self.output_dir = None self.output_file = None + self.omit_header = False self.locale = None self.width = None self.no_wrap = False @@ -736,6 +756,7 @@ def run(self): try: with open(tmpname, 'wb') as tmpfile: write_po(tmpfile, catalog, + omit_header=self.omit_header, ignore_obsolete=self.ignore_obsolete, include_previous=self.previous, width=self.width) except: @@ -962,8 +983,13 @@ def parse_mapping(fileobj, filename=None): options_map = {} parser = RawConfigParser() - parser._sections = odict(parser._sections) # We need ordered sections - parser.readfp(fileobj, filename) + parser._sections = OrderedDict(parser._sections) # We need ordered sections + + if PY2: + parser.readfp(fileobj, filename) + else: + parser.read_file(fileobj, filename) + for section in parser.sections(): if section == 'extractors': extractors = dict(parser.items(section)) @@ -978,7 +1004,7 @@ def parse_mapping(fileobj, filename=None): method = extractors[method] method_map[idx] = (pattern, method) - return (method_map, options_map) + return method_map, options_map def parse_keywords(strings=[]): diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 30d6e5405..c57b1213f 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -6,7 +6,7 @@ A simple JavaScript 1.5 lexer which is used for the JavaScript extractor. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ from collections import namedtuple diff --git a/babel/messages/mofile.py b/babel/messages/mofile.py index 79042e003..8d3cfc905 100644 --- a/babel/messages/mofile.py +++ b/babel/messages/mofile.py @@ -5,7 +5,7 @@ Writing of files in the ``gettext`` MO (machine object) format. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -153,8 +153,8 @@ def write_mo(fileobj, catalog, use_fuzzy=False): in the output """ messages = list(catalog) - if not use_fuzzy: - messages[1:] = [m for m in messages[1:] if not m.fuzzy] + messages[1:] = [m for m in messages[1:] + if m.string and (use_fuzzy or not m.fuzzy)] messages.sort() ids = strs = b'' @@ -178,10 +178,7 @@ def write_mo(fileobj, catalog, use_fuzzy=False): ]) else: msgid = message.id.encode(catalog.charset) - if not message.string: - msgstr = message.id.encode(catalog.charset) - else: - msgstr = message.string.encode(catalog.charset) + msgstr = message.string.encode(catalog.charset) if message.context: msgid = b'\x04'.join([message.context.encode(catalog.charset), msgid]) diff --git a/babel/messages/plurals.py b/babel/messages/plurals.py index 92cefa79a..91ba9e1b1 100644 --- a/babel/messages/plurals.py +++ b/babel/messages/plurals.py @@ -5,7 +5,7 @@ Plural form definitions. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 696ec3e97..be33b831d 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -6,7 +6,7 @@ Reading and writing of files in the ``gettext`` PO (portable object) format. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -16,7 +16,7 @@ from babel.messages.catalog import Catalog, Message from babel.util import wraptext -from babel._compat import text_type +from babel._compat import text_type, cmp def unescape(string): @@ -73,6 +73,15 @@ def denormalize(string): return unescape(string) +class PoFileError(Exception): + """Exception thrown by PoParser when an invalid po file is encountered.""" + def __init__(self, message, catalog, line, lineno): + super(PoFileError, self).__init__('{message} on {lineno}'.format(message=message, lineno=lineno)) + self.catalog = catalog + self.line = line + self.lineno = lineno + + class _NormalizedString(object): def __init__(self, *args): @@ -89,6 +98,36 @@ def denormalize(self): def __nonzero__(self): return bool(self._strs) + __bool__ = __nonzero__ + + def __repr__(self): + return os.linesep.join(self._strs) + + def __cmp__(self, other): + if not other: + return 1 + + return cmp(text_type(self), text_type(other)) + + def __gt__(self, other): + return self.__cmp__(other) > 0 + + def __lt__(self, other): + return self.__cmp__(other) < 0 + + def __ge__(self, other): + return self.__cmp__(other) >= 0 + + def __le__(self, other): + return self.__cmp__(other) <= 0 + + def __eq__(self, other): + return self.__cmp__(other) == 0 + + def __ne__(self, other): + return self.__cmp__(other) != 0 + + class PoFileParser(object): """Support class to read messages from a ``gettext`` PO (portable object) file @@ -104,11 +143,12 @@ class PoFileParser(object): 'msgid_plural', ] - def __init__(self, catalog, ignore_obsolete=False): + def __init__(self, catalog, ignore_obsolete=False, abort_invalid=False): self.catalog = catalog self.ignore_obsolete = ignore_obsolete self.counter = 0 self.offset = 0 + self.abort_invalid = abort_invalid self._reset_message_state() def _reset_message_state(self): @@ -138,7 +178,7 @@ def _add_message(self): string = ['' for _ in range(self.catalog.num_plurals)] for idx, translation in self.translations: if idx >= self.catalog.num_plurals: - self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog") + self._invalid_pofile(u"", self.offset, "msg has more translations than num_plurals of catalog") continue string[idx] = translation.denormalize() string = tuple(string) @@ -172,9 +212,12 @@ def _process_message_line(self, lineno, line, obsolete=False): def _process_keyword_line(self, lineno, line, obsolete=False): for keyword in self._keywords: - if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: - arg = line[len(keyword):] - break + try: + if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: + arg = line[len(keyword):] + break + except IndexError: + self._invalid_pofile(line, lineno, "Keyword must be followed by a string") else: self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.") return @@ -276,11 +319,17 @@ def parse(self, fileobj): self._add_message() def _invalid_pofile(self, line, lineno, msg): + assert isinstance(line, text_type) + if self.abort_invalid: + raise PoFileError(msg, self.catalog, line, lineno) print("WARNING:", msg) - print("WARNING: Problem on line {0}: {1}".format(lineno + 1, line)) + # `line` is guaranteed to be unicode so u"{}"-interpolating would always + # succeed, but on Python < 2 if not in a TTY, `sys.stdout.encoding` + # is `None`, unicode may not be printable so we `repr()` to ASCII. + print(u"WARNING: Problem on line {0}: {1}".format(lineno + 1, repr(line))) -def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None): +def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None, abort_invalid=False): """Read messages from a ``gettext`` PO (portable object) file from the given file-like object and return a `Catalog`. @@ -325,9 +374,10 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No :param domain: the message domain :param ignore_obsolete: whether to ignore obsolete messages in the input :param charset: the character set of the catalog. + :param abort_invalid: abort read if po file is invalid """ catalog = Catalog(locale=locale, domain=domain, charset=charset) - parser = PoFileParser(catalog, ignore_obsolete) + parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid) parser.parse(fileobj) return catalog @@ -535,7 +585,18 @@ def _write_message(message, prefix=''): if not no_location: locs = [] - for filename, lineno in sorted(message.locations): + + # sort locations by filename and lineno. + # if there's no as lineno, use `-1`. + # if no sorting possible, leave unsorted. + # (see issue #606) + try: + locations = sorted(message.locations, + key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1)) + except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()" + locations = message.locations + + for filename, lineno in locations: if lineno and include_lineno: locs.append(u'%s:%d' % (filename.replace(os.sep, '/'), lineno)) else: diff --git a/babel/numbers.py b/babel/numbers.py index 8728699fb..0fcc07e15 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -12,19 +12,25 @@ * ``LC_ALL``, and * ``LANG`` - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ # TODO: # Padding and rounding increments in pattern: -# - http://www.unicode.org/reports/tr35/ (Appendix G.6) +# - https://www.unicode.org/reports/tr35/ (Appendix G.6) import re from datetime import date as date_, datetime as datetime_ -from itertools import chain +import warnings from babel.core import default_locale, Locale, get_global from babel._compat import decimal, string_types -from babel.localedata import locale_identifiers + +try: + # Python 2 + long +except NameError: + # Python 3 + long = int LC_NUMERIC = default_locale('LC_NUMERIC') @@ -151,6 +157,36 @@ def get_currency_precision(currency): return precisions.get(currency, precisions['DEFAULT'])[0] +def get_currency_unit_pattern(currency, count=None, locale=LC_NUMERIC): + """ + Return the unit pattern used for long display of a currency value + for a given locale. + This is a string containing ``{0}`` where the numeric part + should be substituted and ``{1}`` where the currency long display + name should be substituted. + + >>> get_currency_unit_pattern('USD', locale='en_US', count=10) + u'{0} {1}' + + .. versionadded:: 2.7.0 + + :param currency: the currency code. + :param count: the optional count. If provided the unit + pattern for that number will be returned. + :param locale: the `Locale` object or locale identifier. + """ + loc = Locale.parse(locale) + if count is not None: + plural_form = loc.plural_form(count) + try: + return loc._data['currency_unit_patterns'][plural_form] + except LookupError: + # Fall back to 'other' + pass + + return loc._data['currency_unit_patterns']['other'] + + def get_territory_currencies(territory, start_date=None, end_date=None, tender=True, non_tender=False, include_details=False): @@ -304,15 +340,40 @@ def format_number(number, locale=LC_NUMERIC): >>> format_number(1099, locale='de_DE') u'1.099' + .. deprecated:: 2.6.0 + + Use babel.numbers.format_decimal() instead. :param number: the number to format :param locale: the `Locale` object or locale identifier + + """ - # Do we really need this one? + warnings.warn('Use babel.numbers.format_decimal() instead.', DeprecationWarning) return format_decimal(number, locale=locale) -def format_decimal(number, format=None, locale=LC_NUMERIC): +def get_decimal_precision(number): + """Return maximum precision of a decimal instance's fractional part. + + Precision is extracted from the fractional part only. + """ + # Copied from: https://github.com/mahmoud/boltons/pull/59 + assert isinstance(number, decimal.Decimal) + decimal_tuple = number.normalize().as_tuple() + if decimal_tuple.exponent >= 0: + return 0 + return abs(decimal_tuple.exponent) + + +def get_decimal_quantum(precision): + """Return minimal quantum of a number, as defined by precision.""" + assert isinstance(precision, (int, long, decimal.Decimal)) + return decimal.Decimal(10) ** (-precision) + + +def format_decimal( + number, format=None, locale=LC_NUMERIC, decimal_quantization=True, group_separator=True): u"""Return the given decimal number formatted for a specific locale. >>> format_decimal(1.2345, locale='en_US') @@ -332,23 +393,42 @@ def format_decimal(number, format=None, locale=LC_NUMERIC): >>> format_decimal(12345.5, locale='en_US') u'12,345.5' + By default the locale is allowed to truncate and round a high-precision + number by forcing its format pattern onto the decimal part. You can bypass + this behavior with the `decimal_quantization` parameter: + + >>> format_decimal(1.2346, locale='en_US') + u'1.235' + >>> format_decimal(1.2346, locale='en_US', decimal_quantization=False) + u'1.2346' + >>> format_decimal(12345.67, locale='fr_CA', group_separator=False) + u'12345,67' + >>> format_decimal(12345.67, locale='en_US', group_separator=True) + u'12,345.67' + :param number: the number to format :param format: :param locale: the `Locale` object or locale identifier + :param decimal_quantization: Truncate and round high-precision numbers to + the format pattern. Defaults to `True`. + :param group_separator: Boolean to switch group separator on/off in a locale's + number format. """ locale = Locale.parse(locale) if not format: format = locale.decimal_formats.get(format) pattern = parse_pattern(format) - return pattern.apply(number, locale) + return pattern.apply( + number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator) class UnknownCurrencyFormatError(KeyError): """Exception raised when an unknown currency format is requested.""" -def format_currency(number, currency, format=None, locale=LC_NUMERIC, - currency_digits=True, format_type='standard'): +def format_currency( + number, currency, format=None, locale=LC_NUMERIC, currency_digits=True, + format_type='standard', decimal_quantization=True, group_separator=True): u"""Return formatted currency value. >>> format_currency(1099.98, 'USD', locale='en_US') @@ -374,7 +454,7 @@ def format_currency(number, currency, format=None, locale=LC_NUMERIC, >>> format_currency(1099.98, 'JPY', locale='en_US') u'\\xa51,100' >>> format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES') - u'1.100' + u'1.099,98' However, the number of decimal digits can be overriden from the currency information, by setting the last parameter to ``False``: @@ -398,13 +478,48 @@ def format_currency(number, currency, format=None, locale=LC_NUMERIC, ... UnknownCurrencyFormatError: "'unknown' is not a known currency format type" + >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=False) + u'$101299.98' + + >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=True) + u'$101,299.98' + + You can also pass format_type='name' to use long display names. The order of + the number and currency name, along with the correct localized plural form + of the currency name, is chosen according to locale: + + >>> format_currency(1, 'USD', locale='en_US', format_type='name') + u'1.00 US dollar' + >>> format_currency(1099.98, 'USD', locale='en_US', format_type='name') + u'1,099.98 US dollars' + >>> format_currency(1099.98, 'USD', locale='ee', format_type='name') + u'us ga dollar 1,099.98' + + By default the locale is allowed to truncate and round a high-precision + number by forcing its format pattern onto the decimal part. You can bypass + this behavior with the `decimal_quantization` parameter: + + >>> format_currency(1099.9876, 'USD', locale='en_US') + u'$1,099.99' + >>> format_currency(1099.9876, 'USD', locale='en_US', decimal_quantization=False) + u'$1,099.9876' + :param number: the number to format :param currency: the currency code :param format: the format string to use :param locale: the `Locale` object or locale identifier - :param currency_digits: use the currency's number of decimal digits + :param currency_digits: use the currency's natural number of decimal digits :param format_type: the currency format type to use + :param decimal_quantization: Truncate and round high-precision numbers to + the format pattern. Defaults to `True`. + :param group_separator: Boolean to switch group separator on/off in a locale's + number format. + """ + if format_type == 'name': + return _format_currency_long_name(number, currency, format=format, + locale=locale, currency_digits=currency_digits, + decimal_quantization=decimal_quantization, group_separator=group_separator) locale = Locale.parse(locale) if format: pattern = parse_pattern(format) @@ -412,17 +527,52 @@ def format_currency(number, currency, format=None, locale=LC_NUMERIC, try: pattern = locale.currency_formats[format_type] except KeyError: - raise UnknownCurrencyFormatError("%r is not a known currency format" - " type" % format_type) - if currency_digits: - precision = get_currency_precision(currency) - frac = (precision, precision) + raise UnknownCurrencyFormatError( + "%r is not a known currency format type" % format_type) + + return pattern.apply( + number, locale, currency=currency, currency_digits=currency_digits, + decimal_quantization=decimal_quantization, group_separator=group_separator) + + +def _format_currency_long_name( + number, currency, format=None, locale=LC_NUMERIC, currency_digits=True, + format_type='standard', decimal_quantization=True, group_separator=True): + # Algorithm described here: + # https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies + locale = Locale.parse(locale) + # Step 1. + # There are no examples of items with explicit count (0 or 1) in current + # locale data. So there is no point implementing that. + # Step 2. + + # Correct number to numeric type, important for looking up plural rules: + if isinstance(number, string_types): + number_n = float(number) else: - frac = None - return pattern.apply(number, locale, currency=currency, force_frac=frac) + number_n = number + + # Step 3. + unit_pattern = get_currency_unit_pattern(currency, count=number_n, locale=locale) + + # Step 4. + display_name = get_currency_name(currency, count=number_n, locale=locale) + # Step 5. + if not format: + format = locale.decimal_formats.get(format) -def format_percent(number, format=None, locale=LC_NUMERIC): + pattern = parse_pattern(format) + + number_part = pattern.apply( + number, locale, currency=currency, currency_digits=currency_digits, + decimal_quantization=decimal_quantization, group_separator=group_separator) + + return unit_pattern.format(number_part, display_name) + + +def format_percent( + number, format=None, locale=LC_NUMERIC, decimal_quantization=True, group_separator=True): """Return formatted percent value for a specific locale. >>> format_percent(0.34, locale='en_US') @@ -437,18 +587,39 @@ def format_percent(number, format=None, locale=LC_NUMERIC): >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') u'25,123\u2030' + By default the locale is allowed to truncate and round a high-precision + number by forcing its format pattern onto the decimal part. You can bypass + this behavior with the `decimal_quantization` parameter: + + >>> format_percent(23.9876, locale='en_US') + u'2,399%' + >>> format_percent(23.9876, locale='en_US', decimal_quantization=False) + u'2,398.76%' + + >>> format_percent(229291.1234, locale='pt_BR', group_separator=False) + u'22929112%' + + >>> format_percent(229291.1234, locale='pt_BR', group_separator=True) + u'22.929.112%' + :param number: the percent number to format :param format: :param locale: the `Locale` object or locale identifier + :param decimal_quantization: Truncate and round high-precision numbers to + the format pattern. Defaults to `True`. + :param group_separator: Boolean to switch group separator on/off in a locale's + number format. """ locale = Locale.parse(locale) if not format: format = locale.percent_formats.get(format) pattern = parse_pattern(format) - return pattern.apply(number, locale) + return pattern.apply( + number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator) -def format_scientific(number, format=None, locale=LC_NUMERIC): +def format_scientific( + number, format=None, locale=LC_NUMERIC, decimal_quantization=True): """Return value formatted in scientific notation for a specific locale. >>> format_scientific(10000, locale='en_US') @@ -456,23 +627,40 @@ def format_scientific(number, format=None, locale=LC_NUMERIC): The format pattern can also be specified explicitly: - >>> format_scientific(1234567, u'##0E00', locale='en_US') + >>> format_scientific(1234567, u'##0.##E00', locale='en_US') u'1.23E06' + By default the locale is allowed to truncate and round a high-precision + number by forcing its format pattern onto the decimal part. You can bypass + this behavior with the `decimal_quantization` parameter: + + >>> format_scientific(1234.9876, u'#.##E0', locale='en_US') + u'1.23E3' + >>> format_scientific(1234.9876, u'#.##E0', locale='en_US', decimal_quantization=False) + u'1.2349876E3' + :param number: the number to format :param format: :param locale: the `Locale` object or locale identifier + :param decimal_quantization: Truncate and round high-precision numbers to + the format pattern. Defaults to `True`. """ locale = Locale.parse(locale) if not format: format = locale.scientific_formats.get(format) pattern = parse_pattern(format) - return pattern.apply(number, locale) + return pattern.apply( + number, locale, decimal_quantization=decimal_quantization) class NumberFormatError(ValueError): """Exception raised when a string cannot be parsed into a number.""" + def __init__(self, message, suggestions=None): + super(NumberFormatError, self).__init__(message) + #: a list of properly formatted numbers derived from the invalid input + self.suggestions = suggestions + def parse_number(string, locale=LC_NUMERIC): """Parse localized number string into an integer. @@ -500,13 +688,15 @@ def parse_number(string, locale=LC_NUMERIC): raise NumberFormatError('%r is not a valid number' % string) -def parse_decimal(string, locale=LC_NUMERIC): +def parse_decimal(string, locale=LC_NUMERIC, strict=False): """Parse localized decimal string into a decimal. >>> parse_decimal('1,099.98', locale='en_US') Decimal('1099.98') >>> parse_decimal('1.099,98', locale='de') Decimal('1099.98') + >>> parse_decimal('12 345,123', locale='ru') + Decimal('12345.123') When the given string cannot be parsed, an exception is raised: @@ -515,24 +705,74 @@ def parse_decimal(string, locale=LC_NUMERIC): ... NumberFormatError: '2,109,998' is not a valid decimal number + If `strict` is set to `True` and the given string contains a number + formatted in an irregular way, an exception is raised: + + >>> parse_decimal('30.00', locale='de', strict=True) + Traceback (most recent call last): + ... + NumberFormatError: '30.00' is not a properly formatted decimal number. Did you mean '3.000'? Or maybe '30,00'? + + >>> parse_decimal('0.00', locale='de', strict=True) + Traceback (most recent call last): + ... + NumberFormatError: '0.00' is not a properly formatted decimal number. Did you mean '0'? + :param string: the string to parse :param locale: the `Locale` object or locale identifier + :param strict: controls whether numbers formatted in a weird way are + accepted or rejected :raise NumberFormatError: if the string can not be converted to a decimal number """ locale = Locale.parse(locale) + group_symbol = get_group_symbol(locale) + decimal_symbol = get_decimal_symbol(locale) + + if not strict and ( + group_symbol == u'\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE, + group_symbol not in string and # and the string to be parsed does not contain it, + ' ' in string # but it does contain a space instead, + ): + # ... it's reasonable to assume it is taking the place of the grouping symbol. + string = string.replace(' ', group_symbol) + try: - return decimal.Decimal(string.replace(get_group_symbol(locale), '') - .replace(get_decimal_symbol(locale), '.')) + parsed = decimal.Decimal(string.replace(group_symbol, '') + .replace(decimal_symbol, '.')) except decimal.InvalidOperation: raise NumberFormatError('%r is not a valid decimal number' % string) + if strict and group_symbol in string: + proper = format_decimal(parsed, locale=locale, decimal_quantization=False) + if string != proper and string.rstrip('0') != (proper + decimal_symbol): + try: + parsed_alt = decimal.Decimal(string.replace(decimal_symbol, '') + .replace(group_symbol, '.')) + except decimal.InvalidOperation: + raise NumberFormatError(( + "%r is not a properly formatted decimal number. Did you mean %r?" % + (string, proper) + ), suggestions=[proper]) + else: + proper_alt = format_decimal(parsed_alt, locale=locale, decimal_quantization=False) + if proper_alt == proper: + raise NumberFormatError(( + "%r is not a properly formatted decimal number. Did you mean %r?" % + (string, proper) + ), suggestions=[proper]) + else: + raise NumberFormatError(( + "%r is not a properly formatted decimal number. Did you mean %r? Or maybe %r?" % + (string, proper, proper_alt) + ), suggestions=[proper, proper_alt]) + return parsed PREFIX_END = r'[^0-9@#.,]' NUMBER_TOKEN = r'[0-9@#.,E+]' PREFIX_PATTERN = r"(?P(?:'[^']*'|%s)*)" % PREFIX_END -NUMBER_PATTERN = r"(?P%s+)" % NUMBER_TOKEN +NUMBER_PATTERN = r"(?P%s*)" % NUMBER_TOKEN SUFFIX_PATTERN = r"(?P.*)" number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, @@ -615,7 +855,6 @@ def parse_precision(p): int_prec = parse_precision(integer) frac_prec = parse_precision(fraction) if exp: - frac_prec = parse_precision(integer + fraction) exp_plus = exp.startswith('+') exp = exp.lstrip('+') exp_prec = parse_precision(exp) @@ -633,6 +872,7 @@ class NumberPattern(object): def __init__(self, pattern, prefix, suffix, grouping, int_prec, frac_prec, exp_prec, exp_plus): + # Metadata of the decomposed parsed pattern. self.pattern = pattern self.prefix = prefix self.suffix = suffix @@ -641,68 +881,154 @@ def __init__(self, pattern, prefix, suffix, grouping, self.frac_prec = frac_prec self.exp_prec = exp_prec self.exp_plus = exp_plus - if '%' in ''.join(self.prefix + self.suffix): - self.scale = 2 - elif u'‰' in ''.join(self.prefix + self.suffix): - self.scale = 3 - else: - self.scale = 0 + self.scale = self.compute_scale() def __repr__(self): return '<%s %r>' % (type(self).__name__, self.pattern) - def apply(self, value, locale, currency=None, force_frac=None): - frac_prec = force_frac or self.frac_prec + def compute_scale(self): + """Return the scaling factor to apply to the number before rendering. + + Auto-set to a factor of 2 or 3 if presence of a ``%`` or ``‰`` sign is + detected in the prefix or suffix of the pattern. Default is to not mess + with the scale at all and keep it to 0. + """ + scale = 0 + if '%' in ''.join(self.prefix + self.suffix): + scale = 2 + elif u'‰' in ''.join(self.prefix + self.suffix): + scale = 3 + return scale + + def scientific_notation_elements(self, value, locale): + """ Returns normalized scientific notation components of a value. + """ + # Normalize value to only have one lead digit. + exp = value.adjusted() + value = value * get_decimal_quantum(exp) + assert value.adjusted() == 0 + + # Shift exponent and value by the minimum number of leading digits + # imposed by the rendering pattern. And always make that number + # greater or equal to 1. + lead_shift = max([1, min(self.int_prec)]) - 1 + exp = exp - lead_shift + value = value * get_decimal_quantum(-lead_shift) + + # Get exponent sign symbol. + exp_sign = '' + if exp < 0: + exp_sign = get_minus_sign_symbol(locale) + elif self.exp_plus: + exp_sign = get_plus_sign_symbol(locale) + + # Normalize exponent value now that we have the sign. + exp = abs(exp) + + return value, exp, exp_sign + + def apply( + self, + value, + locale, + currency=None, + currency_digits=True, + decimal_quantization=True, + force_frac=None, + group_separator=True, + ): + """Renders into a string a number following the defined pattern. + + Forced decimal quantization is active by default so we'll produce a + number string that is strictly following CLDR pattern definitions. + + :param value: The value to format. If this is not a Decimal object, + it will be cast to one. + :type value: decimal.Decimal|float|int + :param locale: The locale to use for formatting. + :type locale: str|babel.core.Locale + :param currency: Which currency, if any, to format as. + :type currency: str|None + :param currency_digits: Whether or not to use the currency's precision. + If false, the pattern's precision is used. + :type currency_digits: bool + :param decimal_quantization: Whether decimal numbers should be forcibly + quantized to produce a formatted output + strictly matching the CLDR definition for + the locale. + :type decimal_quantization: bool + :param force_frac: DEPRECATED - a forced override for `self.frac_prec` + for a single formatting invocation. + :return: Formatted decimal string. + :rtype: str + """ if not isinstance(value, decimal.Decimal): value = decimal.Decimal(str(value)) + value = value.scaleb(self.scale) + + # Separate the absolute value from its sign. is_negative = int(value.is_signed()) - if self.exp_prec: # Scientific notation - exp = value.adjusted() - value = abs(value) - # Minimum number of integer digits - if self.int_prec[0] == self.int_prec[1]: - exp -= self.int_prec[0] - 1 - # Exponent grouping - elif self.int_prec[1]: - exp = int(exp / self.int_prec[1]) * self.int_prec[1] - if exp < 0: - value = value * 10**(-exp) - else: - value = value / 10**exp - exp_sign = '' - if exp < 0: - exp_sign = get_minus_sign_symbol(locale) - elif self.exp_plus: - exp_sign = get_plus_sign_symbol(locale) - exp = abs(exp) - number = u'%s%s%s%s' % \ - (self._format_significant(value, frac_prec[0], frac_prec[1]), - get_exponential_symbol(locale), exp_sign, - self._format_int(str(exp), self.exp_prec[0], - self.exp_prec[1], locale)) - elif '@' in self.pattern: # Is it a siginificant digits pattern? - text = self._format_significant(abs(value), + value = abs(value).normalize() + + # Prepare scientific notation metadata. + if self.exp_prec: + value, exp, exp_sign = self.scientific_notation_elements(value, locale) + + # Adjust the precision of the fractional part and force it to the + # currency's if necessary. + if force_frac: + # TODO (3.x?): Remove this parameter + warnings.warn('The force_frac parameter to NumberPattern.apply() is deprecated.', DeprecationWarning) + frac_prec = force_frac + elif currency and currency_digits: + frac_prec = (get_currency_precision(currency), ) * 2 + else: + frac_prec = self.frac_prec + + # Bump decimal precision to the natural precision of the number if it + # exceeds the one we're about to use. This adaptative precision is only + # triggered if the decimal quantization is disabled or if a scientific + # notation pattern has a missing mandatory fractional part (as in the + # default '#E0' pattern). This special case has been extensively + # discussed at https://github.com/python-babel/babel/pull/494#issuecomment-307649969 . + if not decimal_quantization or (self.exp_prec and frac_prec == (0, 0)): + frac_prec = (frac_prec[0], max([frac_prec[1], get_decimal_precision(value)])) + + # Render scientific notation. + if self.exp_prec: + number = ''.join([ + self._quantize_value(value, locale, frac_prec, group_separator), + get_exponential_symbol(locale), + exp_sign, + self._format_int( + str(exp), self.exp_prec[0], self.exp_prec[1], locale)]) + + # Is it a siginificant digits pattern? + elif '@' in self.pattern: + text = self._format_significant(value, self.int_prec[0], self.int_prec[1]) a, sep, b = text.partition(".") number = self._format_int(a, 0, 1000, locale) if sep: number += get_decimal_symbol(locale) + b - else: # A normal number pattern - precision = decimal.Decimal('1.' + '1' * frac_prec[1]) - rounded = value.quantize(precision) - a, sep, b = str(abs(rounded)).partition(".") - number = (self._format_int(a, self.int_prec[0], - self.int_prec[1], locale) + - self._format_frac(b or '0', locale, force_frac)) - retval = u'%s%s%s' % (self.prefix[is_negative], number, - self.suffix[is_negative]) + + # A normal number pattern. + else: + number = self._quantize_value(value, locale, frac_prec, group_separator) + + retval = ''.join([ + self.prefix[is_negative], + number, + self.suffix[is_negative]]) + if u'¤' in retval: retval = retval.replace(u'¤¤¤', get_currency_name(currency, value, locale)) retval = retval.replace(u'¤¤', currency.upper()) retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) + return retval # @@ -757,6 +1083,16 @@ def _format_int(self, value, min, max, locale): gsize = self.grouping[1] return value + ret + def _quantize_value(self, value, locale, frac_prec, group_separator): + quantum = get_decimal_quantum(frac_prec[1]) + rounded = value.quantize(quantum) + a, sep, b = "{:f}".format(rounded).partition(".") + integer_part = a + if group_separator: + integer_part = self._format_int(a, self.int_prec[0], self.int_prec[1], locale) + number = integer_part + self._format_frac(b or '0', locale, frac_prec) + return number + def _format_frac(self, value, locale, force_frac=None): min, max = force_frac or self.frac_prec if len(value) < min: diff --git a/babel/plural.py b/babel/plural.py index a23f8b53f..e705e9b8d 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -5,7 +5,7 @@ CLDR Plural support. See UTS #35. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ import re @@ -33,7 +33,7 @@ def extract_operands(source): t visible fractional digits in n, without trailing zeros. ====== =============================================================== - .. _`CLDR rules`: http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands + .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands :param source: A real number :type source: int|float|decimal.Decimal @@ -91,7 +91,7 @@ class PluralRule(object): exclusive; for a given numeric value, only one rule should apply (i.e. the condition should only be true for one of the plural rule elements. - .. _`CLDR rules`: http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules + .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules """ __slots__ = ('abstract', '_func') @@ -241,7 +241,7 @@ def to_gettext(rule): """ rule = PluralRule.parse(rule) - used_tags = rule.tags | set([_fallback_tag]) + used_tags = rule.tags | {_fallback_tag} _compile = _GettextCompiler().compile _get_index = [tag for tag in _plural_tags if tag in used_tags].index diff --git a/babel/support.py b/babel/support.py index 24bc9aaa1..4be9ed37f 100644 --- a/babel/support.py +++ b/babel/support.py @@ -8,7 +8,7 @@ .. note: the code in this module is not used by Babel itself - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ @@ -79,7 +79,7 @@ def time(self, time=None, format='medium'): return format_time(time, format, tzinfo=self.tzinfo, locale=self.locale) def timedelta(self, delta, granularity='second', threshold=.85, - format='medium', add_direction=False): + format='long', add_direction=False): """Return a time delta according to the rules of the given locale. >>> from datetime import timedelta @@ -165,7 +165,7 @@ class LazyProxy(object): Hello, universe! Hello, world! """ - __slots__ = ['_func', '_args', '_kwargs', '_value', '_is_cache_enabled'] + __slots__ = ['_func', '_args', '_kwargs', '_value', '_is_cache_enabled', '_attribute_error'] def __init__(self, func, *args, **kwargs): is_cache_enabled = kwargs.pop('enable_cache', True) @@ -175,11 +175,17 @@ def __init__(self, func, *args, **kwargs): object.__setattr__(self, '_kwargs', kwargs) object.__setattr__(self, '_is_cache_enabled', is_cache_enabled) object.__setattr__(self, '_value', None) + object.__setattr__(self, '_attribute_error', None) @property def value(self): if self._value is None: - value = self._func(*self._args, **self._kwargs) + try: + value = self._func(*self._args, **self._kwargs) + except AttributeError as error: + object.__setattr__(self, '_attribute_error', error) + raise + if not self._is_cache_enabled: return value object.__setattr__(self, '_value', value) @@ -249,6 +255,8 @@ def __delattr__(self, name): delattr(self.value, name) def __getattr__(self, name): + if self._attribute_error is not None: + raise self._attribute_error return getattr(self.value, name) def __setattr__(self, name, value): @@ -343,7 +351,7 @@ def udngettext(self, domain, singular, plural, num): dungettext = udngettext # Most of the downwards code, until it get's included in stdlib, from: - # http://bugs.python.org/file10036/gettext-pgettext.patch + # https://bugs.python.org/file10036/gettext-pgettext.patch # # The encoding of a msgctxt and a msgid in a .mo file is # msgctxt + "\x04" + msgid (gettext version >= 0.15) diff --git a/babel/units.py b/babel/units.py index 1ea5b17cc..07637358c 100644 --- a/babel/units.py +++ b/babel/units.py @@ -26,7 +26,7 @@ def get_unit_name(measurement_unit, length='long', locale=LC_NUMERIC): :param measurement_unit: the code of a measurement unit. Known units can be found in the CLDR Unit Validity XML file: - http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml + https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml :param length: "short", "long" or "narrow" :param locale: the `Locale` object or locale identifier @@ -44,7 +44,7 @@ def _find_unit_pattern(unit_id, locale=LC_NUMERIC): Expand an unit into a qualified form. Known units can be found in the CLDR Unit Validity XML file: - http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml + https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml >>> _find_unit_pattern("radian", locale="en") 'angle-radian' @@ -75,25 +75,27 @@ def format_unit(value, measurement_unit, length='long', format=None, locale=LC_N u'12 metri' >>> format_unit(15.5, 'length-mile', locale='fi_FI') u'15,5 mailia' - >>> format_unit(1200, 'pressure-inch-hg', locale='nb') - u'1\\xa0200 tommer kvikks\\xf8lv' + >>> format_unit(1200, 'pressure-millimeter-ofhg', locale='nb') + u'1\\xa0200 millimeter kvikks\\xf8lv' + >>> format_unit(270, 'ton', locale='en') + u'270 tons' Number formats may be overridden with the ``format`` parameter. >>> from babel._compat import decimal >>> format_unit(decimal.Decimal("-42.774"), 'temperature-celsius', 'short', format='#.0', locale='fr') - u'-42,8 \\xb0C' + u'-42,8\\u202f\\xb0C' The locale's usual pluralization rules are respected. >>> format_unit(1, 'length-meter', locale='ro_RO') u'1 metru' - >>> format_unit(0, 'length-picometer', locale='cy') - u'0 picometr' - >>> format_unit(2, 'length-picometer', locale='cy') - u'2 bicometr' - >>> format_unit(3, 'length-picometer', locale='cy') - u'3 phicometr' + >>> format_unit(0, 'length-mile', locale='cy') + u'0 mi' + >>> format_unit(1, 'length-mile', locale='cy') + u'1 filltir' + >>> format_unit(3, 'length-mile', locale='cy') + u'3 milltir' >>> format_unit(15, 'length-horse', locale='fi') Traceback (most recent call last): @@ -105,7 +107,7 @@ def format_unit(value, measurement_unit, length='long', format=None, locale=LC_N :param value: the value to format. If this is a string, no number formatting will be attempted. :param measurement_unit: the code of a measurement unit. Known units can be found in the CLDR Unit Validity XML file: - http://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml + https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml :param length: "short", "long" or "narrow" :param format: An optional format, as accepted by `format_decimal`. :param locale: the `Locale` object or locale identifier @@ -201,7 +203,7 @@ def format_compound_unit( '150 kilometer per timme' >>> format_compound_unit(150, "kilowatt", denominator_unit="year", locale="fi") - '150 kilowattia vuodessa' + '150 kilowattia / vuosi' >>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en") '32.5 tons per 15 hours' @@ -219,7 +221,7 @@ def format_compound_unit( >>> format_compound_unit(format_currency(35, "JPY", locale="de"), denominator_unit="liter", locale="de") '35\\xa0\\xa5 pro Liter' - See http://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns + See https://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns :param numerator_value: The numerator value. This may be a string, in which case it is considered preformatted and the unit is ignored. @@ -271,6 +273,7 @@ def format_compound_unit( else: # Bare denominator formatted_denominator = format_decimal(denominator_value, format=format, locale=locale) - per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, "{0}/{1}") + # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation + per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, {}).get("compound", "{0}/{1}") return per_pattern.format(formatted_numerator, formatted_denominator) diff --git a/babel/util.py b/babel/util.py index af8c762ec..a8fbac1d9 100644 --- a/babel/util.py +++ b/babel/util.py @@ -5,11 +5,12 @@ Various utility classes and functions. - :copyright: (c) 2013 by the Babel Team. + :copyright: (c) 2013-2021 by the Babel Team. :license: BSD, see LICENSE for more details. """ import codecs +import collections from datetime import timedelta, tzinfo import os import re @@ -67,8 +68,8 @@ def parse_encoding(fp): m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: - import parser - parser.suite(line1.decode('latin-1')) + import ast + ast.parse(line1.decode('latin-1')) except (ImportError, SyntaxError, UnicodeEncodeError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, @@ -151,6 +152,16 @@ def pathmatch(pattern, filename): >>> pathmatch('**.py', 'templates/index.html') False + >>> pathmatch('./foo/**.py', 'foo/bar/baz.py') + True + >>> pathmatch('./foo/**.py', 'bar/baz.py') + False + + >>> pathmatch('^foo/**.py', 'foo/bar/baz.py') + True + >>> pathmatch('^foo/**.py', 'bar/baz.py') + False + >>> pathmatch('**/templates/*.html', 'templates/index.html') True >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html') @@ -167,7 +178,16 @@ def pathmatch(pattern, filename): '**/': '(?:.+/)*?', '**': '(?:.+/)*?[^/]+', } - buf = [] + + if pattern.startswith('^'): + buf = ['^'] + pattern = pattern[1:] + elif pattern.startswith('./'): + buf = ['^'] + pattern = pattern[2:] + else: + buf = [] + for idx, part in enumerate(re.split('([?*]+/?)', pattern)): if idx % 2: buf.append(symbols[part]) @@ -201,77 +221,8 @@ def wraptext(text, width=70, initial_indent='', subsequent_indent=''): return wrapper.wrap(text) -class odict(dict): - """Ordered dict implementation. - - :see: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747 - """ - - def __init__(self, data=None): - dict.__init__(self, data or {}) - self._keys = list(dict.keys(self)) - - def __delitem__(self, key): - dict.__delitem__(self, key) - self._keys.remove(key) - - def __setitem__(self, key, item): - new_key = key not in self - dict.__setitem__(self, key, item) - if new_key: - self._keys.append(key) - - def __iter__(self): - return iter(self._keys) - iterkeys = __iter__ - - def clear(self): - dict.clear(self) - self._keys = [] - - def copy(self): - d = odict() - d.update(self) - return d - - def items(self): - return zip(self._keys, self.values()) - - def iteritems(self): - return izip(self._keys, self.itervalues()) - - def keys(self): - return self._keys[:] - - def pop(self, key, default=missing): - try: - value = dict.pop(self, key) - self._keys.remove(key) - return value - except KeyError as e: - if default == missing: - raise e - else: - return default - - def popitem(self, key): - self._keys.remove(key) - return dict.popitem(key) - - def setdefault(self, key, failobj=None): - dict.setdefault(self, key, failobj) - if key not in self._keys: - self._keys.append(key) - - def update(self, dict): - for (key, val) in dict.items(): - self[key] = val - - def values(self): - return map(self.get, self._keys) - - def itervalues(self): - return imap(self.get, self._keys) +# TODO (Babel 3.x): Remove this re-export +odict = collections.OrderedDict class FixedOffsetTimezone(tzinfo): diff --git a/conftest.py b/conftest.py index 32bd1362a..bd9f2d32d 100644 --- a/conftest.py +++ b/conftest.py @@ -8,4 +8,7 @@ def pytest_collect_file(path, parent): if babel_path.common(path) == babel_path: if path.ext == ".py": + # TODO: remove check when dropping support for old Pytest + if hasattr(DoctestModule, "from_parent"): + return DoctestModule.from_parent(parent, fspath=path) return DoctestModule(path, parent) diff --git a/docs/_templates/sidebar-links.html b/docs/_templates/sidebar-links.html index a55b2dd96..71d11b850 100644 --- a/docs/_templates/sidebar-links.html +++ b/docs/_templates/sidebar-links.html @@ -9,7 +9,7 @@

Other Formats

Useful Links

diff --git a/docs/api/dates.rst b/docs/api/dates.rst index 0fa9f1f32..f3e59b63b 100644 --- a/docs/api/dates.rst +++ b/docs/api/dates.rst @@ -9,17 +9,17 @@ Python `datetime`, `date` and `time` objects and work with timezones. Date and Time Formatting ------------------------ -.. autofunction:: format_datetime +.. autofunction:: format_datetime(datetime=None, format='medium', tzinfo=None, locale=default_locale('LC_TIME')) -.. autofunction:: format_date +.. autofunction:: format_date(date=None, format='medium', locale=default_locale('LC_TIME')) -.. autofunction:: format_time +.. autofunction:: format_time(time=None, format='medium', tzinfo=None, locale=default_locale('LC_TIME')) -.. autofunction:: format_timedelta +.. autofunction:: format_timedelta(delta, granularity='second', threshold=.85, add_direction=False, format='long', locale=default_locale('LC_TIME')) -.. autofunction:: format_skeleton +.. autofunction:: format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=default_locale('LC_TIME')) -.. autofunction:: format_interval +.. autofunction:: format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=default_locale('LC_TIME')) Timezone Functionality ---------------------- diff --git a/docs/api/messages/catalog.rst b/docs/api/messages/catalog.rst index 8a905bcd9..8cb6375e3 100644 --- a/docs/api/messages/catalog.rst +++ b/docs/api/messages/catalog.rst @@ -12,6 +12,7 @@ Catalogs .. autoclass:: Catalog :members: + :special-members: __iter__ Messages -------- diff --git a/docs/api/numbers.rst b/docs/api/numbers.rst index 1b21425ee..f9b0833a2 100644 --- a/docs/api/numbers.rst +++ b/docs/api/numbers.rst @@ -30,6 +30,7 @@ Exceptions ---------- .. autoexception:: NumberFormatError + :members: Data Access ----------- @@ -38,6 +39,8 @@ Data Access .. autofunction:: get_currency_symbol +.. autofunction:: get_currency_unit_pattern + .. autofunction:: get_decimal_symbol .. autofunction:: get_plus_sign_symbol diff --git a/docs/conf.py b/docs/conf.py index 432387e7c..962792fbd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,16 +44,16 @@ # General information about the project. project = u'Babel' -copyright = u'2017, The Babel Team' +copyright = u'2021, The Babel Team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '2.5' +version = '2.9' # The full version, including alpha/beta/rc tags. -release = '2.5.3' +release = '2.9.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -253,7 +253,7 @@ #texinfo_show_urls = 'footnote' intersphinx_mapping = { - 'http://docs.python.org/2': None, + 'https://docs.python.org/2/': None, } extlinks = { diff --git a/docs/dates.rst b/docs/dates.rst index bb3398abb..44201877e 100644 --- a/docs/dates.rst +++ b/docs/dates.rst @@ -124,7 +124,7 @@ the `Locale Data Markup Language specification`_. The following table is just a relatively brief overview. .. _`Locale Data Markup Language specification`: - http://unicode.org/reports/tr35/#Date_Format_Patterns + https://unicode.org/reports/tr35/#Date_Format_Patterns Date Fields ----------- @@ -286,7 +286,7 @@ directly interface with it from within Babel: >>> from datetime import time >>> from babel.dates import get_timezone, UTC - >>> dt = datetime(2007, 04, 01, 15, 30, tzinfo=UTC) + >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=UTC) >>> eastern = get_timezone('US/Eastern') >>> format_datetime(dt, 'H:mm Z', tzinfo=eastern, locale='en_US') u'11:30 -0400' diff --git a/docs/dev.rst b/docs/dev.rst index afc8b12d6..0ff033ff8 100644 --- a/docs/dev.rst +++ b/docs/dev.rst @@ -30,9 +30,8 @@ Python Versions At the moment the following Python versions should be supported: -* Python 2.6 * Python 2.7 -* Python 3.3 and up +* Python 3.4 and up * PyPy tracking 2.7 and 3.2 and up While PyPy does not currently support 3.3, it does support traditional diff --git a/docs/installation.rst b/docs/installation.rst index 0aea3abfe..c1b7ab9fe 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -5,8 +5,7 @@ Installation Babel is distributed as a standard Python package fully set up with all the dependencies it needs. It primarily depends on the excellent `pytz`_ -library for timezone handling. To install it you can use ``easy_install`` -or ``pip``. +library for timezone handling. To install it you can use ``pip``. .. _pytz: http://pytz.sourceforge.net/ @@ -17,17 +16,12 @@ virtualenv Virtualenv is probably what you want to use during development, and if you have shell access to your production machines, you'll probably want to use -it there, too. +it there, too. Use ``pip`` to install it:: -If you are on Mac OS X or Linux, chances are that one of the following two -commands will work for you:: + $ sudo pip install virtualenv - $ sudo easy_install virtualenv - -If you are on Windows and don't have the `easy_install` command, you must -install it first. Check the :ref:`windows-easy-install` section for more -information about how to do that. Once you have it installed, run the same -commands as above, but without the `sudo` prefix. +If you're on Windows, run it in a command-prompt window with administrator +privileges, and leave out ``sudo``. Once you have virtualenv installed, just fire up a shell and create your own environment. I usually create a project folder and a `venv` @@ -66,8 +60,6 @@ with root privileges:: $ sudo pip install Babel -If `pip` is not available on your system you can use `easy_install`. - (On Windows systems, run it in a command-prompt window with administrator privileges, and leave out `sudo`.) @@ -80,59 +72,25 @@ use a git checkout. Get the git checkout in a new virtualenv and run in development mode:: - $ git clone http://github.com/python-babel/babel.git + $ git clone https://github.com/python-babel/babel Initialized empty Git repository in ~/dev/babel/.git/ $ cd babel $ virtualenv venv New python executable in venv/bin/python Installing distribute............done. $ . venv/bin/activate + $ pip install pytz $ python setup.py import_cldr $ pip install --editable . ... Finished processing dependencies for Babel -Make sure to not forget about the ``import_cldr`` step because otherwise -you will be missing the locale data. This custom command will download -the most appropriate CLDR release from the official website and convert it -for Babel. +Make sure to not forget about the ``pip install pytz`` and ``import_cldr`` steps +because otherwise you will be missing the locale data. +The custom setup command will download the most appropriate CLDR release from the +official website and convert it for Babel but will not work without ``pytz``. This will pull also in the dependencies and activate the git head as the current version inside the virtualenv. Then all you have to do is run ``git pull origin`` to update to the latest version. If the CLDR data changes you will have to re-run ``python setup.py import_cldr``. - -.. _windows-easy-install: - -`pip` and `distribute` on Windows ------------------------------------ - -On Windows, installation of `easy_install` is a little bit trickier, but -still quite easy. The easiest way to do it is to download the -`distribute_setup.py`_ file and run it. The easiest way to run the file -is to open your downloads folder and double-click on the file. - -Next, add the `easy_install` command and other Python scripts to the -command search path, by adding your Python installation's Scripts folder -to the `PATH` environment variable. To do that, right-click on the -"Computer" icon on the Desktop or in the Start menu, and choose "Properties". -Then click on "Advanced System settings" (in Windows XP, click on the -"Advanced" tab instead). Then click on the "Environment variables" button. -Finally, double-click on the "Path" variable in the "System variables" section, -and add the path of your Python interpreter's Scripts folder. Be sure to -delimit it from existing values with a semicolon. Assuming you are using -Python 2.7 on the default path, add the following value:: - - - ;C:\Python27\Scripts - -And you are done! To check that it worked, open the Command Prompt and execute -``easy_install``. If you have User Account Control enabled on Windows Vista or -Windows 7, it should prompt you for administrator privileges. - -Now that you have ``easy_install``, you can use it to install ``pip``:: - - > easy_install pip - - -.. _distribute_setup.py: http://python-distribute.org/distribute_setup.py diff --git a/docs/intro.rst b/docs/intro.rst index db36f32d3..7733f70b1 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -43,7 +43,7 @@ of locale data, such as the localized names of countries, languages, or time-zones, which are frequently needed in web-based applications. For these requirements, Babel includes data extracted from the `Common -Locale Data Repository (CLDR) `_, and provides a +Locale Data Repository (CLDR) `_, and provides a number of convenient methods for accessing and using this data. See :ref:`locale-data`, :ref:`date-and-time`, and :ref:`numbers` for more information on this aspect of Babel. diff --git a/docs/license.rst b/docs/license.rst index a619b5746..7c93ab426 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -19,7 +19,7 @@ Authors General License Definitions --------------------------- -The following section contains the full license texts for Flask and the +The following section contains the full license texts for Babel and the documentation. - "AUTHORS" hereby refers to all the authors listed in the diff --git a/docs/locale.rst b/docs/locale.rst index cf4f6d5c5..425fb776c 100644 --- a/docs/locale.rst +++ b/docs/locale.rst @@ -15,8 +15,8 @@ and you'd like to display the names of those countries in the language the user prefers. Instead of translating all those country names yourself in your application, you can make use of the translations provided by the locale data included with Babel, which is based on the `Common Locale Data Repository -(CLDR) `_ developed and maintained by the `Unicode -Consortium `_. +(CLDR) `_ developed and maintained by the `Unicode +Consortium `_. The ``Locale`` Class diff --git a/docs/messages.rst b/docs/messages.rst index f1f695c24..3ac035607 100644 --- a/docs/messages.rst +++ b/docs/messages.rst @@ -17,13 +17,13 @@ application as subject to localization, by wrapping them in functions such as .. code-block:: python - print _("Hello") + print(_("Hello")) instead of just: .. code-block:: python - print "Hello" + print("Hello") to make the string "Hello" localizable. @@ -32,8 +32,8 @@ used in an application. They are commonly stored in PO (Portable Object) and MO (Machine Object) files, the formats of which are defined by the GNU `gettext`_ tools and the GNU `translation project`_. - .. _`gettext`: http://www.gnu.org/software/gettext/ - .. _`translation project`: http://sourceforge.net/projects/translation + .. _`gettext`: https://www.gnu.org/software/gettext/ + .. _`translation project`: https://sourceforge.net/projects/translation/ The general procedure for building message catalogs looks something like this: @@ -81,7 +81,7 @@ extracted from source files can not only depend on the file extension, but needs to be controllable in a precise manner. .. _`Jinja2`: http://jinja.pocoo.org/ -.. _`Genshi`: http://genshi.edgewall.org/ +.. _`Genshi`: https://genshi.edgewall.org/ Babel accepts a configuration file to specify this mapping of files to extraction methods, which is described below. diff --git a/docs/numbers.rst b/docs/numbers.rst index 1443b7cf5..058d79e18 100644 --- a/docs/numbers.rst +++ b/docs/numbers.rst @@ -47,7 +47,7 @@ The syntax for custom number format patterns is described in detail in the the specification. The following table is just a relatively brief overview. .. _`Locale Data Markup Language specification`: - http://unicode.org/reports/tr35/#Number_Format_Patterns + https://unicode.org/reports/tr35/#Number_Format_Patterns +----------+-----------------------------------------------------------------+ | Symbol | Description | @@ -134,8 +134,8 @@ behaves as desired. .. _Decimal: https://docs.python.org/3/library/decimal.html#decimal-objects .. _Context: https://docs.python.org/3/library/decimal.html#context-objects -.. _`UTS #35 section 3.3`: http://www.unicode.org/reports/tr35/tr35-numbers.html#Formatting -.. _cdecimal: https://pypi.python.org/pypi/cdecimal +.. _`UTS #35 section 3.3`: https://www.unicode.org/reports/tr35/tr35-numbers.html#Formatting +.. _cdecimal: https://pypi.org/project/cdecimal/ Parsing Numbers @@ -160,4 +160,21 @@ Examples: ... NumberFormatError: '2,109,998' is not a valid decimal number -.. note:: Number parsing is not properly implemented yet +Note: as of version 2.8.0, the ``parse_number`` function has limited +functionality. It can remove group symbols of certain locales from numeric +strings, but may behave unexpectedly until its logic handles more encoding +issues and other special cases. + +Examples: + +.. code-block:: pycon + + >>> parse_number('1,099', locale='en_US') + 1099 + >>> parse_number('1.099.024', locale='de') + 1099024 + >>> parse_number('123' + u'\xa0' + '4567', locale='ru') + 1234567 + >>> parse_number('123 4567', locale='ru') + ... + NumberFormatError: '123 4567' is not a valid number diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 4cc6a4b9b..805772a16 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import contextlib import os @@ -13,9 +13,9 @@ from urllib import urlretrieve -URL = 'http://unicode.org/Public/cldr/29/core.zip' -FILENAME = 'core-29.zip' -FILESUM = '44d117e6e591a8f9655602ff0abdee105df3cabe' +URL = 'http://unicode.org/Public/cldr/37/core.zip' +FILENAME = 'cldr-core-37.zip' +FILESUM = 'ba93f5ba256a61a6f8253397c6c4b1a9b9e77531f013cc7ffa7977b5f7e4da57' BLKSIZE = 131072 @@ -53,7 +53,7 @@ def is_good_file(filename): if not os.path.isfile(filename): log('Local copy \'%s\' not found', filename) return False - h = hashlib.sha1() + h = hashlib.sha256() with open(filename, 'rb') as f: while 1: blk = f.read(BLKSIZE) @@ -75,14 +75,15 @@ def main(): cldr_path = os.path.join(repo, 'cldr', os.path.splitext(FILENAME)[0]) zip_path = os.path.join(cldr_dl_path, FILENAME) changed = False + show_progress = (False if os.environ.get("BABEL_CLDR_NO_DOWNLOAD_PROGRESS") else sys.stdout.isatty()) while not is_good_file(zip_path): log('Downloading \'%s\'', FILENAME) if os.path.isfile(zip_path): os.remove(zip_path) - urlretrieve(URL, zip_path, reporthook) + urlretrieve(URL, zip_path, (reporthook if show_progress else None)) changed = True - print + print() common_path = os.path.join(cldr_path, 'common') if changed or not os.path.isdir(common_path): diff --git a/scripts/dump_data.py b/scripts/dump_data.py index 0bb3e9117..ac295b2d7 100755 --- a/scripts/dump_data.py +++ b/scripts/dump_data.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # diff --git a/scripts/dump_global.py b/scripts/dump_global.py index 2970bc2ba..c9e1d3008 100755 --- a/scripts/dump_global.py +++ b/scripts/dump_global.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # diff --git a/scripts/generate_authors.py b/scripts/generate_authors.py new file mode 100644 index 000000000..409f24e36 --- /dev/null +++ b/scripts/generate_authors.py @@ -0,0 +1,40 @@ +from collections import Counter +from subprocess import check_output + +import os + +root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) + + +def get_sorted_authors_list(): + authors = check_output(['git', 'log', '--format=%aN'], cwd=root_path).decode('UTF-8') + counts = Counter(authors.splitlines()) + return [author for (author, count) in counts.most_common()] + + +def get_authors_file_content(): + author_list = '\n'.join('- %s' % a for a in get_sorted_authors_list()) + + return ''' +Babel is written and maintained by the Babel team and various contributors: + +{author_list} + +Babel was previously developed under the Copyright of Edgewall Software. The +following copyright notice holds true for releases before 2013: "Copyright (c) +2007 - 2011 by Edgewall Software" + +In addition to the regular contributions Babel includes a fork of Lennart +Regebro's tzlocal that originally was licensed under the CC0 license. The +original copyright of that project is "Copyright 2013 by Lennart Regebro". +'''.format(author_list=author_list) + + +def write_authors_file(): + content = get_authors_file_content() + with open(os.path.join(root_path, 'AUTHORS'), 'w', encoding='UTF-8') as fp: + fp.write(content) + + +if __name__ == '__main__': + write_authors_file() diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 7b9e7734b..7876d5208 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -17,6 +17,7 @@ import os import re import sys +import logging try: from xml.etree import cElementTree as ElementTree @@ -62,23 +63,16 @@ def _text(elem): 'timeFormats': 'time_formats' } - -def log(message, *args): - if args: - message = message % args - sys.stderr.write(message + '\r\n') - sys.stderr.flush() - - -def error(message, *args): - log('ERROR: %s' % message, *args) +log = logging.getLogger("import_cldr") def need_conversion(dst_filename, data_dict, source_filename): with open(source_filename, 'rb') as f: blob = f.read(4096) - version = int(re.search(b'version number="\\$Revision: (\\d+)', - blob).group(1)) + version_match = re.search(b'version number="\\$Revision: (\\d+)', blob) + if not version_match: # CLDR 36.0 was shipped without proper revision numbers + return True + version = int(version_match.group(1)) data_dict['_version'] = version if not os.path.isfile(dst_filename): @@ -180,10 +174,19 @@ def main(): '-j', '--json', dest='dump_json', action='store_true', default=False, help='also export debugging JSON dumps of locale data' ) + parser.add_option( + '-q', '--quiet', dest='quiet', action='store_true', default=bool(os.environ.get('BABEL_CLDR_QUIET')), + help='quiesce info/warning messages', + ) options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') + + logging.basicConfig( + level=(logging.ERROR if options.quiet else logging.INFO), + ) + return process_data( srcdir=args[0], destdir=BABEL_PACKAGE_ROOT, @@ -381,14 +384,19 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): territory = '001' # world regions = territory_containment.get(territory, []) - log('Processing %s (Language = %s; Territory = %s)', - filename, language, territory) + log.info( + 'Processing %s (Language = %s; Territory = %s)', + filename, language, territory, + ) locale_id = '_'.join(filter(None, [ language, territory != '001' and territory or None ])) + data['locale_id'] = locale_id + data['unsupported_number_systems'] = set() + if locale_id in plural_rules: data['plural_form'] = plural_rules[locale_id] if locale_id in ordinal_rules: @@ -397,7 +405,7 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): data["day_period_rules"] = day_period_rules[locale_id] parse_locale_display_names(data, tree) - + parse_list_patterns(data, tree) parse_dates(data, tree, sup, regions, territory) for calendar in tree.findall('.//calendars/calendar'): @@ -421,15 +429,41 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): parse_percent_formats(data, tree) parse_currency_formats(data, tree) + parse_currency_unit_patterns(data, tree) parse_currency_names(data, tree) parse_unit_patterns(data, tree) parse_date_fields(data, tree) parse_character_order(data, tree) parse_measurement_systems(data, tree) + unsupported_number_systems_string = ', '.join(sorted(data.pop('unsupported_number_systems'))) + if unsupported_number_systems_string: + log.warning('%s: unsupported number systems were ignored: %s' % ( + locale_id, + unsupported_number_systems_string, + )) + write_datafile(data_filename, data, dump_json=dump_json) +def _should_skip_number_elem(data, elem): + """ + Figure out whether the numbering-containing element `elem` is in a currently + non-supported (i.e. currently non-Latin) numbering system. + + :param data: The root data element, for stashing the warning. + :param elem: Element with `numberSystem` key + :return: Boolean + """ + number_system = elem.get('numberSystem', 'latn') + + if number_system != 'latn': + data['unsupported_number_systems'].add(number_system) + return True + + return False + + def _should_skip_elem(elem, type=None, dest=None): """ Check whether the given element should be skipped. @@ -478,30 +512,40 @@ def parse_locale_display_names(data, tree): scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): _import_type_text(scripts, elem) + + +def parse_list_patterns(data, tree): list_patterns = data.setdefault('list_patterns', {}) for listType in tree.findall('.//listPatterns/listPattern'): - if 'type' in listType.attrib: - continue + by_type = list_patterns.setdefault(listType.attrib.get('type', 'standard'), {}) for listPattern in listType.findall('listPatternPart'): - list_patterns[listPattern.attrib['type']] = _text(listPattern) + by_type[listPattern.attrib['type']] = _text(listPattern) def parse_dates(data, tree, sup, regions, territory): week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): + if _should_skip_elem(elem): + continue territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): + if _should_skip_elem(elem): + continue territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): + if _should_skip_elem(elem): + continue territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): + if _should_skip_elem(elem): + continue territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] @@ -558,7 +602,7 @@ def parse_calendar_months(data, calendar): for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) - for elem in width.getiterator(): + for elem in width: if elem.tag == 'month': _import_type_text(widths, elem, int(elem.attrib['type'])) elif elem.tag == 'alias': @@ -576,7 +620,7 @@ def parse_calendar_days(data, calendar): for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) - for elem in width.getiterator(): + for elem in width: if elem.tag == 'day': _import_type_text(widths, elem, weekdays[elem.attrib['type']]) elif elem.tag == 'alias': @@ -594,7 +638,7 @@ def parse_calendar_quarters(data, calendar): for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) - for elem in width.getiterator(): + for elem in width: if elem.tag == 'quarter': _import_type_text(widths, elem, int(elem.attrib['type'])) elif elem.tag == 'alias': @@ -609,7 +653,7 @@ def parse_calendar_eras(data, calendar): for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) - for elem in width.getiterator(): + for elem in width: if elem.tag == 'era': _import_type_text(widths, elem, type=int(elem.attrib.get('type'))) elif elem.tag == 'alias': @@ -636,7 +680,7 @@ def parse_calendar_periods(data, calendar): def parse_calendar_date_formats(data, calendar): date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): - for elem in format.getiterator(): + for elem in format: if elem.tag == 'dateFormatLength': type = elem.attrib.get('type') if _should_skip_elem(elem, type, date_formats): @@ -646,7 +690,7 @@ def parse_calendar_date_formats(data, calendar): text_type(elem.findtext('dateFormat/pattern')) ) except ValueError as e: - error(e) + log.error(e) elif elem.tag == 'alias': date_formats = Alias(_translate_alias( ['date_formats'], elem.attrib['path']) @@ -656,7 +700,7 @@ def parse_calendar_date_formats(data, calendar): def parse_calendar_time_formats(data, calendar): time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): - for elem in format.getiterator(): + for elem in format: if elem.tag == 'timeFormatLength': type = elem.attrib.get('type') if _should_skip_elem(elem, type, time_formats): @@ -666,7 +710,7 @@ def parse_calendar_time_formats(data, calendar): text_type(elem.findtext('timeFormat/pattern')) ) except ValueError as e: - error(e) + log.error(e) elif elem.tag == 'alias': time_formats = Alias(_translate_alias( ['time_formats'], elem.attrib['path']) @@ -677,7 +721,7 @@ def parse_calendar_datetime_skeletons(data, calendar): datetime_formats = data.setdefault('datetime_formats', {}) datetime_skeletons = data.setdefault('datetime_skeletons', {}) for format in calendar.findall('dateTimeFormats'): - for elem in format.getiterator(): + for elem in format: if elem.tag == 'dateTimeFormatLength': type = elem.attrib.get('type') if _should_skip_elem(elem, type, datetime_formats): @@ -685,7 +729,7 @@ def parse_calendar_datetime_skeletons(data, calendar): try: datetime_formats[type] = text_type(elem.findtext('dateTimeFormat/pattern')) except ValueError as e: - error(e) + log.error(e) elif elem.tag == 'alias': datetime_formats = Alias(_translate_alias( ['datetime_formats'], elem.attrib['path']) @@ -699,43 +743,73 @@ def parse_calendar_datetime_skeletons(data, calendar): def parse_number_symbols(data, tree): number_symbols = data.setdefault('number_symbols', {}) - for elem in tree.findall('.//numbers/symbols/*'): - if _should_skip_elem(elem): + for symbol_elem in tree.findall('.//numbers/symbols'): + if _should_skip_number_elem(data, symbol_elem): # TODO: Support other number systems continue - number_symbols[elem.tag] = text_type(elem.text) + + for elem in symbol_elem.findall('./*'): + if _should_skip_elem(elem): + continue + number_symbols[elem.tag] = text_type(elem.text) def parse_decimal_formats(data, tree): decimal_formats = data.setdefault('decimal_formats', {}) - for elem in tree.findall('.//decimalFormats/decimalFormatLength'): - type = elem.attrib.get('type') - if _should_skip_elem(elem, type, decimal_formats): - continue - if elem.findall('./alias'): - # TODO map the alias to its target + for df_elem in tree.findall('.//decimalFormats'): + if _should_skip_number_elem(data, df_elem): # TODO: Support other number systems continue - pattern = text_type(elem.findtext('./decimalFormat/pattern')) - decimal_formats[type] = numbers.parse_pattern(pattern) + for elem in df_elem.findall('./decimalFormatLength'): + length_type = elem.attrib.get('type') + if _should_skip_elem(elem, length_type, decimal_formats): + continue + if elem.findall('./alias'): + # TODO map the alias to its target + continue + for pattern_el in elem.findall('./decimalFormat/pattern'): + pattern_type = pattern_el.attrib.get('type') + pattern = numbers.parse_pattern(text_type(pattern_el.text)) + if pattern_type: + # This is a compact decimal format, see: + # https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats + + # These are mapped into a `compact_decimal_formats` dictionary + # with the format {length: {count: {multiplier: pattern}}}. + + # TODO: Add support for formatting them. + compact_decimal_formats = data.setdefault('compact_decimal_formats', {}) + length_map = compact_decimal_formats.setdefault(length_type, {}) + length_count_map = length_map.setdefault(pattern_el.attrib['count'], {}) + length_count_map[pattern_type] = pattern + else: + # Regular decimal format. + decimal_formats[length_type] = pattern def parse_scientific_formats(data, tree): scientific_formats = data.setdefault('scientific_formats', {}) - for elem in tree.findall('.//scientificFormats/scientificFormatLength'): - type = elem.attrib.get('type') - if _should_skip_elem(elem, type, scientific_formats): + for sf_elem in tree.findall('.//scientificFormats'): + if _should_skip_number_elem(data, sf_elem): # TODO: Support other number systems continue - pattern = text_type(elem.findtext('scientificFormat/pattern')) - scientific_formats[type] = numbers.parse_pattern(pattern) + for elem in sf_elem.findall('./scientificFormatLength'): + type = elem.attrib.get('type') + if _should_skip_elem(elem, type, scientific_formats): + continue + pattern = text_type(elem.findtext('scientificFormat/pattern')) + scientific_formats[type] = numbers.parse_pattern(pattern) def parse_percent_formats(data, tree): percent_formats = data.setdefault('percent_formats', {}) - for elem in tree.findall('.//percentFormats/percentFormatLength'): - type = elem.attrib.get('type') - if _should_skip_elem(elem, type, percent_formats): + + for pf_elem in tree.findall('.//percentFormats'): + if _should_skip_number_elem(data, pf_elem): # TODO: Support other number systems continue - pattern = text_type(elem.findtext('percentFormat/pattern')) - percent_formats[type] = numbers.parse_pattern(pattern) + for elem in pf_elem.findall('.//percentFormatLength'): + type = elem.attrib.get('type') + if _should_skip_elem(elem, type, percent_formats): + continue + pattern = text_type(elem.findtext('percentFormat/pattern')) + percent_formats[type] = numbers.parse_pattern(pattern) def parse_currency_names(data, tree): @@ -752,9 +826,11 @@ def parse_currency_names(data, tree): name.attrib['count']] = text_type(name.text) else: currency_names[code] = text_type(name.text) - # TODO: support choice patterns for currency symbol selection - symbol = elem.find('symbol') - if symbol is not None and 'draft' not in symbol.attrib and 'choice' not in symbol.attrib: + for symbol in elem.findall('symbol'): + if 'draft' in symbol.attrib or 'choice' in symbol.attrib: # Skip drafts and choice-patterns + continue + if symbol.attrib.get('alt'): # Skip alternate forms + continue currency_symbols[code] = text_type(symbol.text) @@ -781,9 +857,23 @@ def parse_unit_patterns(data, tree): for unit in elem.findall('compoundUnit'): unit_type = unit.attrib['type'] - compound_patterns.setdefault(unit_type, {})[unit_length_type] = ( - _text(unit.find('compoundUnitPattern')) - ) + compound_unit_info = {} + compound_variations = {} + for child in unit: + if child.tag == "unitPrefixPattern": + compound_unit_info['prefix'] = _text(child) + elif child.tag == "compoundUnitPattern": + compound_variations[None] = _text(child) + elif child.tag == "compoundUnitPattern1": + compound_variations[child.attrib.get('count')] = _text(child) + if compound_variations: + compound_variation_values = set(compound_variations.values()) + if len(compound_variation_values) == 1: + # shortcut: if all compound variations are the same, only store one + compound_unit_info['compound'] = next(iter(compound_variation_values)) + else: + compound_unit_info['compound_variations'] = compound_variations + compound_patterns.setdefault(unit_type, {})[unit_length_type] = compound_unit_info def parse_date_fields(data, tree): @@ -799,7 +889,7 @@ def parse_date_fields(data, tree): def parse_interval_formats(data, tree): - # http://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats + # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats interval_formats = data.setdefault("interval_formats", {}) for elem in tree.findall("dateTimeFormats/intervalFormats/*"): if 'draft' in elem.attrib: @@ -808,7 +898,7 @@ def parse_interval_formats(data, tree): interval_formats[None] = elem.text elif elem.tag == "intervalFormatItem": skel_data = interval_formats.setdefault(elem.attrib["id"], {}) - for item_sub in elem.getchildren(): + for item_sub in elem: if item_sub.tag == "greatestDifference": skel_data[item_sub.attrib["id"]] = split_interval_pattern(item_sub.text) else: @@ -817,25 +907,40 @@ def parse_interval_formats(data, tree): def parse_currency_formats(data, tree): currency_formats = data.setdefault('currency_formats', {}) - for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'): - curr_length_type = length_elem.attrib.get('type') - for elem in length_elem.findall('currencyFormat'): - type = elem.attrib.get('type') - if curr_length_type: - # Handle ``, etc. - # TODO(3.x): use nested dicts instead of colon-separated madness - type = '%s:%s' % (type, curr_length_type) - if _should_skip_elem(elem, type, currency_formats): - continue - for child in elem.getiterator(): - if child.tag == 'alias': - currency_formats[type] = Alias( - _translate_alias(['currency_formats', elem.attrib['type']], - child.attrib['path']) - ) - elif child.tag == 'pattern': - pattern = text_type(child.text) - currency_formats[type] = numbers.parse_pattern(pattern) + for currency_format in tree.findall('.//currencyFormats'): + if _should_skip_number_elem(data, currency_format): # TODO: Support other number systems + continue + + for length_elem in currency_format.findall('./currencyFormatLength'): + curr_length_type = length_elem.attrib.get('type') + for elem in length_elem.findall('currencyFormat'): + type = elem.attrib.get('type') + if curr_length_type: + # Handle ``, etc. + # TODO(3.x): use nested dicts instead of colon-separated madness + type = '%s:%s' % (type, curr_length_type) + if _should_skip_elem(elem, type, currency_formats): + continue + for child in elem.iter(): + if child.tag == 'alias': + currency_formats[type] = Alias( + _translate_alias(['currency_formats', elem.attrib['type']], + child.attrib['path']) + ) + elif child.tag == 'pattern': + pattern = text_type(child.text) + currency_formats[type] = numbers.parse_pattern(pattern) + + +def parse_currency_unit_patterns(data, tree): + currency_unit_patterns = data.setdefault('currency_unit_patterns', {}) + for currency_formats_elem in tree.findall('.//currencyFormats'): + if _should_skip_number_elem(data, currency_formats_elem): # TODO: Support other number systems + continue + for unit_pattern_elem in currency_formats_elem.findall('./unitPattern'): + count = unit_pattern_elem.attrib['count'] + pattern = text_type(unit_pattern_elem.text) + currency_unit_patterns[count] = pattern def parse_day_period_rules(tree): diff --git a/scripts/make-release.py b/scripts/make-release.py index dc9bb31e6..245608a73 100755 --- a/scripts/make-release.py +++ b/scripts/make-release.py @@ -23,7 +23,7 @@ def parse_changelog(): with open('CHANGES') as f: lineiter = iter(f) for line in lineiter: - match = re.search('^Version\s+(.*)', line.strip()) + match = re.search(r'^Version\s+(.*)', line.strip()) if match is None: continue version = match.group(1).strip() diff --git a/setup.cfg b/setup.cfg index c2d8f87e9..12585f0d7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,14 @@ [aliases] release = sdist bdist_wheel -[pytest] +[tool:pytest] norecursedirs = venv* .* _* scripts {args} doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE ALLOW_UNICODE IGNORE_EXCEPTION_DETAIL +markers = + all_locales: parameterize test with all locales [bdist_wheel] universal = 1 + +[metadata] +license_file = LICENSE diff --git a/setup.py b/setup.py index 146f38dc4..0032a3a05 100755 --- a/setup.py +++ b/setup.py @@ -44,22 +44,25 @@ def run(self): 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules', ], + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', packages=['babel', 'babel.messages', 'babel.localtime'], include_package_data=True, install_requires=[ # This version identifier is currently necessary as # pytz otherwise does not install on pip 1.4 or # higher. - 'pytz>=0a', + 'pytz>=2015.7', ], cmdclass={'import_cldr': import_cldr}, diff --git a/tests/conftest.py b/tests/conftest.py index be93b2be7..5b14b1ca7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,9 @@ def os_environ(monkeypatch): def pytest_generate_tests(metafunc): - if hasattr(metafunc.function, "all_locales"): - from babel.localedata import locale_identifiers - metafunc.parametrize("locale", list(locale_identifiers())) + if hasattr(metafunc.function, "pytestmark"): + for mark in metafunc.function.pytestmark: + if mark.name == "all_locales": + from babel.localedata import locale_identifiers + metafunc.parametrize("locale", list(locale_identifiers())) + break diff --git a/tests/messages/data/project/i18n/fi_BUGGY/LC_MESSAGES/messages.po b/tests/messages/data/project/i18n/fi_BUGGY/LC_MESSAGES/messages.po new file mode 100644 index 000000000..0a0745b42 --- /dev/null +++ b/tests/messages/data/project/i18n/fi_BUGGY/LC_MESSAGES/messages.po @@ -0,0 +1,5 @@ +msgid "" +msgstr "" + +msgid "bar %(sign)s" +msgstr "tanko %(merkki)s" diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 2d2880c38..661999648 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -15,8 +15,9 @@ import datetime import unittest +from babel._compat import StringIO from babel.dates import format_datetime, UTC -from babel.messages import catalog +from babel.messages import catalog, pofile from babel.util import FixedOffsetTimezone @@ -101,7 +102,7 @@ def test_update_message_changed_to_plural(self): def test_update_message_changed_to_simple(self): cat = catalog.Catalog() - cat.add((u'foo' u'foos'), (u'Voh', u'Vöhs')) + cat.add(u'foo' u'foos', (u'Voh', u'Vöhs')) tmpl = catalog.Catalog() tmpl.add(u'foo') cat.update(tmpl) @@ -475,3 +476,31 @@ def test_datetime_parsing(): assert val2.month == 6 assert val2.day == 28 assert val2.tzinfo is None + + +def test_update_catalog_comments(): + # Based on https://web.archive.org/web/20100710131029/http://babel.edgewall.org/attachment/ticket/163/cat-update-comments.py + + catalog = pofile.read_po(StringIO(''' + # A user comment + #. An auto comment + #: main.py:1 + #, fuzzy, python-format + msgid "foo %(name)s" + msgstr "foo %(name)s" + ''')) + + assert all(message.user_comments and message.auto_comments for message in catalog if message.id) + + # NOTE: in the POT file, there are no comments + template = pofile.read_po(StringIO(''' + #: main.py:1 + #, fuzzy, python-format + msgid "bar %(name)s" + msgstr "" + ''')) + + catalog.update(template) + + # Auto comments will be obliterated here + assert all(message.user_comments for message in catalog if message.id) diff --git a/tests/messages/test_checkers.py b/tests/messages/test_checkers.py index e9c34bcb7..49abb51b0 100644 --- a/tests/messages/test_checkers.py +++ b/tests/messages/test_checkers.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2008-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index 873439de4..ac7f0a642 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -178,7 +178,7 @@ def test_triple_quoted_strings(self): messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) - self.assertEqual([(1, '_', (u'pylons'), []), + self.assertEqual([(1, '_', u'pylons', []), (2, 'ngettext', (u'elvis', u'elvises', None), []), (3, 'ngettext', (u'elvis', u'elvises', None), [])], messages) @@ -350,7 +350,7 @@ def test_different_signatures(self): self.assertEqual((None, u'hello', u'there'), messages[2][2]) self.assertEqual((None, None), messages[3][2]) self.assertEqual(None, messages[4][2]) - self.assertEqual(('foo'), messages[5][2]) + self.assertEqual('foo', messages[5][2]) def test_utf8_message(self): buf = BytesIO(u""" diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 20904a350..70580215e 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -28,21 +28,27 @@ from babel import __version__ as VERSION from babel.dates import format_datetime from babel.messages import frontend, Catalog -from babel.messages.frontend import CommandLineInterface, extract_messages, update_catalog +from babel.messages.frontend import CommandLineInterface, extract_messages, update_catalog, po_file_read_mode from babel.util import LOCALTZ from babel.messages.pofile import read_po, write_po from babel._compat import StringIO - this_dir = os.path.abspath(os.path.dirname(__file__)) +data_dir = os.path.join(this_dir, 'data') +project_dir = os.path.join(data_dir, 'project') +i18n_dir = os.path.join(project_dir, 'i18n') +pot_file = os.path.join(i18n_dir, 'temp.pot') + + +def _po_file(locale): + return os.path.join(i18n_dir, locale, 'LC_MESSAGES', 'messages.po') class CompileCatalogTestCase(unittest.TestCase): def setUp(self): self.olddir = os.getcwd() - self.datadir = os.path.join(this_dir, 'data') - os.chdir(self.datadir) + os.chdir(data_dir) _global_log.threshold = 5 # shut up distutils logging self.dist = Distribution(dict( @@ -71,8 +77,7 @@ class ExtractMessagesTestCase(unittest.TestCase): def setUp(self): self.olddir = os.getcwd() - self.datadir = os.path.join(this_dir, 'data') - os.chdir(self.datadir) + os.chdir(data_dir) _global_log.threshold = 5 # shut up distutils logging self.dist = Distribution(dict( @@ -84,20 +89,13 @@ def setUp(self): self.cmd.initialize_options() def tearDown(self): - pot_file = self._pot_file() if os.path.isfile(pot_file): os.unlink(pot_file) os.chdir(self.olddir) - def _i18n_dir(self): - return os.path.join(self.datadir, 'project', 'i18n') - - def _pot_file(self): - return os.path.join(self._i18n_dir(), 'temp.pot') - def assert_pot_file_exists(self): - assert os.path.isfile(self._pot_file()) + assert os.path.isfile(pot_file) def test_neither_default_nor_custom_keywords(self): self.cmd.output_file = 'dummy' @@ -119,27 +117,27 @@ def test_invalid_file_or_dir_input_path(self): self.assertRaises(DistutilsOptionError, self.cmd.finalize_options) def test_input_paths_is_treated_as_list(self): - self.cmd.input_paths = self.datadir - self.cmd.output_file = self._pot_file() + self.cmd.input_paths = data_dir + self.cmd.output_file = pot_file self.cmd.finalize_options() self.cmd.run() - with open(self._pot_file(), 'U') as f: + with open(pot_file, po_file_read_mode) as f: catalog = read_po(f) msg = catalog.get('bar') self.assertEqual(1, len(msg.locations)) self.assertTrue('file1.py' in msg.locations[0][0]) def test_input_paths_handle_spaces_after_comma(self): - self.cmd.input_paths = '%s, %s' % (this_dir, self.datadir) - self.cmd.output_file = self._pot_file() + self.cmd.input_paths = '%s, %s' % (this_dir, data_dir) + self.cmd.output_file = pot_file self.cmd.finalize_options() - self.assertEqual([this_dir, self.datadir], self.cmd.input_paths) + self.assertEqual([this_dir, data_dir], self.cmd.input_paths) def test_input_dirs_is_alias_for_input_paths(self): self.cmd.input_dirs = this_dir - self.cmd.output_file = self._pot_file() + self.cmd.output_file = pot_file self.cmd.finalize_options() # Gets listified in `finalize_options`: assert self.cmd.input_paths == [self.cmd.input_dirs] @@ -147,7 +145,7 @@ def test_input_dirs_is_alias_for_input_paths(self): def test_input_dirs_is_mutually_exclusive_with_input_paths(self): self.cmd.input_dirs = this_dir self.cmd.input_paths = this_dir - self.cmd.output_file = self._pot_file() + self.cmd.output_file = pot_file self.assertRaises(DistutilsOptionError, self.cmd.finalize_options) @freeze_time("1994-11-11") @@ -204,7 +202,7 @@ def test_extraction_with_default_mapping(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(self._pot_file(), 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -257,7 +255,7 @@ def test_extraction_with_mapping_file(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(self._pot_file(), 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -315,7 +313,7 @@ def test_extraction_with_mapping_dict(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(self._pot_file(), 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -346,7 +344,7 @@ def test_extraction_add_location_file(self): msgstr[1] "" """ - with open(self._pot_file(), 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -355,8 +353,7 @@ class InitCatalogTestCase(unittest.TestCase): def setUp(self): self.olddir = os.getcwd() - self.datadir = os.path.join(this_dir, 'data') - os.chdir(self.datadir) + os.chdir(data_dir) _global_log.threshold = 5 # shut up distutils logging self.dist = Distribution(dict( @@ -369,19 +366,12 @@ def setUp(self): def tearDown(self): for dirname in ['en_US', 'ja_JP', 'lv_LV']: - locale_dir = os.path.join(self._i18n_dir(), dirname) + locale_dir = os.path.join(i18n_dir, dirname) if os.path.isdir(locale_dir): shutil.rmtree(locale_dir) os.chdir(self.olddir) - def _i18n_dir(self): - return os.path.join(self.datadir, 'project', 'i18n') - - def _po_file(self, locale): - return os.path.join(self._i18n_dir(), locale, 'LC_MESSAGES', - 'messages.po') - def test_no_input_file(self): self.cmd.locale = 'en_US' self.cmd.output_file = 'dummy' @@ -401,7 +391,7 @@ def test_with_output_dir(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('en_US') + po_file = _po_file('en_US') assert os.path.isfile(po_file) expected_content = r"""# English (United States) translations for TestProject. @@ -440,7 +430,7 @@ def test_with_output_dir(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -453,7 +443,7 @@ def test_keeps_catalog_non_fuzzy(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('en_US') + po_file = _po_file('en_US') assert os.path.isfile(po_file) expected_content = r"""# English (United States) translations for TestProject. @@ -492,7 +482,7 @@ def test_keeps_catalog_non_fuzzy(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -505,7 +495,7 @@ def test_correct_init_more_than_2_plurals(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('lv_LV') + po_file = _po_file('lv_LV') assert os.path.isfile(po_file) expected_content = r"""# Latvian (Latvia) translations for TestProject. @@ -546,7 +536,7 @@ def test_correct_init_more_than_2_plurals(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -559,7 +549,7 @@ def test_correct_init_singular_plural_forms(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('ja_JP') + po_file = _po_file('ja_JP') assert os.path.isfile(po_file) expected_content = r"""# Japanese (Japan) translations for TestProject. @@ -597,7 +587,7 @@ def test_correct_init_singular_plural_forms(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='ja_JP')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -619,7 +609,7 @@ def test_supports_no_wrap(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('en_US') + po_file = _po_file('en_US') assert os.path.isfile(po_file) expected_content = r"""# English (United States) translations for TestProject. # Copyright (C) 2007 FooBar, Inc. @@ -658,7 +648,7 @@ def test_supports_no_wrap(self): 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US'), 'long_message': long_message} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -679,7 +669,7 @@ def test_supports_width(self): self.cmd.finalize_options() self.cmd.run() - po_file = self._po_file('en_US') + po_file = _po_file('en_US') assert os.path.isfile(po_file) expected_content = r"""# English (United States) translations for TestProject. # Copyright (C) 2007 FooBar, Inc. @@ -718,7 +708,7 @@ def test_supports_width(self): 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en_US'), 'long_message': long_message} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -726,7 +716,7 @@ def test_supports_width(self): class CommandLineInterfaceTestCase(unittest.TestCase): def setUp(self): - self.datadir = os.path.join(this_dir, 'data') + data_dir = os.path.join(this_dir, 'data') self.orig_working_dir = os.getcwd() self.orig_argv = sys.argv self.orig_stdout = sys.stdout @@ -734,7 +724,7 @@ def setUp(self): sys.argv = ['pybabel'] sys.stdout = StringIO() sys.stderr = StringIO() - os.chdir(self.datadir) + os.chdir(data_dir) self._remove_log_handlers() self.cli = frontend.CommandLineInterface() @@ -745,7 +735,7 @@ def tearDown(self): sys.stdout = self.orig_stdout sys.stderr = self.orig_stderr for dirname in ['lv_LV', 'ja_JP']: - locale_dir = os.path.join(self._i18n_dir(), dirname) + locale_dir = os.path.join(i18n_dir, dirname) if os.path.isdir(locale_dir): shutil.rmtree(locale_dir) self._remove_log_handlers() @@ -772,8 +762,8 @@ def test_usage(self): """, sys.stderr.getvalue().lower()) def _run_init_catalog(self): - i18n_dir = os.path.join(self.datadir, 'project', 'i18n') - pot_path = os.path.join(self.datadir, 'project', 'i18n', 'messages.pot') + i18n_dir = os.path.join(data_dir, 'project', 'i18n') + pot_path = os.path.join(data_dir, 'project', 'i18n', 'messages.pot') init_argv = sys.argv + ['init', '--locale', 'en_US', '-d', i18n_dir, '-i', pot_path] self.cli.run(init_argv) @@ -821,15 +811,11 @@ def test_help(self): update update existing message catalogs from a pot file """, sys.stdout.getvalue().lower()) - def _pot_file(self): - return os.path.join(self._i18n_dir(), 'temp.pot') - def assert_pot_file_exists(self): - assert os.path.isfile(self._pot_file()) + assert os.path.isfile(pot_file) @freeze_time("1994-11-11") def test_extract_with_default_mapping(self): - pot_file = self._pot_file() self.cli.run(sys.argv + ['extract', '--copyright-holder', 'FooBar, Inc.', '--project', 'TestProject', '--version', '0.1', @@ -879,18 +865,17 @@ def test_extract_with_default_mapping(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(pot_file, 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @freeze_time("1994-11-11") def test_extract_with_mapping_file(self): - pot_file = self._pot_file() self.cli.run(sys.argv + ['extract', '--copyright-holder', 'FooBar, Inc.', '--project', 'TestProject', '--version', '0.1', '--msgid-bugs-address', 'bugs.address@email.tld', - '--mapping', os.path.join(self.datadir, 'mapping.cfg'), + '--mapping', os.path.join(data_dir, 'mapping.cfg'), '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', '-o', pot_file, 'project']) self.assert_pot_file_exists() @@ -930,7 +915,7 @@ def test_extract_with_mapping_file(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(pot_file, 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @@ -939,13 +924,12 @@ def test_extract_with_exact_file(self): """Tests that we can call extract with a particular file and only strings from that file get extracted. (Note the absence of strings from file1.py) """ - pot_file = self._pot_file() - file_to_extract = os.path.join(self.datadir, 'project', 'file2.py') + file_to_extract = os.path.join(data_dir, 'project', 'file2.py') self.cli.run(sys.argv + ['extract', '--copyright-holder', 'FooBar, Inc.', '--project', 'TestProject', '--version', '0.1', '--msgid-bugs-address', 'bugs.address@email.tld', - '--mapping', os.path.join(self.datadir, 'mapping.cfg'), + '--mapping', os.path.join(data_dir, 'mapping.cfg'), '-c', 'TRANSLATOR', '-c', 'TRANSLATORS:', '-o', pot_file, file_to_extract]) self.assert_pot_file_exists() @@ -979,17 +963,17 @@ def test_extract_with_exact_file(self): 'year': time.strftime('%Y'), 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(pot_file, 'U') as f: + with open(pot_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @freeze_time("1994-11-11") def test_init_with_output_dir(self): - po_file = self._po_file('en_US') + po_file = _po_file('en_US') self.cli.run(sys.argv + ['init', '--locale', 'en_US', - '-d', os.path.join(self._i18n_dir()), - '-i', os.path.join(self._i18n_dir(), 'messages.pot')]) + '-d', os.path.join(i18n_dir), + '-i', os.path.join(i18n_dir, 'messages.pot')]) assert os.path.isfile(po_file) expected_content = r"""# English (United States) translations for TestProject. # Copyright (C) 2007 FooBar, Inc. @@ -1027,20 +1011,17 @@ def test_init_with_output_dir(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) - def _i18n_dir(self): - return os.path.join(self.datadir, 'project', 'i18n') - @freeze_time("1994-11-11") def test_init_singular_plural_forms(self): - po_file = self._po_file('ja_JP') + po_file = _po_file('ja_JP') self.cli.run(sys.argv + ['init', '--locale', 'ja_JP', - '-d', os.path.join(self._i18n_dir()), - '-i', os.path.join(self._i18n_dir(), 'messages.pot')]) + '-d', os.path.join(i18n_dir), + '-i', os.path.join(i18n_dir, 'messages.pot')]) assert os.path.isfile(po_file) expected_content = r"""# Japanese (Japan) translations for TestProject. # Copyright (C) 2007 FooBar, Inc. @@ -1077,17 +1058,17 @@ def test_init_singular_plural_forms(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) @freeze_time("1994-11-11") def test_init_more_than_2_plural_forms(self): - po_file = self._po_file('lv_LV') + po_file = _po_file('lv_LV') self.cli.run(sys.argv + ['init', '--locale', 'lv_LV', - '-d', self._i18n_dir(), - '-i', os.path.join(self._i18n_dir(), 'messages.pot')]) + '-d', i18n_dir, + '-i', os.path.join(i18n_dir, 'messages.pot')]) assert os.path.isfile(po_file) expected_content = r"""# Latvian (Latvia) translations for TestProject. # Copyright (C) 2007 FooBar, Inc. @@ -1127,28 +1108,28 @@ def test_init_more_than_2_plural_forms(self): """ % {'version': VERSION, 'date': format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')} - with open(po_file, 'U') as f: + with open(po_file, po_file_read_mode) as f: actual_content = f.read() self.assertEqual(expected_content, actual_content) def test_compile_catalog(self): - po_file = self._po_file('de_DE') + po_file = _po_file('de_DE') mo_file = po_file.replace('.po', '.mo') self.cli.run(sys.argv + ['compile', '--locale', 'de_DE', - '-d', self._i18n_dir()]) + '-d', i18n_dir]) assert not os.path.isfile(mo_file), 'Expected no file at %r' % mo_file self.assertEqual("""\ catalog %s is marked as fuzzy, skipping -""" % (po_file), sys.stderr.getvalue()) +""" % po_file, sys.stderr.getvalue()) def test_compile_fuzzy_catalog(self): - po_file = self._po_file('de_DE') + po_file = _po_file('de_DE') mo_file = po_file.replace('.po', '.mo') try: self.cli.run(sys.argv + ['compile', '--locale', 'de_DE', '--use-fuzzy', - '-d', self._i18n_dir()]) + '-d', i18n_dir]) assert os.path.isfile(mo_file) self.assertEqual("""\ compiling catalog %s to %s @@ -1157,17 +1138,13 @@ def test_compile_fuzzy_catalog(self): if os.path.isfile(mo_file): os.unlink(mo_file) - def _po_file(self, locale): - return os.path.join(self._i18n_dir(), locale, 'LC_MESSAGES', - 'messages.po') - def test_compile_catalog_with_more_than_2_plural_forms(self): - po_file = self._po_file('ru_RU') + po_file = _po_file('ru_RU') mo_file = po_file.replace('.po', '.mo') try: self.cli.run(sys.argv + ['compile', '--locale', 'ru_RU', '--use-fuzzy', - '-d', self._i18n_dir()]) + '-d', i18n_dir]) assert os.path.isfile(mo_file) self.assertEqual("""\ compiling catalog %s to %s @@ -1177,16 +1154,14 @@ def test_compile_catalog_with_more_than_2_plural_forms(self): os.unlink(mo_file) def test_compile_catalog_multidomain(self): - po_foo = os.path.join(self._i18n_dir(), 'de_DE', 'LC_MESSAGES', - 'foo.po') - po_bar = os.path.join(self._i18n_dir(), 'de_DE', 'LC_MESSAGES', - 'bar.po') + po_foo = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'foo.po') + po_bar = os.path.join(i18n_dir, 'de_DE', 'LC_MESSAGES', 'bar.po') mo_foo = po_foo.replace('.po', '.mo') mo_bar = po_bar.replace('.po', '.mo') try: self.cli.run(sys.argv + ['compile', '--locale', 'de_DE', '--domain', 'foo bar', '--use-fuzzy', - '-d', self._i18n_dir()]) + '-d', i18n_dir]) for mo_file in [mo_foo, mo_bar]: assert os.path.isfile(mo_file) self.assertEqual("""\ @@ -1204,10 +1179,10 @@ def test_update(self): template.add("1") template.add("2") template.add("3") - tmpl_file = os.path.join(self._i18n_dir(), 'temp-template.pot') + tmpl_file = os.path.join(i18n_dir, 'temp-template.pot') with open(tmpl_file, "wb") as outfp: write_po(outfp, template) - po_file = os.path.join(self._i18n_dir(), 'temp1.po') + po_file = os.path.join(i18n_dir, 'temp1.po') self.cli.run(sys.argv + ['init', '-l', 'fi', '-o', po_file, @@ -1343,26 +1318,14 @@ def test_extract_keyword_args_384(split, arg_name): "extract -F babel-django.cfg --add-comments Translators: -o django232.pot %s ." % kwarg_text ) assert isinstance(cmdinst, extract_messages) - assert set(cmdinst.keywords.keys()) == set(( - '_', - 'dgettext', - 'dngettext', - 'gettext', - 'gettext_lazy', - 'gettext_noop', - 'N_', - 'ngettext', - 'ngettext_lazy', - 'npgettext', - 'npgettext_lazy', - 'pgettext', - 'pgettext_lazy', - 'ugettext', - 'ugettext_lazy', - 'ugettext_noop', - 'ungettext', - 'ungettext_lazy', - )) + assert set(cmdinst.keywords.keys()) == {'_', 'dgettext', 'dngettext', + 'gettext', 'gettext_lazy', + 'gettext_noop', 'N_', 'ngettext', + 'ngettext_lazy', 'npgettext', + 'npgettext_lazy', 'pgettext', + 'pgettext_lazy', 'ugettext', + 'ugettext_lazy', 'ugettext_noop', + 'ungettext', 'ungettext_lazy'} @pytest.mark.parametrize("kwarg,expected", [ @@ -1384,7 +1347,7 @@ def test_extract_distutils_keyword_arg_388(kwarg, expected): assert set(cmdinst.keywords.keys()) == set(expected) # Test the comma-separated comment argument while we're at it: - assert set(cmdinst.add_comments) == set(("Bar", "Foo")) + assert set(cmdinst.add_comments) == {"Bar", "Foo"} def test_update_catalog_boolean_args(): @@ -1420,3 +1383,12 @@ def test_extract_add_location(): assert isinstance(cmdinst, extract_messages) assert cmdinst.add_location == 'never' assert cmdinst.no_location + + +def test_extract_error_code(monkeypatch, capsys): + monkeypatch.chdir(project_dir) + cmdinst = configure_cli_command("compile --domain=messages --directory i18n --locale fi_BUGGY") + assert cmdinst.run() == 1 + out, err = capsys.readouterr() + # replace hack below for py2/py3 compatibility + assert "unknown named placeholder 'merkki'" in err.replace("u'", "'") diff --git a/tests/messages/test_mofile.py b/tests/messages/test_mofile.py index 5fedc600a..fb672a80c 100644 --- a/tests/messages/test_mofile.py +++ b/tests/messages/test_mofile.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -71,3 +71,26 @@ def test_more_plural_forms(self): catalog2.add(('Fuzz', 'Fuzzes'), ('', '', '')) buf = BytesIO() mofile.write_mo(buf, catalog2) + + def test_empty_translation_with_fallback(self): + catalog1 = Catalog(locale='fr_FR') + catalog1.add(u'', '''\ +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n''') + catalog1.add(u'Fuzz', '') + buf1 = BytesIO() + mofile.write_mo(buf1, catalog1) + buf1.seek(0) + catalog2 = Catalog(locale='fr') + catalog2.add(u'', '''\ +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n''') + catalog2.add(u'Fuzz', 'Flou') + buf2 = BytesIO() + mofile.write_mo(buf2, catalog2) + buf2.seek(0) + + translations = Translations(fp=buf1) + translations.add_fallback(Translations(fp=buf2)) + + self.assertEqual(u'Flou', translations.ugettext('Fuzz')) diff --git a/tests/messages/test_normalized_string.py b/tests/messages/test_normalized_string.py new file mode 100644 index 000000000..9c95672b4 --- /dev/null +++ b/tests/messages/test_normalized_string.py @@ -0,0 +1,17 @@ +from babel.messages.pofile import _NormalizedString + + +def test_normalized_string(): + ab1 = _NormalizedString('a', 'b ') + ab2 = _NormalizedString('a', ' b') + ac1 = _NormalizedString('a', 'c') + ac2 = _NormalizedString(' a', 'c ') + z = _NormalizedString() + assert ab1 == ab2 and ac1 == ac2 # __eq__ + assert ab1 < ac1 # __lt__ + assert ac1 > ab2 # __gt__ + assert ac1 >= ac2 # __ge__ + assert ab1 <= ab2 # __le__ + assert ab1 != ac1 # __ne__ + assert not z # __nonzero__ / __bool__ + assert sorted([ab1, ab2, ac1]) # the sort order is not stable so we can't really check it, just that we can sort diff --git a/tests/messages/test_plurals.py b/tests/messages/test_plurals.py index 2e7553c16..5e490f374 100644 --- a/tests/messages/test_plurals.py +++ b/tests/messages/test_plurals.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2008-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index f6cd66dba..be1172a88 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -20,7 +20,6 @@ from babel.util import FixedOffsetTimezone from babel._compat import StringIO, BytesIO - class ReadPoTestCase(unittest.TestCase): def test_preserve_locale(self): @@ -429,6 +428,63 @@ def test_missing_plural_in_the_middle(self): self.assertEqual("", message.string[1]) self.assertEqual("Vohs [text]", message.string[2]) + def test_abort_invalid_po_file(self): + invalid_po = ''' + msgctxt "" + "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " + "270005359}" + msgid "" + "Thank you very much for your time.\n" + "If you have any questions regarding this survey, please contact Fulano " + "at nadie@blah.com" + msgstr "Merci de prendre le temps de remplir le sondage. + Pour toute question, veuillez communiquer avec Fulano à nadie@blah.com + " + ''' + invalid_po_2 = ''' + msgctxt "" + "{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": " + "270005359}" + msgid "" + "Thank you very much for your time.\n" + "If you have any questions regarding this survey, please contact Fulano " + "at fulano@blah.com." + msgstr "Merci de prendre le temps de remplir le sondage. + Pour toute question, veuillez communiquer avec Fulano a fulano@blah.com + " + ''' + # Catalog not created, throws Unicode Error + buf = StringIO(invalid_po) + output = pofile.read_po(buf, locale='fr', abort_invalid=False) + assert isinstance(output, Catalog) + + # Catalog not created, throws PoFileError + buf = StringIO(invalid_po_2) + output = None + with self.assertRaises(pofile.PoFileError) as e: + output = pofile.read_po(buf, locale='fr', abort_invalid=True) + assert not output + + # Catalog is created with warning, no abort + buf = StringIO(invalid_po_2) + output = pofile.read_po(buf, locale='fr', abort_invalid=False) + assert isinstance(output, Catalog) + + # Catalog not created, aborted with PoFileError + buf = StringIO(invalid_po_2) + output = None + with self.assertRaises(pofile.PoFileError) as e: + output = pofile.read_po(buf, locale='fr', abort_invalid=True) + assert not output + + def test_invalid_pofile_with_abort_flag(self): + parser = pofile.PoFileParser(None, abort_invalid=True) + lineno = 10 + line = u'Algo esta mal' + msg = 'invalid file' + with self.assertRaises(pofile.PoFileError) as e: + parser._invalid_pofile(line, lineno, msg) + class WritePoTestCase(unittest.TestCase): @@ -800,3 +856,25 @@ def test_denormalize_on_msgstr_without_empty_first_line(self): self.assertEqual(expected_denormalized, pofile.denormalize(msgstr)) self.assertEqual(expected_denormalized, pofile.denormalize('""\n' + msgstr)) + + +def test_unknown_language_roundtrip(): + buf = StringIO(r''' +msgid "" +msgstr "" +"Language: sr_SP\n"''') + catalog = pofile.read_po(buf) + assert catalog.locale_identifier == 'sr_SP' + assert not catalog.locale + buf = BytesIO() + pofile.write_po(buf, catalog) + assert 'sr_SP' in buf.getvalue().decode() + + +def test_unknown_language_write(): + catalog = Catalog(locale='sr_SP') + assert catalog.locale_identifier == 'sr_SP' + assert not catalog.locale + buf = BytesIO() + pofile.write_po(buf, catalog) + assert 'sr_SP' in buf.getvalue().decode() diff --git a/tests/test_core.py b/tests/test_core.py index e3d8faffa..558322e00 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -58,7 +58,8 @@ def test_ignore_invalid_locales_in_lc_ctype(os_environ): def test_get_global(): - assert core.get_global('zone_aliases')['UTC'] == 'Etc/GMT' + assert core.get_global('zone_aliases')['GMT'] == 'Etc/GMT' + assert core.get_global('zone_aliases')['UTC'] == 'Etc/UTC' assert core.get_global('zone_territories')['Europe/Berlin'] == 'DE' @@ -316,3 +317,13 @@ def find_class(self, module, name): with open(filename, 'rb') as f: return Unpickler(f).load() + + +def test_issue_601_no_language_name_but_has_variant(): + # kw_GB has a variant for Finnish but no actual language name for Finnish, + # so `get_display_name()` previously crashed with a TypeError as it attempted + # to concatenate " (Finnish)" to None. + # Instead, it's better to return None altogether, as we can't reliably format + # part of a language name. + + assert Locale.parse('fi_FI').get_display_name('kw_GB') == None diff --git a/tests/test_date_intervals.py b/tests/test_date_intervals.py index e5a797a94..2e5f0d6e2 100644 --- a/tests/test_date_intervals.py +++ b/tests/test_date_intervals.py @@ -13,7 +13,7 @@ def test_format_interval_same_instant_1(): - assert dates.format_interval(TEST_DT, TEST_DT, "yMMMd", fuzzy=False, locale="fi") == "8. tammikuuta 2016" + assert dates.format_interval(TEST_DT, TEST_DT, "yMMMd", fuzzy=False, locale="fi") == "8. tammik. 2016" def test_format_interval_same_instant_2(): diff --git a/tests/test_dates.py b/tests/test_dates.py index f74cd396d..44efa7fbc 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -15,6 +15,7 @@ from datetime import date, datetime, time, timedelta import unittest +import freezegun import pytest import pytz from pytz import timezone @@ -24,6 +25,23 @@ from babel.util import FixedOffsetTimezone +@pytest.fixture(params=["pytz.timezone", "zoneinfo.ZoneInfo"]) +def timezone_getter(request): + if request.param == "pytz.timezone": + return timezone + elif request.param == "zoneinfo.ZoneInfo": + try: + import zoneinfo + except ImportError: + try: + from backports import zoneinfo + except ImportError: + pytest.skip("zoneinfo not available") + return zoneinfo.ZoneInfo + else: + raise NotImplementedError + + class DateTimeFormatTestCase(unittest.TestCase): def test_quarter_format(self): @@ -79,6 +97,15 @@ def test_week_of_year_last_us_extra_week(self): fmt = dates.DateTimeFormat(d, locale='en_US') self.assertEqual('53', fmt['w']) + def test_week_of_year_de_first_us_last_with_year(self): + d = date(2018,12,31) + fmt = dates.DateTimeFormat(d, locale='de_DE') + self.assertEqual('1', fmt['w']) + self.assertEqual('2019', fmt['YYYY']) + fmt = dates.DateTimeFormat(d, locale='en_US') + self.assertEqual('53', fmt['w']) + self.assertEqual('2018',fmt['yyyy']) + def test_week_of_month_first(self): d = date(2006, 1, 8) fmt = dates.DateTimeFormat(d, locale='de_DE') @@ -134,16 +161,16 @@ def test_local_day_of_week(self): self.assertEqual('7', fmt['e']) # monday is first day of week fmt = dates.DateTimeFormat(d, locale='en_US') self.assertEqual('01', fmt['ee']) # sunday is first day of week - fmt = dates.DateTimeFormat(d, locale='bn_BD') - self.assertEqual('03', fmt['ee']) # friday is first day of week + fmt = dates.DateTimeFormat(d, locale='ar_BH') + self.assertEqual('02', fmt['ee']) # saturday is first day of week d = date(2007, 4, 2) # a monday fmt = dates.DateTimeFormat(d, locale='de_DE') self.assertEqual('1', fmt['e']) # monday is first day of week fmt = dates.DateTimeFormat(d, locale='en_US') self.assertEqual('02', fmt['ee']) # sunday is first day of week - fmt = dates.DateTimeFormat(d, locale='bn_BD') - self.assertEqual('04', fmt['ee']) # friday is first day of week + fmt = dates.DateTimeFormat(d, locale='ar_BH') + self.assertEqual('03', fmt['ee']) # saturday is first day of week def test_local_day_of_week_standalone(self): d = date(2007, 4, 1) # a sunday @@ -151,16 +178,16 @@ def test_local_day_of_week_standalone(self): self.assertEqual('7', fmt['c']) # monday is first day of week fmt = dates.DateTimeFormat(d, locale='en_US') self.assertEqual('1', fmt['c']) # sunday is first day of week - fmt = dates.DateTimeFormat(d, locale='bn_BD') - self.assertEqual('3', fmt['c']) # friday is first day of week + fmt = dates.DateTimeFormat(d, locale='ar_BH') + self.assertEqual('2', fmt['c']) # saturday is first day of week d = date(2007, 4, 2) # a monday fmt = dates.DateTimeFormat(d, locale='de_DE') self.assertEqual('1', fmt['c']) # monday is first day of week fmt = dates.DateTimeFormat(d, locale='en_US') self.assertEqual('2', fmt['c']) # sunday is first day of week - fmt = dates.DateTimeFormat(d, locale='bn_BD') - self.assertEqual('4', fmt['c']) # friday is first day of week + fmt = dates.DateTimeFormat(d, locale='ar_BH') + self.assertEqual('3', fmt['c']) # saturday is first day of week def test_pattern_day_of_week(self): dt = datetime(2016, 2, 6) @@ -292,7 +319,7 @@ def test_with_float(self): d = datetime(2012, 4, 1, 15, 30, 29, tzinfo=timezone('UTC')) epoch = float(calendar.timegm(d.timetuple())) formatted_string = dates.format_datetime(epoch, format='long', locale='en_US') - self.assertEqual(u'April 1, 2012 at 3:30:29 PM +0000', formatted_string) + self.assertEqual(u'April 1, 2012 at 3:30:29 PM UTC', formatted_string) def test_timezone_formats(self): dt = datetime(2016, 1, 13, 7, 8, 35) @@ -358,9 +385,9 @@ def test_timezone_formats(self): formatted_string = dates.format_datetime(dt, 'OOOO', locale='en') self.assertEqual(u'GMT+00:00', formatted_string) formatted_string = dates.format_datetime(dt, 'VV', locale='en') - self.assertEqual(u'Etc/GMT', formatted_string) + self.assertEqual(u'Etc/UTC', formatted_string) formatted_string = dates.format_datetime(dt, 'VVV', locale='en') - self.assertEqual(u'GMT', formatted_string) + self.assertEqual(u'UTC', formatted_string) formatted_string = dates.format_datetime(dt, 'X', locale='en') self.assertEqual(u'Z', formatted_string) formatted_string = dates.format_datetime(dt, 'XX', locale='en') @@ -430,7 +457,7 @@ def test_with_float(self): d = datetime(2012, 4, 1, 15, 30, 29, tzinfo=timezone('UTC')) epoch = float(calendar.timegm(d.timetuple())) formatted_time = dates.format_time(epoch, format='long', locale='en_US') - self.assertEqual(u'3:30:29 PM +0000', formatted_time) + self.assertEqual(u'3:30:29 PM UTC', formatted_time) def test_with_date_fields_in_pattern(self): self.assertRaises(AttributeError, dates.format_time, date(2007, 4, 1), @@ -511,7 +538,7 @@ def test_can_format_time_with_non_pytz_timezone(self): utc = self._utc() t = datetime(2007, 4, 1, 15, 30, tzinfo=utc) formatted_time = dates.format_time(t, 'long', tzinfo=utc, locale='en') - self.assertEqual('3:30:00 PM +0000', formatted_time) + self.assertEqual('3:30:00 PM UTC', formatted_time) def test_get_period_names(): @@ -574,8 +601,8 @@ def test_get_timezone_gmt(): assert dates.get_timezone_gmt(dt, 'long', locale='fr_FR') == u'UTC-07:00' -def test_get_timezone_location(): - tz = timezone('America/St_Johns') +def test_get_timezone_location(timezone_getter): + tz = timezone_getter('America/St_Johns') assert (dates.get_timezone_location(tz, locale='de_DE') == u"Kanada (St. John\u2019s) Zeit") assert (dates.get_timezone_location(tz, locale='en') == @@ -583,58 +610,90 @@ def test_get_timezone_location(): assert (dates.get_timezone_location(tz, locale='en', return_city=True) == u'St. John’s') - tz = timezone('America/Mexico_City') + tz = timezone_getter('America/Mexico_City') assert (dates.get_timezone_location(tz, locale='de_DE') == u'Mexiko (Mexiko-Stadt) Zeit') - tz = timezone('Europe/Berlin') + tz = timezone_getter('Europe/Berlin') assert (dates.get_timezone_location(tz, locale='de_DE') == u'Deutschland (Berlin) Zeit') -def test_get_timezone_name(): - dt = time(15, 30, tzinfo=timezone('America/Los_Angeles')) - assert (dates.get_timezone_name(dt, locale='en_US') == - u'Pacific Standard Time') - assert (dates.get_timezone_name(dt, locale='en_US', return_zone=True) == - u'America/Los_Angeles') - assert dates.get_timezone_name(dt, width='short', locale='en_US') == u'PST' - - tz = timezone('America/Los_Angeles') - assert dates.get_timezone_name(tz, locale='en_US') == u'Pacific Time' - assert dates.get_timezone_name(tz, 'short', locale='en_US') == u'PT' - - tz = timezone('Europe/Berlin') - assert (dates.get_timezone_name(tz, locale='de_DE') == - u'Mitteleurop\xe4ische Zeit') - assert (dates.get_timezone_name(tz, locale='pt_BR') == - u'Hor\xe1rio da Europa Central') - - tz = timezone('America/St_Johns') - assert dates.get_timezone_name(tz, locale='de_DE') == u'Neufundland-Zeit' - - tz = timezone('America/Los_Angeles') - assert dates.get_timezone_name(tz, locale='en', width='short', - zone_variant='generic') == u'PT' - assert dates.get_timezone_name(tz, locale='en', width='short', - zone_variant='standard') == u'PST' - assert dates.get_timezone_name(tz, locale='en', width='short', - zone_variant='daylight') == u'PDT' - assert dates.get_timezone_name(tz, locale='en', width='long', - zone_variant='generic') == u'Pacific Time' - assert dates.get_timezone_name(tz, locale='en', width='long', - zone_variant='standard') == u'Pacific Standard Time' - assert dates.get_timezone_name(tz, locale='en', width='long', - zone_variant='daylight') == u'Pacific Daylight Time' - - localnow = datetime.utcnow().replace(tzinfo=timezone('UTC')).astimezone(dates.LOCALTZ) +@pytest.mark.parametrize( + "tzname, params, expected", + [ + ("America/Los_Angeles", {"locale": "en_US"}, u"Pacific Time"), + ("America/Los_Angeles", {"width": "short", "locale": "en_US"}, u"PT"), + ("Europe/Berlin", {"locale": "de_DE"}, u"Mitteleurop\xe4ische Zeit"), + ("Europe/Berlin", {"locale": "pt_BR"}, u"Hor\xe1rio da Europa Central"), + ("America/St_Johns", {"locale": "de_DE"}, u"Neufundland-Zeit"), + ( + "America/Los_Angeles", + {"locale": "en", "width": "short", "zone_variant": "generic"}, + u"PT", + ), + ( + "America/Los_Angeles", + {"locale": "en", "width": "short", "zone_variant": "standard"}, + u"PST", + ), + ( + "America/Los_Angeles", + {"locale": "en", "width": "short", "zone_variant": "daylight"}, + u"PDT", + ), + ( + "America/Los_Angeles", + {"locale": "en", "width": "long", "zone_variant": "generic"}, + u"Pacific Time", + ), + ( + "America/Los_Angeles", + {"locale": "en", "width": "long", "zone_variant": "standard"}, + u"Pacific Standard Time", + ), + ( + "America/Los_Angeles", + {"locale": "en", "width": "long", "zone_variant": "daylight"}, + u"Pacific Daylight Time", + ), + ("Europe/Berlin", {"locale": "en_US"}, u"Central European Time"), + ], +) +def test_get_timezone_name_tzinfo(timezone_getter, tzname, params, expected): + tz = timezone_getter(tzname) + assert dates.get_timezone_name(tz, **params) == expected + + +@pytest.mark.parametrize("timezone_getter", ["pytz.timezone"], indirect=True) +@pytest.mark.parametrize( + "tzname, params, expected", + [ + ("America/Los_Angeles", {"locale": "en_US"}, u"Pacific Standard Time"), + ( + "America/Los_Angeles", + {"locale": "en_US", "return_zone": True}, + u"America/Los_Angeles", + ), + ("America/Los_Angeles", {"width": "short", "locale": "en_US"}, u"PST"), + ], +) +def test_get_timezone_name_time_pytz(timezone_getter, tzname, params, expected): + """pytz (by design) can't determine if the time is in DST or not, + so it will always return Standard time""" + dt = time(15, 30, tzinfo=timezone_getter(tzname)) + assert dates.get_timezone_name(dt, **params) == expected + + +def test_get_timezone_name_misc(timezone_getter): + localnow = datetime.utcnow().replace(tzinfo=timezone_getter('UTC')).astimezone(dates.LOCALTZ) assert (dates.get_timezone_name(None, locale='en_US') == dates.get_timezone_name(localnow, locale='en_US')) assert (dates.get_timezone_name('Europe/Berlin', locale='en_US') == "Central European Time") - assert (dates.get_timezone_name(1400000000, locale='en_US', width='short') == "Unknown Region (GMT) Time") - assert (dates.get_timezone_name(time(16, 20), locale='en_US', width='short') == "+0000") + assert (dates.get_timezone_name(1400000000, locale='en_US', width='short') == "Unknown Region (UTC) Time") + assert (dates.get_timezone_name(time(16, 20), locale='en_US', width='short') == "UTC") def test_format_date(): @@ -751,19 +810,10 @@ def test_zh_TW_format(): assert dates.format_time(datetime(2016, 4, 8, 12, 34, 56), locale='zh_TW') == u'\u4e0b\u534812:34:56' -def test_format_current_moment(monkeypatch): - import datetime as datetime_module +def test_format_current_moment(): frozen_instant = datetime.utcnow() - - class frozen_datetime(datetime): - - @classmethod - def utcnow(cls): - return frozen_instant - - # Freeze time! Well, some of it anyway. - monkeypatch.setattr(datetime_module, "datetime", frozen_datetime) - assert dates.format_datetime(locale="en_US") == dates.format_datetime(frozen_instant, locale="en_US") + with freezegun.freeze_time(time_to_freeze=frozen_instant): + assert dates.format_datetime(locale="en_US") == dates.format_datetime(frozen_instant, locale="en_US") @pytest.mark.all_locales @@ -783,3 +833,16 @@ def test_no_inherit_metazone_formatting(): assert dates.format_time(t, format='long', locale='en_GB') == "07:00:00 Pacific Standard Time" assert dates.get_timezone_name(t, width='short', locale='en_US') == "PST" assert dates.get_timezone_name(t, width='short', locale='en_GB') == "Pacific Standard Time" + + +def test_russian_week_numbering(): + # See https://github.com/python-babel/babel/issues/485 + v = date(2017, 1, 1) + assert dates.format_date(v, format='YYYY-ww',locale='ru_RU') == '2016-52' # This would have returned 2017-01 prior to CLDR 32 + assert dates.format_date(v, format='YYYY-ww',locale='de_DE') == '2016-52' + + +def test_en_gb_first_weekday(): + assert Locale.parse('en').first_week_day == 0 # Monday in general + assert Locale.parse('en_US').first_week_day == 6 # Sunday in the US + assert Locale.parse('en_GB').first_week_day == 0 # Monday in the UK diff --git a/tests/test_languages.py b/tests/test_languages.py index 594149fa7..32f0d67d5 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -11,4 +11,7 @@ def test_official_languages(): def test_get_language_info(): - assert set(get_territory_language_info("HU").keys()) == set(("hu", "en", "de", "ro", "hr", "sk", "sl")) + assert ( + set(get_territory_language_info("HU")) == + {"hu", "fr", "en", "de", "ro", "hr", "sk", "sl"} + ) diff --git a/tests/test_lists.py b/tests/test_lists.py index f5021ea50..e843a6358 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -1,4 +1,6 @@ # coding=utf-8 +import pytest + from babel import lists @@ -9,6 +11,11 @@ def test_format_list(): (['string1', 'string2'], 'en', u'string1 and string2'), (['string1', 'string2', 'string3'], 'en', u'string1, string2, and string3'), (['string1', 'string2', 'string3'], 'zh', u'string1、string2和string3'), - (['string1', 'string2', 'string3', 'string4'], 'ne', u'string1 र string2, string3 र string4'), + (['string1', 'string2', 'string3', 'string4'], 'ne', u'string1,string2, string3 र string4'), ]: assert lists.format_list(list, locale=locale) == expected + + +def test_format_list_error(): + with pytest.raises(ValueError): + lists.format_list(['a', 'b', 'c'], style='orange', locale='en') diff --git a/tests/test_localedata.py b/tests/test_localedata.py index 6954d3886..735678f80 100644 --- a/tests/test_localedata.py +++ b/tests/test_localedata.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -11,11 +11,18 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. +import os +import pickle +import sys +import tempfile import unittest import random from operator import methodcaller -from babel import localedata, numbers +import pytest + +from babel import localedata, Locale, UnknownLocaleError + class MergeResolveTestCase(unittest.TestCase): @@ -78,6 +85,7 @@ def test_locale_identification(): for l in localedata.locale_identifiers(): assert localedata.exists(l) + def test_unique_ids(): # Check all locale IDs are uniques. all_ids = localedata.locale_identifiers() @@ -93,15 +101,70 @@ def test_mixedcased_locale(): methodcaller(random.choice(['lower', 'upper']))(c) for c in l]) assert localedata.exists(locale_id) + def test_locale_argument_acceptance(): # Testing None input. normalized_locale = localedata.normalize_locale(None) - assert normalized_locale == None + assert normalized_locale is None locale_exist = localedata.exists(None) assert locale_exist == False # # Testing list input. normalized_locale = localedata.normalize_locale(['en_us', None]) - assert normalized_locale == None + assert normalized_locale is None locale_exist = localedata.exists(['en_us', None]) assert locale_exist == False + + +def test_locale_identifiers_cache(monkeypatch): + original_listdir = localedata.os.listdir + listdir_calls = [] + def listdir_spy(*args): + rv = original_listdir(*args) + listdir_calls.append((args, rv)) + return rv + monkeypatch.setattr(localedata.os, 'listdir', listdir_spy) + + # In case we've already run some tests... + if hasattr(localedata.locale_identifiers, 'cache'): + del localedata.locale_identifiers.cache + + assert not listdir_calls + assert localedata.locale_identifiers() + assert len(listdir_calls) == 1 + assert localedata.locale_identifiers() is localedata.locale_identifiers.cache + assert len(listdir_calls) == 1 + localedata.locale_identifiers.cache = None + assert localedata.locale_identifiers() + assert len(listdir_calls) == 2 + + +def test_locale_name_cleanup(): + """ + Test that locale identifiers are cleaned up to avoid directory traversal. + """ + no_exist_name = os.path.join(tempfile.gettempdir(), "babel%d.dat" % random.randint(1, 99999)) + with open(no_exist_name, "wb") as f: + pickle.dump({}, f) + + try: + name = os.path.splitext(os.path.relpath(no_exist_name, localedata._dirname))[0] + except ValueError: + if sys.platform == "win32": + pytest.skip("unable to form relpath") + raise + + assert not localedata.exists(name) + with pytest.raises(IOError): + localedata.load(name) + with pytest.raises(UnknownLocaleError): + Locale(name) + + +@pytest.mark.skipif(sys.platform != "win32", reason="windows-only test") +def test_reserved_locale_names(): + for name in ("con", "aux", "nul", "prn", "com8", "lpt5"): + with pytest.raises(ValueError): + localedata.load(name) + with pytest.raises(ValueError): + Locale(name) diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 5bcd1717d..11e61d37d 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -16,11 +16,10 @@ from datetime import date -from babel import numbers +from babel import localedata, numbers from babel.numbers import ( - list_currencies, validate_currency, UnknownCurrencyError, is_currency, normalize_currency, get_currency_precision) -from babel.core import Locale -from babel.localedata import locale_identifiers + list_currencies, validate_currency, UnknownCurrencyError, is_currency, normalize_currency, + get_currency_precision, get_decimal_precision, get_currency_unit_pattern) from babel._compat import decimal @@ -124,7 +123,7 @@ def test_scientific_notation(self): self.assertEqual(fmt, '1.2E3') # Exponent grouping fmt = numbers.format_scientific(12345, '##0.####E0', locale='en_US') - self.assertEqual(fmt, '12.345E3') + self.assertEqual(fmt, '1.2345E4') # Minimum number of int digits fmt = numbers.format_scientific(12345, '00.###E0', locale='en_US') self.assertEqual(fmt, '12.345E3') @@ -154,6 +153,36 @@ def test_formatting_of_very_small_decimals(self): fmt = numbers.format_decimal(number, format="@@@", locale='en_US') self.assertEqual('0.000000700', fmt) + def test_group_separator(self): + self.assertEqual('29567.12', numbers.format_decimal(29567.12, + locale='en_US', group_separator=False)) + self.assertEqual('29567,12', numbers.format_decimal(29567.12, + locale='fr_CA', group_separator=False)) + self.assertEqual('29567,12', numbers.format_decimal(29567.12, + locale='pt_BR', group_separator=False)) + self.assertEqual(u'$1099.98', numbers.format_currency(1099.98, 'USD', + locale='en_US', group_separator=False)) + self.assertEqual(u'101299,98\xa0€', numbers.format_currency(101299.98, 'EUR', + locale='fr_CA', group_separator=False)) + self.assertEqual('101299.98 euros', numbers.format_currency(101299.98, 'EUR', + locale='en_US', group_separator=False, format_type='name')) + self.assertEqual(u'25123412\xa0%', numbers.format_percent(251234.1234, locale='sv_SE', group_separator=False)) + + self.assertEqual(u'29,567.12', numbers.format_decimal(29567.12, + locale='en_US', group_separator=True)) + self.assertEqual(u'29\u202f567,12', numbers.format_decimal(29567.12, + locale='fr_CA', group_separator=True)) + self.assertEqual(u'29.567,12', numbers.format_decimal(29567.12, + locale='pt_BR', group_separator=True)) + self.assertEqual(u'$1,099.98', numbers.format_currency(1099.98, 'USD', + locale='en_US', group_separator=True)) + self.assertEqual(u'101\u202f299,98\xa0\u20ac', numbers.format_currency(101299.98, 'EUR', + locale='fr_CA', group_separator=True)) + self.assertEqual(u'101,299.98 euros', numbers.format_currency(101299.98, 'EUR', + locale='en_US', group_separator=True, + format_type='name')) + self.assertEqual(u'25\xa0123\xa0412\xa0%', numbers.format_percent(251234.1234, locale='sv_SE', group_separator=True)) + class NumberParsingTestCase(unittest.TestCase): @@ -165,6 +194,36 @@ def test_can_parse_decimals(self): self.assertRaises(numbers.NumberFormatError, lambda: numbers.parse_decimal('2,109,998', locale='de')) + def test_parse_decimal_strict_mode(self): + # Numbers with a misplaced grouping symbol should be rejected + with self.assertRaises(numbers.NumberFormatError) as info: + numbers.parse_decimal('11.11', locale='de', strict=True) + assert info.exception.suggestions == ['1.111', '11,11'] + # Numbers with two misplaced grouping symbols should be rejected + with self.assertRaises(numbers.NumberFormatError) as info: + numbers.parse_decimal('80.00.00', locale='de', strict=True) + assert info.exception.suggestions == ['800.000'] + # Partially grouped numbers should be rejected + with self.assertRaises(numbers.NumberFormatError) as info: + numbers.parse_decimal('2000,000', locale='en_US', strict=True) + assert info.exception.suggestions == ['2,000,000', '2,000'] + # Numbers with duplicate grouping symbols should be rejected + with self.assertRaises(numbers.NumberFormatError) as info: + numbers.parse_decimal('0,,000', locale='en_US', strict=True) + assert info.exception.suggestions == ['0'] + # Return only suggestion for 0 on strict + with self.assertRaises(numbers.NumberFormatError) as info: + numbers.parse_decimal('0.00', locale='de', strict=True) + assert info.exception.suggestions == ['0'] + # Properly formatted numbers should be accepted + assert str(numbers.parse_decimal('1.001', locale='de', strict=True)) == '1001' + # Trailing zeroes should be accepted + assert str(numbers.parse_decimal('3.00', locale='en_US', strict=True)) == '3.00' + # Numbers without any grouping symbol should be accepted + assert str(numbers.parse_decimal('2000.1', locale='en_US', strict=True)) == '2000.1' + # High precision numbers should be accepted + assert str(numbers.parse_decimal('5,000001', locale='fr', strict=True)) == '5.000001' + def test_list_currencies(): assert isinstance(list_currencies(), set) @@ -177,10 +236,9 @@ def test_list_currencies(): list_currencies('yo!') assert excinfo.value.args[0] == "expected only letters, got 'yo!'" - assert list_currencies(locale='pa_Arab') == set(['PKR', 'INR', 'EUR']) - assert list_currencies(locale='kok') == set([]) + assert list_currencies(locale='pa_Arab') == {'PKR', 'INR', 'EUR'} - assert len(list_currencies()) == 296 + assert len(list_currencies()) == 303 def test_validate_currency(): @@ -206,13 +264,13 @@ def test_is_currency(): def test_normalize_currency(): assert normalize_currency('EUR') == 'EUR' assert normalize_currency('eUr') == 'EUR' - assert normalize_currency('FUU') == None - assert normalize_currency('') == None - assert normalize_currency(None) == None - assert normalize_currency(' EUR ') == None - assert normalize_currency(' ') == None - assert normalize_currency([]) == None - assert normalize_currency(set()) == None + assert normalize_currency('FUU') is None + assert normalize_currency('') is None + assert normalize_currency(None) is None + assert normalize_currency(' EUR ') is None + assert normalize_currency(' ') is None + assert normalize_currency([]) is None + assert normalize_currency(set()) is None def test_get_currency_name(): @@ -229,6 +287,17 @@ def test_get_currency_precision(): assert get_currency_precision('JPY') == 0 +def test_get_currency_unit_pattern(): + assert get_currency_unit_pattern('USD', locale='en_US') == '{0} {1}' + assert get_currency_unit_pattern('USD', locale='es_GT') == '{1} {0}' + + # 'ro' locale various pattern according to count + assert get_currency_unit_pattern('USD', locale='ro', count=1) == '{0} {1}' + assert get_currency_unit_pattern('USD', locale='ro', count=2) == '{0} {1}' + assert get_currency_unit_pattern('USD', locale='ro', count=100) == '{0} de {1}' + assert get_currency_unit_pattern('USD', locale='ro') == '{0} de {1}' + + def test_get_territory_currencies(): assert numbers.get_territory_currencies('AT', date(1995, 1, 1)) == ['ATS'] assert numbers.get_territory_currencies('AT', date(2011, 1, 1)) == ['EUR'] @@ -271,6 +340,12 @@ def test_get_group_symbol(): assert numbers.get_group_symbol('en_US') == u',' +def test_decimal_precision(): + assert get_decimal_precision(decimal.Decimal('0.110')) == 2 + assert get_decimal_precision(decimal.Decimal('1.0')) == 0 + assert get_decimal_precision(decimal.Decimal('10000')) == 0 + + def test_format_number(): assert numbers.format_number(1099, locale='en_US') == u'1,099' assert numbers.format_number(1099, locale='de_DE') == u'1.099' @@ -283,11 +358,52 @@ def test_format_decimal(): assert numbers.format_decimal(1.2345, locale='sv_SE') == u'1,234' assert numbers.format_decimal(1.2345, locale='de') == u'1,234' assert numbers.format_decimal(12345.5, locale='en_US') == u'12,345.5' + assert numbers.format_decimal(0001.2345000, locale='en_US') == u'1.234' + assert numbers.format_decimal(-0001.2346000, locale='en_US') == u'-1.235' + assert numbers.format_decimal(0000000.5, locale='en_US') == u'0.5' + assert numbers.format_decimal(000, locale='en_US') == u'0' + + +@pytest.mark.parametrize('input_value, expected_value', [ + ('10000', '10,000'), + ('1', '1'), + ('1.0', '1'), + ('1.1', '1.1'), + ('1.11', '1.11'), + ('1.110', '1.11'), + ('1.001', '1.001'), + ('1.00100', '1.001'), + ('01.00100', '1.001'), + ('101.00100', '101.001'), + ('00000', '0'), + ('0', '0'), + ('0.0', '0'), + ('0.1', '0.1'), + ('0.11', '0.11'), + ('0.110', '0.11'), + ('0.001', '0.001'), + ('0.00100', '0.001'), + ('00.00100', '0.001'), + ('000.00100', '0.001'), +]) +def test_format_decimal_precision(input_value, expected_value): + # Test precision conservation. + assert numbers.format_decimal( + decimal.Decimal(input_value), locale='en_US', decimal_quantization=False) == expected_value + + +def test_format_decimal_quantization(): + # Test all locales. + for locale_code in localedata.locale_identifiers(): + assert numbers.format_decimal( + '0.9999999999', locale=locale_code, decimal_quantization=False).endswith('9999999999') is True def test_format_currency(): assert (numbers.format_currency(1099.98, 'USD', locale='en_US') == u'$1,099.98') + assert (numbers.format_currency(0, 'USD', locale='en_US') + == u'$0.00') assert (numbers.format_currency(1099.98, 'USD', locale='es_CO') == u'US$\xa01.099,98') assert (numbers.format_currency(1099.98, 'EUR', locale='de_DE') @@ -306,10 +422,16 @@ def test_format_currency_format_type(): assert (numbers.format_currency(1099.98, 'USD', locale='en_US', format_type="standard") == u'$1,099.98') + assert (numbers.format_currency(0, 'USD', locale='en_US', + format_type="standard") + == u'$0.00') assert (numbers.format_currency(1099.98, 'USD', locale='en_US', format_type="accounting") == u'$1,099.98') + assert (numbers.format_currency(0, 'USD', locale='en_US', + format_type="accounting") + == u'$0.00') with pytest.raises(numbers.UnknownCurrencyFormatError) as excinfo: numbers.format_currency(1099.98, 'USD', locale='en_US', @@ -319,7 +441,7 @@ def test_format_currency_format_type(): assert (numbers.format_currency(1099.98, 'JPY', locale='en_US') == u'\xa51,100') assert (numbers.format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES') - == u'1.100') + == u'1.099,98') assert (numbers.format_currency(1099.98, 'JPY', locale='en_US', currency_digits=False) == u'\xa51,099.98') @@ -328,8 +450,90 @@ def test_format_currency_format_type(): == u'1.099,98') +@pytest.mark.parametrize('input_value, expected_value', [ + ('10000', '$10,000.00'), + ('1', '$1.00'), + ('1.0', '$1.00'), + ('1.1', '$1.10'), + ('1.11', '$1.11'), + ('1.110', '$1.11'), + ('1.001', '$1.001'), + ('1.00100', '$1.001'), + ('01.00100', '$1.001'), + ('101.00100', '$101.001'), + ('00000', '$0.00'), + ('0', '$0.00'), + ('0.0', '$0.00'), + ('0.1', '$0.10'), + ('0.11', '$0.11'), + ('0.110', '$0.11'), + ('0.001', '$0.001'), + ('0.00100', '$0.001'), + ('00.00100', '$0.001'), + ('000.00100', '$0.001'), +]) +def test_format_currency_precision(input_value, expected_value): + # Test precision conservation. + assert numbers.format_currency( + decimal.Decimal(input_value), 'USD', locale='en_US', decimal_quantization=False) == expected_value + + +def test_format_currency_quantization(): + # Test all locales. + for locale_code in localedata.locale_identifiers(): + assert numbers.format_currency( + '0.9999999999', 'USD', locale=locale_code, decimal_quantization=False).find('9999999999') > -1 + + +def test_format_currency_long_display_name(): + assert (numbers.format_currency(1099.98, 'USD', locale='en_US', format_type='name') + == u'1,099.98 US dollars') + assert (numbers.format_currency(1.00, 'USD', locale='en_US', format_type='name') + == u'1.00 US dollar') + assert (numbers.format_currency(1.00, 'EUR', locale='en_US', format_type='name') + == u'1.00 euro') + assert (numbers.format_currency(2, 'EUR', locale='en_US', format_type='name') + == u'2.00 euros') + # This tests that '{1} {0}' unitPatterns are found: + assert (numbers.format_currency(1, 'USD', locale='sw', format_type='name') + == u'dola ya Marekani 1.00') + # This tests unicode chars: + assert (numbers.format_currency(1099.98, 'USD', locale='es_GT', format_type='name') + == u'dólares estadounidenses 1,099.98') + # Test for completely unknown currency, should fallback to currency code + assert (numbers.format_currency(1099.98, 'XAB', locale='en_US', format_type='name') + == u'1,099.98 XAB') + + # Test for finding different unit patterns depending on count + assert (numbers.format_currency(1, 'USD', locale='ro', format_type='name') + == u'1,00 dolar american') + assert (numbers.format_currency(2, 'USD', locale='ro', format_type='name') + == u'2,00 dolari americani') + assert (numbers.format_currency(100, 'USD', locale='ro', format_type='name') + == u'100,00 de dolari americani') + + +def test_format_currency_long_display_name_all(): + for locale_code in localedata.locale_identifiers(): + assert numbers.format_currency( + 1, 'USD', locale=locale_code, format_type='name').find('1') > -1 + assert numbers.format_currency( + '1', 'USD', locale=locale_code, format_type='name').find('1') > -1 + + +def test_format_currency_long_display_name_custom_format(): + assert (numbers.format_currency(1099.98, 'USD', locale='en_US', + format_type='name', format='##0') + == '1099.98 US dollars') + assert (numbers.format_currency(1099.98, 'USD', locale='en_US', + format_type='name', format='##0', + currency_digits=False) + == '1100 US dollars') + + def test_format_percent(): assert numbers.format_percent(0.34, locale='en_US') == u'34%' + assert numbers.format_percent(0, locale='en_US') == u'0%' assert numbers.format_percent(0.34, u'##0%', locale='en_US') == u'34%' assert numbers.format_percent(34, u'##0', locale='en_US') == u'34' assert numbers.format_percent(25.1234, locale='en_US') == u'2,512%' @@ -339,14 +543,95 @@ def test_format_percent(): == u'25,123\u2030') -def test_scientific_exponent_displayed_as_integer(): - assert numbers.format_scientific(100000, locale='en_US') == u'1E5' +@pytest.mark.parametrize('input_value, expected_value', [ + ('100', '10,000%'), + ('0.01', '1%'), + ('0.010', '1%'), + ('0.011', '1.1%'), + ('0.0111', '1.11%'), + ('0.01110', '1.11%'), + ('0.01001', '1.001%'), + ('0.0100100', '1.001%'), + ('0.010100100', '1.01001%'), + ('0.000000', '0%'), + ('0', '0%'), + ('0.00', '0%'), + ('0.01', '1%'), + ('0.011', '1.1%'), + ('0.0110', '1.1%'), + ('0.0001', '0.01%'), + ('0.000100', '0.01%'), + ('0.0000100', '0.001%'), + ('0.00000100', '0.0001%'), +]) +def test_format_percent_precision(input_value, expected_value): + # Test precision conservation. + assert numbers.format_percent( + decimal.Decimal(input_value), locale='en_US', decimal_quantization=False) == expected_value + + +def test_format_percent_quantization(): + # Test all locales. + for locale_code in localedata.locale_identifiers(): + assert numbers.format_percent( + '0.9999999999', locale=locale_code, decimal_quantization=False).find('99999999') > -1 def test_format_scientific(): assert numbers.format_scientific(10000, locale='en_US') == u'1E4' - assert (numbers.format_scientific(1234567, u'##0E00', locale='en_US') - == u'1.23E06') + assert numbers.format_scientific(4234567, u'#.#E0', locale='en_US') == u'4.2E6' + assert numbers.format_scientific(4234567, u'0E0000', locale='en_US') == u'4.234567E0006' + assert numbers.format_scientific(4234567, u'##0E00', locale='en_US') == u'4.234567E06' + assert numbers.format_scientific(4234567, u'##00E00', locale='en_US') == u'42.34567E05' + assert numbers.format_scientific(4234567, u'0,000E00', locale='en_US') == u'4,234.567E03' + assert numbers.format_scientific(4234567, u'##0.#####E00', locale='en_US') == u'4.23457E06' + assert numbers.format_scientific(4234567, u'##0.##E00', locale='en_US') == u'4.23E06' + assert numbers.format_scientific(42, u'00000.000000E0000', locale='en_US') == u'42000.000000E-0003' + + +def test_default_scientific_format(): + """ Check the scientific format method auto-correct the rendering pattern + in case of a missing fractional part. + """ + assert numbers.format_scientific(12345, locale='en_US') == u'1.2345E4' + assert numbers.format_scientific(12345.678, locale='en_US') == u'1.2345678E4' + assert numbers.format_scientific(12345, u'#E0', locale='en_US') == u'1.2345E4' + assert numbers.format_scientific(12345.678, u'#E0', locale='en_US') == u'1.2345678E4' + + +@pytest.mark.parametrize('input_value, expected_value', [ + ('10000', '1E4'), + ('1', '1E0'), + ('1.0', '1E0'), + ('1.1', '1.1E0'), + ('1.11', '1.11E0'), + ('1.110', '1.11E0'), + ('1.001', '1.001E0'), + ('1.00100', '1.001E0'), + ('01.00100', '1.001E0'), + ('101.00100', '1.01001E2'), + ('00000', '0E0'), + ('0', '0E0'), + ('0.0', '0E0'), + ('0.1', '1E-1'), + ('0.11', '1.1E-1'), + ('0.110', '1.1E-1'), + ('0.001', '1E-3'), + ('0.00100', '1E-3'), + ('00.00100', '1E-3'), + ('000.00100', '1E-3'), +]) +def test_format_scientific_precision(input_value, expected_value): + # Test precision conservation. + assert numbers.format_scientific( + decimal.Decimal(input_value), locale='en_US', decimal_quantization=False) == expected_value + + +def test_format_scientific_quantization(): + # Test all locales. + for locale_code in localedata.locale_identifiers(): + assert numbers.format_scientific( + '0.9999999999', locale=locale_code, decimal_quantization=False).find('999999999') > -1 def test_parse_number(): @@ -419,3 +704,23 @@ def test_numberpattern_repr(): format = u'¤#,##0.00;(¤#,##0.00)' np = numbers.parse_pattern(format) assert repr(format) in repr(np) + + +def test_parse_static_pattern(): + assert numbers.parse_pattern('Kun') # in the So locale in CLDR 30 + # TODO: static patterns might not be correctly `apply()`ed at present + + +def test_parse_decimal_nbsp_heuristics(): + # Re https://github.com/python-babel/babel/issues/637 – + # for locales (of which there are many) that use U+00A0 as the group + # separator in numbers, it's reasonable to assume that input strings + # with plain spaces actually should have U+00A0s instead. + # This heuristic is only applied when strict=False. + n = decimal.Decimal("12345.123") + assert numbers.parse_decimal("12 345.123", locale="fi") == n + assert numbers.parse_decimal(numbers.format_decimal(n, locale="fi"), locale="fi") == n + + +def test_very_small_decimal_no_quantization(): + assert numbers.format_decimal(decimal.Decimal('1E-7'), locale='en', decimal_quantization=False) == '0.0000001' diff --git a/tests/test_plural.py b/tests/test_plural.py index be7414994..bea8115ce 100644 --- a/tests/test_plural.py +++ b/tests/test_plural.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2008-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 3993f608c..23d82aa13 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -34,4 +34,4 @@ def test_smoke_numbers(locale): 10, # Plain old integer 0, # Zero ): - assert numbers.format_number(number, locale=locale) + assert numbers.format_decimal(number, locale=locale) diff --git a/tests/test_support.py b/tests/test_support.py index f2a99b7ba..a683591dc 100644 --- a/tests/test_support.py +++ b/tests/test_support.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -17,6 +17,7 @@ import tempfile import unittest import pytest +import sys from datetime import date, datetime, timedelta from babel import support @@ -24,6 +25,9 @@ from babel.messages.mofile import write_mo from babel._compat import BytesIO, PY2 +get_arg_spec = (inspect.getargspec if PY2 else inspect.getfullargspec) + +SKIP_LGETTEXT = sys.version_info >= (3, 8) @pytest.mark.usefixtures("os_environ") class TranslationsTestCase(unittest.TestCase): @@ -74,6 +78,7 @@ def test_upgettext(self): self.assertEqualTypeToo(u'VohCTX', self.translations.upgettext('foo', 'foo')) + @pytest.mark.skipif(SKIP_LGETTEXT, reason="lgettext is deprecated") def test_lpgettext(self): self.assertEqualTypeToo(b'Voh', self.translations.lgettext('foo')) self.assertEqualTypeToo(b'VohCTX', self.translations.lpgettext('foo', @@ -103,6 +108,7 @@ def test_unpgettext(self): self.translations.unpgettext('foo', 'foo1', 'foos1', 2)) + @pytest.mark.skipif(SKIP_LGETTEXT, reason="lgettext is deprecated") def test_lnpgettext(self): self.assertEqualTypeToo(b'Voh1', self.translations.lngettext('foo1', 'foos1', 1)) @@ -127,6 +133,7 @@ def test_dupgettext(self): self.assertEqualTypeToo( u'VohCTXD', self.translations.dupgettext('messages1', 'foo', 'foo')) + @pytest.mark.skipif(SKIP_LGETTEXT, reason="lgettext is deprecated") def test_ldpgettext(self): self.assertEqualTypeToo( b'VohD', self.translations.ldgettext('messages1', 'foo')) @@ -157,6 +164,7 @@ def test_dunpgettext(self): u'VohsCTXD1', self.translations.dunpgettext('messages1', 'foo', 'foo1', 'foos1', 2)) + @pytest.mark.skipif(SKIP_LGETTEXT, reason="lgettext is deprecated") def test_ldnpgettext(self): self.assertEqualTypeToo( b'VohD1', self.translations.ldngettext('messages1', 'foo1', 'foos1', 1)) @@ -195,7 +203,11 @@ def setUp(self): self.null_translations = support.NullTranslations(fp=fp) def method_names(self): - return [name for name in dir(self.translations) if 'gettext' in name] + names = [name for name in dir(self.translations) if 'gettext' in name] + if SKIP_LGETTEXT: + # Remove deprecated l*gettext functions + names = [name for name in names if not name.startswith('l')] + return names def test_same_methods(self): for name in self.method_names(): @@ -206,9 +218,10 @@ def test_method_signature_compatibility(self): for name in self.method_names(): translations_method = getattr(self.translations, name) null_method = getattr(self.null_translations, name) - signature = inspect.getargspec - self.assertEqual(signature(translations_method), - signature(null_method)) + self.assertEqual( + get_arg_spec(translations_method), + get_arg_spec(null_method), + ) def test_same_return_values(self): data = { @@ -219,8 +232,8 @@ def test_same_return_values(self): for name in self.method_names(): method = getattr(self.translations, name) null_method = getattr(self.null_translations, name) - signature = inspect.getargspec(method) - parameter_names = [name for name in signature[0] if name != 'self'] + signature = get_arg_spec(method) + parameter_names = [name for name in signature.args if name != 'self'] values = [data[name] for name in parameter_names] self.assertEqual(method(*values), null_method(*values)) @@ -276,6 +289,17 @@ def first(xs): self.assertEqual(2, proxy.value) self.assertEqual(1, proxy_deepcopy.value) + def test_handle_attribute_error(self): + + def raise_attribute_error(): + raise AttributeError('message') + + proxy = support.LazyProxy(raise_attribute_error) + with pytest.raises(AttributeError) as exception: + proxy.value + + self.assertEqual('message', str(exception.value)) + def test_format_date(): fmt = support.Format('en_US') @@ -358,4 +382,4 @@ def test_catalog_merge_files(): t2._catalog["bar"] = "quux" t1.merge(t2) assert t1.files == ["pro.mo"] - assert set(t1._catalog.keys()) == set(('', 'foo', 'bar')) + assert set(t1._catalog.keys()) == {'', 'foo', 'bar'} diff --git a/tests/test_util.py b/tests/test_util.py index 4c769bea3..b29278e00 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007-2011 Edgewall Software +# Copyright (C) 2007-2011 Edgewall Software, 2013-2021 the Babel team # All rights reserved. # -# This software is licensed as described in the file COPYING, which +# This software is licensed as described in the file LICENSE, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # @@ -11,6 +11,7 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. +import __future__ import unittest import pytest @@ -20,6 +21,12 @@ from babel.util import parse_future_flags +class _FF: + division = __future__.division.compiler_flag + print_function = __future__.print_function.compiler_flag + with_statement = __future__.with_statement.compiler_flag + unicode_literals = __future__.unicode_literals.compiler_flag + def test_distinct(): assert list(util.distinct([1, 2, 1, 3, 4, 4])) == [1, 2, 3, 4] assert list(util.distinct('foobar')) == ['f', 'o', 'b', 'a', 'r'] @@ -31,20 +38,11 @@ def test_pathmatch(): assert not util.pathmatch('**.py', 'templates/index.html') assert util.pathmatch('**/templates/*.html', 'templates/index.html') assert not util.pathmatch('**/templates/*.html', 'templates/foo/bar.html') - - -def test_odict_pop(): - odict = util.odict() - odict[0] = 1 - value = odict.pop(0) - assert 1 == value - assert [] == list(odict.items()) - assert odict.pop(2, None) is None - try: - odict.pop(2) - assert False - except KeyError: - assert True + assert util.pathmatch('^foo/**.py', 'foo/bar/baz/blah.py') + assert not util.pathmatch('^foo/**.py', 'blah/foo/bar/baz.py') + assert util.pathmatch('./foo/**.py', 'foo/bar/baz/blah.py') + assert util.pathmatch('./blah.py', 'blah.py') + assert not util.pathmatch('./foo/**.py', 'blah/foo/bar/baz.py') class FixedOffsetTimezoneTestCase(unittest.TestCase): @@ -79,25 +77,25 @@ def test_parse_encoding_non_ascii(): from __future__ import print_function, division, with_statement, unicode_literals -''', 0x10000 | 0x2000 | 0x8000 | 0x20000), +''', _FF.print_function | _FF.division | _FF.with_statement | _FF.unicode_literals), (''' from __future__ import print_function, division print('hello') -''', 0x10000 | 0x2000), +''', _FF.print_function | _FF.division), (''' from __future__ import print_function, division, unknown,,,,, print 'hello' -''', 0x10000 | 0x2000), +''', _FF.print_function | _FF.division), (''' from __future__ import ( print_function, division) -''', 0x10000 | 0x2000), +''', _FF.print_function | _FF.division), (''' from __future__ import \\ print_function, \\ division -''', 0x10000 | 0x2000), +''', _FF.print_function | _FF.division), ]) def test_parse_future(source, result): fp = BytesIO(source.encode('latin-1')) diff --git a/tox.ini b/tox.ini index 812561da7..14b450ff8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,15 +1,25 @@ [tox] -envlist = py26, py27, pypy, py33, py34, py26-cdecimal, py27-cdecimal +envlist = + py{36,37,38,39} + pypy3 [testenv] deps = pytest - cdecimal: m3-cdecimal + pytest-cov + freezegun==0.3.12 + backports.zoneinfo;python_version<"3.9" + tzdata;sys_platform == 'win32' whitelist_externals = make -commands = make clean-cldr test +commands = make clean-cldr test-cov +passenv = + BABEL_* + PYTHON_* -[pep8] -ignore = E501,E731,W503 - -[flake8] -ignore = E501,E731,W503 +[gh-actions] +python = + pypy3: pypy3 + 3.6: py36 + 3.7: py37 + 3.8: py38 + 3.9: py39