|
| 1 | +from _locale import (setlocale, LC_ALL, LC_CTYPE, LC_NUMERIC, LC_TIME, localeconv, Error) |
| 2 | +try: |
| 3 | + from _locale import (RADIXCHAR, THOUSEP, nl_langinfo) |
| 4 | +except ImportError: |
| 5 | + nl_langinfo = None |
| 6 | + |
| 7 | +import locale |
| 8 | +import sys |
| 9 | +import unittest |
| 10 | +from platform import uname |
| 11 | + |
| 12 | +from test import support |
| 13 | + |
| 14 | +if uname().system == "Darwin": |
| 15 | + maj, min, mic = [int(part) for part in uname().release.split(".")] |
| 16 | + if (maj, min, mic) < (8, 0, 0): |
| 17 | + raise unittest.SkipTest("locale support broken for OS X < 10.4") |
| 18 | + |
| 19 | +candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT', |
| 20 | + 'et_EE', 'es_PY', 'no_NO', 'nl_NL', 'lv_LV', 'el_GR', 'be_BY', 'fr_BE', |
| 21 | + 'ro_RO', 'ru_UA', 'ru_RU', 'es_VE', 'ca_ES', 'se_NO', 'es_EC', 'id_ID', |
| 22 | + 'ka_GE', 'es_CL', 'wa_BE', 'hu_HU', 'lt_LT', 'sl_SI', 'hr_HR', 'es_AR', |
| 23 | + 'es_ES', 'oc_FR', 'gl_ES', 'bg_BG', 'is_IS', 'mk_MK', 'de_AT', 'pt_BR', |
| 24 | + 'da_DK', 'nn_NO', 'cs_CZ', 'de_LU', 'es_BO', 'sq_AL', 'sk_SK', 'fr_CH', |
| 25 | + 'de_DE', 'sr_YU', 'br_FR', 'nl_BE', 'sv_FI', 'pl_PL', 'fr_CA', 'fo_FO', |
| 26 | + 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', |
| 27 | + 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', |
| 28 | + 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', |
| 29 | + 'ru_RU.KOI8-R', 'ko_KR.eucKR', |
| 30 | + 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', |
| 31 | + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620', |
| 32 | +] |
| 33 | + |
| 34 | +def setUpModule(): |
| 35 | + global candidate_locales |
| 36 | + # Issue #13441: Skip some locales (e.g. cs_CZ and hu_HU) on Solaris to |
| 37 | + # workaround a mbstowcs() bug. For example, on Solaris, the hu_HU locale uses |
| 38 | + # the locale encoding ISO-8859-2, the thousands separator is b'\xA0' and it is |
| 39 | + # decoded as U+30000020 (an invalid character) by mbstowcs(). |
| 40 | + if sys.platform == 'sunos5': |
| 41 | + old_locale = locale.setlocale(locale.LC_ALL) |
| 42 | + try: |
| 43 | + locales = [] |
| 44 | + for loc in candidate_locales: |
| 45 | + try: |
| 46 | + locale.setlocale(locale.LC_ALL, loc) |
| 47 | + except Error: |
| 48 | + continue |
| 49 | + encoding = locale.getencoding() |
| 50 | + try: |
| 51 | + localeconv() |
| 52 | + except Exception as err: |
| 53 | + print("WARNING: Skip locale %s (encoding %s): [%s] %s" |
| 54 | + % (loc, encoding, type(err), err)) |
| 55 | + else: |
| 56 | + locales.append(loc) |
| 57 | + candidate_locales = locales |
| 58 | + finally: |
| 59 | + locale.setlocale(locale.LC_ALL, old_locale) |
| 60 | + |
| 61 | + # Workaround for MSVC6(debug) crash bug |
| 62 | + if "MSC v.1200" in sys.version: |
| 63 | + def accept(loc): |
| 64 | + a = loc.split(".") |
| 65 | + return not(len(a) == 2 and len(a[-1]) >= 9) |
| 66 | + candidate_locales = [loc for loc in candidate_locales if accept(loc)] |
| 67 | + |
| 68 | +# List known locale values to test against when available. |
| 69 | +# Dict formatted as ``<locale> : (<decimal_point>, <thousands_sep>)``. If a |
| 70 | +# value is not known, use '' . |
| 71 | +known_numerics = { |
| 72 | + 'en_US': ('.', ','), |
| 73 | + 'de_DE' : (',', '.'), |
| 74 | + # The French thousands separator may be a breaking or non-breaking space |
| 75 | + # depending on the platform, so do not test it |
| 76 | + 'fr_FR' : (',', ''), |
| 77 | + 'ps_AF': ('\u066b', '\u066c'), |
| 78 | +} |
| 79 | + |
| 80 | +known_alt_digits = { |
| 81 | + 'C': (0, {}), |
| 82 | + 'en_US': (0, {}), |
| 83 | + 'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}), |
| 84 | + 'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), |
| 85 | + 'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}), |
| 86 | + 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), |
| 87 | + 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), |
| 88 | + 'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}), |
| 89 | + 'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}), |
| 90 | + 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), |
| 91 | +} |
| 92 | + |
| 93 | +known_era = { |
| 94 | + 'C': (0, ''), |
| 95 | + 'en_US': (0, ''), |
| 96 | + 'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'), |
| 97 | + 'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'), |
| 98 | + 'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'), |
| 99 | +} |
| 100 | + |
| 101 | +if sys.platform == 'win32': |
| 102 | + # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) |
| 103 | + del known_numerics['ps_AF'] |
| 104 | + |
| 105 | +if sys.platform == 'sunos5': |
| 106 | + # On Solaris, Japanese ERAs start with the year 1927, |
| 107 | + # and thus there's less of them. |
| 108 | + known_era['ja_JP'] = (5, '+:1:2019/05/01:2019/12/31:令和:%EC元年') |
| 109 | + |
| 110 | +class _LocaleTests(unittest.TestCase): |
| 111 | + |
| 112 | + def setUp(self): |
| 113 | + self.oldlocale = setlocale(LC_ALL) |
| 114 | + |
| 115 | + def tearDown(self): |
| 116 | + setlocale(LC_ALL, self.oldlocale) |
| 117 | + |
| 118 | + # Want to know what value was calculated, what it was compared against, |
| 119 | + # what function was used for the calculation, what type of data was used, |
| 120 | + # the locale that was supposedly set, and the actual locale that is set. |
| 121 | + lc_numeric_err_msg = "%s != %s (%s for %s; set to %s, using %s)" |
| 122 | + |
| 123 | + def numeric_tester(self, calc_type, calc_value, data_type, used_locale): |
| 124 | + """Compare calculation against known value, if available""" |
| 125 | + try: |
| 126 | + set_locale = setlocale(LC_NUMERIC) |
| 127 | + except Error: |
| 128 | + set_locale = "<not able to determine>" |
| 129 | + known_value = known_numerics.get(used_locale, |
| 130 | + ('', ''))[data_type == 'thousands_sep'] |
| 131 | + if known_value and calc_value: |
| 132 | + self.assertEqual(calc_value, known_value, |
| 133 | + self.lc_numeric_err_msg % ( |
| 134 | + calc_value, known_value, |
| 135 | + calc_type, data_type, set_locale, |
| 136 | + used_locale)) |
| 137 | + return True |
| 138 | + |
| 139 | + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") |
| 140 | + @unittest.skipIf( |
| 141 | + support.is_emscripten or support.is_wasi, |
| 142 | + "musl libc issue on Emscripten, bpo-46390" |
| 143 | + ) |
| 144 | + def test_lc_numeric_nl_langinfo(self): |
| 145 | + # Test nl_langinfo against known values |
| 146 | + tested = False |
| 147 | + for loc in candidate_locales: |
| 148 | + try: |
| 149 | + setlocale(LC_NUMERIC, loc) |
| 150 | + setlocale(LC_CTYPE, loc) |
| 151 | + except Error: |
| 152 | + continue |
| 153 | + for li, lc in ((RADIXCHAR, "decimal_point"), |
| 154 | + (THOUSEP, "thousands_sep")): |
| 155 | + if self.numeric_tester('nl_langinfo', nl_langinfo(li), lc, loc): |
| 156 | + tested = True |
| 157 | + if not tested: |
| 158 | + self.skipTest('no suitable locales') |
| 159 | + |
| 160 | + @unittest.skipIf( |
| 161 | + support.is_emscripten or support.is_wasi, |
| 162 | + "musl libc issue on Emscripten, bpo-46390" |
| 163 | + ) |
| 164 | + def test_lc_numeric_localeconv(self): |
| 165 | + # Test localeconv against known values |
| 166 | + tested = False |
| 167 | + for loc in candidate_locales: |
| 168 | + try: |
| 169 | + setlocale(LC_NUMERIC, loc) |
| 170 | + setlocale(LC_CTYPE, loc) |
| 171 | + except Error: |
| 172 | + continue |
| 173 | + formatting = localeconv() |
| 174 | + for lc in ("decimal_point", |
| 175 | + "thousands_sep"): |
| 176 | + if self.numeric_tester('localeconv', formatting[lc], lc, loc): |
| 177 | + tested = True |
| 178 | + if not tested: |
| 179 | + self.skipTest('no suitable locales') |
| 180 | + |
| 181 | + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") |
| 182 | + def test_lc_numeric_basic(self): |
| 183 | + # Test nl_langinfo against localeconv |
| 184 | + tested = False |
| 185 | + for loc in candidate_locales: |
| 186 | + try: |
| 187 | + setlocale(LC_NUMERIC, loc) |
| 188 | + setlocale(LC_CTYPE, loc) |
| 189 | + except Error: |
| 190 | + continue |
| 191 | + for li, lc in ((RADIXCHAR, "decimal_point"), |
| 192 | + (THOUSEP, "thousands_sep")): |
| 193 | + nl_radixchar = nl_langinfo(li) |
| 194 | + li_radixchar = localeconv()[lc] |
| 195 | + try: |
| 196 | + set_locale = setlocale(LC_NUMERIC) |
| 197 | + except Error: |
| 198 | + set_locale = "<not able to determine>" |
| 199 | + self.assertEqual(nl_radixchar, li_radixchar, |
| 200 | + "%s (nl_langinfo) != %s (localeconv) " |
| 201 | + "(set to %s, using %s)" % ( |
| 202 | + nl_radixchar, li_radixchar, |
| 203 | + loc, set_locale)) |
| 204 | + tested = True |
| 205 | + if not tested: |
| 206 | + self.skipTest('no suitable locales') |
| 207 | + |
| 208 | + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") |
| 209 | + @unittest.skipUnless(hasattr(locale, 'ALT_DIGITS'), "requires locale.ALT_DIGITS") |
| 210 | + @unittest.skipIf( |
| 211 | + support.is_emscripten or support.is_wasi, |
| 212 | + "musl libc issue on Emscripten, bpo-46390" |
| 213 | + ) |
| 214 | + def test_alt_digits_nl_langinfo(self): |
| 215 | + # Test nl_langinfo(ALT_DIGITS) |
| 216 | + tested = False |
| 217 | + for loc in candidate_locales: |
| 218 | + with self.subTest(locale=loc): |
| 219 | + try: |
| 220 | + setlocale(LC_TIME, loc) |
| 221 | + setlocale(LC_CTYPE, loc) |
| 222 | + except Error: |
| 223 | + self.skipTest(f'no locale {loc!r}') |
| 224 | + continue |
| 225 | + |
| 226 | + with self.subTest(locale=loc): |
| 227 | + alt_digits = nl_langinfo(locale.ALT_DIGITS) |
| 228 | + self.assertIsInstance(alt_digits, str) |
| 229 | + alt_digits = alt_digits.split(';') if alt_digits else [] |
| 230 | + if alt_digits: |
| 231 | + self.assertGreaterEqual(len(alt_digits), 10, alt_digits) |
| 232 | + loc1 = loc.split('.', 1)[0] |
| 233 | + if loc1 in known_alt_digits: |
| 234 | + count, samples = known_alt_digits[loc1] |
| 235 | + if count and not alt_digits: |
| 236 | + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') |
| 237 | + self.assertEqual(len(alt_digits), count, alt_digits) |
| 238 | + for i in samples: |
| 239 | + self.assertEqual(alt_digits[i], samples[i]) |
| 240 | + tested = True |
| 241 | + if not tested: |
| 242 | + self.skipTest('no suitable locales') |
| 243 | + |
| 244 | + @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available") |
| 245 | + @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA") |
| 246 | + @unittest.skipIf( |
| 247 | + support.is_emscripten or support.is_wasi, |
| 248 | + "musl libc issue on Emscripten, bpo-46390" |
| 249 | + ) |
| 250 | + def test_era_nl_langinfo(self): |
| 251 | + # Test nl_langinfo(ERA) |
| 252 | + tested = False |
| 253 | + for loc in candidate_locales: |
| 254 | + with self.subTest(locale=loc): |
| 255 | + try: |
| 256 | + setlocale(LC_TIME, loc) |
| 257 | + setlocale(LC_CTYPE, loc) |
| 258 | + except Error: |
| 259 | + self.skipTest(f'no locale {loc!r}') |
| 260 | + continue |
| 261 | + |
| 262 | + with self.subTest(locale=loc): |
| 263 | + era = nl_langinfo(locale.ERA) |
| 264 | + self.assertIsInstance(era, str) |
| 265 | + if era: |
| 266 | + self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era) |
| 267 | + |
| 268 | + loc1 = loc.split('.', 1)[0] |
| 269 | + if loc1 in known_era: |
| 270 | + count, sample = known_era[loc1] |
| 271 | + if count: |
| 272 | + if not era: |
| 273 | + self.skipTest(f'ERA is not set for locale {loc!r} on this platform') |
| 274 | + self.assertGreaterEqual(era.count(';') + 1, count) |
| 275 | + self.assertIn(sample, era) |
| 276 | + else: |
| 277 | + self.assertEqual(era, '') |
| 278 | + tested = True |
| 279 | + if not tested: |
| 280 | + self.skipTest('no suitable locales') |
| 281 | + |
| 282 | + def test_float_parsing(self): |
| 283 | + # Bug #1391872: Test whether float parsing is okay on European |
| 284 | + # locales. |
| 285 | + tested = False |
| 286 | + for loc in candidate_locales: |
| 287 | + try: |
| 288 | + setlocale(LC_NUMERIC, loc) |
| 289 | + setlocale(LC_CTYPE, loc) |
| 290 | + except Error: |
| 291 | + continue |
| 292 | + |
| 293 | + # Ignore buggy locale databases. (Mac OS 10.4 and some other BSDs) |
| 294 | + if loc == 'eu_ES' and localeconv()['decimal_point'] == "' ": |
| 295 | + continue |
| 296 | + |
| 297 | + self.assertEqual(int(eval('3.14') * 100), 314, |
| 298 | + "using eval('3.14') failed for %s" % loc) |
| 299 | + self.assertEqual(int(float('3.14') * 100), 314, |
| 300 | + "using float('3.14') failed for %s" % loc) |
| 301 | + if localeconv()['decimal_point'] != '.': |
| 302 | + self.assertRaises(ValueError, float, |
| 303 | + localeconv()['decimal_point'].join(['1', '23'])) |
| 304 | + tested = True |
| 305 | + if not tested: |
| 306 | + self.skipTest('no suitable locales') |
| 307 | + |
| 308 | + |
| 309 | +if __name__ == '__main__': |
| 310 | + unittest.main() |
0 commit comments