Skip to content

Commit 853cfbd

Browse files
gh-87281: Fix support for locales with modifiers
1 parent 9d3b53c commit 853cfbd

File tree

3 files changed

+162
-33
lines changed

3 files changed

+162
-33
lines changed

Lib/locale.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -375,12 +375,12 @@ def _replace_encoding(code, encoding):
375375
def _append_modifier(code, modifier):
376376
if modifier == 'euro':
377377
if '.' not in code:
378-
return code + '.ISO8859-15'
378+
return code + '.ISO8859-15@euro'
379379
_, _, encoding = code.partition('.')
380-
if encoding in ('ISO8859-15', 'UTF-8'):
380+
if encoding == 'UTF-8':
381381
return code
382382
if encoding == 'ISO8859-1':
383-
return _replace_encoding(code, 'ISO8859-15')
383+
code = _replace_encoding(code, 'ISO8859-15')
384384
return code + '@' + modifier
385385

386386
def normalize(localename):
@@ -487,11 +487,16 @@ def _parse_localename(localename):
487487
if modifier == 'euro' and '.' not in code:
488488
# Assume Latin-9 for @euro locales. This is bogus,
489489
# since some systems may use other encodings for these
490-
# locales. Also, we ignore other modifiers.
491-
return code, 'iso-8859-15'
490+
# locales.
491+
return code + '@euro', 'ISO8859-15'
492+
else:
493+
modifier = ''
492494

493495
if '.' in code:
494-
return tuple(code.split('.')[:2])
496+
code, encoding = code.split('.')[:2]
497+
if modifier:
498+
code += '@' + modifier
499+
return code, encoding
495500
elif code == 'C':
496501
return None, None
497502
elif code == 'UTF-8':
@@ -516,7 +521,14 @@ def _build_localename(localetuple):
516521
if encoding is None:
517522
return language
518523
else:
519-
return language + '.' + encoding
524+
if '@' in language:
525+
language, modifier = language.split('@', 1)
526+
else:
527+
modifier = ''
528+
localename = language + '.' + encoding
529+
if modifier:
530+
localename += '@' + modifier
531+
return localename
520532
except (TypeError, ValueError):
521533
raise TypeError('Locale must be None, a string, or an iterable of '
522534
'two strings -- language code, encoding.') from None
@@ -888,6 +900,12 @@ def getpreferredencoding(do_setlocale=True):
888900
# SS 2025-06-10:
889901
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
890902
# on all platforms.
903+
#
904+
# SS 2025-07-30:
905+
# Remove conflicts with GNU libc.
906+
#
907+
# removed 'el_gr@euro'
908+
# removed 'uz_uz@cyrillic'
891909

892910
locale_alias = {
893911
'a3': 'az_AZ.KOI8-C',
@@ -1021,7 +1039,6 @@ def getpreferredencoding(do_setlocale=True):
10211039
'el': 'el_GR.ISO8859-7',
10221040
'el_cy': 'el_CY.ISO8859-7',
10231041
'el_gr': 'el_GR.ISO8859-7',
1024-
'el_gr@euro': 'el_GR.ISO8859-15',
10251042
'en': 'en_US.ISO8859-1',
10261043
'en_ag': 'en_AG.UTF-8',
10271044
'en_au': 'en_AU.ISO8859-1',
@@ -1456,7 +1473,6 @@ def getpreferredencoding(do_setlocale=True):
14561473
'ur_pk': 'ur_PK.CP1256',
14571474
'uz': 'uz_UZ.UTF-8',
14581475
'uz_uz': 'uz_UZ.UTF-8',
1459-
'uz_uz@cyrillic': 'uz_UZ.UTF-8',
14601476
've': 've_ZA.UTF-8',
14611477
've_za': 've_ZA.UTF-8',
14621478
'vi': 'vi_VN.TCVN',

Lib/test/test_locale.py

Lines changed: 129 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from decimal import Decimal
2+
from test import support
23
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
34
from test.support.warnings_helper import check_warnings
45
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
@@ -424,8 +425,8 @@ def test_hyphenated_encoding(self):
424425
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
425426

426427
def test_euro_modifier(self):
427-
self.check('de_DE@euro', 'de_DE.ISO8859-15')
428-
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
428+
self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
429+
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
429430
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
430431

431432
def test_latin_modifier(self):
@@ -486,6 +487,132 @@ def test_japanese(self):
486487
self.check('jp_jp', 'ja_JP.eucJP')
487488

488489

490+
class TestRealLocales(unittest.TestCase):
491+
locale_type = locale.LC_CTYPE
492+
493+
def setUp(self):
494+
oldlocale = locale.setlocale(locale.LC_CTYPE)
495+
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
496+
497+
def test_getsetlocale_issue1813(self):
498+
# Issue #1813: setting and getting the locale under a Turkish locale
499+
try:
500+
locale.setlocale(locale.LC_CTYPE, 'tr_TR')
501+
except locale.Error:
502+
# Unsupported locale on this system
503+
self.skipTest('test needs Turkish locale')
504+
loc = locale.getlocale(locale.LC_CTYPE)
505+
if verbose:
506+
print('testing with %a' % (loc,), end=' ', flush=True)
507+
try:
508+
locale.setlocale(locale.LC_CTYPE, loc)
509+
except locale.Error as exc:
510+
# bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE)
511+
# and the tr_TR locale on Windows. getlocale() builds a locale
512+
# which is not recognize by setlocale().
513+
self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}")
514+
self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE))
515+
516+
@support.subTests('localename,localetuple', [
517+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
518+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
519+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
520+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
521+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
522+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
523+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
524+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
525+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
526+
('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
527+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
528+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
529+
('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
530+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
531+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
532+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
533+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
534+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
535+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
536+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
537+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
538+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
539+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
540+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
541+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
542+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
543+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
544+
('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
545+
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
546+
('be_BY.UTF-8@latin', ('be_BY@latin', None)),
547+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
548+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
549+
('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
550+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
551+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
552+
('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
553+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
554+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
555+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
556+
])
557+
def test_setlocale_with_modifier(self, localename, localetuple):
558+
try:
559+
locale.setlocale(locale.LC_CTYPE, localename)
560+
except locale.Error as exc:
561+
self.skipTest(str(exc))
562+
loc = locale.setlocale(locale.LC_CTYPE, localetuple)
563+
self.assertEqual(loc, localename)
564+
565+
loctuple = locale.getlocale(locale.LC_CTYPE)
566+
loc = locale.setlocale(locale.LC_CTYPE, loctuple)
567+
self.assertEqual(loc, localename)
568+
569+
@support.subTests('localename,localetuple', [
570+
('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
571+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
572+
('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
573+
('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
574+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
575+
('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
576+
('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
577+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
578+
('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
579+
('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
580+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
581+
('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
582+
('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
583+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
584+
('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
585+
('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
586+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
587+
('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
588+
('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
589+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
590+
('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
591+
('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
592+
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
593+
('be_BY@latin', ('be_BY@latin', 'UTF-8')),
594+
('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
595+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
596+
('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
597+
('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
598+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
599+
('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
600+
('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
601+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
602+
('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
603+
])
604+
def test_getlocale_with_modifier(self, localename, localetuple):
605+
try:
606+
locale.setlocale(locale.LC_CTYPE, localename)
607+
except locale.Error as exc:
608+
self.skipTest(str(exc))
609+
loctuple = locale.getlocale(locale.LC_CTYPE)
610+
self.assertEqual(loctuple, localetuple)
611+
612+
locale.setlocale(locale.LC_CTYPE, loctuple)
613+
self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)
614+
615+
489616
class TestMiscellaneous(unittest.TestCase):
490617
def test_defaults_UTF8(self):
491618
# Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
@@ -552,27 +679,6 @@ def test_setlocale_category(self):
552679
# crasher from bug #7419
553680
self.assertRaises(locale.Error, locale.setlocale, 12345)
554681

555-
def test_getsetlocale_issue1813(self):
556-
# Issue #1813: setting and getting the locale under a Turkish locale
557-
oldlocale = locale.setlocale(locale.LC_CTYPE)
558-
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
559-
try:
560-
locale.setlocale(locale.LC_CTYPE, 'tr_TR')
561-
except locale.Error:
562-
# Unsupported locale on this system
563-
self.skipTest('test needs Turkish locale')
564-
loc = locale.getlocale(locale.LC_CTYPE)
565-
if verbose:
566-
print('testing with %a' % (loc,), end=' ', flush=True)
567-
try:
568-
locale.setlocale(locale.LC_CTYPE, loc)
569-
except locale.Error as exc:
570-
# bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE)
571-
# and the tr_TR locale on Windows. getlocale() builds a locale
572-
# which is not recognize by setlocale().
573-
self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}")
574-
self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE))
575-
576682
def test_invalid_locale_format_in_localetuple(self):
577683
with self.assertRaises(TypeError):
578684
locale.setlocale(locale.LC_ALL, b'fi_FI')

Tools/i18n/makelocalealias.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,20 @@ def parse(filename):
4444
# Ignore one letter locale mappings (except for 'c')
4545
if len(locale) == 1 and locale != 'c':
4646
continue
47+
if '@' in locale and '@' not in alias:
48+
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
49+
alias += '@euro'
4750
# Normalize encoding, if given
4851
if '.' in locale:
4952
lang, encoding = locale.split('.')[:2]
5053
encoding = encoding.replace('-', '')
5154
encoding = encoding.replace('_', '')
5255
locale = lang + '.' + encoding
5356
data[locale] = alias
57+
# Conflict with GNU libc
58+
data.pop('el_gr@euro', None)
59+
data.pop('uz_uz@cyrillic', None)
60+
data.pop('uz_uz.utf8@cyrillic', None)
5461
return data
5562

5663
def parse_glibc_supported(filename):
@@ -81,7 +88,7 @@ def parse_glibc_supported(filename):
8188
# Add an encoding to alias
8289
alias, _, modifier = alias.partition('@')
8390
alias = _locale._replace_encoding(alias, alias_encoding)
84-
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
91+
if modifier:
8592
alias += '@' + modifier
8693
data[locale] = alias
8794
return data

0 commit comments

Comments
 (0)