From 74b8a7ff693702850df2ffad786af7895f4fedd9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 10 Jun 2025 16:38:32 +0300 Subject: [PATCH] gh-133967: Do not normalize locale name 'C.UTF-8' to 'en_US.UTF-8' (GH-135347) (cherry picked from commit 0f866cbfefd797b4dae25962457c5579bb90dde5) Co-authored-by: Serhiy Storchaka --- Lib/locale.py | 5 ++++- Lib/test/test_locale.py | 4 ++++ .../Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst | 1 + Tools/i18n/makelocalealias.py | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst diff --git a/Lib/locale.py b/Lib/locale.py index 2feb10e59c96a3..dfedc6386cb891 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -883,6 +883,10 @@ def getpreferredencoding(do_setlocale=True): # updated 'sr@latn' -> 'sr_CS.UTF-8@latin' to 'sr_RS.UTF-8@latin' # removed 'univ' # removed 'universal' +# +# SS 2025-06-10: +# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist +# on all platforms. locale_alias = { 'a3': 'az_AZ.KOI8-C', @@ -962,7 +966,6 @@ def getpreferredencoding(do_setlocale=True): 'c.ascii': 'C', 'c.en': 'C', 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'en_US.UTF-8', 'c_c': 'C', 'c_c.c': 'C', 'ca': 'ca_ES.ISO8859-1', diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 455d2af37efdc8..55b502e52ca454 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -387,6 +387,10 @@ def test_c(self): self.check('c', 'C') self.check('posix', 'C') + def test_c_utf8(self): + self.check('c.utf8', 'C.UTF-8') + self.check('C.UTF-8', 'C.UTF-8') + def test_english(self): self.check('en', 'en_US.ISO8859-1') self.check('EN', 'en_US.ISO8859-1') diff --git a/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst b/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst new file mode 100644 index 00000000000000..1976981727e235 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-10-16-11-00.gh-issue-133967.P0c24q.rst @@ -0,0 +1 @@ +Do not normalize :mod:`locale` name 'C.UTF-8' to 'en_US.UTF-8'. diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index b407a8a643be7c..02af1caff7d499 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -140,6 +140,9 @@ def check(data): data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) + # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist + # on all platforms. + data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data)