From 35dbbe25c528ecbaa20009cb4d5699718f433f13 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:54:12 +0300 Subject: [PATCH 1/2] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again (GH-125774) This is a follow up of GH-124974. Only Glibc needed a fix. Now the returned value is a string consisting of semicolon-separated symbols on all Posix platforms. (cherry picked from commit dcc4fb2c9068f60353f0c0978948b7681f7745e6) Co-authored-by: Serhiy Storchaka --- Doc/library/locale.rst | 7 ++-- Lib/test/test__locale.py | 30 ++++++++++----- ...-10-21-12-06-55.gh-issue-124969.xiY8UP.rst | 2 + Modules/_localemodule.c | 38 +++++++++++-------- 4 files changed, 50 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index f0553d51fedf14..a81879a2fe48dc 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions: .. function:: nl_langinfo(option) - Return some locale-specific information as a string (or a tuple for - ``ALT_DIGITS``). This function is not + Return some locale-specific information as a string. This function is not available on all systems, and the set of possible options might also vary across platforms. The possible argument values are numbers, for which symbolic constants are available in the locale module. @@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ALT_DIGITS - Get a tuple of up to 100 strings used to represent the values 0 to 99. + Get a string consisting of up to 100 semicolon-separated symbols used + to represent the values 0 to 99 in a locale-specific way. + In most locales this is an empty string. .. function:: getdefaultlocale([envvars]) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 5041def7216197..a680e6edb63c0e 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -26,7 +26,10 @@ 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', - 'ru_RU.KOI8-R', 'ko_KR.eucKR'] + 'ru_RU.KOI8-R', 'ko_KR.eucKR', + 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620', +] def setUpModule(): global candidate_locales @@ -78,11 +81,13 @@ def accept(loc): 'C': (0, {}), 'en_US': (0, {}), 'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}), - 'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), + 'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), 'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}), 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), 'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}), + 'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}), + 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } if sys.platform == 'win32': @@ -196,7 +201,7 @@ def test_lc_numeric_basic(self): def test_alt_digits_nl_langinfo(self): # Test nl_langinfo(ALT_DIGITS) tested = False - for loc, (count, samples) in known_alt_digits.items(): + for loc in candidate_locales: with self.subTest(locale=loc): try: setlocale(LC_TIME, loc) @@ -204,14 +209,21 @@ def test_alt_digits_nl_langinfo(self): except Error: self.skipTest(f'no locale {loc!r}') continue + with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) - self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and support.is_apple: - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') - self.assertEqual(len(alt_digits), count) - for i in samples: - self.assertEqual(alt_digits[i], samples[i]) + self.assertIsInstance(alt_digits, str) + alt_digits = alt_digits.split(';') if alt_digits else [] + if alt_digits: + self.assertGreaterEqual(len(alt_digits), 10, alt_digits) + loc1 = loc.split('.', 1)[0] + if loc1 in known_alt_digits: + count, samples = known_alt_digits[loc1] + if count and not alt_digits: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') + self.assertEqual(len(alt_digits), count, alt_digits) + for i in samples: + self.assertEqual(alt_digits[i], samples[i]) tested = True if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst new file mode 100644 index 00000000000000..c44550184e0000 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst @@ -0,0 +1,2 @@ +``locale.nl_langinfo(locale.ALT_DIGITS)`` now returns a string again. The +returned value consists of up to 100 semicolon-separated symbols. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 3dea764fdaad27..53ebb57d23ae07 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -619,28 +619,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item) const char *result = nl_langinfo(item); result = result != NULL ? result : ""; PyObject *pyresult; +#ifdef __GLIBC__ #ifdef ALT_DIGITS - if (item == ALT_DIGITS) { - /* The result is a sequence of up to 100 NUL-separated strings. */ - const char *s = result; + if (item == ALT_DIGITS && *result) { + /* According to the POSIX specification the result must be + * a sequence of up to 100 semicolon-separated strings. + * But in Glibc they are NUL-separated. */ + Py_ssize_t i = 0; int count = 0; - for (; count < 100 && *s; count++) { - s += strlen(s) + 1; + for (; count < 100 && result[i]; count++) { + i += strlen(result + i) + 1; } - pyresult = PyTuple_New(count); - if (pyresult != NULL) { - for (int i = 0; i < count; i++) { - PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); - if (unicode == NULL) { - Py_CLEAR(pyresult); - break; - } - PyTuple_SET_ITEM(pyresult, i, unicode); - result += strlen(result) + 1; + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + pyresult = NULL; + } + else { + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; } + pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); } } else +#endif #endif { pyresult = PyUnicode_DecodeLocale(result, NULL); From 4c72e45b3af56ae7d3f0a912476db4b9ed1fd151 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 22:06:53 +0300 Subject: [PATCH 2/2] Merge NEWS. --- .../Library/2024-10-08-12-09-09.gh-issue-124969._VBQLq.rst | 7 ++++--- .../Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-08-12-09-09.gh-issue-124969._VBQLq.rst b/Misc/NEWS.d/next/Library/2024-10-08-12-09-09.gh-issue-124969._VBQLq.rst index b5082b90721d42..7959ce2d1e9907 100644 --- a/Misc/NEWS.d/next/Library/2024-10-08-12-09-09.gh-issue-124969._VBQLq.rst +++ b/Misc/NEWS.d/next/Library/2024-10-08-12-09-09.gh-issue-124969._VBQLq.rst @@ -1,3 +1,4 @@ -Fix ``locale.nl_langinfo(locale.ALT_DIGITS)``. Now it returns a tuple of up -to 100 strings (an empty tuple on most locales). Previously it returned the -first item of that tuple or an empty string. +Fix ``locale.nl_langinfo(locale.ALT_DIGITS)`` on platforms with glibc. +Now it returns a string consisting of up to 100 semicolon-separated symbols +(an empty string in most locales) on all Posix platforms. +Previously it only returned the first symbol or an empty string. diff --git a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst deleted file mode 100644 index c44550184e0000..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst +++ /dev/null @@ -1,2 +0,0 @@ -``locale.nl_langinfo(locale.ALT_DIGITS)`` now returns a string again. The -returned value consists of up to 100 semicolon-separated symbols.