From c6dbeb89cdad5777f8aeaba1702c1c0b8fc76a9e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 1 Aug 2025 15:08:36 +0300 Subject: [PATCH 1/3] gh-137273: Fix debug assertion failure in locale.setlocale() on Windows It happened when there were at least 16 characters after dot in the locale name. --- Lib/test/test_locale.py | 59 ++++++++++++------- ...-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst | 1 + Modules/_localemodule.c | 42 +++++++++++++ 3 files changed, 81 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 55b502e52ca454..a111e0da4ddfdf 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -486,6 +486,44 @@ def test_japanese(self): self.check('jp_jp', 'ja_JP.eucJP') +class TestRealLocales(unittest.TestCase): + def setUp(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + + def test_getsetlocale_issue1813(self): + # Issue #1813: setting and getting the locale under a Turkish locale + try: + locale.setlocale(locale.LC_CTYPE, 'tr_TR') + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs Turkish locale') + loc = locale.getlocale(locale.LC_CTYPE) + if verbose: + print('testing with %a' % (loc,), end=' ', flush=True) + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error as exc: + # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) + # and the tr_TR locale on Windows. getlocale() builds a locale + # which is not recognize by setlocale(). + self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") + self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) + + def test_setlocale_long_encoding(self): + # gh-137273: Debug assertion failure on Windows for long encoding. + oldlocale = locale.setlocale(locale.LC_ALL) + self.addCleanup(locale.setlocale, locale.LC_ALL, oldlocale) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'en_US.' + 'x'*16) + locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.UTF-8', loc) + loc2 = loc.replace('UTF-8', 'x'*16, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + + class TestMiscellaneous(unittest.TestCase): def test_defaults_UTF8(self): # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is @@ -552,27 +590,6 @@ def test_setlocale_category(self): # crasher from bug #7419 self.assertRaises(locale.Error, locale.setlocale, 12345) - def test_getsetlocale_issue1813(self): - # Issue #1813: setting and getting the locale under a Turkish locale - oldlocale = locale.setlocale(locale.LC_CTYPE) - self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) - try: - locale.setlocale(locale.LC_CTYPE, 'tr_TR') - except locale.Error: - # Unsupported locale on this system - self.skipTest('test needs Turkish locale') - loc = locale.getlocale(locale.LC_CTYPE) - if verbose: - print('testing with %a' % (loc,), end=' ', flush=True) - try: - locale.setlocale(locale.LC_CTYPE, loc) - except locale.Error as exc: - # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) - # and the tr_TR locale on Windows. getlocale() builds a locale - # which is not recognize by setlocale(). - self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") - self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) - def test_invalid_locale_format_in_localetuple(self): with self.assertRaises(TypeError): locale.setlocale(locale.LC_ALL, b'fi_FI') diff --git a/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst b/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst new file mode 100644 index 00000000000000..f344877955fea0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst @@ -0,0 +1 @@ +Fix debug assertion failure in :func:`locale.setlocale` on Windows. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 41e6d48b1dbd9b..963c80957dbacd 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -87,6 +87,36 @@ copy_grouping(const char* s) return result; } +#if defined(MS_WINDOWS) +static int +check_locale_name(const char *locale, const char *end) +{ + size_t len = end ? (size_t)(end - locale) : strlen(locale); + const char *dot = memchr(locale, '.', len); + if (dot && locale + len - dot > 16) { + return -1; + } + return 0; +} + +static int +check_locale_name_all(const char *locale) +{ + const char *start = locale; + while (1) { + const char *end = strchr(start, ';'); + if (check_locale_name(start, end) < 0) { + return -1; + } + if (end == NULL) { + break; + } + start = end + 1; + } + return 0; +} +#endif + /*[clinic input] _locale.setlocale @@ -111,6 +141,18 @@ _locale_setlocale_impl(PyObject *module, int category, const char *locale) "invalid locale category"); return NULL; } + if (locale) { + if ((category == LC_ALL + ? check_locale_name_all(locale) + : check_locale_name(locale, NULL)) < 0) + { + /* Debug assertion failure on Windows. + * _Py_BEGIN_SUPPRESS_IPH/_Py_END_SUPPRESS_IPH do not help. */ + PyErr_SetString(get_locale_state(module)->Error, + "unsupported locale setting"); + return NULL; + } + } #endif if (locale) { From 303194898049bc311ee2dac46a3d9837d3220b95 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 1 Aug 2025 15:48:22 +0300 Subject: [PATCH 2/3] Add more tests and comments. --- Lib/test/test_locale.py | 17 ++++++++++++++--- Modules/_localemodule.c | 7 ++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index a111e0da4ddfdf..698e137e3e8abd 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -5,6 +5,7 @@ from unittest import mock import unittest import locale +import os import sys import codecs @@ -510,16 +511,26 @@ def test_getsetlocale_issue1813(self): self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) + @unittest.skipUnless(os.name == 'nt', 'requires Windows') def test_setlocale_long_encoding(self): + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'English.%016d' % 1252) + locale.setlocale(locale.LC_CTYPE, 'English.%015d' % 1252) + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.1252', loc) + loc2 = loc.replace('.1252', '.%016d' % 1252, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + loc2 = loc.replace('.1252', '.%015d' % 1252, 1) + locale.setlocale(locale.LC_ALL, loc2) + # gh-137273: Debug assertion failure on Windows for long encoding. - oldlocale = locale.setlocale(locale.LC_ALL) - self.addCleanup(locale.setlocale, locale.LC_ALL, oldlocale) with self.assertRaises(locale.Error): locale.setlocale(locale.LC_CTYPE, 'en_US.' + 'x'*16) locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') loc = locale.setlocale(locale.LC_ALL) self.assertIn('.UTF-8', loc) - loc2 = loc.replace('UTF-8', 'x'*16, 1) + loc2 = loc.replace('.UTF-8', '.' + 'x'*16, 1) with self.assertRaises(locale.Error): locale.setlocale(locale.LC_ALL, loc2) diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 963c80957dbacd..a9294ffe901fb7 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -88,12 +88,17 @@ copy_grouping(const char* s) } #if defined(MS_WINDOWS) + +// The number of elements in the szCodePage field +// of the __crt_locale_strings structure. +#define MAX_ENCODING_SIZE 16 + static int check_locale_name(const char *locale, const char *end) { size_t len = end ? (size_t)(end - locale) : strlen(locale); const char *dot = memchr(locale, '.', len); - if (dot && locale + len - dot > 16) { + if (dot && locale + len - dot > MAX_ENCODING_SIZE) { return -1; } return 0; From eae0bc4e7309902ae7d170fdfffd647cf35f0f70 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 1 Aug 2025 16:33:06 +0300 Subject: [PATCH 3/3] MAX_ENCODING_SIZE -> MAX_CP_LEN --- Modules/_localemodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index a9294ffe901fb7..17b5220fd6f9e1 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -89,16 +89,16 @@ copy_grouping(const char* s) #if defined(MS_WINDOWS) -// The number of elements in the szCodePage field +// 16 is the number of elements in the szCodePage field // of the __crt_locale_strings structure. -#define MAX_ENCODING_SIZE 16 +#define MAX_CP_LEN 15 static int check_locale_name(const char *locale, const char *end) { size_t len = end ? (size_t)(end - locale) : strlen(locale); const char *dot = memchr(locale, '.', len); - if (dot && locale + len - dot > MAX_ENCODING_SIZE) { + if (dot && locale + len - dot - 1 > MAX_CP_LEN) { return -1; } return 0;