From 150fd4f72e17896f360aa302c96887311589f1dc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 1 Aug 2025 17:43:53 +0300 Subject: [PATCH] gh-137273: Fix debug assertion failure in locale.setlocale() on Windows (GH-137300) It happened when there were at least 16 characters after dot in the locale name. (cherry picked from commit 718e0c89ba0610bba048245028ac133bbf2d44c2) Co-authored-by: Serhiy Storchaka --- Lib/test/test_locale.py | 70 +++++++++++++------ ...-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst | 1 + Modules/_localemodule.c | 47 +++++++++++++ 3 files changed, 97 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 55b502e52ca454..698e137e3e8abd 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -5,6 +5,7 @@ from unittest import mock import unittest import locale +import os import sys import codecs @@ -486,6 +487,54 @@ def test_japanese(self): self.check('jp_jp', 'ja_JP.eucJP') +class TestRealLocales(unittest.TestCase): + def setUp(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + + def test_getsetlocale_issue1813(self): + # Issue #1813: setting and getting the locale under a Turkish locale + try: + locale.setlocale(locale.LC_CTYPE, 'tr_TR') + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs Turkish locale') + loc = locale.getlocale(locale.LC_CTYPE) + if verbose: + print('testing with %a' % (loc,), end=' ', flush=True) + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error as exc: + # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) + # and the tr_TR locale on Windows. getlocale() builds a locale + # which is not recognize by setlocale(). + self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") + self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_setlocale_long_encoding(self): + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'English.%016d' % 1252) + locale.setlocale(locale.LC_CTYPE, 'English.%015d' % 1252) + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.1252', loc) + loc2 = loc.replace('.1252', '.%016d' % 1252, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + loc2 = loc.replace('.1252', '.%015d' % 1252, 1) + locale.setlocale(locale.LC_ALL, loc2) + + # gh-137273: Debug assertion failure on Windows for long encoding. + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'en_US.' + 'x'*16) + locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.UTF-8', loc) + loc2 = loc.replace('.UTF-8', '.' + 'x'*16, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + + class TestMiscellaneous(unittest.TestCase): def test_defaults_UTF8(self): # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is @@ -552,27 +601,6 @@ def test_setlocale_category(self): # crasher from bug #7419 self.assertRaises(locale.Error, locale.setlocale, 12345) - def test_getsetlocale_issue1813(self): - # Issue #1813: setting and getting the locale under a Turkish locale - oldlocale = locale.setlocale(locale.LC_CTYPE) - self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) - try: - locale.setlocale(locale.LC_CTYPE, 'tr_TR') - except locale.Error: - # Unsupported locale on this system - self.skipTest('test needs Turkish locale') - loc = locale.getlocale(locale.LC_CTYPE) - if verbose: - print('testing with %a' % (loc,), end=' ', flush=True) - try: - locale.setlocale(locale.LC_CTYPE, loc) - except locale.Error as exc: - # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) - # and the tr_TR locale on Windows. getlocale() builds a locale - # which is not recognize by setlocale(). - self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") - self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) - def test_invalid_locale_format_in_localetuple(self): with self.assertRaises(TypeError): locale.setlocale(locale.LC_ALL, b'fi_FI') diff --git a/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst b/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst new file mode 100644 index 00000000000000..f344877955fea0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-01-15-07-59.gh-issue-137273.4V8Xmv.rst @@ -0,0 +1 @@ +Fix debug assertion failure in :func:`locale.setlocale` on Windows. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 41e6d48b1dbd9b..17b5220fd6f9e1 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -87,6 +87,41 @@ copy_grouping(const char* s) return result; } +#if defined(MS_WINDOWS) + +// 16 is the number of elements in the szCodePage field +// of the __crt_locale_strings structure. +#define MAX_CP_LEN 15 + +static int +check_locale_name(const char *locale, const char *end) +{ + size_t len = end ? (size_t)(end - locale) : strlen(locale); + const char *dot = memchr(locale, '.', len); + if (dot && locale + len - dot - 1 > MAX_CP_LEN) { + return -1; + } + return 0; +} + +static int +check_locale_name_all(const char *locale) +{ + const char *start = locale; + while (1) { + const char *end = strchr(start, ';'); + if (check_locale_name(start, end) < 0) { + return -1; + } + if (end == NULL) { + break; + } + start = end + 1; + } + return 0; +} +#endif + /*[clinic input] _locale.setlocale @@ -111,6 +146,18 @@ _locale_setlocale_impl(PyObject *module, int category, const char *locale) "invalid locale category"); return NULL; } + if (locale) { + if ((category == LC_ALL + ? check_locale_name_all(locale) + : check_locale_name(locale, NULL)) < 0) + { + /* Debug assertion failure on Windows. + * _Py_BEGIN_SUPPRESS_IPH/_Py_END_SUPPRESS_IPH do not help. */ + PyErr_SetString(get_locale_state(module)->Error, + "unsupported locale setting"); + return NULL; + } + } #endif if (locale) {