From 62a450db326f61f63e116d0d105927ce29b814e3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 1 May 2024 20:44:07 +0300 Subject: [PATCH 1/9] gh-118164: Optimize int to str conversion --- Lib/_pylong.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 936346e187ff69..d632a2c788297f 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -82,8 +82,23 @@ def inner(n, w): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" - return str(int_to_decimal(n)) + w = n.bit_length() + if w > 1_800_000: + return str(int_to_decimal(n)) + def inner(n, w): + if w <= 1000: + return str(n) + w2 = w >> 1 + d = 10**w2 + hi, lo = divmod(n, d) + return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + + w = int(w * 0.3010299956639812 + 1) # log10(2) + if n < 0: + return '-' + inner(-n, w) + else: + return inner(n, w) def _str_to_int_inner(s): """Asymptotically fast conversion of a 'str' to an 'int'.""" From 4e978aadb44258bb5eb53bb65d7e05eb7dbae8f1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 May 2024 13:52:58 +0300 Subject: [PATCH 2/9] Use the cache for pow10. Use more correct upper limit. --- Lib/_pylong.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index d632a2c788297f..99c5c28a44def6 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -83,18 +83,21 @@ def inner(n, w): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() - if w > 1_800_000: + if w > 900_000: return str(int_to_decimal(n)) def inner(n, w): if w <= 1000: return str(n) w2 = w >> 1 - d = 10**w2 + d = pow10_cache.get(w2) + if d is None: + d = pow10_cache[w2] = 10**w2 hi, lo = divmod(n, d) return inner(hi, w - w2) + inner(lo, w2).zfill(w2) w = int(w * 0.3010299956639812 + 1) # log10(2) + pow10_cache = {} if n < 0: return '-' + inner(-n, w) else: From cd108bf289398d7475991b32e8c1d69cfe889a4e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 May 2024 16:25:28 +0300 Subject: [PATCH 3/9] Break a loop between _pydecimal and _pylong. --- Lib/_pylong.py | 6 +++- Lib/test/test_int.py | 31 +++++++++++++------ ...-05-02-15-57-07.gh-issue-118164.AF6kwI.rst | 4 +++ 3 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 99c5c28a44def6..999462aa35c49c 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -16,6 +16,9 @@ import decimal +_is_pydecimal = hasattr(decimal.Decimal, '_power_exact') + + def int_to_decimal(n): """Asymptotically fast conversion of an 'int' to Decimal.""" @@ -83,7 +86,7 @@ def inner(n, w): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() - if w > 900_000: + if w > 900_000 and not _is_pydecimal: return str(int_to_decimal(n)) def inner(n, w): @@ -103,6 +106,7 @@ def inner(n, w): else: return inner(n, w) + def _str_to_int_inner(s): """Asymptotically fast conversion of a 'str' to an 'int'.""" diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 47fc50a0e20349..c8626398b35b89 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -829,17 +829,28 @@ def tearDown(self): sys.set_int_max_str_digits(self._previous_limit) super().tearDown() - def test_pylong_int_to_decimal(self): - n = (1 << 100_000) - 1 - suffix = '9883109375' + def _test_pylong_int_to_decimal(self, n, suffix): s = str(n) - assert s[-10:] == suffix - s = str(-n) - assert s[-10:] == suffix - s = '%d' % n - assert s[-10:] == suffix - s = b'%d' % n - assert s[-10:] == suffix.encode('ascii') + self.assertEqual(s[-10:], suffix) + s2 = str(-n) + self.assertEqual(s2, '-' + s) + s3 = '%d' % n + self.assertEqual(s3, s) + s4 = b'%d' % n + self.assertEqual(s4, s.encode('ascii')) + + def test_pylong_int_to_decimal(self): + self._test_pylong_int_to_decimal((1 << 100_000), '9883109376') + self._test_pylong_int_to_decimal((1 << 100_000) - 1, '9883109375') + self._test_pylong_int_to_decimal(10**30_000, '0000000000') + self._test_pylong_int_to_decimal(10**30_000 - 1, '9999999999') + self._test_pylong_int_to_decimal(3**60_000, '9313200001') + + @support.requires_resource('cpu') + def test_pylong_int_to_decimal_2(self): + self._test_pylong_int_to_decimal(2**1_000_000, '2747109376') + self._test_pylong_int_to_decimal(10**300_000, '0000000000') + self._test_pylong_int_to_decimal(3**600_000, '3132000001') def test_pylong_int_divmod(self): n = (1 << 100_000) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst new file mode 100644 index 00000000000000..c587f8ac5c04de --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst @@ -0,0 +1,4 @@ +Break a loop between the Python implementation of the :mod:`decimal` module +and the Python code for integer to string conversion. Optimize integer to +string conversion for values in the range from 10_000 to 270_000 decimal +digits. From ff23ed651b99d4245e2f30619fba18837d88fbb2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 3 May 2024 00:13:48 +0300 Subject: [PATCH 4/9] More straighforward test for _decimal. --- Lib/_pylong.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 999462aa35c49c..9cec4194e8d8ad 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -14,9 +14,10 @@ import re import decimal - - -_is_pydecimal = hasattr(decimal.Decimal, '_power_exact') +try: + import _decimal +except ImportError: + _decimal = None def int_to_decimal(n): @@ -86,7 +87,7 @@ def inner(n, w): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() - if w > 900_000 and not _is_pydecimal: + if w > 900_000 and _decimal is not None: return str(int_to_decimal(n)) def inner(n, w): From 2108ba77a44e3b5591cb60c5808e966e85fda068 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 3 May 2024 22:39:57 +0300 Subject: [PATCH 5/9] Lower the cutoff. --- Lib/_pylong.py | 2 +- .../2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 9cec4194e8d8ad..05256ab8dde68b 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -87,7 +87,7 @@ def inner(n, w): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() - if w > 900_000 and _decimal is not None: + if w > 450_000 and _decimal is not None: return str(int_to_decimal(n)) def inner(n, w): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst index c587f8ac5c04de..5eb3b6f5009bc4 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst @@ -1,4 +1,4 @@ Break a loop between the Python implementation of the :mod:`decimal` module -and the Python code for integer to string conversion. Optimize integer to -string conversion for values in the range from 10_000 to 270_000 decimal +and the Python code for integer to string conversion. Also optimize integer +to string conversion for values in the range from 9_000 to 135_000 decimal digits. From dd263a52447962f0cd472cf8fa011e24d28d0b62 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 May 2024 10:08:16 +0300 Subject: [PATCH 6/9] Add some comments. --- Lib/_pylong.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 05256ab8dde68b..171ed626cd29b3 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -88,8 +88,15 @@ def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() if w > 450_000 and _decimal is not None: + # It is only usable with the C decimal implementation. + # _pydecimal.py calls str() on very large integers, which in its + # turn calls int_to_decimal_string(), causing very deep recursion. return str(int_to_decimal(n)) + # Fallback algorithm for the case when the C decimal module isn't + # available. This algorithm is asymptotically worse than the algorithm + # using the decimal module, but better than the quadratic time + # implementation in longobject.c. def inner(n, w): if w <= 1000: return str(n) From a3b8e6b09f6e63e8a1a69c717dc0677eb2457708 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 May 2024 11:26:28 +0300 Subject: [PATCH 7/9] Handle errors of inexact guess of the number of decimal digits. --- Lib/_pylong.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 171ed626cd29b3..ef2f7f0a3bed67 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -99,7 +99,16 @@ def int_to_decimal_string(n): # implementation in longobject.c. def inner(n, w): if w <= 1000: - return str(n) + try: + # In normal cases n is small enough here, so the built-in + # longobject.c algorithm is the fastest. + return str(n) + except ValueError: + # In very rare cases, when our guess of w is too small and + # n is too large, we can hit the limit for int to str + # conversion in str(). Call int_to_decimal_string() which + # has no such limitation directly. + return int_to_decimal_string(n) w2 = w >> 1 d = pow10_cache.get(w2) if d is None: @@ -107,12 +116,26 @@ def inner(n, w): hi, lo = divmod(n, d) return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + # The estimation of the number of decimal digits. + # There is no harm in small error. If we guess too large, there may + # be leading 0's that need to be stripped. If we guess too small, we + # may need to call str() recursively for the remaining highest digits, + # which can still potentially be a large integer. This is manifested + # only if the number has way more than 10**15 digits, that exceeds + # the 52-bit physical address limit in both Intel64 and AMD64. w = int(w * 0.3010299956639812 + 1) # log10(2) pow10_cache = {} if n < 0: - return '-' + inner(-n, w) + n = -n + sign = '-' else: - return inner(n, w) + sign = '' + s = inner(n, w) + if s[0] == '0' and n: + # If our guess of w is too large, there may be leading 0's that + # need to be stripped. + s = s.lstrip('0') + return sign + s def _str_to_int_inner(s): From dbac36835540d62f8dc95d2a0b630171340f9d50 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 May 2024 22:31:42 +0300 Subject: [PATCH 8/9] Update Lib/_pylong.py Co-authored-by: Tim Peters --- Lib/_pylong.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index ef2f7f0a3bed67..9d7fde6f274518 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -112,7 +112,7 @@ def inner(n, w): w2 = w >> 1 d = pow10_cache.get(w2) if d is None: - d = pow10_cache[w2] = 10**w2 + d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i hi, lo = divmod(n, d) return inner(hi, w - w2) + inner(lo, w2).zfill(w2) From 9db896683c306e01b1b1467b87952ac08482162c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 May 2024 22:33:03 +0300 Subject: [PATCH 9/9] Remove handling of the case of too small guess for w. --- Lib/_pylong.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 9d7fde6f274518..30bee6fc9ef54c 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -99,16 +99,7 @@ def int_to_decimal_string(n): # implementation in longobject.c. def inner(n, w): if w <= 1000: - try: - # In normal cases n is small enough here, so the built-in - # longobject.c algorithm is the fastest. - return str(n) - except ValueError: - # In very rare cases, when our guess of w is too small and - # n is too large, we can hit the limit for int to str - # conversion in str(). Call int_to_decimal_string() which - # has no such limitation directly. - return int_to_decimal_string(n) + return str(n) w2 = w >> 1 d = pow10_cache.get(w2) if d is None: