Skip to content

[3.12] gh-118164: Break a loop between _pydecimal and _pylong and optimize int to str conversion (GH-118483) #118590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion Lib/_pylong.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

import re
import decimal
try:
import _decimal
except ImportError:
_decimal = None


def int_to_decimal(n):
Expand Down Expand Up @@ -82,7 +86,47 @@ def inner(n, w):

def int_to_decimal_string(n):
"""Asymptotically fast conversion of an 'int' to a decimal string."""
return str(int_to_decimal(n))
w = n.bit_length()
if w > 450_000 and _decimal is not None:
# It is only usable with the C decimal implementation.
# _pydecimal.py calls str() on very large integers, which in its
# turn calls int_to_decimal_string(), causing very deep recursion.
return str(int_to_decimal(n))

# Fallback algorithm for the case when the C decimal module isn't
# available. This algorithm is asymptotically worse than the algorithm
# using the decimal module, but better than the quadratic time
# implementation in longobject.c.
def inner(n, w):
if w <= 1000:
return str(n)
w2 = w >> 1
d = pow10_cache.get(w2)
if d is None:
d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i
hi, lo = divmod(n, d)
return inner(hi, w - w2) + inner(lo, w2).zfill(w2)

# The estimation of the number of decimal digits.
# There is no harm in small error. If we guess too large, there may
# be leading 0's that need to be stripped. If we guess too small, we
# may need to call str() recursively for the remaining highest digits,
# which can still potentially be a large integer. This is manifested
# only if the number has way more than 10**15 digits, that exceeds
# the 52-bit physical address limit in both Intel64 and AMD64.
w = int(w * 0.3010299956639812 + 1) # log10(2)
pow10_cache = {}
if n < 0:
n = -n
sign = '-'
else:
sign = ''
s = inner(n, w)
if s[0] == '0' and n:
# If our guess of w is too large, there may be leading 0's that
# need to be stripped.
s = s.lstrip('0')
return sign + s


def _str_to_int_inner(s):
Expand Down
31 changes: 21 additions & 10 deletions Lib/test/test_int.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,17 +834,28 @@ def tearDown(self):
sys.set_int_max_str_digits(self._previous_limit)
super().tearDown()

def test_pylong_int_to_decimal(self):
n = (1 << 100_000) - 1
suffix = '9883109375'
def _test_pylong_int_to_decimal(self, n, suffix):
s = str(n)
assert s[-10:] == suffix
s = str(-n)
assert s[-10:] == suffix
s = '%d' % n
assert s[-10:] == suffix
s = b'%d' % n
assert s[-10:] == suffix.encode('ascii')
self.assertEqual(s[-10:], suffix)
s2 = str(-n)
self.assertEqual(s2, '-' + s)
s3 = '%d' % n
self.assertEqual(s3, s)
s4 = b'%d' % n
self.assertEqual(s4, s.encode('ascii'))

def test_pylong_int_to_decimal(self):
self._test_pylong_int_to_decimal((1 << 100_000), '9883109376')
self._test_pylong_int_to_decimal((1 << 100_000) - 1, '9883109375')
self._test_pylong_int_to_decimal(10**30_000, '0000000000')
self._test_pylong_int_to_decimal(10**30_000 - 1, '9999999999')
self._test_pylong_int_to_decimal(3**60_000, '9313200001')

@support.requires_resource('cpu')
def test_pylong_int_to_decimal_2(self):
self._test_pylong_int_to_decimal(2**1_000_000, '2747109376')
self._test_pylong_int_to_decimal(10**300_000, '0000000000')
self._test_pylong_int_to_decimal(3**600_000, '3132000001')

def test_pylong_int_divmod(self):
n = (1 << 100_000)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Break a loop between the Python implementation of the :mod:`decimal` module
and the Python code for integer to string conversion. Also optimize integer
to string conversion for values in the range from 9_000 to 135_000 decimal
digits.