From 576d7729839a01eceef22e973fef67da99f7d01d Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Fri, 19 Aug 2022 01:27:10 -0700 Subject: [PATCH 01/14] Backport to 3.9 of psrt/CVE-2020-10735-3.10backport. --- Doc/library/functions.rst | 8 + Doc/library/json.rst | 11 ++ Doc/library/stdtypes.rst | 162 ++++++++++++++++++ Doc/library/sys.rst | 59 +++++-- Doc/library/test.rst | 10 ++ Doc/using/cmdline.rst | 13 ++ Doc/whatsnew/3.9.rst | 12 ++ Include/internal/pycore_initconfig.h | 2 + Include/internal/pycore_interp.h | 2 + Include/internal/pycore_long.h | 49 ++++++ Lib/test/support/__init__.py | 11 ++ Lib/test/test_ast.py | 8 + Lib/test/test_cmd_line.py | 33 ++++ Lib/test/test_compile.py | 13 ++ Lib/test/test_decimal.py | 18 ++ Lib/test/test_int.py | 114 ++++++++++++ Lib/test/test_json/test_decode.py | 9 + Lib/test/test_sys.py | 11 +- Lib/test/test_xmlrpc.py | 10 ++ ...22-08-07-16-53.gh-issue-95778.ch010gps.rst | 11 ++ Objects/longobject.c | 47 ++++- Parser/pegen/pegen.c | 18 ++ Python/clinic/sysmodule.c.h | 60 ++++++- Python/initconfig.c | 60 +++++++ Python/sysmodule.c | 46 ++++- 25 files changed, 779 insertions(+), 18 deletions(-) create mode 100644 Include/internal/pycore_long.h create mode 100644 Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 13d7d6e5b0aa35..80b56fd7c2cd4a 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -844,6 +844,14 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 Falls back to :meth:`__index__` if :meth:`__int__` is not defined. + .. versionchanged:: 3.9.14 + :class:`int` string inputs and string representations can be limited to + help avoid denial of service attacks. A :exc:`ValueError` is raised when + the limit is exceeded while converting a string *x* to an :class:`int` or + when converting an :class:`int` into a string would exceed the limit. + See the :ref:`integer string conversion length limitation + ` documentation. + .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 608e70df5b14c9..20d3c9840f4bfc 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,6 +18,11 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. warning:: + Be cautious when parsing JSON data from untrusted sources. A malicious + JSON string may cause the decoder to consume considerable CPU and memory + resources. Limiting the size of data to be parsed is recommended. + :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -248,6 +253,12 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). + .. versionchanged:: 3.10.7 + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. + *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to raise an exception if invalid JSON numbers diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 2892486757e142..6d62886962599c 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -563,6 +563,13 @@ class`. float also has the following additional methods. :exc:`OverflowError` on infinities and a :exc:`ValueError` on NaNs. + .. note:: + + The values returned by ``as_integer_ratio()`` can be huge. Attempts + to render such integers into decimal strings may bump into the + :ref:`integer string conversion length limitation + `. + .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral @@ -5244,6 +5251,161 @@ types, where they are relevant. Some of these are not reported by the [] +.. _int_max_str_digits: + +Integer string conversion length limitation +=========================================== + +CPython has a global limit for converting between :class:`int` and :class:`str` +to mitigate denial of service attacks. This limit *only* applies to decimal or +other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +are unlimited. The limit can be configured. + +The :class:`int` type in CPython is an abitrary length number stored in binary +form (commonly known as a "bignum"). There exists no algorithm that can convert +a string to a binary integer or a binary integer to a string in linear time, +*unless* the base is a power of 2. Even the best known algorithms for base 10 +have sub-quadratic complexity. Converting a large value such as ``int('1' * +500_000)`` can take over a second on a fast CPU. + +Limiting conversion size offers a practical way to avoid `CVE-2020-10735 +`_. + +The limit is applied to the number of digit characters in the input or output +string when a non-linear conversion algorithm would be involved. Underscores +and the sign are not counted towards the limit. + +When an operation would exceed the limit, a :exc:`ValueError` is raised:: + + >>> import sys + >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. + >>> _ = int('2' * 5432) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. + >>> i = int('2' * 4300) + >>> len(str(i)) + 4300 + >>> i_squared = i*i + >>> len(str(i_squared)) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. + >>> len(hex(i_squared)) + 7144 + >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. + +The default limit is 4300 digits as provided in +:data:`sys.int_info.default_max_str_digits `. +The lowest limit that can be configured is 640 digits as provided in +:data:`sys.int_info.str_digits_check_threshold `. + +Verification:: + + >>> import sys + >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info + >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info + >>> msg = int('578966293710682886880994035146873798396722250538762761564' + ... '9252925514383915483333812743580549779436104706260696366600' + ... '571186405732').to_bytes(53, 'big') + ... + +.. versionadded:: 3.10.7 + +Affected APIs +------------- + +The limition only applies to potentially slow conversions between :class:`int` +and :class:`str` or :class:`bytes`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not a power of 2. +* ``str(integer)``. +* ``repr(integer)`` +* any other string conversion to base 10, for example ``f"{integer}"``, + ``"{}".format(integer)``, or ``b"%d" % integer``. + +The limitations do not apply to functions with a linear algorithm: + +* ``int(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`int.from_bytes` and :func:`int.to_bytes`. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. + +Configuring the limit +--------------------- + +Before Python starts up you can use an environment variable or an interpreter +command line flag to configure the limit: + +* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. + ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or + ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. +* :option:`-X int_max_str_digits <-X>`, e.g. + ``python3 -X int_max_str_digits=640`` +* :data:`sys.flags.int_max_str_digits` contains the value of + :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. + If both the env var and the ``-X`` option are set, the ``-X`` option takes + precedence. A value of *-1* indicates that both were unset, thus a value of + :data:`sys.int_info.default_max_str_digits` was used during initilization. + +From code, you can inspect the current limit and set a new one using these +:mod:`sys` APIs: + +* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are + a getter and setter for the interpreter-wide limit. Subinterpreters have + their own limit. + +Information about the default and minimum can be found in :attr:`sys.int_info`: + +* :data:`sys.int_info.default_max_str_digits ` is the compiled-in + default limit. +* :data:`sys.int_info.str_digits_check_threshold ` is the lowest + accepted value for the limit (other than 0 which disables it). + +.. versionadded:: 3.10.7 + +.. caution:: + + Setting a low limit *can* lead to problems. While rare, code exists that + contains integer constants in decimal in their source that exceed the + minimum threshold. A consequence of setting the limit is that Python source + code containing decimal integer literals longer than the limit will + encounter an error during parsing, usually at startup time or import time or + even at installation time - anytime an up to date ``.pyc`` does not already + exist for the code. A workaround for source that contains such large + constants is to convert them to ``0x`` hexidecimal form as it has no limit. + + Test your application thoroughly if you use a low limit. Ensure your tests + run with the limit set early via the environment or flag so that it applies + during startup and even during any installation step that may invoke Python + to precompile ``.py`` sources to ``.pyc`` files. + +Recommended configuration +------------------------- + +The default :data:`sys.int_info.default_max_str_digits` is expected to be +reasonable for most applications. If your application requires a different +limit, set it from your main entry point using Python version agnostic code as +these APIs were added in security patch releases in versions before 3.11. + +Example:: + + >>> import sys + >>> if hasattr(sys, "set_int_max_str_digits"): + ... upper_bound = 68000 + ... lower_bound = 4004 + ... current_limit = sys.get_int_max_str_digits() + ... if current_limit == 0 or current_limit > upper_bound: + ... sys.set_int_max_str_digits(upper_bound) + ... elif current_limit < lower_bound: + ... sys.set_int_max_str_digits(lower_bound) + +If you need to disable it entirely, set it to ``0``. + + .. rubric:: Footnotes .. [1] Additional information on these special methods may be found in the Python diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 03986db16a78c7..014cd576e1e10d 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -443,9 +443,9 @@ always available. The :term:`named tuple` *flags* exposes the status of command line flags. The attributes are read only. - ============================= ================================================================ + ============================= ============================================================================================================== attribute flag - ============================= ================================================================ + ============================= ============================================================================================================== :const:`debug` :option:`-d` :const:`inspect` :option:`-i` :const:`interactive` :option:`-i` @@ -461,7 +461,8 @@ always available. :const:`hash_randomization` :option:`-R` :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` - ============================= ================================================================ + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) + ============================= ============================================================================================================== .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. @@ -480,6 +481,9 @@ always available. Mode ` and the ``utf8_mode`` attribute for the new :option:`-X` ``utf8`` flag. + .. versionchanged:: 3.10.7 + Added the ``int_max_str_digits`` attribute. + .. data:: float_info @@ -658,6 +662,15 @@ always available. .. versionadded:: 3.6 + +.. function:: get_int_max_str_digits() + + Returns the current value for the :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits`. + + .. versionadded:: 3.9.14 + + .. function:: getrefcount(object) Return the reference count of the *object*. The count returned is generally one @@ -931,19 +944,31 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------+----------------------------------------------+ - | Attribute | Explanation | - +=========================+==============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**int_info.bits_per_digit`` | - +-------------------------+----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------+----------------------------------------------+ + +----------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +========================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +----------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +----------------------------------------+-----------------------------------------------+ + | :const:`default_max_str_digits` | default value for | + | | :func:`sys.get_int_max_str_digits` when it | + | | is not otherwise explicitly configured. | + +----------------------------------------+-----------------------------------------------+ + | :const:`str_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_str_digits`, | + | | :envvar:`PYTHONINTMAXSTRDIGITS`, or | + | | :option:`-X int_max_str_digits <-X>`. | + +----------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 + .. versionchanged:: 3.10.7 + Added ``default_max_str_digits`` and ``str_digits_check_threshold``. + .. data:: __interactivehook__ @@ -1221,6 +1246,14 @@ always available. .. availability:: Unix. +.. function:: set_int_max_str_digits(n) + + Set the :ref:`integer string conversion length limitation + ` used by this interpreter. See also + :func:`get_int_max_str_digits`. + + .. versionadded:: 3.10.7 + .. function:: setprofile(profilefunc) .. index:: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 16f908c8e87080..563197f8e1bc1b 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -1302,6 +1302,16 @@ The :mod:`test.support` module defines the following functions: .. versionadded:: 3.6 +.. function:: adjust_int_max_str_digits(max_digits) + + This function returns a context manager that will change the global + :func:`sys.set_int_max_str_digits` setting for the duration of the + context to allow execution of test code that needs a different limit + on the number of digits when converting between an integer and string. + + .. versionadded:: 3.9.14 + + The :mod:`test.support` module defines the following classes: .. class:: TransientResource(exc, **kwargs) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 5739388ecc5f7d..fdbf02026ea41c 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -436,6 +436,9 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. + * ``-X int_max_str_digits`` configures the :ref:`integer string conversion + length limitation `. See also + :envvar:`PYTHONINTMAXSTRDIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -480,6 +483,9 @@ Miscellaneous options The ``-X showalloccount`` option has been removed. + .. versionadded:: 3.9.14 + The ``-X int_max_str_digits`` option. + .. deprecated-removed:: 3.9 3.10 The ``-X oldparser`` option. @@ -659,6 +665,13 @@ conflict. .. versionadded:: 3.2.3 +.. envvar:: PYTHONINTMAXSTRDIGITS + + If this variable is set to an integer, it is used to configure the + interpreter's global :ref:`integer string conversion length limitation + `. + + .. versionadded:: 3.10.7 .. envvar:: PYTHONIOENCODING diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 6aed8d2b5f984d..3c2564445bf7ad 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -271,6 +271,18 @@ Other Language Changes ``if`` clause in comprehensions and generator expressions. See :issue:`41848` and :issue:`43755` for details. +* New security feature in 3.9.14: + Converting between :class:`int` and :class:`str` in bases other than 2 + (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) + now raises a :exc:`ValueError` if the number of digits in string form is + above a limit to avoid potential denial of service attacks due to the + algorithmic complexity. This is a mitigation for `CVE-2020-10735 + `_. + This limit can be configured or disabled by environment variable, command + line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion + length limitation ` documentation. The default limit + is 4300 digits in string form. + New Modules =========== diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 457a005860b202..ad1b7e55e0140f 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -156,6 +156,8 @@ extern PyStatus _PyConfig_SetPyArgv( PyConfig *config, const _PyArgv *args); +extern int _Py_global_config_int_max_str_digits; + /* --- Function used for testing ---------------------------------- */ diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 551ad833bb6927..304d704a2c1c2b 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -154,6 +154,8 @@ struct _is { */ PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; #endif + + int int_max_str_digits; }; /* Used by _PyImport_Cleanup() */ diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h new file mode 100644 index 00000000000000..f509fe2ee90386 --- /dev/null +++ b/Include/internal/pycore_long.h @@ -0,0 +1,49 @@ +#ifndef Py_INTERNAL_LONG_H +#define Py_INTERNAL_LONG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* + * Default int base conversion size limitation: Denial of Service prevention. + * + * Chosen such that this isn't wildly slow on modern hardware and so that + * everyone's existing deployed numpy test suite passes before + * https://github.com/numpy/numpy/issues/22098 is widely available. + * + * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * 2000 loops, best of 5: 125 usec per loop + * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * 1000 loops, best of 5: 311 usec per loop + * (zen2 cloud VM) + * + * 4300 decimal digits fits a ~14284 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 +/* + * Threshold for max digits check. For performance reasons int() and + * int.__str__() don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' + * 20000 loops, best of 5: 12 usec per loop + * + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. + */ +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 + +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LONG_H */ diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 86ac8f096695fb..6dc08135e0ea14 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3294,3 +3294,14 @@ def clear_ignored_deprecations(*tokens: object) -> None: if warnings.filters != new_filters: warnings.filters[:] = new_filters warnings._filters_mutated() + + +@contextlib.contextmanager +def adjust_int_max_str_digits(max_digits): + """Temporarily change the integer string conversion length limit.""" + current = sys.get_int_max_str_digits() + try: + sys.set_int_max_str_digits(max_digits) + yield + finally: + sys.set_int_max_str_digits(current) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index c3e3be6335340b..b834921041fc46 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -978,6 +978,14 @@ def test_literal_eval(self): self.assertRaises(ValueError, ast.literal_eval, '+True') self.assertRaises(ValueError, ast.literal_eval, '2+3') + def test_literal_eval_str_int_limit(self): + with support.adjust_int_max_str_digits(4000): + ast.literal_eval('3'*4000) # no error + with self.assertRaises(SyntaxError) as err_ctx: + ast.literal_eval('3'*4001) + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) + self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + def test_literal_eval_complex(self): # Issue #4907 self.assertEqual(ast.literal_eval('6j'), 6j) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 4b3e33c4fd3544..e38fc698c52d54 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -815,6 +815,39 @@ def test_parsing_error(self): self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) self.assertNotEqual(proc.returncode, 0) + def test_int_max_str_digits(self): + code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())" + + assert_python_failure('-X', 'int_max_str_digits', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=100', '-c', code) + + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') + + def res2int(res): + out = res.out.strip().decode("utf-8") + return tuple(int(i) for i in out.split()) + + res = assert_python_ok('-c', code) + self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code) + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code) + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code) + self.assertEqual(res2int(res), (100000, 100000)) + + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok( + '-X', 'int_max_str_digits=6000', '-c', code, + PYTHONINTMAXSTRDIGITS='4000' + ) + self.assertEqual(res2int(res), (6000, 6000)) + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 55716fd4b96c3c..159e3fb80f4a0e 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -189,6 +189,19 @@ def test_literals_with_leading_zeroes(self): self.assertEqual(eval("0o777"), 511) self.assertEqual(eval("-0o0000010"), -8) + def test_int_literals_too_long(self): + n = 3000 + source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" + with support.adjust_int_max_str_digits(n): + compile(source, "", "exec") # no errors. + with support.adjust_int_max_str_digits(n-1): + with self.assertRaises(SyntaxError) as err_ctx: + compile(source, "", "exec") + exc = err_ctx.exception + self.assertEqual(exc.lineno, 3) + self.assertIn('Exceeds the limit ', str(exc)) + self.assertIn(' Consider hexidecimal ', str(exc)) + def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 if sys.maxsize == 2147483647: diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 58f4df3060169c..7b1488f5aaaec9 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -2452,6 +2452,15 @@ class CUsabilityTest(UsabilityTest): class PyUsabilityTest(UsabilityTest): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PythonAPItests(unittest.TestCase): def test_abc(self): @@ -4509,6 +4518,15 @@ class CCoverage(Coverage): class PyCoverage(Coverage): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PyFunctionality(unittest.TestCase): """Extra functionality in decimal.py""" diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 6fdf52ef23f65f..54a96ad66459fb 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -571,5 +571,119 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) +class IntStrDigitLimitsTests(unittest.TestCase): + + int_class = int # Override this in subclasses to reuse the suite. + + def setUp(self): + super().setUp() + self._previous_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(2048) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_limit) + super().tearDown() + + def test_disabled_limit(self): + self.assertGreater(sys.get_int_max_str_digits(), 0) + self.assertLess(sys.get_int_max_str_digits(), 20_000) + with support.adjust_int_max_str_digits(0): + self.assertEqual(sys.get_int_max_str_digits(), 0) + i = self.int_class('1' * 20_000) + str(i) + self.assertGreater(sys.get_int_max_str_digits(), 0) + + def test_max_str_digits_edge_cases(self): + """Ignore the +/- sign and space padding.""" + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + int_class('1' * maxdigits) + int_class(' ' + '1' * maxdigits) + int_class('1' * maxdigits + ' ') + int_class('+' + '1' * maxdigits) + int_class('-' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + def check(self, i, base=None): + with self.assertRaises(ValueError): + if base is None: + self.int_class(i) + else: + self.int_class(i, base) + + def test_max_str_digits(self): + maxdigits = sys.get_int_max_str_digits() + + self.check('1' * (maxdigits + 1)) + self.check(' ' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1) + ' ') + self.check('+' + '1' * (maxdigits + 1)) + self.check('-' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1)) + + i = 10 ** maxdigits + with self.assertRaises(ValueError): + str(i) + + def test_power_of_two_bases_unlimited(self): + """The limit does not apply to power of 2 bases.""" + maxdigits = sys.get_int_max_str_digits() + + for base in (2, 4, 8, 16, 32): + with self.subTest(base=base): + self.int_class('1' * (maxdigits + 1), base) + assert maxdigits < 100_000 + self.int_class('1' * 100_000, base) + + def test_underscores_ignored(self): + maxdigits = sys.get_int_max_str_digits() + + triples = maxdigits // 3 + s = '111' * triples + s_ = '1_11' * triples + self.int_class(s) # succeeds + self.int_class(s_) # succeeds + self.check(f'{s}111') + self.check(f'{s_}_111') + + def test_sign_not_counted(self): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '5' * max_digits + i = int_class(s) + pos_i = int_class(f'+{s}') + assert i == pos_i + neg_i = int_class(f'-{s}') + assert -pos_i == neg_i + str(pos_i) + str(neg_i) + + def _other_base_helper(self, base): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '2' * max_digits + i = int_class(s, base) + if base > 10: + with self.assertRaises(ValueError): + str(i) + elif base < 10: + str(i) + with self.assertRaises(ValueError) as err: + int_class(f'{s}1', base) + + def test_int_from_other_bases(self): + base = 3 + with self.subTest(base=base): + self._other_base_helper(base) + base = 36 + with self.subTest(base=base): + self._other_base_helper(base) + + +class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests): + int_class = IntSubclass + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index fdb9e62124ece1..124045b13184b3 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -2,6 +2,7 @@ from io import StringIO from collections import OrderedDict from test.test_json import PyTest, CTest +from test import support class TestDecode: @@ -95,5 +96,13 @@ def test_negative_index(self): d = self.json.JSONDecoder() self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + def test_limit_int(self): + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + self.loads('1' * maxdigits) + with self.assertRaises(ValueError): + self.loads('1' * (maxdigits + 1)) + + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index ed85d185412ea8..ef32424eabaefa 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -409,11 +409,17 @@ def test_attributes(self): self.assertIsInstance(sys.executable, str) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) - self.assertEqual(len(sys.int_info), 2) + self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) + self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500) + self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100) + self.assertGreater(sys.int_info.default_max_str_digits, + sys.int_info.str_digits_check_threshold) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) + self.assertIsInstance(sys.int_info.default_max_str_digits, int) + self.assertIsInstance(sys.int_info.str_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -517,7 +523,8 @@ def test_sys_flags(self): "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", - "hash_randomization", "isolated", "dev_mode", "utf8_mode") + "hash_randomization", "isolated", "dev_mode", "utf8_mode", + "int_max_str_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index f714b773eeecc3..d12da2f0cc8479 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -285,6 +285,16 @@ def test_load_extension_types(self): check('9876543210.0123456789', decimal.Decimal('9876543210.0123456789')) + def test_limit_int(self): + check = self.check_loads + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + s = '1' * (maxdigits + 1) + with self.assertRaises(ValueError): + check(f'{s}', None) + with self.assertRaises(ValueError): + check(f'{s}', None) + def test_get_host_info(self): # see bug #3613, this raised a TypeError transp = xmlrpc.client.Transport() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst new file mode 100644 index 00000000000000..a69e879df7574b --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -0,0 +1,11 @@ +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +raises a :exc:`ValueError` if the number of digits in string form is above a +limit to avoid potential denial of service attacks due to the algorithmic +complexity. This is a mitigation for `CVE-2020-10735 +`_. + +This new limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length +limitation ` documentation. The default limit is 4300 +digits in string form. diff --git a/Objects/longobject.c b/Objects/longobject.c index cf13b2c4301778..a53eabee4cc4ed 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3,7 +3,9 @@ /* XXX The functional organization of this file is terrible */ #include "Python.h" +#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits #include "pycore_interp.h" // _PY_NSMALLPOSINTS +#include "pycore_long.h" #include "pycore_pystate.h" // _Py_IsMainInterpreter() #include "longintrepr.h" @@ -36,6 +38,8 @@ PyObject *_PyLong_One = NULL; #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" + static PyObject * get_small_int(sdigit ival) { @@ -1777,6 +1781,17 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, strlen_nosign); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2290,6 +2305,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2441,6 +2457,17 @@ digit beyond the first. goto onError; } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, digits); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -5071,6 +5098,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); @@ -5723,6 +5751,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -5730,7 +5760,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -5745,6 +5775,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -5790,6 +5831,10 @@ _PyLong_Init(PyThreadState *tstate) } } } + tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits; + if (tstate->interp->int_max_str_digits == -1) { + tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 1; } diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index cdfbc12d16a340..7ac7e1c2efb80f 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -967,6 +967,24 @@ _PyPegen_number_token(Parser *p) if (c == NULL) { p->error_indicator = 1; + PyObject *exc_type, *exc_value, *exc_tb; + PyErr_Fetch(&exc_type, &exc_value, &exc_tb); + // The only way a ValueError should happen in _this_ code is via + // PyLong_FromString hitting a length limit. + if (exc_type == PyExc_ValueError && exc_value != NULL) { + // The Fetch acted as PyErr_Clear(), we're replacing the exception. + Py_XDECREF(exc_tb); + Py_DECREF(exc_type); + RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, + t->lineno, 0 /* col_offset */, + "%S - Consider hexidecimal for huge integer literals " + "to avoid decimal conversion limits.", + exc_value); + Py_DECREF(exc_value); + } else { + PyErr_Restore(exc_type, exc_value, exc_tb); + } return NULL; } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 4615ebaab5de2b..41444080b5bf6b 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -667,6 +667,64 @@ sys_mdebug(PyObject *module, PyObject *arg) #endif /* defined(USE_MALLOPT) */ +PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, +"get_int_max_str_digits($module, /)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module); + +static PyObject * +sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_get_int_max_str_digits_impl(module); +} + +PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, +"set_int_max_str_digits($module, /, maxdigits)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ + {"set_int_max_str_digits", (PyCFunction)(void(*)(void))sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); + +static PyObject * +sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"maxdigits", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; + PyObject *argsbuf[1]; + int maxdigits; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_Check(args[0])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + maxdigits = _PyLong_AsInt(args[0]); + if (maxdigits == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = sys_set_int_max_str_digits_impl(module, maxdigits); + +exit: + return return_value; +} + PyDoc_STRVAR(sys_getrefcount__doc__, "getrefcount($module, object, /)\n" "--\n" @@ -970,4 +1028,4 @@ sys_getandroidapilevel(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=39eb34a01fb9a919 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=401254a595859ac6 input=a9049054013a1b77]*/ diff --git a/Python/initconfig.c b/Python/initconfig.c index 116ee33fee1130..a2c435f34474ff 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3,6 +3,7 @@ #include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _PyInterpreterState.runtime +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_pathconfig.h" // _Py_path_config #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig() @@ -99,6 +100,9 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ + -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ + This helps avoid denial of service attacks when parsing untrusted data.\n\ + The default is sys.int_info.default_max_str_digits. 0 disables.\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -125,6 +129,10 @@ static const char usage_6[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" +" when converting from a string and when converting an int back to a str.\n" +" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" +" 16, and 32 are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -646,6 +654,10 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->_use_peg_parser = 1; } +/* Excluded from public struct PyConfig for backporting reasons. */ +/* default to unconfigured, _PyLong_Init() does the rest */ +int _Py_global_config_int_max_str_digits = -1; + static void config_init_defaults(PyConfig *config) @@ -1410,6 +1422,48 @@ config_init_tracemalloc(PyConfig *config) return _PyStatus_OK(); } +static PyStatus +config_init_int_max_str_digits(PyConfig *config) +{ + int maxdigits; + int valid = 0; + + const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); + if (env) { + if (!_Py_str_to_int(env, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + if (!valid) { +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + return _PyStatus_ERR( + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); + } + _Py_global_config_int_max_str_digits = maxdigits; + } + + const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); + if (xoption) { + const wchar_t *sep = wcschr(xoption, L'='); + if (sep) { + if (!config_wstr_to_int(sep + 1, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + } + if (!valid) { + return _PyStatus_ERR( + "-X int_max_str_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY + } + _Py_global_config_int_max_str_digits = maxdigits; + } + return _PyStatus_OK(); +} static PyStatus config_init_pycache_prefix(PyConfig *config) @@ -1466,6 +1520,12 @@ config_read_complex_options(PyConfig *config) return status; } } + if (_Py_global_config_int_max_str_digits < 0) { + status = config_init_int_max_str_digits(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } if (config->pycache_prefix == NULL) { status = config_init_pycache_prefix(config); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a52b299474c5da..8efa850dce6fc3 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -19,6 +19,7 @@ Data members: #include "frameobject.h" // PyFrame_GetBack() #include "pycore_ceval.h" #include "pycore_initconfig.h" +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_object.h" #include "pycore_pathconfig.h" #include "pycore_pyerrors.h" @@ -1636,6 +1637,45 @@ sys_mdebug_impl(PyObject *module, int flag) } #endif /* USE_MALLOPT */ + +/*[clinic input] +sys.get_int_max_str_digits + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module) +/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return PyLong_FromSsize_t(interp->int_max_str_digits); +} + +/*[clinic input] +sys.set_int_max_str_digits + + maxdigits: int + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ +{ + PyThreadState *tstate = _PyThreadState_GET(); + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + tstate->interp->int_max_str_digits = maxdigits; + Py_RETURN_NONE; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); + return NULL; + } +} + size_t _PySys_GetSizeOf(PyObject *o) { @@ -1980,6 +2020,8 @@ static PyMethodDef sys_methods[] = { SYS_GET_ASYNCGEN_HOOKS_METHODDEF SYS_GETANDROIDAPILEVEL_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF + SYS_GET_INT_MAX_STR_DIGITS_METHODDEF + SYS_SET_INT_MAX_STR_DIGITS_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2440,6 +2482,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, + {"int_max_str_digits", "-X int_max_str_digits"}, {0} }; @@ -2447,7 +2490,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 15 + 16 }; static PyObject* @@ -2483,6 +2526,7 @@ make_flags(PyThreadState *tstate) SetFlag(config->isolated); PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode)); SetFlag(preconfig->utf8_mode); + SetFlag(_Py_global_config_int_max_str_digits); #undef SetFlag if (_PyErr_Occurred(tstate)) { From e5cd3fca43600919fe5bec67b9a275c8b4400287 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sun, 21 Aug 2022 15:31:02 -0700 Subject: [PATCH 02/14] Fix versionadded/versionchanged. --- Doc/library/json.rst | 2 +- Doc/library/stdtypes.rst | 4 ++-- Doc/library/sys.rst | 6 +++--- Doc/using/cmdline.rst | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 20d3c9840f4bfc..4dc085fda8d385 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -253,7 +253,7 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - .. versionchanged:: 3.10.7 + .. versionchanged:: 3.9.14 The default *parse_int* of :func:`int` now limits the maximum length of the integer string via the interpreter's :ref:`integer string conversion length limitation ` to help avoid denial diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 6d62886962599c..bb1663f52a57bd 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5310,7 +5310,7 @@ Verification:: ... '571186405732').to_bytes(53, 'big') ... -.. versionadded:: 3.10.7 +.. versionadded:: 3.9.14 Affected APIs ------------- @@ -5365,7 +5365,7 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: * :data:`sys.int_info.str_digits_check_threshold ` is the lowest accepted value for the limit (other than 0 which disables it). -.. versionadded:: 3.10.7 +.. versionadded:: 3.9.14 .. caution:: diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 014cd576e1e10d..97c450e7ee09c5 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -481,7 +481,7 @@ always available. Mode ` and the ``utf8_mode`` attribute for the new :option:`-X` ``utf8`` flag. - .. versionchanged:: 3.10.7 + .. versionchanged:: 3.9.14 Added the ``int_max_str_digits`` attribute. @@ -966,7 +966,7 @@ always available. .. versionadded:: 3.1 - .. versionchanged:: 3.10.7 + .. versionchanged:: 3.9.14 Added ``default_max_str_digits`` and ``str_digits_check_threshold``. @@ -1252,7 +1252,7 @@ always available. ` used by this interpreter. See also :func:`get_int_max_str_digits`. - .. versionadded:: 3.10.7 + .. versionadded:: 3.9.14 .. function:: setprofile(profilefunc) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index fdbf02026ea41c..66d8d57aadf384 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -671,7 +671,7 @@ conflict. interpreter's global :ref:`integer string conversion length limitation `. - .. versionadded:: 3.10.7 + .. versionadded:: 3.9.14 .. envvar:: PYTHONIOENCODING From 9a3cd0557e51be1170382ac600f9e76acf7f9414 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sun, 21 Aug 2022 16:10:21 -0700 Subject: [PATCH 03/14] headers += Include/internal/pycore_long.h --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 42b1ec622accc2..c0272bfcdd9d4f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1159,6 +1159,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_import.h \ $(srcdir)/Include/internal/pycore_initconfig.h \ $(srcdir)/Include/internal/pycore_interp.h \ + $(srcdir)/Include/internal/pycore_long.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ From c190dc9c34810918b15fd078ac93edfd978cb33d Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 25 Aug 2022 16:50:19 -0700 Subject: [PATCH 04/14] Update attribution in Misc/NEWS.d Co-authored-by: Christian Heimes --- .../next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index a69e879df7574b..a205fb31ad7b95 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -9,3 +9,6 @@ This new limit can be configured or disabled by environment variable, command line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length limitation ` documentation. The default limit is 4300 digits in string form. + +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from +Victor Stinner, Thomas Wouters, and Steve Dower. From a7213dda20e70ebea70f664a4d06841beaa416ce Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 1 Sep 2022 15:19:46 +0200 Subject: [PATCH 05/14] Manually add new field to abi file --- Doc/data/python3.9.abi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/data/python3.9.abi b/Doc/data/python3.9.abi index e2037436bda00c..cca97796cb8284 100644 --- a/Doc/data/python3.9.abi +++ b/Doc/data/python3.9.abi @@ -5653,7 +5653,7 @@ - + @@ -5774,6 +5774,9 @@ + + + From 2384ef41bca2cb920aadfc0d8442efd73283a3d1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 15:34:01 -0700 Subject: [PATCH 06/14] Move the whatsnew text per review. Ned pointed this out on the 3.7 review, it matches other patch changes and stands out better. --- Doc/whatsnew/3.9.rst | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 3c2564445bf7ad..392e2ef879f26d 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -271,18 +271,6 @@ Other Language Changes ``if`` clause in comprehensions and generator expressions. See :issue:`41848` and :issue:`43755` for details. -* New security feature in 3.9.14: - Converting between :class:`int` and :class:`str` in bases other than 2 - (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) - now raises a :exc:`ValueError` if the number of digits in string form is - above a limit to avoid potential denial of service attacks due to the - algorithmic complexity. This is a mitigation for `CVE-2020-10735 - `_. - This limit can be configured or disabled by environment variable, command - line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion - length limitation ` documentation. The default limit - is 4300 digits in string form. - New Modules =========== @@ -1599,3 +1587,17 @@ URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal characters are controlled by a new module level variable ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) +Notable security feature in 3.9.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. + From 1378bdeb8a41dafe35c227fa84fbc1a5616faf7c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 16:57:08 -0700 Subject: [PATCH 07/14] Make the doctest actually run & fix it. --- Doc/library/stdtypes.rst | 4 +++- .../Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index bb1663f52a57bd..bb5c4ac89fd8b1 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5277,6 +5277,8 @@ and the sign are not counted towards the limit. When an operation would exceed the limit, a :exc:`ValueError` is raised:: +.. doctest:: + >>> import sys >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. >>> _ = int('2' * 5432) @@ -5293,7 +5295,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised:: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index a205fb31ad7b95..f9386b2ac140da 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -10,5 +10,5 @@ line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length limitation ` documentation. The default limit is 4300 digits in string form. -Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from -Victor Stinner, Thomas Wouters, and Steve Dower. +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback +from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. From e51a8e2b09c05324919f26c84f18c5f5ed8b3d34 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 17:22:01 -0700 Subject: [PATCH 08/14] Fix the docs build. --- Doc/library/stdtypes.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index bb5c4ac89fd8b1..267679bb6f9184 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5275,7 +5275,7 @@ The limit is applied to the number of digit characters in the input or output string when a non-linear conversion algorithm would be involved. Underscores and the sign are not counted towards the limit. -When an operation would exceed the limit, a :exc:`ValueError` is raised:: +When an operation would exceed the limit, a :exc:`ValueError` is raised: .. doctest:: @@ -5302,7 +5302,9 @@ The default limit is 4300 digits as provided in The lowest limit that can be configured is 640 digits as provided in :data:`sys.int_info.str_digits_check_threshold `. -Verification:: +Verification: + +.. doctest:: >>> import sys >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info From 3e7eb9c47600181186805b973e4dfee8406093a2 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 21:47:15 -0700 Subject: [PATCH 09/14] Rename the news file to appease the Bedevere bot. --- ...010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Security/{2022-08-07-16-53.gh-issue-95778.ch010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} (100%) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst similarity index 100% rename from Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst rename to Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst From 5c64ec67586c507266668d88d3836936eef60f68 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Fri, 2 Sep 2022 16:08:30 +0000 Subject: [PATCH 10/14] hexadecimal spelling =) --- Doc/library/stdtypes.rst | 6 +++--- Doc/whatsnew/3.9.rst | 2 +- Lib/test/test_ast.py | 2 +- Lib/test/test_compile.py | 2 +- .../2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Parser/pegen/pegen.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 267679bb6f9184..60f889c663cdef 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5258,7 +5258,7 @@ Integer string conversion length limitation CPython has a global limit for converting between :class:`int` and :class:`str` to mitigate denial of service attacks. This limit *only* applies to decimal or -other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +other non-power-of-two number bases. Hexadecimal, octal, and binary conversions are unlimited. The limit can be configured. The :class:`int` type in CPython is an abitrary length number stored in binary @@ -5295,7 +5295,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. @@ -5380,7 +5380,7 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: encounter an error during parsing, usually at startup time or import time or even at installation time - anytime an up to date ``.pyc`` does not already exist for the code. A workaround for source that contains such large - constants is to convert them to ``0x`` hexidecimal form as it has no limit. + constants is to convert them to ``0x`` hexadecimal form as it has no limit. Test your application thoroughly if you use a low limit. Ensure your tests run with the limit set early via the environment or flag so that it applies diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 392e2ef879f26d..dab4746a8f6e2c 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1591,7 +1591,7 @@ Notable security feature in 3.9.14 ================================== Converting between :class:`int` and :class:`str` in bases other than 2 -(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index b834921041fc46..a048d389906677 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -984,7 +984,7 @@ def test_literal_eval_str_int_limit(self): with self.assertRaises(SyntaxError) as err_ctx: ast.literal_eval('3'*4001) self.assertIn('Exceeds the limit ', str(err_ctx.exception)) - self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) def test_literal_eval_complex(self): # Issue #4907 diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 159e3fb80f4a0e..ec776b9e31b87b 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -200,7 +200,7 @@ def test_int_literals_too_long(self): exc = err_ctx.exception self.assertEqual(exc.lineno, 3) self.assertIn('Exceeds the limit ', str(exc)) - self.assertIn(' Consider hexidecimal ', str(exc)) + self.assertIn(' Consider hexadecimal ', str(exc)) def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index f9386b2ac140da..ea3b85d632e083 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -1,5 +1,5 @@ Converting between :class:`int` and :class:`str` in bases other than 2 -(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 7ac7e1c2efb80f..15b06ce65a6fcc 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -978,7 +978,7 @@ _PyPegen_number_token(Parser *p) RAISE_ERROR_KNOWN_LOCATION( p, PyExc_SyntaxError, t->lineno, 0 /* col_offset */, - "%S - Consider hexidecimal for huge integer literals " + "%S - Consider hexadecimal for huge integer literals " "to avoid decimal conversion limits.", exc_value); Py_DECREF(exc_value); From a37041f147ab5b7824ba8d8507113c89e31b3304 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:17:22 -0700 Subject: [PATCH 11/14] doc typo: limitation https://github.com/python/cpython/pull/96542 --- Doc/library/stdtypes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 60f889c663cdef..5cd7cf776c15c5 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5319,7 +5319,7 @@ Verification: Affected APIs ------------- -The limition only applies to potentially slow conversions between :class:`int` +The limitation only applies to potentially slow conversions between :class:`int` and :class:`str` or :class:`bytes`: * ``int(string)`` with default base 10. From b4957d82681ac9ae37cadd8a42bc78f4cc065b70 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:00:58 -0700 Subject: [PATCH 12/14] Misc: Fix a typo in the header comment. --- Include/internal/pycore_long.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index f509fe2ee90386..ae04332a7a84ce 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -15,9 +15,9 @@ extern "C" { * everyone's existing deployed numpy test suite passes before * https://github.com/numpy/numpy/issues/22098 is widely available. * - * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * $ python -m timeit -s 's = "1"*4300' 'int(s)' * 2000 loops, best of 5: 125 usec per loop - * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' * 1000 loops, best of 5: 311 usec per loop * (zen2 cloud VM) * From 0515141547a746bfe77ccdf3f728d0d6f2fa4bec Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:35:01 -0700 Subject: [PATCH 13/14] remove unneeded doc note on float.as_integer_ratio Per mdickinson@'s comment on the main branch PR. --- Doc/library/stdtypes.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 5cd7cf776c15c5..6eef56455edbcb 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -563,13 +563,6 @@ class`. float also has the following additional methods. :exc:`OverflowError` on infinities and a :exc:`ValueError` on NaNs. - .. note:: - - The values returned by ``as_integer_ratio()`` can be huge. Attempts - to render such integers into decimal strings may bump into the - :ref:`integer string conversion length limitation - `. - .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral From a1247fd6aa4610041917fdae07fcced5bc399ed7 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 4 Sep 2022 17:21:18 +0100 Subject: [PATCH 14/14] gh-95778: Correctly pre-check for int-to-str conversion (#96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. * Issue: gh-95778 Co-authored-by: Gregory P. Smith [Google LLC] --- Lib/test/test_int.py | 82 +++++++++++++++++++ ...08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Objects/longobject.c | 26 +++++- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 54a96ad66459fb..cbbddf50637ca0 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,5 @@ import sys +import time import unittest from test import support @@ -626,6 +627,87 @@ def test_max_str_digits(self): with self.assertRaises(ValueError): str(i) + def test_denial_of_service_prevented_int_to_str(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 50_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. + digits = 78_268 + with support.adjust_int_max_str_digits(digits): + start = get_time() + huge_decimal = str(huge_int) + seconds_to_convert = get_time() - start + self.assertEqual(len(huge_decimal), digits) + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + # We test with the limit almost at the size needed to check performance. + # The performant limit check is slightly fuzzy, give it a some room. + with support.adjust_int_max_str_digits(int(.995 * digits)): + with self.assertRaises(ValueError) as err: + start = get_time() + str(huge_int) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds said Zen based cloud VM. + str(extra_huge_int) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_denial_of_service_prevented_str_to_int(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 100_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + digits = 133700 + huge = '8'*digits + with support.adjust_int_max_str_digits(digits): + start = get_time() + int(huge) + seconds_to_convert = get_time() - start + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + with support.adjust_int_max_str_digits(digits - 1): + with self.assertRaises(ValueError) as err: + start = get_time() + int(huge) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge = '7'*1_200_000 + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds in the Zen based cloud VM. + int(extra_huge) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" maxdigits = sys.get_int_max_str_digits() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index ea3b85d632e083..8eb8a34884dced 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -11,4 +11,4 @@ limitation ` documentation. The default limit is 4300 digits in string form. Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback -from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. +from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. diff --git a/Objects/longobject.c b/Objects/longobject.c index a53eabee4cc4ed..ec18ec32b8a895 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -38,7 +38,8 @@ PyObject *_PyLong_One = NULL; #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" static PyObject * get_small_int(sdigit ival) @@ -1722,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1787,8 +1805,8 @@ long_to_decimal_string_internal(PyObject *aa, Py_ssize_t strlen_nosign = strlen - negative; if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - max_str_digits, strlen_nosign); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); return -1; } } @@ -2462,7 +2480,7 @@ digit beyond the first. PyInterpreterState *interp = _PyInterpreterState_GET(); int max_str_digits = interp->int_max_str_digits; if ((max_str_digits > 0) && (digits > max_str_digits)) { - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, max_str_digits, digits); return NULL; }