diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 21e3f3aefe5d8d..0d7545368d7a70 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,7 +34,7 @@ jobs: run_tests: ${{ steps.check.outputs.run_tests }} run_ssl_tests: ${{ steps.check.outputs.run_ssl_tests }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check for source changes id: check run: | @@ -66,8 +66,8 @@ jobs: needs: check_source if: needs.check_source.outputs.run_tests == 'true' steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 - name: Install Dependencies run: | sudo ./.github/workflows/posix-deps-apt.sh @@ -96,8 +96,8 @@ jobs: needs: check_source if: needs.check_source.outputs.run_tests == 'true' steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 - name: Install Dependencies run: sudo ./.github/workflows/posix-deps-apt.sh - name: Add ccache to PATH @@ -110,8 +110,12 @@ jobs: grep "aclocal 1.16.3" aclocal.m4 grep -q "runstatedir" configure grep -q "PKG_PROG_PKG_CONFIG" aclocal.m4 + - name: Configure CPython + run: | + # Build Python with the libpython dynamic library + ./configure --config-cache --with-pydebug --enable-shared - name: Regenerate autoconf files - run: docker run --rm -v $(pwd):/src quay.io/tiran/cpython_autoconf:269 + run: make regen-configure - name: Build CPython run: | # Build Python with the libpython dynamic library @@ -142,7 +146,7 @@ jobs: env: IncludeUwp: 'true' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build CPython run: .\PCbuild\build.bat -e -p Win32 - name: Display build info @@ -158,7 +162,7 @@ jobs: env: IncludeUwp: 'true' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Register MSVC problem matcher run: echo "::add-matcher::.github/problem-matchers/msvc.json" - name: Build CPython @@ -179,7 +183,7 @@ jobs: HOMEBREW_NO_INSTALL_CLEANUP: 1 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Homebrew dependencies run: brew install pkg-config openssl@3.0 xz gdbm tcl-tk - name: Configure CPython @@ -203,10 +207,10 @@ jobs: needs: check_source if: needs.check_source.outputs.run_tests == 'true' env: - OPENSSL_VER: 1.1.1v + OPENSSL_VER: 3.0.11 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies @@ -247,14 +251,14 @@ jobs: strategy: fail-fast: false matrix: - openssl_ver: [1.1.1v, 3.0.10, 3.1.2] + openssl_ver: [1.1.1w, 3.0.11, 3.1.3] env: OPENSSL_VER: ${{ matrix.openssl_ver }} MULTISSL_DIR: ${{ github.workspace }}/multissl OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies diff --git a/.github/workflows/build_msi.yml b/.github/workflows/build_msi.yml index 4c757c6934bb10..c9993b75df1bac 100644 --- a/.github/workflows/build_msi.yml +++ b/.github/workflows/build_msi.yml @@ -34,7 +34,7 @@ jobs: name: 'Windows (x86) Installer' runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build CPython installer run: .\Tools\msi\build.bat --doc -x86 @@ -42,6 +42,6 @@ jobs: name: 'Windows (x64) Installer' runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build CPython installer run: .\Tools\msi\build.bat --doc -x64 diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 0de062fdc0fae2..ee02848417c2e7 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -32,7 +32,7 @@ jobs: name: 'Docs' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Register Sphinx problem matcher run: echo "::add-matcher::.github/problem-matchers/sphinx.json" - name: 'Set up Python' @@ -56,7 +56,7 @@ jobs: name: 'Doctest' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Register Sphinx problem matcher run: echo "::add-matcher::.github/problem-matchers/sphinx.json" - uses: actions/cache@v3 diff --git a/.github/workflows/verify-ensurepip-wheels.yml b/.github/workflows/verify-ensurepip-wheels.yml index 9f4754f912b09f..458e44413e5e6b 100644 --- a/.github/workflows/verify-ensurepip-wheels.yml +++ b/.github/workflows/verify-ensurepip-wheels.yml @@ -24,7 +24,7 @@ jobs: verify: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: '3' diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000000000..6a9db718698269 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,12 @@ +# This is a dummy config file so that readthedocs.org doesn't fail on security branches. +# Note that this won't result in docs actually getting built; +# clicking on the docs preview link on a PR will result in a 404. +version: 2 +formats: [] +build: + os: "ubuntu-22.04" + tools: + python: "3.11" + jobs: + post_checkout: + - exit 183 diff --git a/Doc/constraints.txt b/Doc/constraints.txt new file mode 100644 index 00000000000000..16b735ea07a72a --- /dev/null +++ b/Doc/constraints.txt @@ -0,0 +1,24 @@ +# We have upper bounds on our transitive dependencies here +# To avoid new releases unexpectedly breaking our build. +# This file can be updated on an ad-hoc basis, +# though it will probably have to be updated +# whenever Doc/requirements.txt is updated. + +# Direct dependencies of Sphinx +babel<3 +colorama<0.5 +imagesize<1.5 +Jinja2<3.2 +packaging<24 +Pygments>=2.16.1,<3 +requests<3 +snowballstemmer<3 +sphinxcontrib-applehelp<1.0.5 +sphinxcontrib-devhelp<1.0.6 +sphinxcontrib-htmlhelp<2.0.5 +sphinxcontrib-jsmath<1.1 +sphinxcontrib-qthelp<1.0.7 +sphinxcontrib-serializinghtml<1.1.10 + +# Direct dependencies of Jinja2 (Jinja is a dependency of Sphinx, see above) +MarkupSafe<2.2 diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 034e579315de00..83815b75e92f3d 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -196,6 +196,42 @@ XMLParser Objects :exc:`ExpatError` to be raised with the :attr:`code` attribute set to ``errors.codes[errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING]``. +.. method:: xmlparser.SetReparseDeferralEnabled(enabled) + + .. warning:: + + Calling ``SetReparseDeferralEnabled(False)`` has security implications, + as detailed below; please make sure to understand these consequences + prior to using the ``SetReparseDeferralEnabled`` method. + + Expat 2.6.0 introduced a security mechanism called "reparse deferral" + where instead of causing denial of service through quadratic runtime + from reparsing large tokens, reparsing of unfinished tokens is now delayed + by default until a sufficient amount of input is reached. + Due to this delay, registered handlers may — depending of the sizing of + input chunks pushed to Expat — no longer be called right after pushing new + input to the parser. Where immediate feedback and taking over responsiblity + of protecting against denial of service from large tokens are both wanted, + calling ``SetReparseDeferralEnabled(False)`` disables reparse deferral + for the current Expat parser instance, temporarily or altogether. + Calling ``SetReparseDeferralEnabled(True)`` allows re-enabling reparse + deferral. + + Note that :meth:`SetReparseDeferralEnabled` has been backported to some + prior releases of CPython as a security fix. Check for availability of + :meth:`SetReparseDeferralEnabled` using :func:`hasattr` if used in code + running across a variety of Python versions. + + .. versionadded:: 3.10.14 + +.. method:: xmlparser.GetReparseDeferralEnabled() + + Returns whether reparse deferral is currently enabled for the given + Expat parser instance. + + .. versionadded:: 3.10.14 + + :class:`xmlparser` objects have the following attributes: diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 2fe0d2e082fb3a..dbea33b8a0a64e 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -165,6 +165,11 @@ data but would still like to have incremental parsing capabilities, take a look at :func:`iterparse`. It can be useful when you're reading a large XML document and don't want to hold it wholly in memory. +Where *immediate* feedback through events is wanted, calling method +:meth:`XMLPullParser.flush` can help reduce delay; +please make sure to study the related security notes. + + Finding interesting elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1370,6 +1375,24 @@ XMLParser Objects Feeds data to the parser. *data* is encoded data. + + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + + Note that :meth:`flush` has been backported to some prior releases of + CPython as a security fix. Check for availability of :meth:`flush` + using :func:`hasattr` if used in code running across a variety of Python + versions. + + .. versionadded:: 3.10.14 + + :meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method for each opening tag, its ``end(tag)`` method for each closing tag, and data is processed by method ``data(data)``. For further supported callback @@ -1431,6 +1454,22 @@ XMLPullParser Objects Feed the given bytes data to the parser. + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + + Note that :meth:`flush` has been backported to some prior releases of + CPython as a security fix. Check for availability of :meth:`flush` + using :func:`hasattr` if used in code running across a variety of Python + versions. + + .. versionadded:: 3.10.14 + .. method:: close() Signal the parser that the data stream is terminated. Unlike diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index 20b0905bb1093a..cf821e73df748c 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -68,6 +68,7 @@ quadratic blowup **Vulnerable** (1) **Vulnerable** (1) **Vulnerable* external entity expansion Safe (5) Safe (2) Safe (3) Safe (5) Safe (4) `DTD`_ retrieval Safe (5) Safe Safe Safe (5) Safe decompression bomb Safe Safe Safe Safe **Vulnerable** +large tokens **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) ========================= ================== ================== ================== ================== ================== 1. Expat 2.4.1 and newer is not vulnerable to the "billion laughs" and @@ -81,6 +82,11 @@ decompression bomb Safe Safe Safe 4. :mod:`xmlrpclib` doesn't expand external entities and omits them. 5. Since Python 3.7.1, external general entities are no longer processed by default. +6. Expat 2.6.0 and newer is not vulnerable to denial of service + through quadratic runtime caused by parsing large tokens. + Items still listed as vulnerable due to + potential reliance on system-provided libraries. Check + :const:`!pyexpat.EXPAT_VERSION`. billion laughs / exponential entity expansion @@ -114,6 +120,13 @@ decompression bomb files. For an attacker it can reduce the amount of transmitted data by three magnitudes or more. +large tokens + Expat needs to re-parse unfinished tokens; without the protection + introduced in Expat 2.6.0, this can lead to quadratic runtime that can + be used to cause denial of service in the application parsing XML. + The issue is known as + `CVE-2023-52425 `_. + The documentation for `defusedxml`_ on PyPI has further information about all known attack vectors with examples and references. diff --git a/Doc/license.rst b/Doc/license.rst index 37a3d4fa9e5f2c..54634771349d9d 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -989,26 +989,29 @@ https://www.w3.org/TR/xml-c14n2-testcases/ and is distributed under the Audioop ------- -The audioop module uses the code base in g771.c file of the SoX project:: - - Programming the AdLib/Sound Blaster - FM Music Chips - Version 2.0 (24 Feb 1992) - Copyright (c) 1991, 1992 by Jeffrey S. Lee - jlee@smylex.uucp - Warranty and Copyright Policy - This document is provided on an "as-is" basis, and its author makes - no warranty or representation, express or implied, with respect to - its quality performance or fitness for a particular purpose. In no - event will the author of this document be liable for direct, indirect, - special, incidental, or consequential damages arising out of the use - or inability to use the information contained within. Use of this - document is at your own risk. - This file may be used and copied freely so long as the applicable - copyright notices are retained, and no modifications are made to the - text of the document. No money shall be charged for its distribution - beyond reasonable shipping, handling and duplication costs, nor shall - proprietary changes be made to this document so that it cannot be - distributed freely. This document may not be included in published - material or commercial packages without the written consent of its - author. +The audioop module uses the code base in g771.c file of the SoX project. +https://sourceforge.net/projects/sox/files/sox/12.17.7/sox-12.17.7.tar.gz + + This source code is a product of Sun Microsystems, Inc. and is provided + for unrestricted use. Users may copy or modify this source code without + charge. + + SUN SOURCE CODE IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING + THE WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR + PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE. + + Sun source code is provided with no support and without any obligation on + the part of Sun Microsystems, Inc. to assist in its use, correction, + modification or enhancement. + + SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE + INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THIS SOFTWARE + OR ANY PART THEREOF. + + In no event will Sun Microsystems, Inc. be liable for any lost revenue + or profits or other special, indirect and consequential damages, even if + Sun has been advised of the possibility of such damages. + + Sun Microsystems, Inc. + 2550 Garcia Avenue + Mountain View, California 94043 diff --git a/Doc/requirements.txt b/Doc/requirements.txt index f43ce2c6e4efd8..da5c38ae1a0298 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -16,3 +16,5 @@ blurb # The theme used by the documentation is stored separately, so we need # to install that as well. python-docs-theme>=2022.1 + +-c constraints.txt diff --git a/Include/patchlevel.h b/Include/patchlevel.h index a4c3636ab13b4d..61bf1c087db91c 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -18,12 +18,12 @@ /*--start constants--*/ #define PY_MAJOR_VERSION 3 #define PY_MINOR_VERSION 10 -#define PY_MICRO_VERSION 13 +#define PY_MICRO_VERSION 14 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.10.13" +#define PY_VERSION "3.10.14" /*--end constants--*/ /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 07020b5dc964cb..9824d099c3df7d 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -48,8 +48,10 @@ struct PyExpat_CAPI enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding); int (*DefaultUnknownEncodingHandler)( void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); - /* might be none for expat < 2.1.0 */ + /* might be NULL for expat < 2.1.0 */ int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); + /* might be NULL for expat < 2.6.0 */ + XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); /* always add new stuff to the end! */ }; diff --git a/Lib/idlelib/idle_test/test_squeezer.py b/Lib/idlelib/idle_test/test_squeezer.py index 86c5d41b629719..86c21f00bb8d00 100644 --- a/Lib/idlelib/idle_test/test_squeezer.py +++ b/Lib/idlelib/idle_test/test_squeezer.py @@ -170,6 +170,7 @@ def test_write_not_stdout(self): def test_write_stdout(self): """Test Squeezer's overriding of the EditorWindow's write() method.""" + requires('gui') editwin = self.make_mock_editor_window() for text in ['', 'TEXT']: diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index b32b3200c0e027..f09f28b2c43d2e 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Thu Aug 24 13:45:52 2023 +# Autogenerated by Sphinx on Tue Mar 19 22:44:19 2024 topics = {'assert': 'The "assert" statement\n' '**********************\n' '\n' diff --git a/Lib/site.py b/Lib/site.py index 939893eb5ee93b..5302037e0bf2c1 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -74,6 +74,7 @@ import builtins import _sitebuiltins import io +import stat # Prefixes for site-packages; add additional prefixes like /usr/local here PREFIXES = [sys.prefix, sys.exec_prefix] @@ -168,6 +169,14 @@ def addpackage(sitedir, name, known_paths): else: reset = False fullname = os.path.join(sitedir, name) + try: + st = os.lstat(fullname) + except OSError: + return + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or + (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): + _trace(f"Skipping hidden .pth file: {fullname!r}") + return _trace(f"Processing .pth file: {fullname!r}") try: # locale encoding is not ideal especially on Windows. But we have used @@ -221,7 +230,8 @@ def addsitedir(sitedir, known_paths=None): names = os.listdir(sitedir) except OSError: return - names = [name for name in names if name.endswith(".pth")] + names = [name for name in names + if name.endswith(".pth") and not name.startswith(".")] for name in sorted(names): addpackage(sitedir, name, known_paths) if reset: diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 96da93053ac712..fd78998df9fd85 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -269,6 +269,22 @@ def _mkstemp_inner(dir, pre, suf, flags, output_type): raise FileExistsError(_errno.EEXIST, "No usable temporary file name found") +def _dont_follow_symlinks(func, path, *args): + # Pass follow_symlinks=False, unless not supported on this platform. + if func in _os.supports_follow_symlinks: + func(path, *args, follow_symlinks=False) + elif _os.name == 'nt' or not _os.path.islink(path): + func(path, *args) + +def _resetperms(path): + try: + chflags = _os.chflags + except AttributeError: + pass + else: + _dont_follow_symlinks(chflags, path, 0) + _dont_follow_symlinks(_os.chmod, path, 0o700) + # User visible interfaces. @@ -827,17 +843,10 @@ def __init__(self, suffix=None, prefix=None, dir=None, def _rmtree(cls, name, ignore_errors=False): def onerror(func, path, exc_info): if issubclass(exc_info[0], PermissionError): - def resetperms(path): - try: - _os.chflags(path, 0) - except AttributeError: - pass - _os.chmod(path, 0o700) - try: if path != name: - resetperms(_os.path.dirname(path)) - resetperms(path) + _resetperms(_os.path.dirname(path)) + _resetperms(path) try: _os.unlink(path) diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py index 00ea1c39dd6fb6..027dbecc6134df 100644 --- a/Lib/test/test_codecencodings_iso2022.py +++ b/Lib/test/test_codecencodings_iso2022.py @@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): (b'ab\x1BNdef', 'replace', 'abdef'), ) +class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_3' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'), + ('ab\u4FF1def', 'replace', b'ab?def'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(O\x29\x28\x1B(Bℜ\x1B$(O\x29\x32\x1B(B = ⟨ሴ⟩' + ) + +class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_2004' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'), + ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(Q\x29\x28\x1B(Bℜ\x1B$(Q\x29\x32\x1B(B = ⟨ሴ⟩' + ) + class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 66ab064241a08a..10a50fe97ec131 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -420,10 +420,10 @@ def test_undecodable_filename(self): def test_undecodable_parameter(self): # sanity check using a valid parameter response = self.request(self.base_url + '/?x=123').read() - self.assertRegex(response, f'listing for {self.base_url}/\?x=123'.encode('latin1')) + self.assertRegex(response, rf'listing for {self.base_url}/\?x=123'.encode('latin1')) # now the bogus encoding response = self.request(self.base_url + '/?x=%bb').read() - self.assertRegex(response, f'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1')) + self.assertRegex(response, rf'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1')) def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 8f34c182f82eaf..307e2b93559ff2 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -241,10 +241,15 @@ def test_access_parameter(self): # Try writing with PROT_EXEC and without PROT_WRITE prot = mmap.PROT_READ | getattr(mmap, 'PROT_EXEC', 0) with open(TESTFN, "r+b") as f: - m = mmap.mmap(f.fileno(), mapsize, prot=prot) - self.assertRaises(TypeError, m.write, b"abcdef") - self.assertRaises(TypeError, m.write_byte, 0) - m.close() + try: + m = mmap.mmap(f.fileno(), mapsize, prot=prot) + except PermissionError: + # on macOS 14, PROT_READ | PROT_WRITE is not allowed + pass + else: + self.assertRaises(TypeError, m.write, b"abcdef") + self.assertRaises(TypeError, m.write_byte, 0) + m.close() def test_bad_file_desc(self): # Try opening a bad file descriptor... diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b2b4dea060532d..5212c7a704725e 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -730,5 +730,59 @@ def resolve_entity(context, base, system_id, public_id): self.assertEqual(handler_call_args, [("bar", "baz")]) +class ReparseDeferralTest(unittest.TestCase): + def test_getter_setter_round_trip(self): + parser = expat.ParserCreate() + enabled = (expat.version_info >= (2, 6, 0)) + + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) + parser.SetReparseDeferralEnabled(False) + self.assertIs(parser.GetReparseDeferralEnabled(), False) + parser.SetReparseDeferralEnabled(True) + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) + + def test_reparse_deferral_enabled(self): + if expat.version_info < (2, 6, 0): + self.skipTest(f'Expat {expat.version_info} does not ' + 'support reparse deferral') + + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + self.assertTrue(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: no. + self.assertEqual(started, []) + + parser.Parse(b'', True) + + self.assertEqual(started, ['doc']) + + def test_reparse_deferral_disabled(self): + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + if expat.version_info >= (2, 6, 0): + parser.SetReparseDeferralEnabled(False) + self.assertFalse(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: yes. + self.assertEqual(started, ['doc']) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index eda4e6a46df437..97e96668f85c8a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -19,6 +19,7 @@ from io import BytesIO, StringIO import codecs import os.path +import pyexpat import shutil import sys from urllib.error import URLError @@ -1214,6 +1215,56 @@ def test_expat_incremental_reset(self): self.assertEqual(result.getvalue(), start + b"text") + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') + + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + def test_flush_reparse_deferral_disabled(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + parser._parser.SetReparseDeferralEnabled(False) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + # ===== Locator support def test_expat_locator_noinfo(self): diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 93349ed8bb16a3..c70e1fa9ae1041 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -18,6 +18,7 @@ import os import re import shutil +import stat import subprocess import sys import sysconfig @@ -194,6 +195,44 @@ def test_addsitedir(self): finally: pth_file.cleanup() + def test_addsitedir_dotfile(self): + pth_file = PthFile('.dotfile') + pth_file.cleanup(prep=True) + try: + pth_file.create() + site.addsitedir(pth_file.base_dir, set()) + self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) + self.assertIn(pth_file.base_dir, sys.path) + finally: + pth_file.cleanup() + + @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()') + def test_addsitedir_hidden_flags(self): + pth_file = PthFile() + pth_file.cleanup(prep=True) + try: + pth_file.create() + st = os.stat(pth_file.file_path) + os.chflags(pth_file.file_path, st.st_flags | stat.UF_HIDDEN) + site.addsitedir(pth_file.base_dir, set()) + self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) + self.assertIn(pth_file.base_dir, sys.path) + finally: + pth_file.cleanup() + + @unittest.skipUnless(sys.platform == 'win32', 'test needs Windows') + def test_addsitedir_hidden_file_attribute(self): + pth_file = PthFile() + pth_file.cleanup(prep=True) + try: + pth_file.create() + subprocess.check_call(['attrib', '+H', pth_file.file_path]) + site.addsitedir(pth_file.base_dir, set()) + self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) + self.assertIn(pth_file.base_dir, sys.path) + finally: + pth_file.cleanup() + # This tests _getuserbase, hence the double underline # to distinguish from a test for getuserbase def test__getuserbase(self): diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 211fd8c02da0a4..4f1fc3fd92db21 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -1070,7 +1070,20 @@ def testInterfaceNameIndex(self): 'socket.if_indextoname() not available.') def testInvalidInterfaceIndexToName(self): self.assertRaises(OSError, socket.if_indextoname, 0) + self.assertRaises(OverflowError, socket.if_indextoname, -1) + self.assertRaises(OverflowError, socket.if_indextoname, 2**1000) self.assertRaises(TypeError, socket.if_indextoname, '_DEADBEEF') + if hasattr(socket, 'if_nameindex'): + indices = dict(socket.if_nameindex()) + for index in indices: + index2 = index + 2**32 + if index2 not in indices: + with self.assertRaises((OverflowError, OSError)): + socket.if_indextoname(index2) + for index in 2**32-1, 2**64-1: + if index not in indices: + with self.assertRaises((OverflowError, OSError)): + socket.if_indextoname(index) @unittest.skipUnless(hasattr(socket, 'if_nametoindex'), 'socket.if_nametoindex() not available.') diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index 1946b043d04d79..30d57baf977f52 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -1499,6 +1499,103 @@ def test_cleanup_with_symlink_to_a_directory(self): "were deleted") d2.cleanup() + @os_helper.skip_unless_symlink + def test_cleanup_with_symlink_modes(self): + # cleanup() should not follow symlinks when fixing mode bits (#91133) + with self.do_create(recurse=0) as d2: + file1 = os.path.join(d2, 'file1') + open(file1, 'wb').close() + dir1 = os.path.join(d2, 'dir1') + os.mkdir(dir1) + for mode in range(8): + mode <<= 6 + with self.subTest(mode=format(mode, '03o')): + def test(target, target_is_directory): + d1 = self.do_create(recurse=0) + symlink = os.path.join(d1.name, 'symlink') + os.symlink(target, symlink, + target_is_directory=target_is_directory) + try: + os.chmod(symlink, mode, follow_symlinks=False) + except NotImplementedError: + pass + try: + os.chmod(symlink, mode) + except FileNotFoundError: + pass + os.chmod(d1.name, mode) + d1.cleanup() + self.assertFalse(os.path.exists(d1.name)) + + with self.subTest('nonexisting file'): + test('nonexisting', target_is_directory=False) + with self.subTest('nonexisting dir'): + test('nonexisting', target_is_directory=True) + + with self.subTest('existing file'): + os.chmod(file1, mode) + old_mode = os.stat(file1).st_mode + test(file1, target_is_directory=False) + new_mode = os.stat(file1).st_mode + self.assertEqual(new_mode, old_mode, + '%03o != %03o' % (new_mode, old_mode)) + + with self.subTest('existing dir'): + os.chmod(dir1, mode) + old_mode = os.stat(dir1).st_mode + test(dir1, target_is_directory=True) + new_mode = os.stat(dir1).st_mode + self.assertEqual(new_mode, old_mode, + '%03o != %03o' % (new_mode, old_mode)) + + @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags') + @os_helper.skip_unless_symlink + def test_cleanup_with_symlink_flags(self): + # cleanup() should not follow symlinks when fixing flags (#91133) + flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK + self.check_flags(flags) + + with self.do_create(recurse=0) as d2: + file1 = os.path.join(d2, 'file1') + open(file1, 'wb').close() + dir1 = os.path.join(d2, 'dir1') + os.mkdir(dir1) + def test(target, target_is_directory): + d1 = self.do_create(recurse=0) + symlink = os.path.join(d1.name, 'symlink') + os.symlink(target, symlink, + target_is_directory=target_is_directory) + try: + os.chflags(symlink, flags, follow_symlinks=False) + except NotImplementedError: + pass + try: + os.chflags(symlink, flags) + except FileNotFoundError: + pass + os.chflags(d1.name, flags) + d1.cleanup() + self.assertFalse(os.path.exists(d1.name)) + + with self.subTest('nonexisting file'): + test('nonexisting', target_is_directory=False) + with self.subTest('nonexisting dir'): + test('nonexisting', target_is_directory=True) + + with self.subTest('existing file'): + os.chflags(file1, flags) + old_flags = os.stat(file1).st_flags + test(file1, target_is_directory=False) + new_flags = os.stat(file1).st_flags + self.assertEqual(new_flags, old_flags) + + with self.subTest('existing dir'): + os.chflags(dir1, flags) + old_flags = os.stat(dir1).st_flags + test(dir1, target_is_directory=True) + new_flags = os.stat(dir1).st_flags + self.assertEqual(new_flags, old_flags) + @support.cpython_only def test_del_on_collection(self): # A TemporaryDirectory is deleted when garbage collected @@ -1671,9 +1768,27 @@ def test_modes(self): d.cleanup() self.assertFalse(os.path.exists(d.name)) - @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.lchflags') + def check_flags(self, flags): + # skip the test if these flags are not supported (ex: FreeBSD 13) + filename = os_helper.TESTFN + try: + open(filename, "w").close() + try: + os.chflags(filename, flags) + except OSError as exc: + # "OSError: [Errno 45] Operation not supported" + self.skipTest(f"chflags() doesn't support flags " + f"{flags:#b}: {exc}") + else: + os.chflags(filename, 0) + finally: + os_helper.unlink(filename) + + @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags') def test_flags(self): flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK + self.check_flags(flags) + d = self.do_create(recurse=3, dirs=2, files=2) with d: # Change files and directories flags recursively. diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 55f45db7288b09..fe17aac6c1fec4 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -14,10 +14,11 @@ import subprocess import urllib.request -# The proxy bypass method imported below has logic specific to the OSX -# proxy config data structure but is testable on all platforms. +# The proxy bypass method imported below has logic specific to the +# corresponding system but is testable on all platforms. from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler, HTTPPasswordMgrWithPriorAuth, _parse_proxy, + _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) from urllib.parse import urlparse @@ -1443,6 +1444,30 @@ def test_proxy_https_proxy_authorization(self): self.assertEqual(req.host, "proxy.example.com:3128") self.assertEqual(req.get_header("Proxy-authorization"), "FooBar") + @unittest.skipUnless(os.name == "nt", "only relevant for Windows") + def test_winreg_proxy_bypass(self): + proxy_override = "www.example.com;*.example.net; 192.168.0.1" + proxy_bypass = _proxy_bypass_winreg_override + for host in ("www.example.com", "www.example.net", "192.168.0.1"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expected bypass of %s to be true" % host) + + for host in ("example.com", "www.example.org", "example.net", + "192.168.0.2"): + self.assertFalse(proxy_bypass(host, proxy_override), + "expected bypass of %s to be False" % host) + + # check intranet address bypass + proxy_override = "example.com; " + self.assertTrue(proxy_bypass("example.com", proxy_override), + "expected bypass of %s to be true" % host) + self.assertFalse(proxy_bypass("example.net", proxy_override), + "expected bypass of %s to be False" % host) + for host in ("test", "localhost"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expect to bypass intranet address '%s'" + % host) + @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX") def test_osx_proxy_bypass(self): bypass = { diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 940e02630e90da..68717c09be928c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -13,6 +13,7 @@ import operator import os import pickle +import pyexpat import sys import textwrap import types @@ -1373,12 +1374,14 @@ def test_attlist_default(self): class XMLPullParserTest(unittest.TestCase): - def _feed(self, parser, data, chunk_size=None): + def _feed(self, parser, data, chunk_size=None, flush=False): if chunk_size is None: parser.feed(data) else: for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + if flush: + parser.flush() def assert_events(self, parser, expected, max_events=None): self.assertEqual( @@ -1396,28 +1399,35 @@ def assert_event_tags(self, parser, expected, max_events=None): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - def test_simple_xml(self): - for chunk_size in (None, 1, 5): - with self.subTest(chunk_size=chunk_size): - parser = ET.XMLPullParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) - self._feed(parser, - "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ - ('end', 'element'), - ('end', 'empty-element'), - ]) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) - self.assertIsNone(parser.close()) + def test_simple_xml(self, chunk_size=None, flush=False): + parser = ET.XMLPullParser() + self.assert_event_tags(parser, []) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, []) + self._feed(parser, + "\n text\n", chunk_size, flush) + self.assert_event_tags(parser, [('end', 'element')]) + self._feed(parser, "texttail\n", chunk_size, flush) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, [ + ('end', 'element'), + ('end', 'empty-element'), + ]) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, [('end', 'root')]) + self.assertIsNone(parser.close()) + + def test_simple_xml_chunk_1(self): + self.test_simple_xml(chunk_size=1, flush=True) + + def test_simple_xml_chunk_5(self): + self.test_simple_xml(chunk_size=5, flush=True) + + def test_simple_xml_chunk_22(self): + self.test_simple_xml(chunk_size=22) def test_feed_while_iterating(self): parser = ET.XMLPullParser() @@ -1613,6 +1623,57 @@ def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + self.skipTest(f'Expat {pyexpat.version_info} does not ' + 'support reparse deferral') + + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + self.assert_event_tags(parser, []) # i.e. no elements started + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) + + def test_flush_reparse_deferral_disabled(self): + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + if not ET is pyET: + self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' + 'methods not available in C') + parser._parser._parser.SetReparseDeferralEnabled(False) + + self.assert_event_tags(parser, []) # i.e. no elements started + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) # # xinclude tests (samples from appendix C of the xinclude specification) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 3495fc6548b425..32c01704d9d1d6 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2059,6 +2059,66 @@ def test_decompress_without_3rd_party_library(self): with zipfile.ZipFile(zip_file) as zf: self.assertRaises(RuntimeError, zf.extract, 'a.txt') + @requires_zlib() + def test_full_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' + b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' + b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' + b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' + b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' + b'\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + self.assertEqual(len(zipf.read('a')), 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): + zipf.read('b') + + @requires_zlib() + def test_quoted_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' + b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' + b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\' + b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0' + b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00' + b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00' + b'\x00S\x00\x00\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 52) + self.assertEqual(zi.file_size, 1064) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 36) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): + zipf.read('a') + self.assertEqual(len(zipf.read('b')), 1033) + def tearDown(self): unlink(TESTFN) unlink(TESTFN2) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 76fdd719cdaa22..6edde1f73189b1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2571,6 +2571,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): } """ from fnmatch import fnmatch + from ipaddress import AddressValueError, IPv4Address hostonly, port = _splitport(host) @@ -2587,20 +2588,17 @@ def ip2num(ipAddr): return True hostIP = None + try: + hostIP = int(IPv4Address(hostonly)) + except AddressValueError: + pass for value in proxy_settings.get('exceptions', ()): # Items in the list are strings like these: *.local, 169.254/16 if not value: continue m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except OSError: - continue - + if m is not None and hostIP is not None: base = ip2num(m.group(1)) mask = m.group(2) if mask is None: @@ -2623,6 +2621,31 @@ def ip2num(ipAddr): return False +# Same as _proxy_bypass_macosx_sysconf, testable on all platforms +def _proxy_bypass_winreg_override(host, override): + """Return True if the host should bypass the proxy server. + + The proxy override list is obtained from the Windows + Internet settings proxy override registry value. + + An example of a proxy override value is: + "www.example.com;*.example.net; 192.168.0.1" + """ + from fnmatch import fnmatch + + host, _ = _splitport(host) + proxy_override = override.split(';') + for test in proxy_override: + test = test.strip() + # "" should bypass the proxy server for all intranet addresses + if test == '': + if '.' not in host: + return True + elif fnmatch(host, test): + return True + return False + + if sys.platform == 'darwin': from _scproxy import _get_proxy_settings, _get_proxies @@ -2721,7 +2744,7 @@ def proxy_bypass_registry(host): import winreg except ImportError: # Std modules, so should be around - but you never know! - return 0 + return False try: internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') @@ -2731,40 +2754,10 @@ def proxy_bypass_registry(host): 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII except OSError: - return 0 + return False if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - rawHost, port = _splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except OSError: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except OSError: - pass - # make a check value list from the registry entry: replace the - # '' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '': - if '.' not in rawHost: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - for val in host: - if re.match(test, val, re.I): - return 1 - return 0 + return False + return _proxy_bypass_winreg_override(host, proxyOverride) def proxy_bypass(host): """Return True, if host should be bypassed. diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 2503d9ee76ab68..516656b69e3dfe 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1325,6 +1325,11 @@ def read_events(self): else: yield event + def flush(self): + if self._parser is None: + raise ValueError("flush() called after end of stream") + self._parser.flush() + def XML(text, parser=None): """Parse XML document from string constant. @@ -1731,6 +1736,15 @@ def close(self): del self.parser, self._parser del self.target, self._target + def flush(self): + was_enabled = self.parser.GetReparseDeferralEnabled() + try: + self.parser.SetReparseDeferralEnabled(False) + self.parser.Parse(b"", False) + except self._error as v: + self._raiseerror(v) + finally: + self.parser.SetReparseDeferralEnabled(was_enabled) # -------------------------------------------------------------------- # C14N 2.0 diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index e334ac9fea0d36..2f7c87b126c993 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -220,6 +220,20 @@ def feed(self, data, isFinal=False): # FIXME: when to invoke error()? self._err_handler.fatalError(exc) + def flush(self): + if self._parser is None: + return + + was_enabled = self._parser.GetReparseDeferralEnabled() + try: + self._parser.SetReparseDeferralEnabled(False) + self._parser.Parse(b"", False) + except expat.error as e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + self._err_handler.fatalError(exc) + finally: + self._parser.SetReparseDeferralEnabled(was_enabled) + def _close_source(self): source = self._source try: diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 42e11d7e255caa..7d18bc2479fcda 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -341,6 +341,7 @@ class ZipInfo (object): 'compress_size', 'file_size', '_raw_time', + '_end_offset', ) def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): @@ -382,6 +383,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.external_attr = 0 # External file attributes self.compress_size = 0 # Size of the compressed file self.file_size = 0 # Size of the uncompressed file + self._end_offset = None # Start of the next local header or central directory # Other attributes are set by class ZipFile: # header_offset Byte offset to the file header # CRC CRC-32 of the uncompressed file @@ -1404,6 +1406,12 @@ def _RealGetContents(self): if self.debug > 2: print("total", total) + end_offset = self.start_dir + for zinfo in sorted(self.filelist, + key=lambda zinfo: zinfo.header_offset, + reverse=True): + zinfo._end_offset = end_offset + end_offset = zinfo.header_offset def namelist(self): """Return a list of file names in the archive.""" @@ -1559,6 +1567,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) + if (zinfo._end_offset is not None and + zef_file.tell() + zinfo.compress_size > zinfo._end_offset): + raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") + # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & 0x1 if is_encrypted: diff --git a/Makefile.pre.in b/Makefile.pre.in index 51c31b94aea876..fa99dd86c416ed 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1868,6 +1868,18 @@ autoconf: # Regenerate pyconfig.h.in from configure.ac using autoheader (cd $(srcdir); autoheader -Wall) +# See https://github.com/tiran/cpython_autoconf container +.PHONY: regen-configure +regen-configure: + @if command -v podman >/dev/null; then RUNTIME="podman"; else RUNTIME="docker"; fi; \ + if ! command -v $$RUNTIME; then echo "$@ needs either Podman or Docker container runtime." >&2; exit 1; fi; \ + if command -v selinuxenabled >/dev/null && selinuxenabled; then OPT=":Z"; fi; \ + # Manifest corresponds with tag '269' \ + CPYTHON_AUTOCONF_MANIFEST="sha256:f370fee95eefa3d57b00488bce4911635411fa83e2d293ced8cf8a3674ead939" \ + CMD="$$RUNTIME run --rm --pull=missing -v $(abs_srcdir):/src$$OPT quay.io/tiran/cpython_autoconf@$$CPYTHON_AUTOCONF_MANIFEST"; \ + echo $$CMD; \ + $$CMD || exit $? + # Create a tags file for vi tags:: ctags -w $(srcdir)/Include/*.h $(srcdir)/Include/cpython/*.h $(srcdir)/Include/internal/*.h diff --git a/Misc/NEWS.d/3.10.14.rst b/Misc/NEWS.d/3.10.14.rst new file mode 100644 index 00000000000000..916bd6aa252bb1 --- /dev/null +++ b/Misc/NEWS.d/3.10.14.rst @@ -0,0 +1,147 @@ +.. date: 2024-02-18-03-14-40 +.. gh-issue: 115398 +.. nonce: tzvxH8 +.. release date: 2024-03-19 +.. section: Security + +Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding +five new methods: + +* :meth:`xml.etree.ElementTree.XMLParser.flush` +* :meth:`xml.etree.ElementTree.XMLPullParser.flush` +* :meth:`xml.parsers.expat.xmlparser.GetReparseDeferralEnabled` +* :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` +* :meth:`xml.sax.expatreader.ExpatParser.flush` + +.. + +.. date: 2024-02-13-15-14-39 +.. gh-issue: 115399 +.. nonce: xT-scP +.. section: Security + +Update bundled libexpat to 2.6.0 + +.. + +.. date: 2024-01-26-22-14-09 +.. gh-issue: 114572 +.. nonce: t1QMQD +.. section: Security + +:meth:`ssl.SSLContext.cert_store_stats` and +:meth:`ssl.SSLContext.get_ca_certs` now correctly lock access to the +certificate store, when the :class:`ssl.SSLContext` is shared across +multiple threads. + +.. + +.. date: 2024-01-02-19-52-23 +.. gh-issue: 113659 +.. nonce: DkmnQc +.. section: Security + +Skip ``.pth`` files with names starting with a dot or hidden file attribute. + +.. + +.. date: 2023-10-27-19-38-33 +.. gh-issue: 102388 +.. nonce: vd5YUZ +.. section: Core and Builtins + +Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of +bounds + +.. + +.. date: 2024-02-09-19-41-48 +.. gh-issue: 115197 +.. nonce: 20wkWH +.. section: Library + +``urllib.request`` no longer resolves the hostname before checking it +against the system's proxy bypass list on macOS and Windows. + +.. + +.. date: 2024-02-08-14-21-28 +.. gh-issue: 115133 +.. nonce: ycl4ko +.. section: Library + +Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat +2.6.0. + +.. + +.. date: 2023-12-01-16-09-59 +.. gh-issue: 81194 +.. nonce: FFad1c +.. section: Library + +Fix a crash in :func:`socket.if_indextoname` with specific value (UINT_MAX). +Fix an integer overflow in :func:`socket.if_indextoname` on 64-bit +non-Windows platforms. + +.. + +.. date: 2023-09-28-13-15-51 +.. gh-issue: 109858 +.. nonce: 43e2dg +.. section: Library + +Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises +BadZipFile when try to read an entry that overlaps with other entry or +central directory. + +.. + +.. date: 2022-12-01-16-57-44 +.. gh-issue: 91133 +.. nonce: LKMVCV +.. section: Library + +Fix a bug in :class:`tempfile.TemporaryDirectory` cleanup, which now no +longer dereferences symlinks when working around file system permission +errors. + +.. + +.. date: 2024-02-14-20-17-04 +.. gh-issue: 115399 +.. nonce: fb9a0R +.. section: Documentation + +Document CVE-2023-52425 of Expat <2.6.0 under "XML vulnerabilities". + +.. + +.. date: 2024-02-01-14-35-05 +.. gh-issue: 111239 +.. nonce: SO7SUF +.. section: Windows + +Update Windows builds to use zlib v1.3.1. + +.. + +.. date: 2023-09-29-10-35-29 +.. gh-issue: 109991 +.. nonce: GmuzGZ +.. section: Windows + +Windows builds now use OpenSSL 1.1.1w. Note that OpenSSL 1.1 has reached its +end of life and no future fixes will be made, and this version of Python is +no longer receiving maintenance fixes and will not be updated to OpenSSL +3.0. + +.. + +.. date: 2023-09-27-23-31-54 +.. gh-issue: 109991 +.. nonce: sUUYY8 +.. section: Tools/Demos + +Update GitHub CI workflows to use OpenSSL 3.0.11 and multissltests to use +1.1.1w, 3.0.11, and 3.1.3. diff --git a/Misc/NEWS.d/3.5.3.rst b/Misc/NEWS.d/3.5.3.rst index c3fcb67a4563f9..25db389ba5734f 100644 --- a/Misc/NEWS.d/3.5.3.rst +++ b/Misc/NEWS.d/3.5.3.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: zYPqUK .. release date: 2017-01-17 +.. section: Library There were no code changes between 3.5.3rc1 and 3.5.3 final. diff --git a/Misc/NEWS.d/3.6.0.rst b/Misc/NEWS.d/3.6.0.rst index f9805cab28615e..d5c41f38838d93 100644 --- a/Misc/NEWS.d/3.6.0.rst +++ b/Misc/NEWS.d/3.6.0.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: F9ENBV .. release date: 2016-12-23 +.. section: Library No changes since release candidate 2 diff --git a/Misc/NEWS.d/3.6.2.rst b/Misc/NEWS.d/3.6.2.rst index dba43d146df954..ee50670bd9f442 100644 --- a/Misc/NEWS.d/3.6.2.rst +++ b/Misc/NEWS.d/3.6.2.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: F9ENBV .. release date: 2017-07-17 +.. section: Library No changes since release candidate 2 diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 8637bae0ae143b..090d6fd58e10ee 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3883,6 +3883,38 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self) } } +/*[clinic input] +_elementtree.XMLParser.flush + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self) +/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/ +{ + if (!_check_xmlparser(self)) { + return NULL; + } + + if (EXPAT(SetReparseDeferralEnabled) == NULL) { + Py_RETURN_NONE; + } + + // NOTE: The Expat parser in the C implementation of ElementTree is not + // exposed to the outside; as a result we known that reparse deferral + // is currently enabled, or we would not even have access to function + // XML_SetReparseDeferralEnabled in the first place (which we checked + // for, a few lines up). + + EXPAT(SetReparseDeferralEnabled)(self->parser, XML_FALSE); + + PyObject *res = expat_parse(self, "", 0, XML_FALSE); + + EXPAT(SetReparseDeferralEnabled)(self->parser, XML_TRUE); + + return res; +} + /*[clinic input] _elementtree.XMLParser.feed @@ -4310,6 +4342,7 @@ static PyTypeObject TreeBuilder_Type = { static PyMethodDef xmlparser_methods[] = { _ELEMENTTREE_XMLPARSER_FEED_METHODDEF _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF + _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF {NULL, NULL} diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 7a28f2d37f6c55..e637830c7afe9f 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -4519,6 +4519,50 @@ set_sni_callback(PySSLContext *self, PyObject *arg, void *c) return 0; } +#if OPENSSL_VERSION_NUMBER < 0x30300000L +static X509_OBJECT *x509_object_dup(const X509_OBJECT *obj) +{ + int ok; + X509_OBJECT *ret = X509_OBJECT_new(); + if (ret == NULL) { + return NULL; + } + switch (X509_OBJECT_get_type(obj)) { + case X509_LU_X509: + ok = X509_OBJECT_set1_X509(ret, X509_OBJECT_get0_X509(obj)); + break; + case X509_LU_CRL: + /* X509_OBJECT_get0_X509_CRL was not const-correct prior to 3.0.*/ + ok = X509_OBJECT_set1_X509_CRL( + ret, X509_OBJECT_get0_X509_CRL((X509_OBJECT *)obj)); + break; + default: + /* We cannot duplicate unrecognized types in a polyfill, but it is + * safe to leave an empty object. The caller will ignore it. */ + ok = 1; + break; + } + if (!ok) { + X509_OBJECT_free(ret); + return NULL; + } + return ret; +} + +static STACK_OF(X509_OBJECT) * +X509_STORE_get1_objects(X509_STORE *store) +{ + STACK_OF(X509_OBJECT) *ret; + if (!X509_STORE_lock(store)) { + return NULL; + } + ret = sk_X509_OBJECT_deep_copy(X509_STORE_get0_objects(store), + x509_object_dup, X509_OBJECT_free); + X509_STORE_unlock(store); + return ret; +} +#endif + PyDoc_STRVAR(PySSLContext_sni_callback_doc, "Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension.\n\ \n\ @@ -4548,7 +4592,12 @@ _ssl__SSLContext_cert_store_stats_impl(PySSLContext *self) int x509 = 0, crl = 0, ca = 0, i; store = SSL_CTX_get_cert_store(self->ctx); - objs = X509_STORE_get0_objects(store); + objs = X509_STORE_get1_objects(store); + if (objs == NULL) { + PyErr_SetString(PyExc_MemoryError, "failed to query cert store"); + return NULL; + } + for (i = 0; i < sk_X509_OBJECT_num(objs); i++) { obj = sk_X509_OBJECT_value(objs, i); switch (X509_OBJECT_get_type(obj)) { @@ -4562,12 +4611,11 @@ _ssl__SSLContext_cert_store_stats_impl(PySSLContext *self) crl++; break; default: - /* Ignore X509_LU_FAIL, X509_LU_RETRY, X509_LU_PKEY. - * As far as I can tell they are internal states and never - * stored in a cert store */ + /* Ignore unrecognized types. */ break; } } + sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); return Py_BuildValue("{sisisi}", "x509", x509, "crl", crl, "x509_ca", ca); } @@ -4599,7 +4647,12 @@ _ssl__SSLContext_get_ca_certs_impl(PySSLContext *self, int binary_form) } store = SSL_CTX_get_cert_store(self->ctx); - objs = X509_STORE_get0_objects(store); + objs = X509_STORE_get1_objects(store); + if (objs == NULL) { + PyErr_SetString(PyExc_MemoryError, "failed to query cert store"); + goto error; + } + for (i = 0; i < sk_X509_OBJECT_num(objs); i++) { X509_OBJECT *obj; X509 *cert; @@ -4627,9 +4680,11 @@ _ssl__SSLContext_get_ca_certs_impl(PySSLContext *self, int binary_form) } Py_CLEAR(ci); } + sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); return rlist; error: + sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); Py_XDECREF(ci); Py_XDECREF(rlist); return NULL; diff --git a/Modules/audioop.c b/Modules/audioop.c index 798e3c46c4729d..ab02a20bbc3cb5 100644 --- a/Modules/audioop.c +++ b/Modules/audioop.c @@ -1,33 +1,31 @@ /* The audioop module uses the code base in g777.c file of the Sox project. - * Source: https://web.archive.org/web/19970716121258/http://www.spies.com/Sox/Archive/soxgamma.tar.gz - * Programming the AdLib/Sound Blaster - * FM Music Chips - * Version 2.0 (24 Feb 1992) - * - * Copyright (c) 1991, 1992 by Jeffrey S. Lee - * - * jlee@smylex.uucp + Source: https://sourceforge.net/projects/sox/files/sox/12.17.7/sox-12.17.7.tar.gz + + Copyright of g771.c: + + * This source code is a product of Sun Microsystems, Inc. and is provided + * for unrestricted use. Users may copy or modify this source code without + * charge. * + * SUN SOURCE CODE IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING + * THE WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR + * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE. * + * Sun source code is provided with no support and without any obligation on + * the part of Sun Microsystems, Inc. to assist in its use, correction, + * modification or enhancement. * - * Warranty and Copyright Policy + * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE + * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY THIS SOFTWARE + * OR ANY PART THEREOF. * - * This document is provided on an "as-is" basis, and its author makes - * no warranty or representation, express or implied, with respect to - * its quality performance or fitness for a particular purpose. In no - * event will the author of this document be liable for direct, indirect, - * special, incidental, or consequential damages arising out of the use - * or inability to use the information contained within. Use of this - * document is at your own risk. + * In no event will Sun Microsystems, Inc. be liable for any lost revenue + * or profits or other special, indirect and consequential damages, even if + * Sun has been advised of the possibility of such damages. * - * This file may be used and copied freely so long as the applicable - * copyright notices are retained, and no modifications are made to the - * text of the document. No money shall be charged for its distribution - * beyond reasonable shipping, handling and duplication costs, nor shall - * proprietary changes be made to this document so that it cannot be - * distributed freely. This document may not be included in published - * material or commercial packages without the written consent of its - * author. */ + * Sun Microsystems, Inc. + * 2550 Garcia Avenue + * Mountain View, California 94043 */ /* audioopmodule - Module to detect peak values in arrays */ diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 7394cf67e0e7dd..6d906ecdd396c2 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -181,8 +181,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(&c, &length); + encoded = dsg->encoder(buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -191,9 +192,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(&c, &length); + } + encoded = dsg->encoder(buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break; diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index 5ed5a6cadd9d59..4dfbda15ef552f 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -865,6 +865,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored) return _elementtree_XMLParser_close_impl(self); } +PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__, +"flush($self, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \ + {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__}, + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self); + +static PyObject * +_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _elementtree_XMLParser_flush_impl(self); +} + PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__, "feed($self, data, /)\n" "--\n" @@ -915,4 +932,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args, exit: return return_value; } -/*[clinic end generated code: output=992733cfc7390590 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=734466e245f19c0c input=a9049054013a1b77]*/ diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index bee2ee66950a5a..638bc0f8120be5 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -2,6 +2,53 @@ preserve [clinic start generated code]*/ +PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, +"SetReparseDeferralEnabled($self, enabled, /)\n" +"--\n" +"\n" +"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0."); + +#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \ + {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled); + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + int enabled; + + enabled = PyObject_IsTrue(arg); + if (enabled < 0) { + goto exit; + } + return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled); + +exit: + return return_value; +} + +PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__, +"GetReparseDeferralEnabled($self, /)\n" +"--\n" +"\n" +"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0."); + +#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \ + {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self); + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored)) +{ + return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self); +} + PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__, "Parse($self, data, isfinal=False, /)\n" "--\n" @@ -425,4 +472,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=5d60049d385d5d56 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=396e05b3d56ce477 input=a9049054013a1b77]*/ diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 1c83563cbf68e7..95464b0dd17735 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -11,11 +11,13 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James Copyright (c) 2022 Thijs Schreijer + Copyright (c) 2023 Hanno Böck + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -269,7 +271,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *namespaceSeparator); -/* Prepare a parser object to be re-used. This is particularly +/* Prepare a parser object to be reused. This is particularly valuable when memory allocation overhead is disproportionately high, such as when a large number of small documnents need to be parsed. All handlers are cleared from the parser, except for the @@ -951,7 +953,7 @@ XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentByteCount(XML_Parser parser); -/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets +/* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets the integer pointed to by offset to the offset within this buffer of the current parse position, and sets the integer pointed to by size to the size of this buffer (the number of input bytes). Otherwise @@ -1025,7 +1027,9 @@ enum XML_FeatureEnum { XML_FEATURE_ATTR_INFO, /* Added in Expat 2.4.0. */ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, - XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + /* Added in Expat 2.6.0. */ + XML_FEATURE_GE /* Additional features must be added to the end of this enum. */ }; @@ -1038,23 +1042,29 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#ifdef XML_DTD -/* Added in Expat 2.4.0. */ +#if XML_GE == 1 +/* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor); -/* Added in Expat 2.4.0. */ +/* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); #endif +/* Added in Expat 2.6.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); + /* Expat follows the semantic versioning convention. - See http://semver.org. + See https://semver.org */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 5 +#define XML_MINOR_VERSION 6 #define XML_MICRO_VERSION 0 #ifdef __cplusplus diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h index afbedd011f660f..8f1831de36faec 100644 --- a/Modules/expat/expat_config.h +++ b/Modules/expat/expat_config.h @@ -16,6 +16,7 @@ #define XML_NS 1 #define XML_DTD 1 +#define XML_GE 1 #define XML_CONTEXT_BYTES 1024 #endif /* EXPAT_CONFIG_H */ diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h index e09f533b23c9df..cce71e4c5164b5 100644 --- a/Modules/expat/internal.h +++ b/Modules/expat/internal.h @@ -28,9 +28,10 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2003 Greg Stein - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2023 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -154,12 +155,15 @@ extern "C" { void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef); -#if defined(XML_DTD) +#if XML_GE == 1 unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); const char *unsignedCharToPrintable(unsigned char c); #endif +extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c +extern unsigned int g_parseAttempts; // used for testing only + #ifdef __cplusplus } #endif diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index d45d9b6c457159..8ee03ef0792815 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -108,6 +108,7 @@ #define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler #define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing #define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler +#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled #define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet #define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler #define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h index 303283ad2de98d..a1ed99e687bd6e 100644 --- a/Modules/expat/siphash.h +++ b/Modules/expat/siphash.h @@ -106,7 +106,7 @@ * if this code is included and compiled as C++; related GCC warning is: * warning: use of C++11 long long integer constant [-Wlong-long] */ -#define _SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) +#define SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) @@ -190,10 +190,10 @@ sip_round(struct siphash *H, const int rounds) { static struct siphash * sip24_init(struct siphash *H, const struct sipkey *key) { - H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; - H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; - H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; - H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; + H->v0 = SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; H->p = H->buf; H->c = 0; diff --git a/Modules/expat/winconfig.h b/Modules/expat/winconfig.h index 2ecd61b5b94820..05805514ec7fa2 100644 --- a/Modules/expat/winconfig.h +++ b/Modules/expat/winconfig.h @@ -9,7 +9,8 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Greg Stein Copyright (c) 2005 Karl Waclawek - Copyright (c) 2017-2021 Sebastian Pipping + Copyright (c) 2017-2023 Sebastian Pipping + Copyright (c) 2023 Orgad Shaneh Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -35,7 +36,9 @@ #ifndef WINCONFIG_H #define WINCONFIG_H -#define WIN32_LEAN_AND_MEAN +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif #include #undef WIN32_LEAN_AND_MEAN diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index b6c2eca97567ba..aaf0fa9c8f96d1 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+) +/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie Copyright (c) 2016 Eric Rahm - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Gaurav Copyright (c) 2016 Thomas Beutlich Copyright (c) 2016 Gustavo Grieco @@ -32,10 +32,13 @@ Copyright (c) 2019 David Loffredo Copyright (c) 2019-2020 Ben Wagner Copyright (c) 2019 Vadim Zeitlin - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Copyright (c) 2022 Samanta Navarro Copyright (c) 2022 Jeffrey Walton Copyright (c) 2022 Jann Horn + Copyright (c) 2022 Sean McBride + Copyright (c) 2023 Owain Davies + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -60,10 +63,25 @@ #define XML_BUILDING_EXPAT 1 -#include +#include "expat_config.h" -#if ! defined(_GNU_SOURCE) -# define _GNU_SOURCE 1 /* syscall prototype */ +#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) +# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) +#endif + +#if defined(XML_DTD) && XML_GE == 0 +# error Either undefine XML_DTD or define XML_GE to 1. +#endif + +#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ + || (XML_CONTEXT_BYTES + 0 < 0) +# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) +#endif + +#if defined(HAVE_SYSCALL_GETRANDOM) +# if ! defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +# endif #endif #ifdef _WIN32 @@ -73,6 +91,7 @@ # endif #endif +#include #include #include /* memset(), memcpy() */ #include @@ -131,8 +150,8 @@ Your options include: \ * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ - * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ - * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ + * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ @@ -196,6 +215,8 @@ typedef char ICHAR; /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) +#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) + #include "internal.h" #include "xmltok.h" #include "xmlrole.h" @@ -279,7 +300,7 @@ typedef struct { XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to contain the 'raw' name as well. - A parser re-uses these structures, maintaining a list of allocated + A parser reuses these structures, maintaining a list of allocated TAG objects in a free list. */ typedef struct tag { @@ -408,12 +429,12 @@ enum XML_Account { XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ }; -#ifdef XML_DTD +#if XML_GE == 1 typedef unsigned long long XmlBigCount; typedef struct accounting { XmlBigCount countBytesDirect; XmlBigCount countBytesIndirect; - int debugLevel; + unsigned long debugLevel; float maximumAmplificationFactor; // >=1.0 unsigned long long activationThresholdBytes; } ACCOUNTING; @@ -422,9 +443,9 @@ typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; unsigned int maximumDepthSeen; - int debugLevel; + unsigned long debugLevel; } ENTITY_STATS; -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); @@ -464,41 +485,47 @@ static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, XML_Bool haveMore, enum XML_Account account); -static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, +static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore, enum XML_Account account); #ifdef XML_DTD -static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, +static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ static void freeBindings(XML_Parser parser, BINDING *bindings); -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, - const char *s, TAG_NAME *tagNamePtr, +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, + const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr, enum XML_Account account); static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); -static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, - XML_Bool isId, const XML_Char *dfltValue, - XML_Parser parser); -static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, - XML_Bool isCdata, const char *, - const char *, STRING_POOL *, +static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, + XML_Bool isCdata, XML_Bool isId, + const XML_Char *value, XML_Parser parser); +static enum XML_Error storeAttributeValue(XML_Parser parser, + const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool, enum XML_Account account); -static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, - XML_Bool isCdata, const char *, - const char *, STRING_POOL *, +static enum XML_Error appendAttributeValue(XML_Parser parser, + const ENCODING *enc, + XML_Bool isCdata, const char *ptr, + const char *end, STRING_POOL *pool, enum XML_Account account); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); +#if XML_GE == 1 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end, enum XML_Account account); +#else +static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); +#endif static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, @@ -518,21 +545,22 @@ static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); -static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, - const HASH_TABLE *); +static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, + STRING_POOL *newPool, const HASH_TABLE *oldTable); static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL hashTableInit(HASH_TABLE *, +static void FASTCALL hashTableInit(HASH_TABLE *table, const XML_Memory_Handling_Suite *ms); -static void FASTCALL hashTableClear(HASH_TABLE *); -static void FASTCALL hashTableDestroy(HASH_TABLE *); -static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); +static void FASTCALL hashTableClear(HASH_TABLE *table); +static void FASTCALL hashTableDestroy(HASH_TABLE *table); +static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, + const HASH_TABLE *table); +static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); -static void FASTCALL poolInit(STRING_POOL *, +static void FASTCALL poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms); -static void FASTCALL poolClear(STRING_POOL *); -static void FASTCALL poolDestroy(STRING_POOL *); +static void FASTCALL poolClear(STRING_POOL *pool); +static void FASTCALL poolDestroy(STRING_POOL *pool); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, @@ -562,7 +590,7 @@ static XML_Parser parserCreate(const XML_Char *encodingName, static void parserInit(XML_Parser parser, const XML_Char *encodingName); -#ifdef XML_DTD +#if XML_GE == 1 static float accountingGetCurrentAmplification(XML_Parser rootParser); static void accountingReportStats(XML_Parser originParser, const char *epilog); static void accountingOnAbort(XML_Parser originParser); @@ -585,13 +613,12 @@ static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) -#define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) @@ -602,21 +629,35 @@ static unsigned long getDebugLevel(const char *variableName, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) +XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c +unsigned int g_parseAttempts = 0; // used for testing only + struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; - char *m_buffer; + + // How the four parse buffer pointers below relate in time and space: + // + // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim + // | | | | + // <--parsed-->| | | + // <---parsing--->| | + // <--unoccupied-->| + // <---------total-malloced/realloced-------->| + + char *m_buffer; // malloc/realloc base pointer of parse buffer const XML_Memory_Handling_Suite m_mem; - /* first character to be parsed */ - const char *m_bufferPtr; - /* past last character to be parsed */ - char *m_bufferEnd; - /* allocated end of m_buffer */ - const char *m_bufferLim; + const char *m_bufferPtr; // first character to be parsed + char *m_bufferEnd; // past last character to be parsed + const char *m_bufferLim; // allocated end of m_buffer + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; + size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ + XML_Bool m_reparseDeferralEnabled; + int m_lastBufferRequestSize; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; XML_StartElementHandler m_startElementHandler; @@ -703,7 +744,7 @@ struct XML_ParserStruct { enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; -#ifdef XML_DTD +#if XML_GE == 1 ACCOUNTING m_accounting; ENTITY_STATS m_entity_stats; #endif @@ -948,6 +989,47 @@ get_hash_secret_salt(XML_Parser parser) { return parser->m_hash_secret_salt; } +static enum XML_Error +callProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); + + if (parser->m_reparseDeferralEnabled + && ! parser->m_parsingStatus.finalBuffer) { + // Heuristic: don't try to parse a partial token again until the amount of + // available data has increased significantly. + const size_t had_before = parser->m_partialTokenBytesBefore; + // ...but *do* try anyway if we're close to causing a reallocation. + size_t available_buffer + = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); +#if XML_CONTEXT_BYTES > 0 + available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); +#endif + available_buffer + += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); + // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok + const bool enough + = (have_now >= 2 * had_before) + || ((size_t)parser->m_lastBufferRequestSize > available_buffer); + + if (! enough) { + *endPtr = start; // callers may expect this to be set + return XML_ERROR_NONE; + } + } + g_parseAttempts += 1; + const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); + if (ret == XML_ERROR_NONE) { + // if we consumed nothing, remember what we had on this parse attempt. + if (*endPtr == start) { + parser->m_partialTokenBytesBefore = have_now; + } else { + parser->m_partialTokenBytesBefore = 0; + } + } + return ret; +} + static XML_Bool /* only valid for root parser */ startParsing(XML_Parser parser) { /* hash functions must be initialized before setContext() is called */ @@ -1129,6 +1211,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_bufferEnd = parser->m_buffer; parser->m_parseEndByteIndex = 0; parser->m_parseEndPtr = NULL; + parser->m_partialTokenBytesBefore = 0; + parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; + parser->m_lastBufferRequestSize = 0; parser->m_declElementType = NULL; parser->m_declAttributeId = NULL; parser->m_declEntity = NULL; @@ -1163,7 +1248,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { #endif parser->m_hash_secret_salt = 0; -#ifdef XML_DTD +#if XML_GE == 1 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); parser->m_accounting.maximumAmplificationFactor @@ -1298,6 +1383,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, to worry which hash secrets each table has. */ unsigned long oldhash_secret_salt; + XML_Bool oldReparseDeferralEnabled; /* Validate the oldParser parameter before we pull everything out of it */ if (oldParser == NULL) @@ -1342,6 +1428,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, to worry which hash secrets each table has. */ oldhash_secret_salt = parser->m_hash_secret_salt; + oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; #ifdef XML_DTD if (! context) @@ -1394,6 +1481,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; parser->m_ns_triplets = oldns_triplets; parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; parser->m_parentParser = oldParser; #ifdef XML_DTD parser->m_paramEntityParsing = oldParamEntityParsing; @@ -1848,55 +1936,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_parsingStatus.parsing = XML_PARSING; } - if (len == 0) { - parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - if (! isFinal) - return XML_STATUS_OK; - parser->m_positionPtr = parser->m_bufferPtr; - parser->m_parseEndPtr = parser->m_bufferEnd; - - /* If data are left over from last buffer, and we now know that these - data are the final chunk of input, then we have to check them again - to detect errors based on that fact. - */ - parser->m_errorCode - = parser->m_processor(parser, parser->m_bufferPtr, - parser->m_parseEndPtr, &parser->m_bufferPtr); - - if (parser->m_errorCode == XML_ERROR_NONE) { - switch (parser->m_parsingStatus.parsing) { - case XML_SUSPENDED: - /* It is hard to be certain, but it seems that this case - * cannot occur. This code is cleaning up a previous parse - * with no new data (since len == 0). Changing the parsing - * state requires getting to execute a handler function, and - * there doesn't seem to be an opportunity for that while in - * this circumstance. - * - * Given the uncertainty, we retain the code but exclude it - * from coverage tests. - * - * LCOV_EXCL_START - */ - XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, - parser->m_bufferPtr, &parser->m_position); - parser->m_positionPtr = parser->m_bufferPtr; - return XML_STATUS_SUSPENDED; - /* LCOV_EXCL_STOP */ - case XML_INITIALIZED: - case XML_PARSING: - parser->m_parsingStatus.parsing = XML_FINISHED; - /* fall through */ - default: - return XML_STATUS_OK; - } - } - parser->m_eventEndPtr = parser->m_eventPtr; - parser->m_processor = errorProcessor; - return XML_STATUS_ERROR; - } -#ifndef XML_CONTEXT_BYTES - else if (parser->m_bufferPtr == parser->m_bufferEnd) { +#if XML_CONTEXT_BYTES == 0 + if (parser->m_bufferPtr == parser->m_bufferEnd) { const char *end; int nLeftOver; enum XML_Status result; @@ -1907,12 +1948,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } + // though this isn't a buffer request, we assume that `len` is the app's + // preferred buffer fill size, and therefore save it here. + parser->m_lastBufferRequestSize = len; parser->m_parseEndByteIndex += len; parser->m_positionPtr = s; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; parser->m_errorCode - = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); + = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; @@ -1939,23 +1983,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { - if (parser->m_buffer == NULL - || nLeftOver > parser->m_bufferLim - parser->m_buffer) { - /* avoid _signed_ integer overflow */ - char *temp = NULL; - const int bytesToAllocate = (int)((unsigned)len * 2U); - if (bytesToAllocate > 0) { - temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); - } - if (temp == NULL) { - parser->m_errorCode = XML_ERROR_NO_MEMORY; - parser->m_eventPtr = parser->m_eventEndPtr = NULL; - parser->m_processor = errorProcessor; - return XML_STATUS_ERROR; - } - parser->m_buffer = temp; - parser->m_bufferLim = parser->m_buffer + bytesToAllocate; + // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED + // (and XML_ERROR_FINISHED) from XML_GetBuffer. + const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; + parser->m_parsingStatus.parsing = XML_PARSING; + void *const temp = XML_GetBuffer(parser, nLeftOver); + parser->m_parsingStatus.parsing = originalStatus; + // GetBuffer may have overwritten this, but we want to remember what the + // app requested, not how many bytes were left over after parsing. + parser->m_lastBufferRequestSize = len; + if (temp == NULL) { + // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; } + // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we + // don't have any data to preserve, and can copy straight into the start + // of the buffer rather than the GetBuffer return pointer (which may be + // pointing further into the allocated buffer). memcpy(parser->m_buffer, end, nLeftOver); } parser->m_bufferPtr = parser->m_buffer; @@ -1966,16 +2012,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_eventEndPtr = parser->m_bufferPtr; return result; } -#endif /* not defined XML_CONTEXT_BYTES */ - else { - void *buff = XML_GetBuffer(parser, len); - if (buff == NULL) - return XML_STATUS_ERROR; - else { - memcpy(buff, s, len); - return XML_ParseBuffer(parser, len, isFinal); - } +#endif /* XML_CONTEXT_BYTES == 0 */ + void *buff = XML_GetBuffer(parser, len); + if (buff == NULL) + return XML_STATUS_ERROR; + if (len > 0) { + assert(s != NULL); // make sure s==NULL && len!=0 was rejected above + memcpy(buff, s, len); } + return XML_ParseBuffer(parser, len, isFinal); } enum XML_Status XMLCALL @@ -2015,8 +2060,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { parser->m_parseEndByteIndex += len; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - parser->m_errorCode = parser->m_processor( - parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); + parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, + &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; @@ -2061,10 +2106,14 @@ XML_GetBuffer(XML_Parser parser, int len) { default:; } - if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { -#ifdef XML_CONTEXT_BYTES + // whether or not the request succeeds, `len` seems to be the app's preferred + // buffer fill size; remember it. + parser->m_lastBufferRequestSize = len; + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) + || parser->m_buffer == NULL) { +#if XML_CONTEXT_BYTES > 0 int keep; -#endif /* defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ /* Do not invoke signed arithmetic overflow: */ int neededSize = (int)((unsigned)len + (unsigned)EXPAT_SAFE_PTR_DIFF( @@ -2073,7 +2122,7 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; @@ -2083,10 +2132,11 @@ XML_GetBuffer(XML_Parser parser, int len) { return NULL; } neededSize += keep; -#endif /* defined XML_CONTEXT_BYTES */ - if (neededSize - <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { -#ifdef XML_CONTEXT_BYTES +#endif /* XML_CONTEXT_BYTES > 0 */ + if (parser->m_buffer && parser->m_bufferPtr + && neededSize + <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { +#if XML_CONTEXT_BYTES > 0 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) @@ -2099,19 +2149,17 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_bufferPtr -= offset; } #else - if (parser->m_buffer && parser->m_bufferPtr) { - memmove(parser->m_buffer, parser->m_bufferPtr, - EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); - parser->m_bufferEnd - = parser->m_buffer - + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); - parser->m_bufferPtr = parser->m_buffer; - } -#endif /* not defined XML_CONTEXT_BYTES */ + memmove(parser->m_buffer, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + parser->m_bufferPtr = parser->m_buffer; +#endif /* XML_CONTEXT_BYTES > 0 */ } else { char *newBuf; int bufferSize - = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { @@ -2128,7 +2176,7 @@ XML_GetBuffer(XML_Parser parser, int len) { return NULL; } parser->m_bufferLim = newBuf + bufferSize; -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 if (parser->m_bufferPtr) { memcpy(newBuf, &parser->m_bufferPtr[-keep], EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) @@ -2158,7 +2206,7 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_bufferEnd = newBuf; } parser->m_bufferPtr = parser->m_buffer = newBuf; -#endif /* not defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ } parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; @@ -2208,7 +2256,7 @@ XML_ResumeParser(XML_Parser parser) { } parser->m_parsingStatus.parsing = XML_PARSING; - parser->m_errorCode = parser->m_processor( + parser->m_errorCode = callProcessor( parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { @@ -2272,7 +2320,7 @@ XML_GetCurrentByteCount(XML_Parser parser) { const char *XMLCALL XML_GetInputContext(XML_Parser parser, int *offset, int *size) { -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 if (parser == NULL) return NULL; if (parser->m_eventPtr && parser->m_buffer) { @@ -2286,7 +2334,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) { (void)parser; (void)offset; (void)size; -#endif /* defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ return (const char *)0; } @@ -2506,7 +2554,7 @@ XML_GetFeatureList(void) { #ifdef XML_DTD {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), XML_CONTEXT_BYTES}, #endif @@ -2522,8 +2570,9 @@ XML_GetFeatureList(void) { #ifdef XML_ATTR_INFO {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, #endif -#ifdef XML_DTD - /* Added in Expat 2.4.0. */ +#if XML_GE == 1 + /* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_L("XML_BLAP_MAX_AMP"), (long int) @@ -2531,13 +2580,15 @@ XML_GetFeatureList(void) { {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, XML_L("XML_BLAP_ACT_THRES"), EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, + /* Added in Expat 2.6.0. */ + {XML_FEATURE_GE, XML_L("XML_GE"), 0}, #endif {XML_FEATURE_END, NULL, 0}}; return features; } -#ifdef XML_DTD +#if XML_GE == 1 XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor) { @@ -2559,7 +2610,16 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ + +XML_Bool XMLCALL +XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { + if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { + parser->m_reparseDeferralEnabled = enabled; + return XML_TRUE; + } + return XML_FALSE; +} /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was @@ -2581,7 +2641,7 @@ storeRawNames(XML_Parser parser) { */ if (tag->rawName == rawNameBuf) break; - /* For re-use purposes we need to ensure that the + /* For reuse purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); @@ -2645,13 +2705,13 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start, int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to @@ -2765,7 +2825,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 const char *accountAfter = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) ? (haveMore ? s /* i.e. 0 bytes */ : end) @@ -2831,14 +2891,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { -#ifdef XML_DTD +#if XML_GE == 1 /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ if (parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); else if (parser->m_defaultHandler) @@ -3039,13 +3099,13 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, if (parser->m_ns && localPart) { /* localPart and prefix may have been overwritten in tag->name.str, since this points to the binding->uri - buffer which gets re-used; so we have to add them again + buffer which gets reused; so we have to add them again */ uri = (XML_Char *)tag->name.str + tag->name.uriLen; /* don't need to check for space - already done in storeAtts() */ while (*localPart) *uri++ = *localPart++; - prefix = (XML_Char *)tag->name.prefix; + prefix = tag->name.prefix; if (parser->m_ns_triplets && prefix) { *uri++ = parser->m_namespaceSeparator; while (*prefix) @@ -3112,7 +3172,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, However, now we have a start/endCdataSectionHandler, so it seems easier to let the user deal with this. */ - else if (0 && parser->m_characterDataHandler) + else if ((0) && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ @@ -3141,8 +3201,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); } else parser->m_characterDataHandler( - parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)end - (XML_Char *)s)); + parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)end - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for @@ -3175,8 +3235,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, *eventPP = s; } } else - charDataHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + charDataHandler(parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)next - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -4040,7 +4100,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, for (;;) { const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; @@ -4055,7 +4115,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, parser->m_endCdataSectionHandler(parser->m_handlerArg); /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if (0 && parser->m_characterDataHandler) + else if ((0) && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ @@ -4091,8 +4151,8 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventPP = s; } } else - charDataHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + charDataHandler(parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)next - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -4192,7 +4252,7 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); -# ifdef XML_DTD +# if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4284,7 +4344,7 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const XML_Char *storedversion = NULL; int standalone = -1; -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4482,16 +4542,16 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } - /* If we are at the end of the buffer, this would cause XmlPrologTok to - return XML_TOK_NONE on the next call, which would then cause the - function to exit with *nextPtr set to s - that is what we want for other - tokens, but not for the BOM - we would rather like to skip it; - then, when this routine is entered the next time, XmlPrologTok will - return XML_TOK_INVALID, since the BOM is still in the buffer + /* XmlPrologTok has now set the encoding based on the BOM it found, and we + must move s and nextPtr forward to consume the BOM. + + If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we + would leave the BOM in the buffer and return. On the next call to this + function, our XmlPrologTok call would return XML_TOK_INVALID, since it + is not valid to have multiple BOMs. */ - else if (tok == XML_TOK_BOM && next == end - && ! parser->m_parsingStatus.finalBuffer) { -# ifdef XML_DTD + else if (tok == XML_TOK_BOM) { +# if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4500,7 +4560,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, # endif *nextPtr = next; - return XML_ERROR_NONE; + s = next; } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with @@ -4707,11 +4767,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } } role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); -#ifdef XML_DTD +#if XML_GE == 1 switch (role) { case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl - case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl +# ifdef XML_DTD + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl +# endif break; default: if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { @@ -5029,6 +5091,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { +#if XML_GE == 1 + // This will store the given replacement text in + // parser->m_declEntity->textPtr. enum XML_Error result = storeEntityValue(parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, XML_ACCOUNT_NONE); @@ -5049,6 +5114,25 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; +#else + // This will store "&entity123;" in parser->m_declEntity->textPtr + // to end up as "&entity123;" in the handler. + if (parser->m_declEntity != NULL) { + const enum XML_Error result + = storeSelfEntityValue(parser, parser->m_declEntity); + if (result != XML_ERROR_NONE) + return result; + + if (parser->m_entityDeclHandler) { + *eventEndPP = s; + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); + handleDefault = XML_FALSE; + } + } +#endif } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: @@ -5107,6 +5191,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } break; case XML_ROLE_ENTITY_COMPLETE: +#if XML_GE == 0 + // This will store "&entity123;" in entity->textPtr + // to end up as "&entity123;" in the handler. + if (parser->m_declEntity != NULL) { + const enum XML_Error result + = storeSelfEntityValue(parser, parser->m_declEntity); + if (result != XML_ERROR_NONE) + return result; + } +#endif if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) { *eventEndPP = s; @@ -5648,7 +5742,7 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, for (;;) { const char *next = NULL; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -5728,7 +5822,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnOpen(parser, entity, __LINE__); #endif entity->processed = 0; @@ -5761,10 +5855,10 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); parser->m_processor = internalEntityProcessor; - } else { -#ifdef XML_DTD + } else if (parser->m_openInternalEntities->entity == entity) { +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ @@ -5813,7 +5907,7 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return result; } -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; @@ -5892,7 +5986,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *next = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; @@ -5957,14 +6051,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { -#ifdef XML_DTD +#if XML_GE == 1 /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; @@ -6042,14 +6136,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = XML_TRUE; -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnOpen(parser, entity, __LINE__); #endif result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, (const char *)entity->textPtr, (const char *)textEnd, pool, XML_ACCOUNT_ENTITY_EXPANSION); -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; @@ -6079,6 +6173,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, /* not reached */ } +#if XML_GE == 1 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd, @@ -6086,12 +6181,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; -#ifdef XML_DTD +# ifdef XML_DTD int oldInEntityValue = parser->m_prologState.inEntityValue; parser->m_prologState.inEntityValue = 1; -#else +# else UNUSED_P(account); -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ @@ -6105,18 +6200,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); -#ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, account)) { accountingOnAbort(parser); result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; goto endEntityValue; } -#endif switch (tok) { case XML_TOK_PARAM_ENTITY_REF: -#ifdef XML_DTD +# ifdef XML_DTD if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; @@ -6178,7 +6271,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, } break; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* In the internal subset, PE references are not legal within markup declarations, e.g entity values in this case. */ parser->m_eventPtr = entityTextPtr; @@ -6259,12 +6352,38 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, entityTextPtr = next; } endEntityValue: -#ifdef XML_DTD +# ifdef XML_DTD parser->m_prologState.inEntityValue = oldInEntityValue; -#endif /* XML_DTD */ +# endif /* XML_DTD */ return result; } +#else /* XML_GE == 0 */ + +static enum XML_Error +storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { + // This will store "&entity123;" in entity->textPtr + // to end up as "&entity123;" in the handler. + const char *const entity_start = "&"; + const char *const entity_end = ";"; + + STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); + if (! poolAppendString(pool, entity_start) + || ! poolAppendString(pool, entity->name) + || ! poolAppendString(pool, entity_end)) { + poolDiscard(pool); + return XML_ERROR_NO_MEMORY; + } + + entity->textPtr = poolStart(pool); + entity->textLen = (int)(poolLength(pool)); + poolFinish(pool); + + return XML_ERROR_NONE; +} + +#endif /* XML_GE == 0 */ + static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; @@ -6375,8 +6494,9 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else - parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)end - (XML_Char *)s)); + parser->m_defaultHandler( + parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)end - (const XML_Char *)s)); } static int @@ -6480,7 +6600,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, name = poolStoreString(&dtd->pool, enc, start, end); if (! name) return NULL; - /* skip quotation mark - its storage will be re-used (like in name[-1]) */ + /* skip quotation mark - its storage will be reused (like in name[-1]) */ ++name; id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); @@ -6630,6 +6750,10 @@ getContext(XML_Parser parser) { static XML_Bool setContext(XML_Parser parser, const XML_Char *context) { + if (context == NULL) { + return XML_FALSE; + } + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; @@ -7220,7 +7344,7 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, return NULL; for (;;) { const enum XML_Convert_Result convert_res = XmlConvert( - enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; @@ -7651,7 +7775,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { return result; } -#ifdef XML_DTD +#if XML_GE == 1 static float accountingGetCurrentAmplification(XML_Parser rootParser) { @@ -7672,7 +7796,7 @@ accountingReportStats(XML_Parser originParser, const char *epilog) { const XML_Parser rootParser = getRootParserOf(originParser, NULL); assert(! rootParser->m_parentParser); - if (rootParser->m_accounting.debugLevel < 1) { + if (rootParser->m_accounting.debugLevel == 0u) { return; } @@ -7709,7 +7833,7 @@ accountingReportDiff(XML_Parser rootParser, /* Note: Performance is of no concern here */ const char *walker = before; - if ((rootParser->m_accounting.debugLevel >= 3) + if ((rootParser->m_accounting.debugLevel >= 3u) || (after - before) <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { for (; walker < after; walker++) { @@ -7774,7 +7898,7 @@ accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, || (amplificationFactor <= rootParser->m_accounting.maximumAmplificationFactor); - if (rootParser->m_accounting.debugLevel >= 2) { + if (rootParser->m_accounting.debugLevel >= 2u) { accountingReportStats(rootParser, ""); accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, bytesMore, source_line, account); @@ -7801,7 +7925,7 @@ static void entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, const char *action, int sourceLine) { assert(! rootParser->m_parentParser); - if (rootParser->m_entity_stats.debugLevel < 1) + if (rootParser->m_entity_stats.debugLevel == 0u) return; # if defined(XML_UNICODE) @@ -8382,7 +8506,7 @@ unsignedCharToPrintable(unsigned char c) { assert(0); /* never gets here */ } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { @@ -8393,9 +8517,9 @@ getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { const char *const value = valueOrNull; errno = 0; - char *afterValue = (char *)value; + char *afterValue = NULL; unsigned long debugLevel = strtoul(value, &afterValue, 10); - if ((errno != 0) || (afterValue[0] != '\0')) { + if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { errno = 0; return defaultDebugLevel; } diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c index 3f0f5c150c6278..2c48bf40867953 100644 --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -12,10 +12,10 @@ Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016-2023 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -38,7 +38,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include "expat_config.h" #include diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h index d6e1fa150a108a..a7904274c91d4e 100644 --- a/Modules/expat/xmlrole.h +++ b/Modules/expat/xmlrole.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Karl Waclawek Copyright (c) 2002 Fred L. Drake, Jr. - Copyright (c) 2017 Sebastian Pipping + Copyright (c) 2017-2024 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -127,9 +127,9 @@ typedef struct prolog_state { #endif /* XML_DTD */ } PROLOG_STATE; -void XmlPrologStateInit(PROLOG_STATE *); +void XmlPrologStateInit(PROLOG_STATE *state); #ifdef XML_DTD -void XmlPrologStateInitExternalEntity(PROLOG_STATE *); +void XmlPrologStateInitExternalEntity(PROLOG_STATE *state); #endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 2b7012a58be419..29a66d72ceea5e 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -12,7 +12,7 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Pascal Cuoq Copyright (c) 2016 Don Lewis Copyright (c) 2017 Rhodri James @@ -20,8 +20,10 @@ Copyright (c) 2017 Benbuck Nason Copyright (c) 2017 José Gutiérrez de la Concha Copyright (c) 2019 David Loffredo - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Copyright (c) 2022 Martin Ettl + Copyright (c) 2022 Sean McBride + Copyright (c) 2023 Hanno Böck Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -44,7 +46,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include "expat_config.h" #include #include /* memcpy */ @@ -76,7 +78,7 @@ #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into @@ -100,7 +102,7 @@ & (1u << (((byte)[2]) & 0x1F))) /* Detection of invalid UTF-8 sequences is based on Table 3.1B - of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/ with the additional restriction of not allowing the Unicode code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). Implementation details: @@ -225,7 +227,7 @@ struct normal_encoding { /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL -static int FASTCALL checkCharRefNumber(int); +static int FASTCALL checkCharRefNumber(int result); #include "xmltok_impl.h" #include "ascii.h" @@ -243,7 +245,7 @@ static int FASTCALL checkCharRefNumber(int); #endif #define SB_BYTE_TYPE(enc, p) \ - (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) + (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE static int PTRFASTCALL @@ -407,7 +409,7 @@ utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short *to = *toP; const char *from = *fromP; while (from < fromLim && to < toLim) { - switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { + switch (SB_BYTE_TYPE(enc, from)) { case BT_LEAD2: if (fromLim - from < 2) { res = XML_CONVERT_INPUT_INCOMPLETE; @@ -715,31 +717,26 @@ unicode_byte_type(char hi, char lo) { return res; \ } -#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) DEFINE_UTF16_TO_UTF8(little2_) DEFINE_UTF16_TO_UTF16(little2_) -#undef SET2 #undef GET_LO #undef GET_HI -#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) DEFINE_UTF16_TO_UTF8(big2_) DEFINE_UTF16_TO_UTF16(big2_) -#undef SET2 #undef GET_LO #undef GET_HI #define LITTLE2_BYTE_TYPE(enc, p) \ - ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ - : unicode_byte_type((p)[1], (p)[0])) + ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0])) #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) #define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ @@ -872,9 +869,7 @@ static const struct normal_encoding internal_little2_encoding #endif #define BIG2_BYTE_TYPE(enc, p) \ - ((p)[0] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ - : unicode_byte_type((p)[0], (p)[1])) + ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1])) #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) #define BIG2_IS_NAME_CHAR_MINBPC(p) \ diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h index 6f630c2f9ba96d..c51fce1ec1518b 100644 --- a/Modules/expat/xmltok.h +++ b/Modules/expat/xmltok.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Fred L. Drake, Jr. Copyright (c) 2002-2005 Karl Waclawek - Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2017 Rhodri James Licensed under the MIT license: @@ -289,7 +289,8 @@ int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name); const ENCODING *XmlGetUtf8InternalEncoding(void); const ENCODING *XmlGetUtf16InternalEncoding(void); int FASTCALL XmlUtf8Encode(int charNumber, char *buf); @@ -307,7 +308,8 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 1971d74bf8c91f..239a2d06c4512c 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -126,7 +126,7 @@ # endif # define HAS_CHARS(enc, ptr, end, count) \ - ((end) - (ptr) >= ((count)*MINBPC(enc))) + ((end) - (ptr) >= ((count) * MINBPC(enc))) # define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 4d7196a8348faa..26006f5946db47 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1,6 +1,7 @@ #include "Python.h" #include +#include #include "structmember.h" // PyMemberDef #include "frameobject.h" #include "expat.h" @@ -76,6 +77,12 @@ typedef struct { /* NULL if not enabled */ int buffer_size; /* Size of buffer, in XML_Char units */ int buffer_used; /* Buffer units in use */ + bool reparse_deferral_enabled; /* Whether to defer reparsing of + unfinished XML tokens; a de-facto cache of + what Expat has the authority on, for lack + of a getter API function + "XML_GetReparseDeferralEnabled" in Expat + 2.6.0 */ PyObject *intern; /* Dictionary to intern strings */ PyObject **handlers; } xmlparseobject; @@ -705,6 +712,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv) #define MAX_CHUNK_SIZE (1 << 20) +/*[clinic input] +pyexpat.xmlparser.SetReparseDeferralEnabled + + enabled: bool + / + +Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled) +/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/ +{ +#if XML_COMBINED_VERSION >= 20600 + XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE); + self->reparse_deferral_enabled = (bool)enabled; +#endif + Py_RETURN_NONE; +} + +/*[clinic input] +pyexpat.xmlparser.GetReparseDeferralEnabled + +Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self) +/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/ +{ + return PyBool_FromLong(self->reparse_deferral_enabled); +} + /*[clinic input] pyexpat.xmlparser.Parse @@ -1066,6 +1107,8 @@ static struct PyMethodDef xmlparse_methods[] = { #if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif + PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF + PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -1149,6 +1192,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, self->handlers = NULL; self->intern = intern; Py_XINCREF(self->intern); +#if XML_COMBINED_VERSION >= 20600 + self->reparse_deferral_enabled = true; +#else + self->reparse_deferral_enabled = false; +#endif /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, @@ -1956,6 +2004,11 @@ pyexpat_exec(PyObject *mod) #else capi.SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20600 + capi.SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled; +#else + capi.SetReparseDeferralEnabled = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL); diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 0762a8df8663d2..be628a03da17ac 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -6827,17 +6827,23 @@ Returns the interface index corresponding to the interface name if_name."); static PyObject * socket_if_indextoname(PyObject *self, PyObject *arg) { + unsigned long index_long = PyLong_AsUnsignedLong(arg); + if (index_long == (unsigned long) -1 && PyErr_Occurred()) { + return NULL; + } + #ifdef MS_WINDOWS - NET_IFINDEX index; + NET_IFINDEX index = (NET_IFINDEX)index_long; #else - unsigned long index; + unsigned int index = (unsigned int)index_long; #endif - char name[IF_NAMESIZE + 1]; - index = PyLong_AsUnsignedLong(arg); - if (index == (unsigned long) -1) + if ((unsigned long)index != index_long) { + PyErr_SetString(PyExc_OverflowError, "index is too large"); return NULL; + } + char name[IF_NAMESIZE + 1]; if (if_indextoname(index, name) == NULL) { PyErr_SetFromErrno(PyExc_OSError); return NULL; diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 34d6e2cd7919ea..4e51f8521d6d64 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -53,13 +53,13 @@ echo.Fetching external libraries... set libraries= set libraries=%libraries% bzip2-1.0.8 if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.3.0 -if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-1.1.1u +if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-1.1.1w set libraries=%libraries% sqlite-3.40.1.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.12.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.12.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tix-8.4.3.6 set libraries=%libraries% xz-5.2.5 -set libraries=%libraries% zlib-1.2.13 +set libraries=%libraries% zlib-1.3.1 for %%e in (%libraries%) do ( if exist "%EXTERNALS_DIR%\%%e" ( @@ -77,7 +77,7 @@ echo.Fetching external binaries... set binaries= if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.3.0 -if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-1.1.1u +if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-1.1.1w if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.12.0 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 diff --git a/PCbuild/python.props b/PCbuild/python.props index 33d5f8fb7a5964..057ed5ac224162 100644 --- a/PCbuild/python.props +++ b/PCbuild/python.props @@ -70,11 +70,11 @@ $(ExternalsDir)libffi-3.3.0\ $(libffiDir)$(ArchName)\ $(libffiOutDir)include - $(ExternalsDir)openssl-1.1.1u\ - $(ExternalsDir)openssl-bin-1.1.1u\$(ArchName)\ + $(ExternalsDir)openssl-1.1.1w\ + $(ExternalsDir)openssl-bin-1.1.1w\$(ArchName)\ $(opensslOutDir)include $(ExternalsDir)\nasm-2.11.06\ - $(ExternalsDir)\zlib-1.2.13\ + $(ExternalsDir)\zlib-1.3.1\ diff --git a/README.rst b/README.rst index db87a71698f18b..cf64d7dbc1aed9 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -This is Python version 3.10.13 +This is Python version 3.10.14 ============================== .. image:: https://travis-ci.com/python/cpython.svg?branch=master diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index ef06a9fcb69033..8d12d876dc6a3d 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -96,6 +96,7 @@ def clean_lines(text): Modules/_dbmmodule.c Modules/cjkcodecs/_codecs_*.c +Modules/expat/internal.h Modules/expat/xmlrole.c Modules/expat/xmlparse.c Python/initconfig.c diff --git a/Tools/scripts/verify_ensurepip_wheels.py b/Tools/scripts/verify_ensurepip_wheels.py index 044d1fd6b3cf2d..434a0b4c5387d4 100755 --- a/Tools/scripts/verify_ensurepip_wheels.py +++ b/Tools/scripts/verify_ensurepip_wheels.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """ Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop. @@ -35,11 +35,17 @@ def print_error(file_path: str, message: str) -> None: def verify_wheel(package_name: str) -> bool: # Find the package on disk - package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None) - if not package_path: - print_error("", f"Could not find a {package_name} wheel on disk.") + package_paths = list(WHEEL_DIR.glob(f"{package_name}*.whl")) + if len(package_paths) != 1: + if package_paths: + for p in package_paths: + print_error(p, f"Found more than one wheel for package {package_name}.") + else: + print_error("", f"Could not find a {package_name} wheel on disk.") return False + package_path = package_paths[0] + print(f"Verifying checksum for {package_path}.") # Find the version of the package used by ensurepip diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py index cdaa75488fa862..61b87a2d22fd25 100755 --- a/Tools/ssl/multissltests.py +++ b/Tools/ssl/multissltests.py @@ -47,9 +47,9 @@ ] OPENSSL_RECENT_VERSIONS = [ - "1.1.1v", - "3.0.10", - "3.1.2", + "1.1.1w", + "3.0.11", + "3.1.3", ] LIBRESSL_OLD_VERSIONS = [ diff --git a/netlify.toml b/netlify.toml deleted file mode 100644 index 387c8f954ada3c..00000000000000 --- a/netlify.toml +++ /dev/null @@ -1,4 +0,0 @@ -[build] - base = "Doc/" - command = "make html" - publish = "Doc/build/html" \ No newline at end of file