diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 217ee2e78..57aa02d51 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -6,6 +6,7 @@ updates: schedule: # Check for updates to GitHub Actions every week interval: "weekly" + rebase-strategy: auto groups: github-actions: patterns: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cadf23b4..ec98107e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,13 @@ concurrency: permissions: contents: read # to fetch code (actions/checkout) +env: + ZLIB_VERSION: "1.3.1" + LIBICONV_VERSION: "1.18" + LIBXML2_VERSION: "2.14.5" + LIBXSLT_VERSION: "1.1.43" + + jobs: ci: strategy: @@ -173,9 +180,7 @@ jobs: env: OS_NAME: ${{ matrix.os }} PYTHON_VERSION: ${{ matrix.python-version }} - MACOSX_DEPLOYMENT_TARGET: 11.0 - LIBXML2_VERSION: 2.14.4 - LIBXSLT_VERSION: 1.1.43 + MACOSX_DEPLOYMENT_TARGET: "11.0" COVERAGE: false GCC_VERSION: 9 USE_CCACHE: 1 @@ -185,7 +190,7 @@ jobs: steps: - name: Checkout repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: fetch-depth: 1 @@ -206,17 +211,17 @@ jobs: with: max-size: 100M create-symlink: true - key: ${{ runner.os }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ matrix.env.STATIC_DEPS }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }} + key: ${{ runner.os }}-${{ runner.arch }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ matrix.env.STATIC_DEPS }}-${{ matrix.env.LIBXML2_VERSION || env.LIBXML2_VERSION }}-${{ matrix.env.LIBXSLT_VERSION || env.LIBXSLT_VERSION }} - name: Cache [libs] - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 if: matrix.env.STATIC_DEPS with: path: | libs/*.xz libs/*.gz libs/*.zip - key: libs-${{ runner.os }}-${{ matrix.env.LIBXML2_VERSION }}-${{ matrix.env.LIBXSLT_VERSION }} + key: libs-${{ runner.os }}-${{ matrix.env.LIBXML2_VERSION || env.LIBXML2_VERSION }}-${{ matrix.env.LIBXSLT_VERSION || env.LIBXSLT_VERSION }} - name: Run CI continue-on-error: ${{ matrix.allowed_failure || false }} @@ -251,12 +256,10 @@ jobs: CCACHE_COMPRESS: 1 CCACHE_COMPRESSLEVEL: 5 STATIC_DEPS: true - LIBXML2_VERSION: 2.14.4 - LIBXSLT_VERSION: 1.1.43 steps: - name: Checkout repo - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: fetch-depth: 0 fetch-tags: true diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6f111651f..220919449 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -34,7 +34,45 @@ on: permissions: {} +env: + ZLIB_VERSION: "1.3.1" + LIBICONV_VERSION: "1.18" + LIBXML2_VERSION: "2.14.5" + LIBXSLT_VERSION: "1.1.43" + + jobs: + + cache_libs: + strategy: + fail-fast: false + matrix: + os: + - "ubuntu-22.04" + - "ubuntu-22.04-arm" + - "macos-latest" + - "windows-2022" + - "windows-11-arm" + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Cache [libs] + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 + with: + path: | + libs/*.xz + libs/*.gz + libs/*.zip + key: libs-${{ runner.os }}-${{ runner.arch }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }} + + - name: Download latest libraries + env: + GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python3 buildlibxml.py --download-only + sdist: runs-on: ubuntu-24.04 @@ -42,7 +80,7 @@ jobs: contents: write steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Set up Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 @@ -80,10 +118,10 @@ jobs: outputs: include: ${{ steps.set-matrix.outputs.include }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install cibuildwheel # Nb. keep cibuildwheel version pin consistent with job below - run: pipx install cibuildwheel==2.22.0 + run: pipx install cibuildwheel==3.1.3 - id: set-matrix run: | MATRIX=$( @@ -104,7 +142,7 @@ jobs: build_wheels: name: Build for ${{ matrix.only }} - needs: generate-wheels-matrix + needs: [ cache_libs, generate-wheels-matrix ] runs-on: ${{ matrix.os }} strategy: @@ -112,22 +150,18 @@ jobs: matrix: include: ${{ fromJson(needs.generate-wheels-matrix.outputs.include) }} - env: - LIBXML2_VERSION: 2.14.4 - LIBXSLT_VERSION: 1.1.43 - steps: - name: Check out the repo - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Cache [libs] - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 with: path: | libs/*.xz libs/*.gz libs/*.zip - key: libs-${{ runner.os }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }} + key: libs-${{ runner.os }}-${{ runner.arch }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }} - name: Set up QEMU if: runner.os == 'Linux' @@ -136,23 +170,26 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@v3.0.0 + uses: pypa/cibuildwheel@v3.1.3 + env: + GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: only: ${{ matrix.only }} - name: Build old Linux wheels - if: contains(matrix.only, '-manylinux_') && startsWith(matrix.only, 'cp36-') && (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64')) - uses: pypa/cibuildwheel@v3.0.0 + if: contains(matrix.only, '-manylinux_') && (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64') || contains(matrix.only, 'aarch64')) + uses: pypa/cibuildwheel@v3.1.3 env: - CIBW_MANYLINUX_i686_IMAGE: manylinux1 - CIBW_MANYLINUX_X86_64_IMAGE: manylinux1 + CIBW_MANYLINUX_i686_IMAGE: manylinux2014 + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 with: only: ${{ matrix.only }} - name: Build faster Linux wheels # also build wheels with the most recent manylinux images and gcc if: runner.os == 'Linux' && !contains(matrix.only, 'i686') - uses: pypa/cibuildwheel@v3.0.0 + uses: pypa/cibuildwheel@v3.1.3 env: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 @@ -183,7 +220,7 @@ jobs: steps: - name: Download artifacts - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: path: ./release_upload merge-multiple: true diff --git a/CHANGES.txt b/CHANGES.txt index ab0f253ed..bc0c1c40c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,31 @@ lxml changelog ============== +6.0.1 (2025-08-22) +================== + +Bugs fixed +---------- + +* LP#2116333: ``lxml.sax._getNsTag()`` could fail with an exception on malformed input. + +* GH#467: Some test adaptations were made for libxml2 2.15. + Patch by Nick Wellnhofer. + +* LP2119510, GH#473: A Python compatibility test was fixed for Python 3.14+. + Patch by Lumír Balhar. + +* GH#471: Wheels for "riscv64" on recent Python versions were added. + Patch by ffgan. + +* GH#469: The wheel build no longer requires the ``wheel`` package unconditionally. + Patch by Miro Hrončok. + +* Binary wheels use the library version libxml2 2.14.5. + +* Windows binary wheels continue to use a security patched library version libxml2 2.11.9. + + 6.0.0 (2025-06-26) ================== diff --git a/Makefile b/Makefile index 9fc590ef5..711eee5c7 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ PYTHON_WITH_CYTHON?=$(shell $(PYTHON) -c 'import Cython.Build.Dependencies' >/d CYTHON_WITH_COVERAGE?=$(shell $(PYTHON) -c 'import Cython.Coverage; import sys; assert not hasattr(sys, "pypy_version_info")' >/dev/null 2>/dev/null && echo " --coverage" || true) PYTHON_BUILD_VERSION ?= * -MANYLINUX_LIBXML2_VERSION=2.14.4 +MANYLINUX_LIBXML2_VERSION=2.14.5 MANYLINUX_LIBXSLT_VERSION=1.1.43 MANYLINUX_CFLAGS=-O3 -g1 -pipe -fPIC -flto MANYLINUX_LDFLAGS=-flto @@ -121,6 +121,7 @@ apidoc: apidocclean inplace && (echo "Generating API docs ..." && \ PYTHONPATH=src:$(PYTHONPATH) sphinx-apidoc -e -P -T -o doc/api src/lxml \ "*includes" "*tests" "*pyclasslookup.py" "*usedoctest.py" "*html/_html5builder.py" \ + "*html/_diff*" "*html/_setmixin*" \ "*.so" "*.pyd") \ || (echo "not generating Sphinx autodoc API rst files") diff --git a/TODO.txt b/TODO.txt index d51ef6964..d9ca17f15 100644 --- a/TODO.txt +++ b/TODO.txt @@ -8,6 +8,13 @@ lxml In general ---------- +* libxml2 2.14: + + disableSAX -> xmlCtxtIsStopped + replaceEntities -> XML_PARSE_NOENT + progressive (unbenutzt) + node -> nicht mehr deprecated aber Benutzung unerwünscht + * more testing on multi-threading * better exception messages for XPath and schemas based on error log, diff --git a/buildlibxml.py b/buildlibxml.py index cc61d65b2..e2cdcd751 100644 --- a/buildlibxml.py +++ b/buildlibxml.py @@ -1,9 +1,8 @@ import json -import os, re, sys, subprocess, platform +import os, re, sys, platform import tarfile import time -from distutils import log -from contextlib import closing, contextmanager +from contextlib import closing from ftplib import FTP import urllib.error @@ -28,6 +27,22 @@ # use pre-built libraries on Windows +def read_file_digest(file): + buffer = bytearray(2**18) + view = memoryview(buffer) + + from hashlib import sha256 + filehash = sha256() + with open(file, 'rb') as f: + while True: + size = f.readinto(buffer) + if not size: + break + filehash.update(view[:size]) + + return 'sha256:' + filehash.hexdigest() + + def download_and_extract_windows_binaries(destdir): url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases?per_page=5" releases, _ = read_url( @@ -43,7 +58,10 @@ def download_and_extract_windows_binaries(destdir): max_release = release url = "https://github.com/lxml/libxml2-win-binaries/releases/download/%s/" % max_release['tag_name'] - filenames = [asset['name'] for asset in max_release.get('assets', ())] + asset_files = { + asset['name']: (asset['size'], asset['digest']) + for asset in max_release.get('assets', ()) + } # Check for native ARM64 build or the environment variable that is set by # Visual Studio for cross-compilation (same variable as setuptools uses) @@ -55,13 +73,17 @@ def download_and_extract_windows_binaries(destdir): arch = "win32" arch_part = '.' + arch + '.' - filenames = [filename for filename in filenames if arch_part in filename] + asset_files = { + filename: details + for filename, details in asset_files.items() + if arch_part in filename + } libs = {} for libname in ['libxml2', 'libxslt', 'zlib', 'iconv']: libs[libname] = "%s-%s.%s.zip" % ( libname, - find_max_version(libname, filenames), + find_max_version(libname, list(asset_files)), arch, ) @@ -71,11 +93,17 @@ def download_and_extract_windows_binaries(destdir): for libname, libfn in libs.items(): srcfile = urljoin(url, libfn) destfile = os.path.join(destdir, libfn) - if os.path.exists(destfile + ".keep"): - print('Using local copy of "{}"'.format(srcfile)) - else: - print('Retrieving "%s" to "%s"' % (srcfile, destfile)) - urlretrieve(srcfile, destfile) + if os.path.exists(destfile): + file_size, file_digest = asset_files.get(libfn, (None, None)) + if file_size and os.path.getsize(destfile) == file_size and read_file_digest(destfile) == file_digest: + print('Using local copy of "{}"'.format(srcfile)) + continue + + print('Retrieving "%s" to "%s"' % (srcfile, destfile)) + urlretrieve(srcfile, destfile) + + for libname, libfn in libs.items(): + destfile = os.path.join(destdir, libfn) d = unpack_zipfile(destfile, destdir) libs[libname] = d @@ -84,7 +112,7 @@ def download_and_extract_windows_binaries(destdir): def find_top_dir_of_zipfile(zipfile): topdir = None - files = [f.filename for f in zipfile.filelist] + files = (f.filename for f in zipfile.filelist) dirs = [d for d in files if d.endswith('/')] if dirs: dirs.sort(key=len) @@ -103,13 +131,12 @@ def find_top_dir_of_zipfile(zipfile): def unpack_zipfile(zipfn, destdir): assert zipfn.endswith('.zip') import zipfile - print('Unpacking %s into %s' % (os.path.basename(zipfn), destdir)) - f = zipfile.ZipFile(zipfn) - try: + + print(f'Unpacking {os.path.basename(zipfn)} into {destdir}') + with zipfile.ZipFile(zipfn) as f: extracted_dir = os.path.join(destdir, find_top_dir_of_zipfile(f)) f.extractall(path=destdir) - finally: - f.close() + assert os.path.exists(extracted_dir), 'missing: %s' % extracted_dir return extracted_dir @@ -250,16 +277,6 @@ def tryint(s): return s -@contextmanager -def py2_tarxz(filename): - import tempfile - with tempfile.TemporaryFile() as tmp: - subprocess.check_call(["xz", "-dc", filename], stdout=tmp.fileno()) - tmp.seek(0) - with closing(tarfile.TarFile(fileobj=tmp)) as tf: - yield tf - - def download_libxml2(dest_dir, version=None): """Downloads libxml2, returning the filename where the library was downloaded""" #version_re = re.compile(r'LATEST_LIBXML2_IS_([0-9.]+[0-9](?:-[abrc0-9]+)?)') @@ -371,31 +388,65 @@ def download_library(dest_dir, location, name, version_re, filename, version=Non return dest_filename -def unpack_tarball(tar_filename, dest): +def unpack_tarball(tar_filename, dest) -> str: print('Unpacking %s into %s' % (os.path.basename(tar_filename), dest)) - if sys.version_info[0] < 3 and tar_filename.endswith('.xz'): - # Py 2.7 lacks lzma support - tar_cm = py2_tarxz(tar_filename) - else: - tar_cm = closing(tarfile.open(tar_filename)) + os_path = os.path + abs_dest = os_path.abspath(dest) + + tar_cm = tarfile.open(tar_filename) + + if hasattr(tarfile, 'data_filter'): + tar_cm.extraction_filter = tarfile.data_filter base_dir = None - with tar_cm as tar: + with closing(tar_cm) as tar: + directories = [] for member in tar: - base_name = member.name.split('/')[0] + # Guard against malicious tar file content. + path = os_path.join(dest, member.name) + abs_path = os_path.abspath(path) + if not os_path.commonpath([abs_dest, abs_path]).startswith(abs_dest): + raise RuntimeError('Unexpected path in %s: %s' % (tar_filename, member.name)) + + if member.isdir(): + directories.append(member) + continue + elif member.issym() or member.islnk(): + link_path = os_path.abspath(os_path.join( + os_path.dirname(abs_path) if member.issym() else abs_dest, + member.linkname)) + if not os_path.commonpath([abs_dest, link_path]).startswith(abs_dest): + raise RuntimeError('Unexpected path in %s: %s' % (tar_filename, member.name)) + elif member.islnk(): + link_path = os_path.abspath(os_path.join(abs_dest, member.linkname)) + elif not member.isfile(): + raise RuntimeError('Unexpected path in %s: %s' % (tar_filename, member.name)) + + # Find common base directory. + first_dir = member.name.split('/')[0] if base_dir is None: - base_dir = base_name - elif base_dir != base_name: - print('Unexpected path in %s: %s' % (tar_filename, base_name)) - tar.extractall(dest) - return os.path.join(dest, base_dir) + base_dir = first_dir + elif base_dir != first_dir: + print('Unexpected path in %s: %s' % (tar_filename, first_dir)) + continue + + # Extract only new files. + if os_path.exists(abs_path) and os_path.getsize(abs_path) == member.size: + continue + tar.extract(member, abs_dest) + + # Update directory properties/times/etc. + for member in directories: + tar.extract(member, abs_dest) + + return os_path.join(dest, base_dir) def call_subprocess(cmd, **kw): import subprocess cwd = kw.get('cwd', '.') cmd_desc = ' '.join(cmd) - log.info('Running "%s" in %s' % (cmd_desc, cwd)) + print(f'Running "{cmd_desc}" in {cwd}') returncode = subprocess.call(cmd, **kw) if returncode: raise Exception('Command "%s" returned code %s' % (cmd_desc, returncode)) @@ -437,15 +488,34 @@ def configure_darwin_env(env_setup): env_setup['env'] = env_default -def build_libxml2xslt(download_dir, build_dir, - static_include_dirs, static_library_dirs, - static_cflags, static_binaries, - libxml2_version=None, - libxslt_version=None, - libiconv_version=None, - zlib_version=None, - multicore=None, - with_zlib=True): +def build_libxml2xslt( + download_dir, build_dir, + static_include_dirs, static_library_dirs, + static_cflags, static_binaries, + libxml2_version=None, + libxslt_version=None, + libiconv_version=None, + zlib_version=None, + multicore=None, + with_zlib=True): + lib_dirs = download_libs(download_dir, build_dir, + libxml2_version, libxslt_version, libiconv_version, zlib_version, with_zlib=with_zlib) + return build_libs( + build_dir, lib_dirs, + static_include_dirs, static_library_dirs, static_cflags, static_binaries, + libxml2_version=libxml2_version, + multicore=multicore, + with_zlib=with_zlib, + ) + + +def download_libs( + download_dir, build_dir, + libxml2_version=None, + libxslt_version=None, + libiconv_version=None, + zlib_version=None, + with_zlib=True): safe_mkdir(download_dir) safe_mkdir(build_dir) @@ -457,6 +527,18 @@ def build_libxml2xslt(download_dir, build_dir, libxml2_dir = unpack_tarball(download_libxml2(download_dir, libxml2_version), build_dir) libxslt_dir = unpack_tarball(download_libxslt(download_dir, libxslt_version), build_dir) + return zlib_dir, libiconv_dir, libxml2_dir, libxslt_dir + + +def build_libs( + build_dir, lib_dirs, + static_include_dirs, static_library_dirs, + static_cflags, static_binaries, + libxml2_version=None, + multicore=None, + with_zlib=True): + zlib_dir, libiconv_dir, libxml2_dir, libxslt_dir = lib_dirs + prefix = os.path.join(os.path.abspath(build_dir), 'libxml2') lib_dir = os.path.join(prefix, 'lib') safe_mkdir(prefix) @@ -566,25 +648,52 @@ def has_current_lib(name, build_dir, _build_all_following=[False]): return xml2_config, xslt_config -def main(): +def main(with_zlib=True, download_only=False, platform=None): static_include_dirs = [] static_library_dirs = [] download_dir = "libs" + if platform is None: + platform = sys_platform + if sys_platform.startswith('win'): return get_prebuilt_libxml2xslt( download_dir, static_include_dirs, static_library_dirs) - else: - return build_libxml2xslt( - download_dir, 'build/tmp', - static_include_dirs, static_library_dirs, - static_cflags=[], - static_binaries=[] - ) + + get_env = os.environ.get + zlib_version = get_env('ZLIB_VERSION') + libiconv_version = get_env('LIBICONV_VERSION') + libxml2_version = get_env('LIBXML2_VERSION') + libxslt_version = get_env('LIBXSLT_VERSION') + + build_dir = 'build/tmp' + lib_dirs = download_libs( + download_dir, build_dir, + libxml2_version=libxml2_version, + libxslt_version=libxslt_version, + libiconv_version=libiconv_version, + zlib_version=zlib_version, + with_zlib=with_zlib, + ) + if download_only: + return None, None + + return build_libs( + build_dir, lib_dirs, + static_include_dirs, static_library_dirs, + static_cflags=[], + static_binaries=[], + libxml2_version=libxml2_version, + with_zlib=with_zlib, + ) if __name__ == '__main__': - if len(sys.argv) > 1: + args = sys.argv[1:] + download_only = '--download-only' in args + if download_only: + args.remove('--download-only') + if args: # change global sys_platform setting - sys_platform = sys.argv[1] - main() + sys_platform = args[0] + main(download_only=download_only, platform=sys_platform) diff --git a/doc/extensions.txt b/doc/extensions.txt index 45bcf9795..5d14247eb 100644 --- a/doc/extensions.txt +++ b/doc/extensions.txt @@ -249,7 +249,7 @@ the global mapping of the FunctionNamespace objects: >>> e2('/foo:a') Traceback (most recent call last): ... - lxml.etree.XPathEvalError: Undefined namespace prefix + lxml.etree.XPathEvalError: Undefined namespace prefix... Evaluator-local extensions diff --git a/doc/main.txt b/doc/main.txt index b142ca86c..b68c8e2d8 100644 --- a/doc/main.txt +++ b/doc/main.txt @@ -160,8 +160,8 @@ Index `_ (PyPI). It has the source that compiles on various platforms. The source distribution is signed with `this key `_. -The latest version is `lxml 6.0.0`_, released 2025-06-26 -(`changes for 6.0.0`_). `Older versions <#old-versions>`_ +The latest version is `lxml 6.0.1`_, released 2025-08-22 +(`changes for 6.0.1`_). `Older versions <#old-versions>`_ are listed below. Please take a look at the @@ -264,6 +264,8 @@ See the websites of lxml .. _`PDF documentation`: lxmldoc-6.0.0.pdf +* `lxml 6.0.1`_, released 2025-08-22 (`changes for 6.0.1`_) + * `lxml 6.0.0`_, released 2025-06-26 (`changes for 6.0.0`_) * `lxml 5.4.0`_, released 2025-04-22 (`changes for 5.4.0`_) @@ -276,30 +278,16 @@ See the websites of lxml * `older releases `_ +.. _`lxml 6.0.1`: /files/lxml-6.0.1.tgz .. _`lxml 6.0.0`: /files/lxml-6.0.0.tgz .. _`lxml 5.4.0`: /files/lxml-5.4.0.tgz .. _`lxml 5.3.2`: /files/lxml-5.3.2.tgz .. _`lxml 5.3.1`: /files/lxml-5.3.1.tgz .. _`lxml 5.3.0`: /files/lxml-5.3.0.tgz -.. _`lxml 5.2.2`: /files/lxml-5.2.2.tgz -.. _`lxml 5.2.1`: /files/lxml-5.2.1.tgz -.. _`lxml 5.2.0`: /files/lxml-5.2.0.tgz -.. _`lxml 5.1.1`: /files/lxml-5.1.1.tgz -.. _`lxml 5.1.0`: /files/lxml-5.1.0.tgz -.. _`lxml 5.0.2`: /files/lxml-5.0.2.tgz -.. _`lxml 5.0.1`: /files/lxml-5.0.1.tgz -.. _`lxml 5.0.0`: /files/lxml-5.0.0.zip +.. _`changes for 6.0.1`: /changes-6.0.1.html .. _`changes for 6.0.0`: /changes-6.0.0.html .. _`changes for 5.4.0`: /changes-5.4.0.html .. _`changes for 5.3.2`: /changes-5.3.2.html .. _`changes for 5.3.1`: /changes-5.3.1.html .. _`changes for 5.3.0`: /changes-5.3.0.html -.. _`changes for 5.2.2`: /changes-5.2.2.html -.. _`changes for 5.2.1`: /changes-5.2.1.html -.. _`changes for 5.2.0`: /changes-5.2.0.html -.. _`changes for 5.1.1`: /changes-5.1.1.html -.. _`changes for 5.1.0`: /changes-5.1.0.html -.. _`changes for 5.0.2`: /changes-5.0.2.html -.. _`changes for 5.0.1`: /changes-5.0.1.html -.. _`changes for 5.0.0`: /changes-5.0.0.html diff --git a/doc/xpathxslt.txt b/doc/xpathxslt.txt index 3b0b899c4..d2480f03c 100644 --- a/doc/xpathxslt.txt +++ b/doc/xpathxslt.txt @@ -412,7 +412,7 @@ During evaluation, lxml will emit an XPathEvalError on errors: >>> find(root) Traceback (most recent call last): ... - lxml.etree.XPathEvalError: Undefined namespace prefix + lxml.etree.XPathEvalError: Undefined namespace prefix... This works for the ``XPath`` class, however, the other evaluators (including the ``xpath()`` method) are one-shot operations that do parsing and evaluation @@ -429,7 +429,7 @@ in one step. They therefore raise evaluation exceptions in all cases: >>> find = root.xpath("//ns:a") Traceback (most recent call last): ... - lxml.etree.XPathEvalError: Undefined namespace prefix + lxml.etree.XPathEvalError: Undefined namespace prefix... >>> find = root.xpath("\\") Traceback (most recent call last): diff --git a/pyproject.toml b/pyproject.toml index 770a68c7d..42adeaf02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["Cython>=3.1.2", "setuptools", "wheel"] +requires = ["Cython>=3.1.2", "setuptools"] [tool.cibuildwheel] build-verbosity = 1 -environment = {STATIC_DEPS="true", LIBXML2_VERSION = "2.14.4", LIBXSLT_VERSION = "1.1.43"} -enable = "pypy cpython-prerelease" +environment = {STATIC_DEPS="true", ZLIB_VERSION = "1.3.1", LIBICONV_VERSION = "1.18", LIBXML2_VERSION = "2.14.5", LIBXSLT_VERSION = "1.1.43"} +enable = ["pypy", "cpython-prerelease"] # "pypy" # "cpython-prerelease" # "cpython-freethreading" @@ -23,10 +23,25 @@ skip = [ "cp38-musllinux_aarch64", "cp38-manylinux_armv7l", "cp38-musllinux_armv7l", + "cp38-manylinux_ppc64le", "cp39-manylinux_ppc64le", + "cp310-manylinux_ppc64le", + "cp311-manylinux_ppc64le", "cp38-musllinux_ppc64le", "cp39-musllinux_ppc64le", + "cp310-musllinux_ppc64le", + "cp311-musllinux_ppc64le", + + "cp38-manylinux_riscv64", + "cp39-manylinux_riscv64", + "cp310-manylinux_riscv64", + "cp311-manylinux_riscv64", + "cp38-musllinux_riscv64", + "cp39-musllinux_riscv64", + "cp310-musllinux_riscv64", + "cp311-musllinux_riscv64", + "cp38-manylinux_s390x", "cp39-manylinux_s390x", "cp38-musllinux_s390x", @@ -36,7 +51,7 @@ skip = [ [tool.cibuildwheel.linux] #archs = ["x86_64", "aarch64", "i686", "ppc64le", "s390x", "armv7l"] -archs = ["x86_64", "aarch64", "i686", "armv7l"] +archs = ["x86_64", "aarch64", "i686", "ppc64le", "armv7l", "riscv64"] repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}" [tool.cibuildwheel.linux.environment] @@ -46,7 +61,7 @@ NM = "gcc-nm" RANLIB = "gcc-ranlib" LDFLAGS = "-fPIC -flto" STATIC_DEPS = "true" -LIBXML2_VERSION = "2.14.4" +LIBXML2_VERSION = "2.14.5" LIBXSLT_VERSION = "1.1.43" [[tool.cibuildwheel.overrides]] diff --git a/setup.py b/setup.py index c63225644..432b80ab0 100644 --- a/setup.py +++ b/setup.py @@ -240,6 +240,7 @@ def build_packages(files): 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', 'Programming Language :: C', 'Operating System :: OS Independent', 'Topic :: Text Processing :: Markup :: HTML', diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 8443a3498..aceb0ca38 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "6.0.0" +__version__ = "6.0.1" def get_include(): diff --git a/src/lxml/html/tests/test_elementsoup.py b/src/lxml/html/tests/test_elementsoup.py index 94d21636b..7e4aa899b 100644 --- a/src/lxml/html/tests/test_elementsoup.py +++ b/src/lxml/html/tests/test_elementsoup.py @@ -50,7 +50,7 @@ def test_head_body(self): def test_wrap_html(self): # outside , parser should fix that - html = 'title</test></head><html><body/></html>' + html = '<head><title>title' res = b'title' tree = self.soupparser.fromstring(html) self.assertEqual(tostring(tree), res) diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd index 04caf8e79..3a721c1dc 100644 --- a/src/lxml/includes/xmlparser.pxd +++ b/src/lxml/includes/xmlparser.pxd @@ -293,11 +293,18 @@ cdef extern from "libxml/parser.h" nogil: cdef extern from "libxml/parserInternals.h" nogil: + """ + #if LIBXML_VERSION < 21400 + #define xmlNewInputFromMemory(url, mem, size, flags) (NULL) + #endif + """ cdef xmlParserInput* xmlNewInputStream(xmlParserCtxt* ctxt) cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, char* buffer) cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, char* filename) + cdef xmlParserInput* xmlNewInputFromMemory( + const char *url, const void *mem, size_t size, int flags) # actually "xmlParserInputFlags flags" cdef void xmlFreeInputStream(xmlParserInput* input) cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) cdef bint xmlCtxtIsStopped(xmlParserCtxt* ctxt) diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index 93b6ef5ae..a3fbef399 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -479,14 +479,19 @@ cdef xmlparser.xmlParserInput* _local_resolver(const_char* c_url, const_char* c_ if not isinstance(filename, bytes): filename = None - c_input = xmlparser.xmlNewInputStream(c_context) - if c_input is not NULL: - if filename is not None: - c_input.filename = tree.xmlStrdup(_xcstr(filename)) - c_input.base = _xcstr(data) - c_input.length = python.PyBytes_GET_SIZE(data) - c_input.cur = c_input.base - c_input.end = c_input.base + c_input.length + if tree.LIBXML_VERSION >= 21400: + c_filename = tree.xmlStrdup(_xcstr(filename)) if filename is not None else NULL + c_input = xmlparser.xmlNewInputFromMemory( + c_filename, _xcstr(data), python.PyBytes_GET_SIZE(data), 0) + else: + c_input = xmlparser.xmlNewInputStream(c_context) + if c_input is not NULL: + if filename is not None: + c_input.filename = tree.xmlStrdup(_xcstr(filename)) + c_input.base = _xcstr(data) + c_input.length = python.PyBytes_GET_SIZE(data) + c_input.cur = c_input.base + c_input.end = c_input.base + c_input.length elif doc_ref._type == PARSER_DATA_FILENAME: data = None c_filename = _cstr(doc_ref._filename) diff --git a/src/lxml/sax.py b/src/lxml/sax.py index 12088880e..db77f6f29 100644 --- a/src/lxml/sax.py +++ b/src/lxml/sax.py @@ -1,5 +1,3 @@ -# cython: language_level=2 - """ SAX-based adapter to copy trees from/to the Python standard library. @@ -32,7 +30,7 @@ class SaxError(etree.LxmlError): def _getNsTag(tag): - if tag[0] == '{': + if tag[0] == '{' and '}' in tag: return tuple(tag[1:].split('}', 1)) else: return None, tag @@ -152,10 +150,11 @@ def characters(self, data): try: # if there already is a child element, we must append to its tail last_element = last_element[-1] - last_element.tail = (last_element.tail or '') + data except IndexError: # otherwise: append to the text last_element.text = (last_element.text or '') + data + else: + last_element.tail = (last_element.tail or '') + data ignorableWhitespace = characters diff --git a/src/lxml/tests/common_imports.py b/src/lxml/tests/common_imports.py index 4ef6e770e..44916c273 100644 --- a/src/lxml/tests/common_imports.py +++ b/src/lxml/tests/common_imports.py @@ -104,7 +104,7 @@ def BytesIO(*args): def make_doctest(filename): file_path = os.path.join(DOC_DIR, filename) - return doctest.DocFileSuite(file_path, module_relative=False, encoding='utf-8') + return doctest.DocFileSuite(file_path, module_relative=False, encoding='utf-8', optionflags=doctest.ELLIPSIS) class HelperTestCase(unittest.TestCase): diff --git a/src/lxml/tests/dummy_http_server.py b/src/lxml/tests/dummy_http_server.py index 4e8a4ca19..64b59ec90 100644 --- a/src/lxml/tests/dummy_http_server.py +++ b/src/lxml/tests/dummy_http_server.py @@ -3,14 +3,9 @@ """ import sys +import urllib.parse as urlparse from contextlib import contextmanager -try: - import urlparse -except ImportError: - # Python 3 - import urllib.parse as urlparse - @contextmanager def webserver(app, port=0, host=None): @@ -39,13 +34,9 @@ def webserver(app, port=0, host=None): thread.join(timeout=1) -try: - from SocketServer import ThreadingMixIn -except ImportError: - # Python 3 - from socketserver import ThreadingMixIn - +from socketserver import ThreadingMixIn import wsgiref.simple_server as wsgiserver + class WebServer(wsgiserver.WSGIServer, ThreadingMixIn): """A web server that starts a new thread for each request. """ diff --git a/src/lxml/tests/test_sax.py b/src/lxml/tests/test_sax.py index e2d03c255..2c8379497 100644 --- a/src/lxml/tests/test_sax.py +++ b/src/lxml/tests/test_sax.py @@ -7,8 +7,8 @@ from xml.dom import pulldom from xml.sax.handler import ContentHandler -from .common_imports import HelperTestCase, make_doctest, BytesIO, _bytes -from lxml import sax +from .common_imports import HelperTestCase, make_doctest, BytesIO +from lxml import etree, sax class ETreeSaxTestCase(HelperTestCase): @@ -121,6 +121,32 @@ def test_sax_to_pulldom_multiple_namespaces(self): self.assertEqual('a', dom.firstChild.prefix) + def test_sax_non_html(self): + # https://bugs.launchpad.net/lxml/+bug/2116333 + events = [] + + from xml.sax.handler import ContentHandler + class MyContentHandler(ContentHandler): + def startElementNS(self, name, qname, attributes): + events.append(("START", name, qname, attributes.items())) + + def characters(self, data): + events.append(("DATA", data)) + + markup = ( + '' + '' + '' + ) + + parser = etree.HTMLParser(recover=True) + tree = etree.fromstring(markup, parser) + + self.assertFalse(events) + sax.saxify(tree, MyContentHandler()) + # The exact list of parsed attributes depends on the libxml2 parser version. + self.assertTrue(events) + def test_element_sax(self): tree = self.parse('') a = tree.getroot() diff --git a/tox.ini b/tox.ini index a68b40c67..d4889e12f 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py38, py39, py310, py311, py312, py313 +envlist = py38, py39, py310, py311, py312, py313, py314 [testenv] allowlist_externals = make