diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e96753ad8..09dc7c9d7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -70,6 +70,8 @@ jobs: exclude: - image: manylinux_2_24_aarch64 pyversion: "*" + - image: musllinux_1_1_aarch64 + pyversion: "*" include: - image: manylinux2014_aarch64 pyversion: "cp36*" @@ -82,6 +84,17 @@ jobs: - image: manylinux_2_24_aarch64 pyversion: "cp310*" + - image: musllinux_1_1_aarch64 + pyversion: "cp36*" + - image: musllinux_1_1_aarch64 + pyversion: "cp37*" + - image: musllinux_1_1_aarch64 + pyversion: "cp38*" + - image: musllinux_1_1_aarch64 + pyversion: "cp39*" + - image: musllinux_1_1_aarch64 + pyversion: "cp310*" + steps: - uses: actions/checkout@v2 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9d8a9f424..000000000 --- a/.travis.yml +++ /dev/null @@ -1,86 +0,0 @@ -os: linux -language: python - -cache: - pip: true - directories: - - $HOME/.ccache - - libs - -python: - - nightly - - 3.10 - - 2.7 - - 3.9 - - 3.8 - - 3.7 - - 3.6 - - 3.5 - -env: - global: - - USE_CCACHE=1 - - CCACHE_SLOPPINESS=pch_defines,time_macros - - CCACHE_COMPRESS=1 - - CCACHE_MAXSIZE=70M - - PATH="/usr/lib/ccache:$PATH" - - LIBXML2_VERSION=2.9.10 - - LIBXSLT_VERSION=1.1.34 - matrix: - - STATIC_DEPS=false - - STATIC_DEPS=true - -matrix: - include: - - python: 3.8 - env: - - STATIC_DEPS=false - - EXTRA_DEPS="docutils pygments sphinx sphinx-rtd-theme" - script: make html - - python: 3.8 - env: - - STATIC_DEPS=false - - EXTRA_DEPS="coverage<5" - - python: 3.8 - env: - - STATIC_DEPS=true - - LIBXML2_VERSION=2.9.2 # minimum version requirements - - LIBXSLT_VERSION=1.1.27 - - python: pypy - env: STATIC_DEPS=false - - python: pypy3 - env: STATIC_DEPS=false - - python: 3.8 - env: STATIC_DEPS=false - arch: arm64 - - python: 3.8 - env: STATIC_DEPS=true - arch: arm64 - - python: 3.8 - env: STATIC_DEPS=false - arch: ppc64le - - python: 3.8 - env: STATIC_DEPS=true - arch: ppc64le - allow_failures: - - python: nightly - - python: pypy - - python: pypy3 - -install: - - pip install -U pip wheel - - if [ -z "${TRAVIS_PYTHON_VERSION##*-dev}" ]; - then pip install --install-option=--no-cython-compile https://github.com/cython/cython/archive/master.zip; - else pip install -r requirements.txt; - fi - - pip install -U beautifulsoup4 cssselect html5lib rnc2rng==2.6.5 ${EXTRA_DEPS} - -script: - - CFLAGS="-O0 -g -fPIC" python -u setup.py build_ext --inplace - $(if [ -n "${TRAVIS_PYTHON_VERSION##2.*}" -a -n "${TRAVIS_PYTHON_VERSION##3.[34]*}" ]; then echo -n " -j7 "; fi ) - $(if [ -n "$EXTRA_DEPS" -a -z "${EXTRA_DEPS##*coverage*}" ]; then echo -n "--with-coverage"; fi ) - - ccache -s || true - - CFLAGS="-O0 -g -fPIC" PYTHONUNBUFFERED=x make test - - ccache -s || true - - python setup.py install - - python -c "from lxml import etree" diff --git a/CHANGES.txt b/CHANGES.txt index b2e0c8f03..64bba1c22 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,18 @@ lxml changelog ============== +4.9.1 (2022-07-01) +================== + +Bugs fixed +---------- + +* A crash was resolved when using ``iterwalk()`` (or ``canonicalize()``) + after parsing certain incorrect input. Note that ``iterwalk()`` can crash + on *valid* input parsed with the same parser *after* failing to parse the + incorrect input. + + 4.9.0 (2022-06-01) ================== diff --git a/README.rst b/README.rst index e8705ab92..a0434b379 100644 --- a/README.rst +++ b/README.rst @@ -63,7 +63,7 @@ Crypto currencies do not fit into that ambition. .. _`doc/main.txt`: https://github.com/lxml/lxml/blob/master/doc/main.txt .. _`INSTALL.txt`: http://lxml.de/installation.html -`Travis-CI `_ and `AppVeyor `_ +`AppVeyor `_ and `GitHub Actions `_ support the lxml project with their build and CI servers. Jetbrains supports the lxml project by donating free licenses of their `PyCharm IDE `_. diff --git a/doc/main.txt b/doc/main.txt index e9a0a4637..578f92dcf 100644 --- a/doc/main.txt +++ b/doc/main.txt @@ -160,8 +160,8 @@ Index `_ (PyPI). It has the source that compiles on various platforms. The source distribution is signed with `this key `_. -The latest version is `lxml 4.9.0`_, released 2022-06-01 -(`changes for 4.9.0`_). `Older versions <#old-versions>`_ +The latest version is `lxml 4.9.1`_, released 2022-07-01 +(`changes for 4.9.1`_). `Older versions <#old-versions>`_ are listed below. Please take a look at the @@ -256,7 +256,9 @@ See the websites of lxml .. and the `latest in-development version `_. -.. _`PDF documentation`: lxmldoc-4.9.0.pdf +.. _`PDF documentation`: lxmldoc-4.9.1.pdf + +* `lxml 4.9.1`_, released 2022-07-01 (`changes for 4.9.1`_) * `lxml 4.9.0`_, released 2022-06-01 (`changes for 4.9.0`_) @@ -280,6 +282,7 @@ See the websites of lxml * `older releases `_ +.. _`lxml 4.9.1`: /files/lxml-4.9.1.tgz .. _`lxml 4.9.0`: /files/lxml-4.9.0.tgz .. _`lxml 4.8.0`: /files/lxml-4.8.0.tgz .. _`lxml 4.7.1`: /files/lxml-4.7.1.tgz @@ -291,6 +294,7 @@ See the websites of lxml .. _`lxml 4.6.1`: /files/lxml-4.6.1.tgz .. _`lxml 4.6.0`: /files/lxml-4.6.0.tgz +.. _`changes for 4.9.1`: /changes-4.9.1.html .. _`changes for 4.9.0`: /changes-4.9.0.html .. _`changes for 4.8.0`: /changes-4.8.0.html .. _`changes for 4.7.1`: /changes-4.7.1.html diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 0e0083413..f8be68f71 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "4.9.0" +__version__ = "4.9.1" def get_include(): diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index c16627629..9fae9fb12 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node): while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: c_ns = c_node.nsDef while c_ns is not NULL: - prefix = funicodeOrNone(c_ns.prefix) - if prefix not in nsmap: - nsmap[prefix] = funicodeOrNone(c_ns.href) + if c_ns.prefix or c_ns.href: + prefix = funicodeOrNone(c_ns.prefix) + if prefix not in nsmap: + nsmap[prefix] = funicodeOrNone(c_ns.href) c_ns = c_ns.next c_node = c_node.parent return nsmap diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 138c23a6a..a7299da6d 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node): count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - count += 1 + count += (c_ns.href is not NULL) c_ns = c_ns.next return count @@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '', - funicode(c_ns.href)) - event_list.append( (u"start-ns", ns_tuple) ) - count += 1 + if c_ns.href: + ns_tuple = (funicodeOrEmpty(c_ns.prefix), + funicode(c_ns.href)) + event_list.append( (u"start-ns", ns_tuple) ) + count += 1 c_ns = c_ns.next return count diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index e5f084692..3e52258ed 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1460,6 +1460,28 @@ def test_iterwalk_getiterator(self): [1,2,1,4], counts) + def test_walk_after_parse_failure(self): + # This used to be an issue because libxml2 can leak empty namespaces + # between failed parser runs. iterwalk() failed to handle such a tree. + parser = etree.XMLParser() + + try: + etree.XML('''''', parser=parser) + except etree.XMLSyntaxError: + pass + else: + assert False, "invalid input did not fail to parse" + + et = etree.XML(''' ''', parser=parser) + try: + ns = next(etree.iterwalk(et, events=('start-ns',))) + except StopIteration: + # This would be the expected result, because there was no namespace + pass + else: + # This is a bug in libxml2 + assert not ns, repr(ns) + def test_itertext_comment_pi(self): # https://bugs.launchpad.net/lxml/+bug/1844674 XML = self.etree.XML diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py index acbde4212..2f3186ff1 100644 --- a/src/lxml/tests/test_htmlparser.py +++ b/src/lxml/tests/test_htmlparser.py @@ -661,7 +661,6 @@ def test_xhtml_as_html_as_xml(self): b'' ) root = html.fromstring(xhtml) - print(root.attrib) result = etree.tostring(root) self.assertEqual(result, b'') @@ -673,7 +672,6 @@ def test_xhtml_as_html_as_xml(self): b'' ) root = html.fromstring(xhtml) - print(root.attrib) result = etree.tostring(root) self.assertEqual(result, b'') """