From b9f7074430594b95824059eef931dfbb27a7645e Mon Sep 17 00:00:00 2001
From: Stefan Behnel
Date: Tue, 31 May 2022 22:49:19 +0200
Subject: [PATCH 1/6] Remove debug print from test.
---
src/lxml/tests/test_htmlparser.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py
index acbde4212..2f3186ff1 100644
--- a/src/lxml/tests/test_htmlparser.py
+++ b/src/lxml/tests/test_htmlparser.py
@@ -661,7 +661,6 @@ def test_xhtml_as_html_as_xml(self):
b''
)
root = html.fromstring(xhtml)
- print(root.attrib)
result = etree.tostring(root)
self.assertEqual(result, b'')
@@ -673,7 +672,6 @@ def test_xhtml_as_html_as_xml(self):
b''
)
root = html.fromstring(xhtml)
- print(root.attrib)
result = etree.tostring(root)
self.assertEqual(result, b'
')
"""
From 8f0bf2d158f2dd3f98d410c8a38fcd536fd11b53 Mon Sep 17 00:00:00 2001
From: Stefan Behnel
Date: Tue, 31 May 2022 23:18:38 +0200
Subject: [PATCH 2/6] Try to speed up the musllinux AArch64 build by splitting
the different CPython versions into separate GHA jobs.
---
.github/workflows/wheels.yml | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index e96753ad8..09dc7c9d7 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -70,6 +70,8 @@ jobs:
exclude:
- image: manylinux_2_24_aarch64
pyversion: "*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "*"
include:
- image: manylinux2014_aarch64
pyversion: "cp36*"
@@ -82,6 +84,17 @@ jobs:
- image: manylinux_2_24_aarch64
pyversion: "cp310*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp36*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp37*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp38*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp39*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp310*"
+
steps:
- uses: actions/checkout@v2
From 50c276412880c1a3dde8a6d6c909e3ed8ef47e43 Mon Sep 17 00:00:00 2001
From: Christian Clauss
Date: Wed, 22 Jun 2022 09:10:10 +0200
Subject: [PATCH 3/6] Delete unused Travis CI config and reference in docs
(GH-345)
---
.travis.yml | 86 -----------------------------------------------------
README.rst | 2 +-
2 files changed, 1 insertion(+), 87 deletions(-)
delete mode 100644 .travis.yml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 9d8a9f424..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-os: linux
-language: python
-
-cache:
- pip: true
- directories:
- - $HOME/.ccache
- - libs
-
-python:
- - nightly
- - 3.10
- - 2.7
- - 3.9
- - 3.8
- - 3.7
- - 3.6
- - 3.5
-
-env:
- global:
- - USE_CCACHE=1
- - CCACHE_SLOPPINESS=pch_defines,time_macros
- - CCACHE_COMPRESS=1
- - CCACHE_MAXSIZE=70M
- - PATH="/usr/lib/ccache:$PATH"
- - LIBXML2_VERSION=2.9.10
- - LIBXSLT_VERSION=1.1.34
- matrix:
- - STATIC_DEPS=false
- - STATIC_DEPS=true
-
-matrix:
- include:
- - python: 3.8
- env:
- - STATIC_DEPS=false
- - EXTRA_DEPS="docutils pygments sphinx sphinx-rtd-theme"
- script: make html
- - python: 3.8
- env:
- - STATIC_DEPS=false
- - EXTRA_DEPS="coverage<5"
- - python: 3.8
- env:
- - STATIC_DEPS=true
- - LIBXML2_VERSION=2.9.2 # minimum version requirements
- - LIBXSLT_VERSION=1.1.27
- - python: pypy
- env: STATIC_DEPS=false
- - python: pypy3
- env: STATIC_DEPS=false
- - python: 3.8
- env: STATIC_DEPS=false
- arch: arm64
- - python: 3.8
- env: STATIC_DEPS=true
- arch: arm64
- - python: 3.8
- env: STATIC_DEPS=false
- arch: ppc64le
- - python: 3.8
- env: STATIC_DEPS=true
- arch: ppc64le
- allow_failures:
- - python: nightly
- - python: pypy
- - python: pypy3
-
-install:
- - pip install -U pip wheel
- - if [ -z "${TRAVIS_PYTHON_VERSION##*-dev}" ];
- then pip install --install-option=--no-cython-compile https://github.com/cython/cython/archive/master.zip;
- else pip install -r requirements.txt;
- fi
- - pip install -U beautifulsoup4 cssselect html5lib rnc2rng==2.6.5 ${EXTRA_DEPS}
-
-script:
- - CFLAGS="-O0 -g -fPIC" python -u setup.py build_ext --inplace
- $(if [ -n "${TRAVIS_PYTHON_VERSION##2.*}" -a -n "${TRAVIS_PYTHON_VERSION##3.[34]*}" ]; then echo -n " -j7 "; fi )
- $(if [ -n "$EXTRA_DEPS" -a -z "${EXTRA_DEPS##*coverage*}" ]; then echo -n "--with-coverage"; fi )
- - ccache -s || true
- - CFLAGS="-O0 -g -fPIC" PYTHONUNBUFFERED=x make test
- - ccache -s || true
- - python setup.py install
- - python -c "from lxml import etree"
diff --git a/README.rst b/README.rst
index e8705ab92..a0434b379 100644
--- a/README.rst
+++ b/README.rst
@@ -63,7 +63,7 @@ Crypto currencies do not fit into that ambition.
.. _`doc/main.txt`: https://github.com/lxml/lxml/blob/master/doc/main.txt
.. _`INSTALL.txt`: http://lxml.de/installation.html
-`Travis-CI `_ and `AppVeyor `_
+`AppVeyor `_ and `GitHub Actions `_
support the lxml project with their build and CI servers.
Jetbrains supports the lxml project by donating free licenses of their
`PyCharm IDE `_.
From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
From: Stefan Behnel
Date: Fri, 1 Jul 2022 21:06:10 +0200
Subject: [PATCH 4/6] Fix a crash when incorrect parser input occurs together
with usages of iterwalk() on trees generated by the same parser.
---
src/lxml/apihelpers.pxi | 7 ++++---
src/lxml/iterparse.pxi | 11 ++++++-----
src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
3 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index c16627629..9fae9fb12 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
c_ns = c_node.nsDef
while c_ns is not NULL:
- prefix = funicodeOrNone(c_ns.prefix)
- if prefix not in nsmap:
- nsmap[prefix] = funicodeOrNone(c_ns.href)
+ if c_ns.prefix or c_ns.href:
+ prefix = funicodeOrNone(c_ns.prefix)
+ if prefix not in nsmap:
+ nsmap[prefix] = funicodeOrNone(c_ns.href)
c_ns = c_ns.next
c_node = c_node.parent
return nsmap
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
index 138c23a6a..a7299da6d 100644
--- a/src/lxml/iterparse.pxi
+++ b/src/lxml/iterparse.pxi
@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
- count += 1
+ count += (c_ns.href is not NULL)
c_ns = c_ns.next
return count
@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
- funicode(c_ns.href))
- event_list.append( (u"start-ns", ns_tuple) )
- count += 1
+ if c_ns.href:
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
+ funicode(c_ns.href))
+ event_list.append( (u"start-ns", ns_tuple) )
+ count += 1
c_ns = c_ns.next
return count
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index e5f084692..285313f6e 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1460,6 +1460,26 @@ def test_iterwalk_getiterator(self):
[1,2,1,4],
counts)
+ def test_walk_after_parse_failure(self):
+ # This used to be an issue because libxml2 can leak empty namespaces
+ # between failed parser runs. iterwalk() failed to handle such a tree.
+ try:
+ etree.XML('''''')
+ except etree.XMLSyntaxError:
+ pass
+ else:
+ assert False, "invalid input did not fail to parse"
+
+ et = etree.XML(''' ''')
+ try:
+ ns = next(etree.iterwalk(et, events=('start-ns',)))
+ except StopIteration:
+ # This would be the expected result, because there was no namespace
+ pass
+ else:
+ # This is a bug in libxml2
+ assert not ns, repr(ns)
+
def test_itertext_comment_pi(self):
# https://bugs.launchpad.net/lxml/+bug/1844674
XML = self.etree.XML
From d65e63229e8958bc08344a85cd3f09ceeef933c3 Mon Sep 17 00:00:00 2001
From: Stefan Behnel
Date: Fri, 1 Jul 2022 21:09:05 +0200
Subject: [PATCH 5/6] Prepare release of lxml 4.9.1.
---
CHANGES.txt | 12 ++++++++++++
doc/main.txt | 10 +++++++---
src/lxml/__init__.py | 2 +-
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index b2e0c8f03..64bba1c22 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,18 @@
lxml changelog
==============
+4.9.1 (2022-07-01)
+==================
+
+Bugs fixed
+----------
+
+* A crash was resolved when using ``iterwalk()`` (or ``canonicalize()``)
+ after parsing certain incorrect input. Note that ``iterwalk()`` can crash
+ on *valid* input parsed with the same parser *after* failing to parse the
+ incorrect input.
+
+
4.9.0 (2022-06-01)
==================
diff --git a/doc/main.txt b/doc/main.txt
index e9a0a4637..578f92dcf 100644
--- a/doc/main.txt
+++ b/doc/main.txt
@@ -160,8 +160,8 @@ Index `_ (PyPI). It has the source
that compiles on various platforms. The source distribution is signed
with `this key `_.
-The latest version is `lxml 4.9.0`_, released 2022-06-01
-(`changes for 4.9.0`_). `Older versions <#old-versions>`_
+The latest version is `lxml 4.9.1`_, released 2022-07-01
+(`changes for 4.9.1`_). `Older versions <#old-versions>`_
are listed below.
Please take a look at the
@@ -256,7 +256,9 @@ See the websites of lxml
..
and the `latest in-development version `_.
-.. _`PDF documentation`: lxmldoc-4.9.0.pdf
+.. _`PDF documentation`: lxmldoc-4.9.1.pdf
+
+* `lxml 4.9.1`_, released 2022-07-01 (`changes for 4.9.1`_)
* `lxml 4.9.0`_, released 2022-06-01 (`changes for 4.9.0`_)
@@ -280,6 +282,7 @@ See the websites of lxml
* `older releases `_
+.. _`lxml 4.9.1`: /files/lxml-4.9.1.tgz
.. _`lxml 4.9.0`: /files/lxml-4.9.0.tgz
.. _`lxml 4.8.0`: /files/lxml-4.8.0.tgz
.. _`lxml 4.7.1`: /files/lxml-4.7.1.tgz
@@ -291,6 +294,7 @@ See the websites of lxml
.. _`lxml 4.6.1`: /files/lxml-4.6.1.tgz
.. _`lxml 4.6.0`: /files/lxml-4.6.0.tgz
+.. _`changes for 4.9.1`: /changes-4.9.1.html
.. _`changes for 4.9.0`: /changes-4.9.0.html
.. _`changes for 4.8.0`: /changes-4.8.0.html
.. _`changes for 4.7.1`: /changes-4.7.1.html
diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py
index 0e0083413..f8be68f71 100644
--- a/src/lxml/__init__.py
+++ b/src/lxml/__init__.py
@@ -1,6 +1,6 @@
# this is a package
-__version__ = "4.9.0"
+__version__ = "4.9.1"
def get_include():
From d01872ccdf7e1e5e825b6c6292b43e7d27ae5fc4 Mon Sep 17 00:00:00 2001
From: Stefan Behnel
Date: Fri, 1 Jul 2022 21:19:44 +0200
Subject: [PATCH 6/6] Prevent parse failure in new test from leaking into later
test runs.
---
src/lxml/tests/test_etree.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 285313f6e..3e52258ed 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1463,14 +1463,16 @@ def test_iterwalk_getiterator(self):
def test_walk_after_parse_failure(self):
# This used to be an issue because libxml2 can leak empty namespaces
# between failed parser runs. iterwalk() failed to handle such a tree.
+ parser = etree.XMLParser()
+
try:
- etree.XML('''''')
+ etree.XML('''''', parser=parser)
except etree.XMLSyntaxError:
pass
else:
assert False, "invalid input did not fail to parse"
- et = etree.XML(''' ''')
+ et = etree.XML(''' ''', parser=parser)
try:
ns = next(etree.iterwalk(et, events=('start-ns',)))
except StopIteration: