diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 528b1eac7..14a96261b 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -7,6 +7,8 @@ from __future__ import absolute_import +import re +import six from lxml import etree from .ns import NamespacePrefixedTag, nsmap @@ -18,6 +20,17 @@ oxml_parser.set_element_class_lookup(element_class_lookup) +def remove_hyperlink_tags(xml): + is_bytestring = isinstance(xml, six.binary_type) + if is_bytestring: + xml = xml.decode('utf-8') + xml = xml.replace('', '') + xml = re.sub(']*>', '', xml) + if is_bytestring: + xml = xml.encode('utf-8') + return xml + + def parse_xml(xml): """ Return root lxml element obtained by parsing XML character string in @@ -25,7 +38,7 @@ def parse_xml(xml): parser is used, so custom element classes are produced for elements in *xml* that have them. """ - root_element = etree.fromstring(xml, oxml_parser) + root_element = etree.fromstring(remove_hyperlink_tags(xml), oxml_parser) return root_element diff --git a/requirements.txt b/requirements.txt index de244afa3..a335bbc0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ lxml>=3.1.0 mock>=1.0.1 pyparsing>=2.0.1 pytest>=2.5 +six>=1.10.0 \ No newline at end of file