python-openxml · desecho · Mar 17, 2017
diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py
@@ -7,6 +7,8 @@
 
 from __future__ import absolute_import
 
+import re
+import six
 from lxml import etree
 
 from .ns import NamespacePrefixedTag, nsmap
@@ -18,14 +20,25 @@
 oxml_parser.set_element_class_lookup(element_class_lookup)
 
 
+def remove_hyperlink_tags(xml):
+    is_bytestring = isinstance(xml, six.binary_type)
+    if is_bytestring:
+        xml = xml.decode('utf-8')
+    xml = xml.replace('</w:hyperlink>', '')
+    xml = re.sub('<w:hyperlink[^>]*>', '', xml)
+    if is_bytestring:
+        xml = xml.encode('utf-8')
+    return xml
+
+
 def parse_xml(xml):
     """
     Return root lxml element obtained by parsing XML character string in
     *xml*, which can be either a Python 2.x string or unicode. The custom
     parser is used, so custom element classes are produced for elements in
     *xml* that have them.
     """
-    root_element = etree.fromstring(xml, oxml_parser)
+    root_element = etree.fromstring(remove_hyperlink_tags(xml), oxml_parser)
     return root_element
 
 

diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,4 @@ lxml>=3.1.0
 mock>=1.0.1
 pyparsing>=2.0.1
 pytest>=2.5
+six>=1.10.0