Skip to content

Commit b783545

Browse files
committed
Feature: Paragraph.text includes hyperlink text
Fixes issue #85
1 parent e784a73 commit b783545

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

docx/oxml/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from __future__ import absolute_import
99

10+
import re
11+
import six
1012
from lxml import etree
1113

1214
from .ns import NamespacePrefixedTag, nsmap
@@ -18,14 +20,25 @@
1820
oxml_parser.set_element_class_lookup(element_class_lookup)
1921

2022

23+
def remove_hyperlink_tags(xml):
24+
is_bytestring = isinstance(xml, six.binary_type)
25+
if is_bytestring:
26+
xml = xml.decode('utf-8')
27+
xml = xml.replace('</w:hyperlink>', '')
28+
xml = re.sub('<w:hyperlink[^>]*>', '', xml)
29+
if is_bytestring:
30+
xml = xml.encode('utf-8')
31+
return xml
32+
33+
2134
def parse_xml(xml):
2235
"""
2336
Return root lxml element obtained by parsing XML character string in
2437
*xml*, which can be either a Python 2.x string or unicode. The custom
2538
parser is used, so custom element classes are produced for elements in
2639
*xml* that have them.
2740
"""
28-
root_element = etree.fromstring(xml, oxml_parser)
41+
root_element = etree.fromstring(remove_hyperlink_tags(xml), oxml_parser)
2942
return root_element
3043

3144

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def text_of(relpath):
3636
PACKAGES = find_packages(exclude=['tests', 'tests.*'])
3737
PACKAGE_DATA = {'docx': ['templates/*']}
3838

39-
INSTALL_REQUIRES = ['lxml>=2.3.2']
39+
INSTALL_REQUIRES = ['lxml>=2.3.2', 'six>=1.10.0']
4040
TEST_SUITE = 'tests'
4141
TESTS_REQUIRE = ['behave', 'mock', 'pyparsing', 'pytest']
4242

0 commit comments

Comments
 (0)