Skip to content

Commit be51b95

Browse files
authored
feature: Paragraph.text includes hyperlink text
Fixes issue #85
1 parent bc34ec9 commit be51b95

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

docx/oxml/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from __future__ import absolute_import
99

10+
import re
1011
from lxml import etree
1112

1213
from .ns import NamespacePrefixedTag, nsmap
@@ -18,14 +19,20 @@
1819
oxml_parser.set_element_class_lookup(element_class_lookup)
1920

2021

22+
def remove_hyperlink_tags(xml):
23+
xml = xml.replace("</w:hyperlink>","")
24+
xml = re.sub('<w:hyperlink[^>]*>',"",xml)
25+
return xml
26+
27+
2128
def parse_xml(xml):
2229
"""
2330
Return root lxml element obtained by parsing XML character string in
2431
*xml*, which can be either a Python 2.x string or unicode. The custom
2532
parser is used, so custom element classes are produced for elements in
2633
*xml* that have them.
2734
"""
28-
root_element = etree.fromstring(xml, oxml_parser)
35+
root_element = etree.fromstring(remove_hyperlink_tags(xml), oxml_parser)
2936
return root_element
3037

3138

0 commit comments

Comments
 (0)