Skip to content

Commit afd9fef

Browse files
committed
Feature: Paragraph.text includes hyperlink text
Fixes issue #85
1 parent e784a73 commit afd9fef

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

docx/oxml/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from __future__ import absolute_import
99

10+
import re
11+
import sys
1012
from lxml import etree
1113

1214
from .ns import NamespacePrefixedTag, nsmap
@@ -18,14 +20,24 @@
1820
oxml_parser.set_element_class_lookup(element_class_lookup)
1921

2022

23+
def remove_hyperlink_tags(xml):
24+
if (sys.version_info > (3, 0)):
25+
xml = xml.decode('utf-8')
26+
xml = xml.replace('</w:hyperlink>', '')
27+
xml = re.sub('<w:hyperlink[^>]*>', '', xml)
28+
if (sys.version_info > (3, 0)):
29+
xml = xml.encode('utf-8')
30+
return xml
31+
32+
2133
def parse_xml(xml):
2234
"""
2335
Return root lxml element obtained by parsing XML character string in
2436
*xml*, which can be either a Python 2.x string or unicode. The custom
2537
parser is used, so custom element classes are produced for elements in
2638
*xml* that have them.
2739
"""
28-
root_element = etree.fromstring(xml, oxml_parser)
40+
root_element = etree.fromstring(remove_hyperlink_tags(xml), oxml_parser)
2941
return root_element
3042

3143

0 commit comments

Comments
 (0)