From 6086f5ca0e96b5a756cbd91c081326a66fcb84a6 Mon Sep 17 00:00:00 2001 From: yunshi Date: Mon, 14 Mar 2016 21:10:40 +0100 Subject: [PATCH 1/2] docs: document hyperlink analysis --- docs/dev/analysis/features/text/hyperlink.rst | 301 ++++++++++++++++++ docs/dev/analysis/features/text/index.rst | 3 + 2 files changed, 304 insertions(+) create mode 100644 docs/dev/analysis/features/text/hyperlink.rst diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..aa8788da3 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,301 @@ + +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst index 3b7694731..a9cb92bf8 100644 --- a/docs/dev/analysis/features/text/index.rst +++ b/docs/dev/analysis/features/text/index.rst @@ -5,6 +5,7 @@ Text .. toctree:: :titlesonly: + hyperlink font-highlight-color paragraph-format font @@ -12,3 +13,5 @@ Text underline run-content breaks + hyperlink + From 9b71461f6211c88157443f23b0cabbcbdbe0cb6b Mon Sep 17 00:00:00 2001 From: yunshi Date: Wed, 30 Mar 2016 21:51:29 +0200 Subject: [PATCH 2/2] _wip_: implement hyperlink --- docx/oxml/__init__.py | 3 ++ docx/oxml/text/hyperlink.py | 40 +++++++++++++++++++++++++ docx/oxml/text/paragraph.py | 1 + docx/text/hyperlink.py | 60 +++++++++++++++++++++++++++++++++++++ docx/text/paragraph.py | 21 ++++++++++++- 5 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 docx/oxml/text/hyperlink.py create mode 100644 docx/text/hyperlink.py diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 3e320a217..a928c28d4 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -181,6 +181,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): from .text.paragraph import CT_P register_element_cls('w:p', CT_P) +from .text.hyperlink import CT_Hyperlink +register_element_cls('w:hyperlink', CT_Hyperlink) + from .text.parfmt import CT_Ind, CT_Jc, CT_PPr, CT_Spacing register_element_cls('w:ind', CT_Ind) register_element_cls('w:jc', CT_Jc) diff --git a/docx/oxml/text/hyperlink.py b/docx/oxml/text/hyperlink.py new file mode 100644 index 000000000..b66b9c31c --- /dev/null +++ b/docx/oxml/text/hyperlink.py @@ -0,0 +1,40 @@ +# encoding: utf-8 + +""" +Custom element classes related to hyperlinks (CT_Hyperlink). +""" + +from ..ns import qn +from ..simpletypes import ST_String, ST_RelationshipId +from ..xmlchemy import ( + BaseOxmlElement, OptionalAttribute, ZeroOrMore +) + + +class CT_Hyperlink(BaseOxmlElement): + """ + ```` element, containing the properties and text for a external hyperlink. + """ + r = ZeroOrMore('w:r') + rid = OptionalAttribute('r:id', ST_RelationshipId) + anchor = OptionalAttribute('w:anchor', ST_String) + + @property + def relationship(self): + """ + String contained in ``r:id`` attribute of . It should + point to a URL in the document's relationships. + """ + val = self.get(qn('r:id')) + return val + + @relationship.setter + def relationship(self, rId): + self.set(qn('r:id'), rId) + + def clear_content(self): + """ + Remove all child elements. + """ + for child in self[:]: + self.remove(child) diff --git a/docx/oxml/text/paragraph.py b/docx/oxml/text/paragraph.py index 5e4213776..7a29adc13 100644 --- a/docx/oxml/text/paragraph.py +++ b/docx/oxml/text/paragraph.py @@ -14,6 +14,7 @@ class CT_P(BaseOxmlElement): """ pPr = ZeroOrOne('w:pPr') r = ZeroOrMore('w:r') + hyperlink = ZeroOrMore('w:hyperlink') def _insert_pPr(self, pPr): self.insert(0, pPr) diff --git a/docx/text/hyperlink.py b/docx/text/hyperlink.py new file mode 100644 index 000000000..64e3055ab --- /dev/null +++ b/docx/text/hyperlink.py @@ -0,0 +1,60 @@ +# encoding: utf-8 + +""" +Hyperlink proxy objects. +""" + +from __future__ import ( + absolute_import, division, print_function, unicode_literals +) + +from ..opc.constants import RELATIONSHIP_TYPE as RT +from ..shared import Parented +from .run import Run + + +class Hyperlink(Parented): + """ + Proxy object wrapping ```` element. + """ + def __init__(self, hyperlink, parent): + super(Hyperlink, self).__init__(parent) + self._hyperlink = self.element = hyperlink + + @property + def address(self): + rId = self._hyperlink.relationship + return self.part.target_ref(rId) if rId else None + + @address.setter + def address(self, url): + rId = self.part.relate_to(url, RT.HYPERLINK, is_external=True) + self._hyperlink.relationship = rId + + @property + def anchor(self): + return self._hyperlink.anchor + + @anchor.setter + def anchor(self, anchor): + self._hyperlink.anchor = anchor + + def iter_runs(self): + return [Run(r, self) for r in self._hyperlink.r_lst] + + def insert_run(self, text, style=None): + _r = self._hyperlink.add_r() + run = Run(_r, self) + run.text = text + if style: + run.style = style + return run + + @property + def text(self): + return ''.join([run.text for run in self.iter_runs()]) + + @text.setter + def text(self, text): + self._hyperlink.clear_content() + self.insert_run(text) diff --git a/docx/text/paragraph.py b/docx/text/paragraph.py index 4fb583b94..0ca91bf5a 100644 --- a/docx/text/paragraph.py +++ b/docx/text/paragraph.py @@ -9,9 +9,10 @@ ) from ..enum.style import WD_STYLE_TYPE +from ..shared import Parented +from .hyperlink import Hyperlink from .parfmt import ParagraphFormat from .run import Run -from ..shared import Parented class Paragraph(Parented): @@ -22,6 +23,24 @@ def __init__(self, p, parent): super(Paragraph, self).__init__(parent) self._p = self._element = p + def add_hyperlink(self, text, address=None, anchor=None, style=None): + + _h = self._p.add_hyperlink() + _r = _h.add_r() + hyperlink = Hyperlink(_h, self) + run = Run(_r, hyperlink) + + run.text = text + if style: + run.style = style + + if address: + hyperlink.address = address + if anchor: + hyperlink.anchor = anchor + + return hyperlink + def add_run(self, text=None, style=None): """ Append a run to this paragraph containing *text* and having character