From 436e8a2f54b23860fdc029769085e787bc0b985d Mon Sep 17 00:00:00 2001 From: yunshi Date: Mon, 14 Mar 2016 21:10:40 +0100 Subject: [PATCH 1/2] docs: document hyperlink analysis --- docs/dev/analysis/features/text/hyperlink.rst | 301 ++++++++++++++++++ docs/dev/analysis/features/text/index.rst | 3 + 2 files changed, 304 insertions(+) create mode 100644 docs/dev/analysis/features/text/hyperlink.rst diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..aa8788da3 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,301 @@ + +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst index 3b7694731..a9cb92bf8 100644 --- a/docs/dev/analysis/features/text/index.rst +++ b/docs/dev/analysis/features/text/index.rst @@ -5,6 +5,7 @@ Text .. toctree:: :titlesonly: + hyperlink font-highlight-color paragraph-format font @@ -12,3 +13,5 @@ Text underline run-content breaks + hyperlink + From 256e2e5305f2e0318c4b9c6258758ff817807d4c Mon Sep 17 00:00:00 2001 From: yunshi Date: Mon, 14 Mar 2016 21:10:40 +0100 Subject: [PATCH 2/2] docs: document hyperlink analysis --- docs/dev/analysis/features/text/hyperlink.rst | 301 ++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 docs/dev/analysis/features/text/hyperlink.rst diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..aa8788da3 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,301 @@ + +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +