Skip to content

Commit 60d2a7c

Browse files
authored
Libdoc refactoring
Also better tests for roundtrip when saving Libspec using XML:HTML format and reading it again.
1 parent b920166 commit 60d2a7c

File tree

6 files changed

+224
-274
lines changed

6 files changed

+224
-274
lines changed
Lines changed: 43 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,117 +1,73 @@
11
*** Settings ***
22
Resource libdoc_resource.robot
3-
Suite Setup Run Libdoc to XML:HTML and to HTML and Parse Model
4-
Test Template Should Be Equal Multiline
3+
Suite Setup Run Libdoc to XML:HTML and to HTML and Parse Models ${TESTDATADIR}/module.py
4+
Test Template Should Be Equal As Strings
55

66
*** Keywords ***
7-
Run Libdoc to XML:HTML and to HTML and Parse Model
8-
Run Libdoc And Set Output --format XML:HTML ${TESTDATADIR}/module.py ${OUTXML}
7+
Run Libdoc to XML:HTML and to HTML and Parse Models
8+
[Arguments] ${library_path}
9+
Run Libdoc And Set Output --format XML:HTML ${library_path} ${OUTXML}
910
Run Libdoc And Parse Model From HTML ${OUTXML}
10-
11-
*** Comments ***
12-
This test suite will be changed with one of the next Tasks to contain a check for the roundtrip from library into XML then from XML to html.
11+
Set Suite Variable ${XML-MODEL} ${MODEL}
12+
Run Libdoc And Parse Model From HTML ${library_path}
1313

1414
*** Test Cases ***
1515
Name
16-
${MODEL}[name] module
16+
${XML-MODEL}[name] ${MODEL}[name]
1717

1818
Documentation
19-
${MODEL}[doc] <p>Module test library.</p>
19+
${XML-MODEL}[doc] ${MODEL}[doc]
2020

2121
Version
22-
${MODEL}[version] 0.1-alpha
23-
24-
Generated
25-
[Template] Should Match Regexp
26-
${MODEL}[generated] \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}
22+
${XML-MODEL}[version] ${MODEL}[version]
2723

2824
Scope
29-
${MODEL}[scope] GLOBAL
25+
${XML-MODEL}[scope] ${MODEL}[scope]
3026

3127
Named Args
32-
[Template] Should Be Equal
33-
${MODEL}[named_args] ${True}
28+
${XML-MODEL}[named_args] ${MODEL}[named_args]
3429

3530
Inits
36-
[Template] Should Be Empty
37-
${MODEL}[inits]
31+
${XML-MODEL}[inits] ${MODEL}[inits]
3832

3933
Keyword Names
40-
${MODEL}[keywords][0][name] Get Hello
41-
${MODEL}[keywords][1][name] Keyword
42-
${MODEL}[keywords][13][name] Set Name Using Robot Name Attribute
34+
${XML-MODEL}[keywords][0][name] ${MODEL}[keywords][0][name]
35+
${XML-MODEL}[keywords][1][name] ${MODEL}[keywords][1][name]
36+
${XML-MODEL}[keywords][13][name] ${MODEL}[keywords][13][name]
4337

4438
Keyword Arguments
45-
[Template] Should Be Equal As Strings
46-
${MODEL}[keywords][0][args] []
47-
${MODEL}[keywords][1][args] ['a1=d', '*a2']
48-
${MODEL}[keywords][6][args] ['arg=hyv\\\\xe4']
49-
${MODEL}[keywords][10][args] ['arg=hyvä']
50-
${MODEL}[keywords][12][args] ['a=1', 'b=True', 'c=(1, 2, None)']
51-
${MODEL}[keywords][13][args] ['a', 'b', '*args', '**kwargs']
39+
${XML-MODEL}[keywords][0][args] ${MODEL}[keywords][0][args]
40+
${XML-MODEL}[keywords][1][args] ${MODEL}[keywords][1][args]
41+
${XML-MODEL}[keywords][6][args] ${MODEL}[keywords][6][args]
42+
${XML-MODEL}[keywords][10][args] ${MODEL}[keywords][10][args]
43+
${XML-MODEL}[keywords][12][args] ${MODEL}[keywords][12][args]
44+
${XML-MODEL}[keywords][13][args] ${MODEL}[keywords][13][args]
5245

5346
Embedded Arguments
54-
[Template] NONE
55-
Should Be Equal ${MODEL}[keywords][14][name] Takes \${embedded} \${args}
56-
Should Be Empty ${MODEL}[keywords][14][args]
47+
${XML-MODEL}[keywords][14][name] ${MODEL}[keywords][14][name]
48+
${XML-MODEL}[keywords][14][args] ${MODEL}[keywords][14][args]
5749

5850
Keyword Documentation
59-
${MODEL}[keywords][1][doc]
60-
... <p>A keyword.</p>
61-
... <p>See <a href="#Get%20Hello" class="name">get hello</a> for details.</p>
62-
${MODEL}[keywords][0][doc]
63-
... <p>Get hello.</p>
64-
... <p>See <a href="#Importing" class="name">importing</a> for explanation of nothing and <a href="#Introduction" class="name">introduction</a> for no more information.</p>
65-
${MODEL}[keywords][5][doc]
66-
... <p>This is short doc. It can span multiple physical lines.</p>
67-
... <p>This is body. It can naturally also contain multiple lines.</p>
68-
... <p>And paragraphs.</p>
69-
70-
Non-ASCII Keyword Documentation
71-
${MODEL}[keywords][8][doc] <p>Hyvää yötä.</p>
72-
${MODEL}[keywords][11][doc] <p>Hyvää yötä.</p>\n<p>Спасибо!</p>
51+
${XML-MODEL}[keywords][1][doc] ${MODEL}[keywords][1][doc]
52+
${XML-MODEL}[keywords][0][doc] ${MODEL}[keywords][0][doc]
53+
${XML-MODEL}[keywords][5][doc] ${MODEL}[keywords][5][doc]
54+
${XML-MODEL}[keywords][8][doc] ${MODEL}[keywords][8][doc]
55+
${XML-MODEL}[keywords][11][doc] ${MODEL}[keywords][11][doc]
7356

7457
Keyword Short Doc
75-
${MODEL}[keywords][1][shortdoc] A keyword.
76-
${MODEL}[keywords][0][shortdoc] Get hello.
77-
${MODEL}[keywords][8][shortdoc] Hyvää yötä.
78-
${MODEL}[keywords][11][shortdoc] Hyvää yötä.
79-
80-
Keyword Short Doc Spanning Multiple Physical Lines
81-
${MODEL}[keywords][5][shortdoc] This is short doc. It can span multiple physical lines.
58+
${XML-MODEL}[keywords][1][shortdoc] ${MODEL}[keywords][1][shortdoc]
59+
${XML-MODEL}[keywords][0][shortdoc] ${MODEL}[keywords][0][shortdoc]
60+
${XML-MODEL}[keywords][8][shortdoc] ${MODEL}[keywords][8][shortdoc]
61+
${XML-MODEL}[keywords][11][shortdoc] ${MODEL}[keywords][11][shortdoc]
62+
${XML-MODEL}[keywords][5][shortdoc] ${MODEL}[keywords][5][shortdoc]
8263

8364
Keyword tags
84-
[Template] Should Be Equal As Strings
85-
${MODEL}[keywords][1][tags] []
86-
${MODEL}[keywords][2][tags] ['1', 'one', 'yksi']
87-
${MODEL}[keywords][3][tags] ['2', 'kaksi', 'two']
88-
${MODEL}[keywords][4][tags] ['tag1', 'tag2']
89-
90-
User keyword documentation formatting
91-
[Setup] Run Libdoc And Parse Model From HTML ${TESTDATADIR}/resource.robot
92-
${MODEL}[keywords][0][doc] <p>$\{CURDIR}</p>
93-
${MODEL}[keywords][1][doc] <p><b>DEPRECATED</b> for some reason.</p>
94-
${MODEL}[keywords][2][doc]
95-
${MODEL}[keywords][10][doc]
96-
... <p>Hyvää yötä.</p>
97-
... <p>Спасибо!</p>
98-
${MODEL}[keywords][8][doc]
99-
... <p>foo bar <a href="#kw" class="name">kw</a>.</p>
100-
... <p>FIRST <span class="name">\${a1}</span> alskdj alskdjlajd askf laskdjf asldkfj alsdkfj alsdkfjasldkfj END</p>
101-
... <p>SECOND askf laskdjf <i>asldkfj</i> alsdkfj alsdkfjasldkfj askf <b>laskdjf</b> END</p>
102-
... <p>THIRD asldkfj <a href="#Introduction" class="name">introduction</a> alsdkfj <a href="http://foo.bar">http://foo.bar</a> END</p>
103-
... <ul>
104-
... <li>aaa</li>
105-
... <li>bbb</li>
106-
... </ul>
107-
... <hr>
108-
... <table border="1">
109-
... <tr>
110-
... <th>first</th>
111-
... <th>second</th>
112-
... </tr>
113-
... <tr>
114-
... <td>foo</td>
115-
... <td>bar</td>
116-
... </tr>
117-
... </table>
65+
${XML-MODEL}[keywords][1][tags] ${MODEL}[keywords][1][tags]
66+
${XML-MODEL}[keywords][2][tags] ${MODEL}[keywords][2][tags]
67+
${XML-MODEL}[keywords][3][tags] ${MODEL}[keywords][3][tags]
68+
${XML-MODEL}[keywords][4][tags] ${MODEL}[keywords][4][tags]
69+
70+
TOC doc
71+
[Template] None
72+
Run Libdoc to XML:HTML and to HTML and Parse Models ${TESTDATADIR}/TOCWithInitsAndKeywords.py
73+
Should Be Equal As Strings ${XML-MODEL}[doc] ${MODEL}[doc]

src/robot/libdocpkg/htmlutils.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import re
2+
try:
3+
from urllib import quote
4+
except ImportError:
5+
from urllib.parse import quote
6+
7+
from robot.errors import DataError
8+
from robot.utils import html_escape, html_format, NormalizedDict
9+
from robot.utils.htmlformatters import HeaderFormatter
10+
11+
12+
class DocFormatter(object):
13+
_header_regexp = re.compile(r'<h([234])>(.+?)</h\1>')
14+
_name_regexp = re.compile('`(.+?)`')
15+
16+
def __init__(self, keywords, introduction, doc_format='ROBOT'):
17+
self._doc_to_html = DocToHtml(doc_format)
18+
self._targets = self._get_targets(keywords, introduction,
19+
robot_format=doc_format == 'ROBOT')
20+
21+
def _get_targets(self, keywords, introduction, robot_format):
22+
targets = {
23+
'introduction': 'Introduction',
24+
'library introduction': 'Introduction',
25+
'importing': 'Importing',
26+
'library importing': 'Importing',
27+
'keywords': 'Keywords'
28+
}
29+
for kw in keywords:
30+
targets[kw.name] = kw.name
31+
if robot_format:
32+
for header in self._yield_header_targets(introduction):
33+
targets[header] = header
34+
return self._escape_and_encode_targets(targets)
35+
36+
def _yield_header_targets(self, introduction):
37+
headers = HeaderFormatter()
38+
for line in introduction.splitlines():
39+
match = headers.match(line.strip())
40+
if match:
41+
yield match.group(2)
42+
43+
def _escape_and_encode_targets(self, targets):
44+
return NormalizedDict((html_escape(key), self._encode_uri_component(value))
45+
for key, value in targets.items())
46+
47+
def _encode_uri_component(self, value):
48+
# Emulates encodeURIComponent javascript function
49+
return quote(value.encode('UTF-8'), safe="-_.!~*'()")
50+
51+
def html(self, doc, intro=False):
52+
doc = self._doc_to_html(doc)
53+
if intro:
54+
doc = self._header_regexp.sub(r'<h\1 id="\2">\2</h\1>', doc)
55+
return self._name_regexp.sub(self._link_keywords, doc)
56+
57+
def _link_keywords(self, match):
58+
name = match.group(1)
59+
if name in self._targets:
60+
return '<a href="#%s" class="name">%s</a>' % (self._targets[name], name)
61+
return '<span class="name">%s</span>' % name
62+
63+
64+
class DocToHtml(object):
65+
66+
def __init__(self, doc_format):
67+
self._formatter = self._get_formatter(doc_format)
68+
69+
def _get_formatter(self, doc_format):
70+
try:
71+
return {'ROBOT': html_format,
72+
'TEXT': self._format_text,
73+
'HTML': lambda doc: doc,
74+
'REST': self._format_rest}[doc_format]
75+
except KeyError:
76+
raise DataError("Invalid documentation format '%s'." % doc_format)
77+
78+
def _format_text(self, doc):
79+
return '<p style="white-space: pre-wrap">%s</p>' % html_escape(doc)
80+
81+
def _format_rest(self, doc):
82+
try:
83+
from docutils.core import publish_parts
84+
except ImportError:
85+
raise DataError("reST format requires 'docutils' module to be installed.")
86+
parts = publish_parts(doc, writer_name='html',
87+
settings_overrides={'syntax_highlight': 'short'})
88+
return parts['html_body']
89+
90+
def __call__(self, doc):
91+
return self._formatter(doc)
92+
93+
94+
class HtmlToText(object):
95+
html_tags = {
96+
'b': '*',
97+
'i': '_',
98+
'strong': '*',
99+
'em': '_',
100+
'code': '``',
101+
'div.*?': ''
102+
}
103+
html_chars = {
104+
'<br */?>': '\n',
105+
'&amp;': '&',
106+
'&lt;': '<',
107+
'&gt;': '>',
108+
'&quot;': '"',
109+
'&apos;': "'"
110+
}
111+
112+
def get_shortdoc_from_html(self, doc):
113+
match = re.search(r'<p.*?>(.*?)</?p>', doc, re.DOTALL)
114+
if match:
115+
doc = match.group(1)
116+
doc = self.html_to_plain_text(doc)
117+
return doc
118+
119+
def html_to_plain_text(self, doc):
120+
for tag, repl in self.html_tags.items():
121+
doc = re.sub(r'<%(tag)s>(.*?)</%(tag)s>' % {'tag': tag},
122+
r'%(repl)s\1%(repl)s' % {'repl': repl}, doc,
123+
flags=re.DOTALL)
124+
for html, text in self.html_chars.items():
125+
doc = re.sub(html, text, doc)
126+
return doc

0 commit comments

Comments
 (0)