Skip to content

Commit 83b78fd

Browse files
committed
Merge again
--HG-- rename : python/src/html5lib/html5parser.py => python3/src/html5lib/html5parser.py
2 parents d21797e + ef2c545 commit 83b78fd

File tree

17 files changed

+1383
-824
lines changed

17 files changed

+1383
-824
lines changed

parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def parse():
8181
t1 = time.time()
8282
printOutput(p, document, opts)
8383
t2 = time.time()
84-
print "\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1)
84+
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
8585
else:
8686
document = parseMethod(f, encoding=encoding)
8787
printOutput(p, document, opts)

src/html5lib/constants.py

Lines changed: 120 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
_(u"Unexpected end of file in attribute value (')."),
7373
"eof-in-attribute-value-no-quotes":
7474
_(u"Unexpected end of file in attribute value."),
75+
"unexpected-EOF-after-solidus-in-tag":
76+
_(u"Unexpected end of file in tag. Expected >"),
77+
"unexpected-character-after-soldius-in-tag":
78+
_(u"Unexpected character after / in tag. Expected >"),
7579
"expected-dashes-or-doctype":
7680
_(u"Expected '--' or 'DOCTYPE'. Not found."),
7781
"incorrect-comment":
@@ -250,6 +254,10 @@
250254
_(u"Unexpected end of file. Expected select content."),
251255
"eof-in-frameset":
252256
_(u"Unexpected end of file. Expected frameset content."),
257+
"non-void-element-with-trailing-solidus":
258+
_(u"Trailing solidus not allowed on element %(name)s"),
259+
"unexpected-html-element-in-foreign-content":
260+
_(u"Element %(name)s not allowed in a non-html context"),
253261
"XXX-undefined-error":
254262
(u"Undefined error (this sucks and should be fixed)"),
255263
}
@@ -261,110 +269,121 @@
261269
"PLAINTEXT":3
262270
}
263271

272+
namespaces = {
273+
"html":"http://www.w3.org/1999/xhtml",
274+
"mathml":"http://www.w3.org/1998/Math/MathML",
275+
"svg":"http://www.w3.org/2000/svg",
276+
"xlink":"http://www.w3.org/1999/xlink",
277+
"xml":"http://www.w3.org/XML/1998/namespace",
278+
"xmlns":"http://www.w3.org/2000/xmlns/"
279+
}
280+
264281
scopingElements = frozenset((
265-
"applet",
266-
"button",
267-
"caption",
268-
"html",
269-
"marquee",
270-
"object",
271-
"table",
272-
"td",
273-
"th"
282+
(namespaces["html"], "applet"),
283+
(namespaces["html"], "button"),
284+
(namespaces["html"], "caption"),
285+
(namespaces["html"], "html"),
286+
(namespaces["html"], "marquee"),
287+
(namespaces["html"], "object"),
288+
(namespaces["html"], "table"),
289+
(namespaces["html"], "td"),
290+
(namespaces["html"], "th"),
291+
(namespaces["svg"], "foreignObject")
274292
))
275293

276294
formattingElements = frozenset((
277-
"a",
278-
"b",
279-
"big",
280-
"em",
281-
"font",
282-
"i",
283-
"nobr",
284-
"s",
285-
"small",
286-
"strike",
287-
"strong",
288-
"tt",
289-
"u"
295+
(namespaces["html"], "a"),
296+
(namespaces["html"], "b"),
297+
(namespaces["html"], "big"),
298+
(namespaces["html"], "code"),
299+
(namespaces["html"], "em"),
300+
(namespaces["html"], "font"),
301+
(namespaces["html"], "i"),
302+
(namespaces["html"], "nobr"),
303+
(namespaces["html"], "s"),
304+
(namespaces["html"], "small"),
305+
(namespaces["html"], "strike"),
306+
(namespaces["html"], "strong"),
307+
(namespaces["html"], "tt"),
308+
(namespaces["html"], "u")
290309
))
291310

292311
specialElements = frozenset((
293-
"address",
294-
"area",
295-
"article",
296-
"aside",
297-
"base",
298-
"basefont",
299-
"bgsound",
300-
"blockquote",
301-
"body",
302-
"br",
303-
"center",
304-
"col",
305-
"colgroup",
306-
"command",
307-
"datagrid",
308-
"dd",
309-
"details",
310-
"dialog",
311-
"dir",
312-
"div",
313-
"dl",
314-
"dt",
315-
"embed",
316-
"event-source",
317-
"fieldset",
318-
"figure",
319-
"footer",
320-
"form",
321-
"frame",
322-
"frameset",
323-
"h1",
324-
"h2",
325-
"h3",
326-
"h4",
327-
"h5",
328-
"h6",
329-
"head",
330-
"header",
331-
"hr",
332-
"iframe",
312+
(namespaces["html"], "address"),
313+
(namespaces["html"], "area"),
314+
(namespaces["html"], "article"),
315+
(namespaces["html"], "aside"),
316+
(namespaces["html"], "base"),
317+
(namespaces["html"], "basefont"),
318+
(namespaces["html"], "bgsound"),
319+
(namespaces["html"], "blockquote"),
320+
(namespaces["html"], "body"),
321+
(namespaces["html"], "br"),
322+
(namespaces["html"], "center"),
323+
(namespaces["html"], "col"),
324+
(namespaces["html"], "colgroup"),
325+
(namespaces["html"], "command"),
326+
(namespaces["html"], "datagrid"),
327+
(namespaces["html"], "dd"),
328+
(namespaces["html"], "details"),
329+
(namespaces["html"], "dialog"),
330+
(namespaces["html"], "dir"),
331+
(namespaces["html"], "div"),
332+
(namespaces["html"], "dl"),
333+
(namespaces["html"], "dt"),
334+
(namespaces["html"], "embed"),
335+
(namespaces["html"], "event-source"),
336+
(namespaces["html"], "fieldset"),
337+
(namespaces["html"], "figure"),
338+
(namespaces["html"], "footer"),
339+
(namespaces["html"], "form"),
340+
(namespaces["html"], "frame"),
341+
(namespaces["html"], "frameset"),
342+
(namespaces["html"], "h1"),
343+
(namespaces["html"], "h2"),
344+
(namespaces["html"], "h3"),
345+
(namespaces["html"], "h4"),
346+
(namespaces["html"], "h5"),
347+
(namespaces["html"], "h6"),
348+
(namespaces["html"], "head"),
349+
(namespaces["html"], "header"),
350+
(namespaces["html"], "hr"),
351+
(namespaces["html"], "iframe"),
333352
# Note that image is commented out in the spec as "this isn't an
334-
# element that can end up on the stack, so it doesn't matter"
335-
"image",
336-
"img",
337-
"input",
338-
"isindex",
339-
"li",
340-
"link",
341-
"listing",
342-
"menu",
343-
"meta",
344-
"nav",
345-
"noembed",
346-
"noframes",
347-
"noscript",
348-
"ol",
349-
"optgroup",
350-
"option",
351-
"p",
352-
"param",
353-
"plaintext",
354-
"pre",
355-
"script",
356-
"section",
357-
"select",
358-
"spacer",
359-
"style",
360-
"tbody",
361-
"textarea",
362-
"tfoot",
363-
"thead",
364-
"title",
365-
"tr",
366-
"ul",
367-
"wbr"
353+
# element that can end up on the stack, so it doesn't matter,"
354+
(namespaces["html"], "image"),
355+
(namespaces["html"], "img"),
356+
(namespaces["html"], "input"),
357+
(namespaces["html"], "isindex"),
358+
(namespaces["html"], "li"),
359+
(namespaces["html"], "link"),
360+
(namespaces["html"], "listing"),
361+
(namespaces["html"], "menu"),
362+
(namespaces["html"], "meta"),
363+
(namespaces["html"], "nav"),
364+
(namespaces["html"], "noembed"),
365+
(namespaces["html"], "noframes"),
366+
(namespaces["html"], "noscript"),
367+
(namespaces["html"], "ol"),
368+
(namespaces["html"], "optgroup"),
369+
(namespaces["html"], "option"),
370+
(namespaces["html"], "p"),
371+
(namespaces["html"], "param"),
372+
(namespaces["html"], "plaintext"),
373+
(namespaces["html"], "pre"),
374+
(namespaces["html"], "script"),
375+
(namespaces["html"], "section"),
376+
(namespaces["html"], "select"),
377+
(namespaces["html"], "spacer"),
378+
(namespaces["html"], "style"),
379+
(namespaces["html"], "tbody"),
380+
(namespaces["html"], "textarea"),
381+
(namespaces["html"], "tfoot"),
382+
(namespaces["html"], "thead"),
383+
(namespaces["html"], "title"),
384+
(namespaces["html"], "tr"),
385+
(namespaces["html"], "ul"),
386+
(namespaces["html"], "wbr")
368387
))
369388

370389
spaceCharacters = frozenset((
@@ -1097,6 +1116,11 @@
10971116
"ParseError":7
10981117
}
10991118

1119+
1120+
1121+
prefixes = dict([(v,k) for k,v in namespaces.iteritems()])
1122+
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
1123+
11001124
class DataLossWarning(UserWarning):
11011125
pass
11021126

0 commit comments

Comments
 (0)