Skip to content

Commit 0c6743c

Browse files
committed
some small improvements to error messages and trim trailing space
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40479
1 parent d4e8f1d commit 0c6743c

File tree

2 files changed

+31
-31
lines changed

2 files changed

+31
-31
lines changed

src/html5parser.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ class HTMLParser(object):
3737

3838
def __init__(self, strict = False, tree=simpletree.TreeBuilder):
3939
"""
40-
strict - raise an exception when a parse error is encountered
41-
42-
tree - a treebuilder class controlling the type of tree that will be
43-
returned. This class is almost always a subclass of
40+
strict - raise an exception when a parse error is encountered
41+
42+
tree - a treebuilder class controlling the type of tree that will be
43+
returned. This class is almost always a subclass of
4444
html5lib.treebuilders._base.TreeBuilder
4545
"""
4646

@@ -72,10 +72,10 @@ def __init__(self, strict = False, tree=simpletree.TreeBuilder):
7272

7373
def parse(self, stream, encoding=None, innerHTML=False):
7474
"""Parse a HTML document into a well-formed tree
75-
75+
7676
stream - a filelike object or string containing the HTML to be parsed
77-
78-
innerHTML - Are we parsing in innerHTML mode (note innerHTML=True
77+
78+
innerHTML - Are we parsing in innerHTML mode (note innerHTML=True
7979
is not yet supported)
8080
8181
The optional encoding parameter must be a string that indicates
@@ -131,7 +131,7 @@ def atheistParseError(self):
131131

132132
def normalizeToken(self, token):
133133
""" HTML5 specific normalizations to the token stream """
134-
134+
135135
if token["type"] == "EmptyTag":
136136
# When a solidus (/) is encountered within a tag name what happens
137137
# depends on whether the current tag name matches that of a void
@@ -249,7 +249,7 @@ def processComment(self, data):
249249
self.tree.insertComment(data, self.tree.openElements[-1])
250250

251251
def processDoctype(self, name, error):
252-
self.parser.parseError()
252+
self.parser.parseError(_("Unexpected DOCTYPE. Ignored."))
253253

254254
def processSpaceCharacters(self, data):
255255
self.tree.insertText(data)
@@ -436,7 +436,7 @@ def startTagTitle(self, name, attributes):
436436
self.appendToHead(element)
437437
self.tree.openElements.append(element)
438438
self.parser.tokenizer.contentModelFlag = contentModelFlags["RCDATA"]
439-
439+
440440
def startTagStyle(self, name, attributes):
441441
element = self.tree.createElement(name, attributes)
442442
if self.tree.headPointer is not None and\
@@ -596,11 +596,12 @@ def __init__(self, parser, tree):
596596
(("a", "b", "big", "em", "font", "i", "nobr", "s", "small",
597597
"strike", "strong", "tt", "u"), self.endTagFormatting),
598598
(("marquee", "object", "button"), self.endTagButtonMarqueeObject),
599-
(("caption", "col", "colgroup", "frame", "frameset", "head",
600-
"option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
601-
"tr", "area", "basefont", "bgsound", "br", "embed", "hr",
602-
"image", "img", "input", "isindex", "param", "select", "spacer",
603-
"table", "wbr"),self.endTagMisplacedNone),
599+
(("head", "frameset", "select", "optgroup", "option", "table",
600+
"caption", "colgroup", "col", "thead", "tfoot", "tbody", "tr",
601+
"td", "th"), self.endTagMisplaced),
602+
(("area", "basefont", "bgsound", "br", "embed", "hr", "image",
603+
"img", "input", "isindex", "param", "spacer", "wbr", "frame"),
604+
self.endTagNone),
604605
(("noframes", "noscript", "noembed", "textarea", "xmp", "iframe"),
605606
self.endTagCdataTextAreaXmp),
606607
(("event-source", "section", "nav", "article", "aside", "header",
@@ -647,7 +648,7 @@ def startTagCloseP(self, name, attributes):
647648

648649
def startTagForm(self, name, attributes):
649650
if self.tree.formPointer:
650-
self.parser.parseError()
651+
self.parser.parseError("Unexpected start tag (form). Ignored.")
651652
else:
652653
if self.tree.elementInScope("p"):
653654
self.endTagP("p")
@@ -685,7 +686,8 @@ def startTagHeading(self, name, attributes):
685686
self.endTagP("p")
686687
for item in headingElements:
687688
if self.tree.elementInScope(item):
688-
self.parser.parseError()
689+
self.parser.parseError(_("Unexpected start tag (" + name +\
690+
")."))
689691
item = self.tree.openElements.pop()
690692
while item.name not in headingElements:
691693
item = self.tree.openElements.pop()
@@ -818,7 +820,7 @@ def startTagOther(self, name, attributes):
818820
def endTagP(self, name):
819821
self.tree.generateImpliedEndTags("p")
820822
if self.tree.openElements[-1].name != "p":
821-
self.parser.parseError()
823+
self.parser.parseError("Unexpected end tag (p).")
822824
while self.tree.elementInScope("p"):
823825
self.tree.openElements.pop()
824826

@@ -1007,25 +1009,23 @@ def endTagButtonMarqueeObject(self, name):
10071009
if self.tree.elementInScope(name):
10081010
self.tree.generateImpliedEndTags()
10091011
if self.tree.openElements[-1].name != name:
1010-
self.parser.parseError()
1012+
self.parser.parseError(_(u"Unexpected end tag (" + name +\
1013+
"). Expected other end tag first."))
10111014

10121015
if self.tree.elementInScope(name):
10131016
element = self.tree.openElements.pop()
10141017
while element.name != name:
10151018
element = self.tree.openElements.pop()
10161019
self.tree.clearActiveFormattingElements()
10171020

1018-
def endTagMisplacedNone(self, name):
1019-
""" Elements that should be children of other elements that have a
1020-
different insertion mode or elements that have no end tag;
1021-
here they are ignored
1022-
"caption", "col", "colgroup", "frame", "frameset", "head",
1023-
"option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
1024-
"tr", "noscript, "area", "basefont", "bgsound", "br", "embed",
1025-
"hr", "iframe", "image", "img", "input", "isindex", "noembed",
1026-
"noframes", "param", "select", "spacer", "table", "textarea", "wbr""
1027-
"""
1028-
self.parser.parseError()
1021+
def endTagMisplaced(self, name):
1022+
# This handles elements with end tags in other insertion modes.
1023+
self.parser.parseError(_(u"Unexpected end tag (" + name +\
1024+
u"). Ignored."))
1025+
1026+
def endTagNone(self, name):
1027+
# This handles elements with no end tag.
1028+
self.parser.parseError(_(u"This tag (" + name + u")has no end tag"))
10291029

10301030
def endTagCdataTextAreaXmp(self, name):
10311031
if self.tree.openElements[-1].name == name:

tests/test_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
#Run the parse error checks
2727
#XXX - ideally want this to be a command line argument
28-
checkParseErrors = False
28+
checkParseErrors = True
2929

3030
def parseTestcase(testString):
3131
testString = testString.split("\n")

0 commit comments

Comments
 (0)