Skip to content

Commit 0fcb607

Browse files
committed
A first attempt at fixing html5lib#100.
1 parent e4d12b9 commit 0fcb607

File tree

2 files changed

+91
-91
lines changed

2 files changed

+91
-91
lines changed

html5lib/html5parser.py

Lines changed: 86 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -649,8 +649,8 @@ def __init__(self, parser, tree):
649649
Phase.__init__(self, parser, tree)
650650

651651
self.startTagHandler = utils.MethodDispatcher([
652-
("html", self.startTagHtml),
653-
("head", self.startTagHead)
652+
(("html",), self.startTagHtml),
653+
(("head",), self.startTagHead)
654654
])
655655
self.startTagHandler.default = self.startTagOther
656656

@@ -695,19 +695,19 @@ def __init__(self, parser, tree):
695695
Phase.__init__(self, parser, tree)
696696

697697
self.startTagHandler = utils.MethodDispatcher([
698-
("html", self.startTagHtml),
699-
("title", self.startTagTitle),
698+
(("html",), self.startTagHtml),
699+
(("title",), self.startTagTitle),
700700
(("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
701-
("script", self.startTagScript),
701+
(("script",), self.startTagScript),
702702
(("base", "basefont", "bgsound", "command", "link"),
703703
self.startTagBaseLinkCommand),
704-
("meta", self.startTagMeta),
705-
("head", self.startTagHead)
704+
(("meta",), self.startTagMeta),
705+
(("head",), self.startTagHead)
706706
])
707707
self.startTagHandler.default = self.startTagOther
708708

709709
self. endTagHandler = utils.MethodDispatcher([
710-
("head", self.endTagHead),
710+
(("head",), self.endTagHead),
711711
(("br", "html", "body"), self.endTagHtmlBodyBr)
712712
])
713713
self.endTagHandler.default = self.endTagOther
@@ -794,13 +794,13 @@ def __init__(self, parser, tree):
794794
Phase.__init__(self, parser, tree)
795795

796796
self.startTagHandler = utils.MethodDispatcher([
797-
("html", self.startTagHtml),
798-
("body", self.startTagBody),
799-
("frameset", self.startTagFrameset),
797+
(("html",), self.startTagHtml),
798+
(("body",), self.startTagBody),
799+
(("frameset",), self.startTagFrameset),
800800
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
801801
"style", "title"),
802802
self.startTagFromHead),
803-
("head", self.startTagHead)
803+
(("head",), self.startTagHead)
804804
])
805805
self.startTagHandler.default = self.startTagOther
806806
self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
@@ -866,66 +866,66 @@ def __init__(self, parser, tree):
866866
self.processSpaceCharactersNonPre = self.processSpaceCharacters
867867

868868
self.startTagHandler = utils.MethodDispatcher([
869-
("html", self.startTagHtml),
869+
(("html",), self.startTagHtml),
870870
(("base", "basefont", "bgsound", "command", "link", "meta",
871871
"noframes", "script", "style", "title"),
872872
self.startTagProcessInHead),
873-
("body", self.startTagBody),
874-
("frameset", self.startTagFrameset),
873+
(("body",), self.startTagBody),
874+
(("frameset",), self.startTagFrameset),
875875
(("address", "article", "aside", "blockquote", "center", "details",
876-
"details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
877-
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
878-
"section", "summary", "ul"),
876+
"dir", "div", "dl", "fieldset", "figcaption", "figure", "footer",
877+
"header", "hgroup", "main", "menu", "nav", "ol", "p", "section",
878+
"summary", "ul"),
879879
self.startTagCloseP),
880880
(headingElements, self.startTagHeading),
881881
(("pre", "listing"), self.startTagPreListing),
882-
("form", self.startTagForm),
882+
(("form",), self.startTagForm),
883883
(("li", "dd", "dt"), self.startTagListItem),
884-
("plaintext", self.startTagPlaintext),
885-
("a", self.startTagA),
884+
(("plaintext",), self.startTagPlaintext),
885+
(("a",), self.startTagA),
886886
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
887887
"strong", "tt", "u"), self.startTagFormatting),
888-
("nobr", self.startTagNobr),
889-
("button", self.startTagButton),
888+
(("nobr",), self.startTagNobr),
889+
(("button",), self.startTagButton),
890890
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
891-
("xmp", self.startTagXmp),
892-
("table", self.startTagTable),
891+
(("xmp",), self.startTagXmp),
892+
(("table",), self.startTagTable),
893893
(("area", "br", "embed", "img", "keygen", "wbr"),
894894
self.startTagVoidFormatting),
895895
(("param", "source", "track"), self.startTagParamSource),
896-
("input", self.startTagInput),
897-
("hr", self.startTagHr),
898-
("image", self.startTagImage),
899-
("isindex", self.startTagIsIndex),
900-
("textarea", self.startTagTextarea),
901-
("iframe", self.startTagIFrame),
902-
(("noembed", "noframes", "noscript"), self.startTagRawtext),
903-
("select", self.startTagSelect),
896+
(("input",), self.startTagInput),
897+
(("hr",), self.startTagHr),
898+
(("image",), self.startTagImage),
899+
(("isindex",), self.startTagIsIndex),
900+
(("textarea",), self.startTagTextarea),
901+
(("iframe",), self.startTagIFrame),
902+
(("noembed", "noscript"), self.startTagRawtext),
903+
(("select",), self.startTagSelect),
904904
(("rp", "rt"), self.startTagRpRt),
905905
(("option", "optgroup"), self.startTagOpt),
906-
(("math"), self.startTagMath),
907-
(("svg"), self.startTagSvg),
906+
(("math",), self.startTagMath),
907+
(("svg",), self.startTagSvg),
908908
(("caption", "col", "colgroup", "frame", "head",
909909
"tbody", "td", "tfoot", "th", "thead",
910910
"tr"), self.startTagMisplaced)
911911
])
912912
self.startTagHandler.default = self.startTagOther
913913

914914
self.endTagHandler = utils.MethodDispatcher([
915-
("body", self.endTagBody),
916-
("html", self.endTagHtml),
915+
(("body",), self.endTagBody),
916+
(("html",), self.endTagHtml),
917917
(("address", "article", "aside", "blockquote", "button", "center",
918918
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
919919
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
920920
"section", "summary", "ul"), self.endTagBlock),
921-
("form", self.endTagForm),
922-
("p", self.endTagP),
921+
(("form",), self.endTagForm),
922+
(("p",), self.endTagP),
923923
(("dd", "dt", "li"), self.endTagListItem),
924924
(headingElements, self.endTagHeading),
925925
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
926926
"strike", "strong", "tt", "u"), self.endTagFormatting),
927927
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
928-
("br", self.endTagBr),
928+
(("br",), self.endTagBr),
929929
])
930930
self.endTagHandler.default = self.endTagOther
931931

@@ -1587,7 +1587,7 @@ def __init__(self, parser, tree):
15871587
self.startTagHandler = utils.MethodDispatcher([])
15881588
self.startTagHandler.default = self.startTagOther
15891589
self.endTagHandler = utils.MethodDispatcher([
1590-
("script", self.endTagScript)])
1590+
(("script",), self.endTagScript)])
15911591
self.endTagHandler.default = self.endTagOther
15921592

15931593
def processCharacters(self, token):
@@ -1619,21 +1619,21 @@ class InTablePhase(Phase):
16191619
def __init__(self, parser, tree):
16201620
Phase.__init__(self, parser, tree)
16211621
self.startTagHandler = utils.MethodDispatcher([
1622-
("html", self.startTagHtml),
1623-
("caption", self.startTagCaption),
1624-
("colgroup", self.startTagColgroup),
1625-
("col", self.startTagCol),
1622+
(("html",), self.startTagHtml),
1623+
(("caption",), self.startTagCaption),
1624+
(("colgroup",), self.startTagColgroup),
1625+
(("col",), self.startTagCol),
16261626
(("tbody", "tfoot", "thead"), self.startTagRowGroup),
16271627
(("td", "th", "tr"), self.startTagImplyTbody),
1628-
("table", self.startTagTable),
1628+
(("table",), self.startTagTable),
16291629
(("style", "script"), self.startTagStyleScript),
1630-
("input", self.startTagInput),
1631-
("form", self.startTagForm)
1630+
(("input",), self.startTagInput),
1631+
(("form",), self.startTagForm)
16321632
])
16331633
self.startTagHandler.default = self.startTagOther
16341634

16351635
self.endTagHandler = utils.MethodDispatcher([
1636-
("table", self.endTagTable),
1636+
(("table",), self.endTagTable),
16371637
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
16381638
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
16391639
])
@@ -1810,15 +1810,15 @@ def __init__(self, parser, tree):
18101810
Phase.__init__(self, parser, tree)
18111811

18121812
self.startTagHandler = utils.MethodDispatcher([
1813-
("html", self.startTagHtml),
1813+
(("html",), self.startTagHtml),
18141814
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
18151815
"thead", "tr"), self.startTagTableElement)
18161816
])
18171817
self.startTagHandler.default = self.startTagOther
18181818

18191819
self.endTagHandler = utils.MethodDispatcher([
1820-
("caption", self.endTagCaption),
1821-
("table", self.endTagTable),
1820+
(("caption",), self.endTagCaption),
1821+
(("table",), self.endTagTable),
18221822
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
18231823
"thead", "tr"), self.endTagIgnore)
18241824
])
@@ -1882,14 +1882,14 @@ def __init__(self, parser, tree):
18821882
Phase.__init__(self, parser, tree)
18831883

18841884
self.startTagHandler = utils.MethodDispatcher([
1885-
("html", self.startTagHtml),
1886-
("col", self.startTagCol)
1885+
(("html",), self.startTagHtml),
1886+
(("col",), self.startTagCol)
18871887
])
18881888
self.startTagHandler.default = self.startTagOther
18891889

18901890
self.endTagHandler = utils.MethodDispatcher([
1891-
("colgroup", self.endTagColgroup),
1892-
("col", self.endTagCol)
1891+
(("colgroup",), self.endTagColgroup),
1892+
(("col",), self.endTagCol)
18931893
])
18941894
self.endTagHandler.default = self.endTagOther
18951895

@@ -1945,8 +1945,8 @@ class InTableBodyPhase(Phase):
19451945
def __init__(self, parser, tree):
19461946
Phase.__init__(self, parser, tree)
19471947
self.startTagHandler = utils.MethodDispatcher([
1948-
("html", self.startTagHtml),
1949-
("tr", self.startTagTr),
1948+
(("html",), self.startTagHtml),
1949+
(("tr",), self.startTagTr),
19501950
(("td", "th"), self.startTagTableCell),
19511951
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
19521952
self.startTagTableOther)
@@ -1955,7 +1955,7 @@ def __init__(self, parser, tree):
19551955

19561956
self.endTagHandler = utils.MethodDispatcher([
19571957
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
1958-
("table", self.endTagTable),
1958+
(("table",), self.endTagTable),
19591959
(("body", "caption", "col", "colgroup", "html", "td", "th",
19601960
"tr"), self.endTagIgnore)
19611961
])
@@ -2043,16 +2043,16 @@ class InRowPhase(Phase):
20432043
def __init__(self, parser, tree):
20442044
Phase.__init__(self, parser, tree)
20452045
self.startTagHandler = utils.MethodDispatcher([
2046-
("html", self.startTagHtml),
2046+
(("html",), self.startTagHtml),
20472047
(("td", "th"), self.startTagTableCell),
20482048
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
20492049
"tr"), self.startTagTableOther)
20502050
])
20512051
self.startTagHandler.default = self.startTagOther
20522052

20532053
self.endTagHandler = utils.MethodDispatcher([
2054-
("tr", self.endTagTr),
2055-
("table", self.endTagTable),
2054+
(("tr",), self.endTagTr),
2055+
(("table",), self.endTagTable),
20562056
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
20572057
(("body", "caption", "col", "colgroup", "html", "td", "th"),
20582058
self.endTagIgnore)
@@ -2132,7 +2132,7 @@ class InCellPhase(Phase):
21322132
def __init__(self, parser, tree):
21332133
Phase.__init__(self, parser, tree)
21342134
self.startTagHandler = utils.MethodDispatcher([
2135-
("html", self.startTagHtml),
2135+
(("html",), self.startTagHtml),
21362136
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
21372137
"thead", "tr"), self.startTagTableOther)
21382138
])
@@ -2208,19 +2208,19 @@ def __init__(self, parser, tree):
22082208
Phase.__init__(self, parser, tree)
22092209

22102210
self.startTagHandler = utils.MethodDispatcher([
2211-
("html", self.startTagHtml),
2212-
("option", self.startTagOption),
2213-
("optgroup", self.startTagOptgroup),
2214-
("select", self.startTagSelect),
2211+
(("html",), self.startTagHtml),
2212+
(("option",), self.startTagOption),
2213+
(("optgroup",), self.startTagOptgroup),
2214+
(("select",), self.startTagSelect),
22152215
(("input", "keygen", "textarea"), self.startTagInput),
2216-
("script", self.startTagScript)
2216+
(("script",), self.startTagScript)
22172217
])
22182218
self.startTagHandler.default = self.startTagOther
22192219

22202220
self.endTagHandler = utils.MethodDispatcher([
2221-
("option", self.endTagOption),
2222-
("optgroup", self.endTagOptgroup),
2223-
("select", self.endTagSelect)
2221+
(("option",), self.endTagOption),
2222+
(("optgroup",), self.endTagOptgroup),
2223+
(("select",), self.endTagSelect)
22242224
])
22252225
self.endTagHandler.default = self.endTagOther
22262226

@@ -2462,11 +2462,13 @@ def __init__(self, parser, tree):
24622462
Phase.__init__(self, parser, tree)
24632463

24642464
self.startTagHandler = utils.MethodDispatcher([
2465-
("html", self.startTagHtml)
2465+
(("html",), self.startTagHtml)
24662466
])
24672467
self.startTagHandler.default = self.startTagOther
24682468

2469-
self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
2469+
self.endTagHandler = utils.MethodDispatcher([
2470+
(("html",), self.endTagHtml)
2471+
])
24702472
self.endTagHandler.default = self.endTagOther
24712473

24722474
def processEOF(self):
@@ -2510,15 +2512,15 @@ def __init__(self, parser, tree):
25102512
Phase.__init__(self, parser, tree)
25112513

25122514
self.startTagHandler = utils.MethodDispatcher([
2513-
("html", self.startTagHtml),
2514-
("frameset", self.startTagFrameset),
2515-
("frame", self.startTagFrame),
2516-
("noframes", self.startTagNoframes)
2515+
(("html",), self.startTagHtml),
2516+
(("frameset",), self.startTagFrameset),
2517+
(("frame",), self.startTagFrame),
2518+
(("noframes",), self.startTagNoframes)
25172519
])
25182520
self.startTagHandler.default = self.startTagOther
25192521

25202522
self.endTagHandler = utils.MethodDispatcher([
2521-
("frameset", self.endTagFrameset)
2523+
(("frameset",), self.endTagFrameset)
25222524
])
25232525
self.endTagHandler.default = self.endTagOther
25242526

@@ -2567,13 +2569,13 @@ def __init__(self, parser, tree):
25672569
Phase.__init__(self, parser, tree)
25682570

25692571
self.startTagHandler = utils.MethodDispatcher([
2570-
("html", self.startTagHtml),
2571-
("noframes", self.startTagNoframes)
2572+
(("html",), self.startTagHtml),
2573+
(("noframes",), self.startTagNoframes)
25722574
])
25732575
self.startTagHandler.default = self.startTagOther
25742576

25752577
self.endTagHandler = utils.MethodDispatcher([
2576-
("html", self.endTagHtml)
2578+
(("html",), self.endTagHtml)
25772579
])
25782580
self.endTagHandler.default = self.endTagOther
25792581

@@ -2603,7 +2605,7 @@ def __init__(self, parser, tree):
26032605
Phase.__init__(self, parser, tree)
26042606

26052607
self.startTagHandler = utils.MethodDispatcher([
2606-
("html", self.startTagHtml)
2608+
(("html",), self.startTagHtml)
26072609
])
26082610
self.startTagHandler.default = self.startTagOther
26092611

@@ -2641,8 +2643,8 @@ def __init__(self, parser, tree):
26412643
Phase.__init__(self, parser, tree)
26422644

26432645
self.startTagHandler = utils.MethodDispatcher([
2644-
("html", self.startTagHtml),
2645-
("noframes", self.startTagNoFrames)
2646+
(("html",), self.startTagHtml),
2647+
(("noframes",), self.startTagNoFrames)
26462648
])
26472649
self.startTagHandler.default = self.startTagOther
26482650

html5lib/utils.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,12 @@ def __init__(self, items=()):
2929
# Using _dictEntries instead of directly assigning to self is about
3030
# twice as fast. Please do careful performance testing before changing
3131
# anything here.
32-
_dictEntries = []
32+
dict.__init__(self)
3333
for name, value in items:
34-
if type(name) in (list, tuple, frozenset, set):
35-
for item in name:
36-
_dictEntries.append((item, value))
37-
else:
38-
_dictEntries.append((name, value))
39-
dict.__init__(self, _dictEntries)
34+
assert isinstance(name, (list, tuple, frozenset, set)), repr(name)
35+
for item in name:
36+
assert item not in self, "%s duplicated" % item
37+
self[item] = value
4038
self.default = None
4139

4240
def __getitem__(self, key):

0 commit comments

Comments
 (0)