Skip to content

Commit 002347d

Browse files
committed
Fix handling of <li> to new spec
1 parent 7fd10d9 commit 002347d

File tree

2 files changed

+63
-68
lines changed

2 files changed

+63
-68
lines changed

src/html5lib/html5parser.py

Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,32 +1006,24 @@ def startTagForm(self, token):
10061006

10071007
def startTagListItem(self, token):
10081008
self.parser.framesetOK = False
1009-
if self.tree.elementInScope("p"):
1010-
self.endTagP(impliedTagToken("p"))
1011-
stopNames = {"li":("li"), "dd":("dd", "dt"), "dt":("dd", "dt")}
1012-
stopName = stopNames[token["name"]]
1013-
# AT Use reversed in Python 2.4...
1014-
for i, node in enumerate(self.tree.openElements[::-1]):
1015-
if node.name in stopName:
1016-
poppedNodes = []
1017-
for j in range(i+1):
1018-
poppedNodes.append(self.tree.openElements.pop())
1019-
if i >= 1:
1020-
self.parser.parseError(
1021-
i == 1 and "missing-end-tag" or "missing-end-tags",
1022-
{"name": u", ".join([item.name
1023-
for item
1024-
in poppedNodes[:-1]])})
1025-
break
1026-
10271009

1028-
# Phrasing elements are all non special, non scoping, non
1029-
# formatting elements
1030-
if (node.nameTuple in
1031-
(specialElements | scopingElements)
1032-
and node.name not in ("address", "div")):
1010+
stopNamesMap = {"li":["li"],
1011+
"dt":["dt", "dd"],
1012+
"dd":["dt", "dd"]}
1013+
stopNames = stopNamesMap[token["name"]]
1014+
for node in reversed(self.tree.openElements):
1015+
if node.name in stopNames:
1016+
self.parser.phase.processEndTag(
1017+
impliedTagToken(node.name, "EndTag"))
10331018
break
1034-
# Always insert an <li> element.
1019+
if (node.nameTuple in (scopingElements | specialElements) and
1020+
node.name not in ("address", "div", "p")):
1021+
break
1022+
1023+
if self.tree.elementInScope("p"):
1024+
self.parser.phase.processEndTag(
1025+
impliedTagToken("p", "EndTag"))
1026+
10351027
self.tree.insertElement(token)
10361028

10371029
def startTagPlaintext(self, token):
@@ -1335,14 +1327,18 @@ def endTagForm(self, token):
13351327
self.tree.openElements.remove(node)
13361328

13371329
def endTagListItem(self, token):
1338-
# AT Could merge this with the Block case
1339-
if self.tree.elementInScope(token["name"]):
1340-
self.tree.generateImpliedEndTags(token["name"])
1341-
1342-
if self.tree.openElements[-1].name != token["name"]:
1343-
self.parser.parseError("end-tag-too-early", {"name": token["name"]})
1344-
1345-
if self.tree.elementInScope(token["name"]):
1330+
if token["name"] == "li":
1331+
variant = "list"
1332+
else:
1333+
variant = None
1334+
if not self.tree.elementInScope(token["name"], variant=variant):
1335+
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1336+
else:
1337+
self.tree.generateImpliedEndTags(exclude = token["name"])
1338+
if self.tree.openElements[-1].name != token["name"]:
1339+
self.parser.parseError(
1340+
"end-tag-too-early",
1341+
{"name": token["name"]})
13461342
node = self.tree.openElements.pop()
13471343
while node.name != token["name"]:
13481344
node = self.tree.openElements.pop()
@@ -1667,7 +1663,7 @@ def startTagOther(self, token):
16671663
self.tree.insertFromTable = False
16681664

16691665
def endTagTable(self, token):
1670-
if self.tree.elementInScope("table", True):
1666+
if self.tree.elementInScope("table", variant="table"):
16711667
self.tree.generateImpliedEndTags()
16721668
if self.tree.openElements[-1].name != "table":
16731669
self.parser.parseError("end-tag-too-early-named",
@@ -1759,7 +1755,7 @@ def __init__(self, parser, tree):
17591755
self.endTagHandler.default = self.endTagOther
17601756

17611757
def ignoreEndTagCaption(self):
1762-
return not self.tree.elementInScope("caption", True)
1758+
return not self.tree.elementInScope("caption", variant="table")
17631759

17641760
def processEOF(self):
17651761
self.parser.phases["inBody"].processEOF()
@@ -1930,9 +1926,9 @@ def startTagTableCell(self, token):
19301926

19311927
def startTagTableOther(self, token):
19321928
# XXX AT Any ideas on how to share this with endTagTable?
1933-
if (self.tree.elementInScope("tbody", True) or
1934-
self.tree.elementInScope("thead", True) or
1935-
self.tree.elementInScope("tfoot", True)):
1929+
if (self.tree.elementInScope("tbody", variant="table") or
1930+
self.tree.elementInScope("thead", variant="table") or
1931+
self.tree.elementInScope("tfoot", variant="table")):
19361932
self.clearStackToTableBodyContext()
19371933
self.endTagTableRowGroup(
19381934
impliedTagToken(self.tree.openElements[-1].name))
@@ -1945,7 +1941,7 @@ def startTagOther(self, token):
19451941
self.parser.phases["inTable"].processStartTag(token)
19461942

19471943
def endTagTableRowGroup(self, token):
1948-
if self.tree.elementInScope(token["name"], True):
1944+
if self.tree.elementInScope(token["name"], variant="table"):
19491945
self.clearStackToTableBodyContext()
19501946
self.tree.openElements.pop()
19511947
self.parser.phase = self.parser.phases["inTable"]
@@ -1954,9 +1950,9 @@ def endTagTableRowGroup(self, token):
19541950
{"name": token["name"]})
19551951

19561952
def endTagTable(self, token):
1957-
if (self.tree.elementInScope("tbody", True) or
1958-
self.tree.elementInScope("thead", True) or
1959-
self.tree.elementInScope("tfoot", True)):
1953+
if (self.tree.elementInScope("tbody", variant="table") or
1954+
self.tree.elementInScope("thead", variant="table") or
1955+
self.tree.elementInScope("tfoot", variant="table")):
19601956
self.clearStackToTableBodyContext()
19611957
self.endTagTableRowGroup(
19621958
impliedTagToken(self.tree.openElements[-1].name))
@@ -2002,7 +1998,7 @@ def clearStackToTableRowContext(self):
20021998
self.tree.openElements.pop()
20031999

20042000
def ignoreEndTagTr(self):
2005-
return not self.tree.elementInScope("tr", tableVariant=True)
2001+
return not self.tree.elementInScope("tr", variant="table")
20062002

20072003
# the rest
20082004
def processEOF(self):
@@ -2049,7 +2045,7 @@ def endTagTable(self, token):
20492045
self.parser.phase.processEndTag(token)
20502046

20512047
def endTagTableRowGroup(self, token):
2052-
if self.tree.elementInScope(token["name"], True):
2048+
if self.tree.elementInScope(token["name"], variant="table"):
20532049
self.endTagTr("tr")
20542050
self.parser.phase.processEndTag(token)
20552051
else:
@@ -2083,9 +2079,9 @@ def __init__(self, parser, tree):
20832079

20842080
# helper
20852081
def closeCell(self):
2086-
if self.tree.elementInScope("td", True):
2082+
if self.tree.elementInScope("td", variant="table"):
20872083
self.endTagTableCell(impliedTagToken("td"))
2088-
elif self.tree.elementInScope("th", True):
2084+
elif self.tree.elementInScope("th", variant="table"):
20892085
self.endTagTableCell(impliedTagToken("th"))
20902086

20912087
# the rest
@@ -2096,8 +2092,8 @@ def processCharacters(self, token):
20962092
self.parser.phases["inBody"].processCharacters(token)
20972093

20982094
def startTagTableOther(self, token):
2099-
if (self.tree.elementInScope("td", True) or
2100-
self.tree.elementInScope("th", True)):
2095+
if (self.tree.elementInScope("td", variant="table") or
2096+
self.tree.elementInScope("th", variant="table")):
21012097
self.closeCell()
21022098
self.parser.phase.processStartTag(token)
21032099
else:
@@ -2112,7 +2108,7 @@ def startTagOther(self, token):
21122108
self.parser.phases["inBody"].processStartTag
21132109

21142110
def endTagTableCell(self, token):
2115-
if self.tree.elementInScope(token["name"], True):
2111+
if self.tree.elementInScope(token["name"], variant="table"):
21162112
self.tree.generateImpliedEndTags(token["name"])
21172113
if self.tree.openElements[-1].name != token["name"]:
21182114
self.parser.parseError("unexpected-cell-end-tag",
@@ -2132,7 +2128,7 @@ def endTagIgnore(self, token):
21322128
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
21332129

21342130
def endTagImply(self, token):
2135-
if self.tree.elementInScope(token["name"], True):
2131+
if self.tree.elementInScope(token["name"], variant="table"):
21362132
self.closeCell()
21372133
self.parser.phase.processEndTag(token)
21382134
else:
@@ -2197,7 +2193,7 @@ def startTagSelect(self, token):
21972193

21982194
def startTagInput(self, token):
21992195
self.parser.parseError("unexpected-input-in-select")
2200-
if self.tree.elementInScope("select", True):
2196+
if self.tree.elementInScope("select", variant="table"):
22012197
self.endTagSelect("select")
22022198
self.parser.phase.processStartTag(token)
22032199

@@ -2226,7 +2222,7 @@ def endTagOptgroup(self, token):
22262222
{"name": "optgroup"})
22272223

22282224
def endTagSelect(self, token):
2229-
if self.tree.elementInScope("select", True):
2225+
if self.tree.elementInScope("select", variant="table"):
22302226
node = self.tree.openElements.pop()
22312227
while node.name != "select":
22322228
node = self.tree.openElements.pop()
@@ -2238,7 +2234,7 @@ def endTagSelect(self, token):
22382234
def endTagTableElements(self, token):
22392235
self.parser.parseError("unexpected-end-tag-in-select",
22402236
{"name": token["name"]})
2241-
if self.tree.elementInScope(token["name"], True):
2237+
if self.tree.elementInScope(token["name"], variant="table"):
22422238
self.endTagSelect("select")
22432239
self.parser.phase.processEndTag(token)
22442240

@@ -2279,7 +2275,7 @@ def startTagOther(self, token):
22792275

22802276
def endTagTable(self, token):
22812277
self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
2282-
if self.tree.elementInScope(token["name"], tableVariant=True):
2278+
if self.tree.elementInScope(token["name"], variant="table"):
22832279
self.endTagOther(impliedTagToken("select"))
22842280
self.parser.phase.processEndTag(token)
22852281

src/html5lib/treebuilders/_base.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import warnings
2-
from html5lib.constants import scopingElements, tableInsertModeElements
2+
from html5lib.constants import scopingElements, tableInsertModeElements, namespaces
33
try:
44
frozenset
55
except NameError:
@@ -130,24 +130,23 @@ def reset(self):
130130

131131
self.document = self.documentClass()
132132

133-
def elementInScope(self, target, tableVariant=False):
133+
def elementInScope(self, target, variant=None):
134134
# Exit early when possible.
135-
if self.openElements[-1].name == target:
136-
return True
137-
138-
# AT Use reverse instead of [::-1] when we can rely on Python 2.4
139-
# AT How about while True and simply set node to [-1] and set it to
140-
# [-2] at the end...
141-
for node in self.openElements[::-1]:
135+
listElementsMap = {
136+
None:scopingElements,
137+
"list":scopingElements | set([(namespaces["html"], "ol"),
138+
(namespaces["html"], "ul")]),
139+
"table":set([(namespaces["html"], "html"),
140+
(namespaces["html"], "table")])
141+
}
142+
listElements = listElementsMap[variant]
143+
144+
for node in reversed(self.openElements):
142145
if node.name == target:
143146
return True
144-
elif node.name == "table":
145-
return False
146-
elif (not tableVariant and (node.nameTuple in
147-
scopingElements)):
148-
return False
149-
elif node.name == "html":
147+
elif node.nameTuple in listElements:
150148
return False
149+
151150
assert False # We should never reach this point
152151

153152
def reconstructActiveFormattingElements(self):

0 commit comments

Comments
 (0)