Skip to content

Commit 786225f

Browse files
committed
Small optimization to avoid reconstrucing a tuple all the time
1 parent 04efd59 commit 786225f

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

src/html5lib/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,8 @@
11161116
"ParseError":7
11171117
}
11181118

1119+
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
1120+
tokenTypes["EmptyTag"]))
11191121

11201122

11211123
prefixes = dict([(v,k) for k,v in namespaces.iteritems()])

src/html5lib/tokenizer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from constants import entitiesWindows1252, entities
1414
from constants import asciiLowercase, asciiLetters, asciiUpper2Lower
1515
from constants import digits, hexDigits, EOF
16-
from constants import tokenTypes
16+
from constants import tokenTypes, tagTokenTypes
1717

1818
from inputstream import HTMLInputStream
1919

@@ -276,8 +276,7 @@ def emitCurrentToken(self):
276276
"""
277277
token = self.currentToken
278278
# Add token to the queue to be yielded
279-
if (token["type"] in (tokenTypes["StartTag"], tokenTypes["EndTag"],
280-
tokenTypes["EmptyTag"])):
279+
if (token["type"] in tagTokenTypes):
281280
if self.lowercaseElementName:
282281
token["name"] = token["name"].translate(asciiUpper2Lower)
283282
if token["type"] == tokenTypes["EndTag"]:
@@ -294,7 +293,7 @@ def emitCurrentToken(self):
294293
# Below are the various tokenizer states worked out.
295294

296295
def dataState(self):
297-
296+
#XXX - consider splitting this state based on the content model flag
298297
data = self.stream.char()
299298

300299
# Keep a charbuffer to handle the escapeFlag

0 commit comments

Comments
 (0)