Small optimization to avoid reconstrucing a tuple all the time

jgraham · jgraham · commit 786225f53fb5 · 2009-07-06T22:55:27.000+02:00
diff --git a/src/html5lib/constants.py b/src/html5lib/constants.py
@@ -1116,6 +1116,8 @@
     "ParseError":7
 }
 
+tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], 
+                           tokenTypes["EmptyTag"]))
 
 
 prefixes = dict([(v,k) for k,v in namespaces.iteritems()])
diff --git a/src/html5lib/tokenizer.py b/src/html5lib/tokenizer.py
@@ -13,7 +13,7 @@
 from constants import entitiesWindows1252, entities
 from constants import asciiLowercase, asciiLetters, asciiUpper2Lower
 from constants import digits, hexDigits, EOF
-from constants import tokenTypes
+from constants import tokenTypes, tagTokenTypes
 
 from inputstream import HTMLInputStream
 
@@ -276,8 +276,7 @@ def emitCurrentToken(self):
         """
         token = self.currentToken
         # Add token to the queue to be yielded
-        if (token["type"] in (tokenTypes["StartTag"], tokenTypes["EndTag"], 
-                              tokenTypes["EmptyTag"])):
+        if (token["type"] in tagTokenTypes):
             if self.lowercaseElementName:
                 token["name"] = token["name"].translate(asciiUpper2Lower)
             if token["type"] == tokenTypes["EndTag"]:
@@ -294,7 +293,7 @@ def emitCurrentToken(self):
     # Below are the various tokenizer states worked out.
 
     def dataState(self):
-        
+        #XXX - consider splitting this state based on the content model flag
         data = self.stream.char()
 
         # Keep a charbuffer to handle the escapeFlag

Original file line number	Diff line number	Diff line change
`@@ -1116,6 +1116,8 @@`
`1116`	`1116`	`"ParseError":7`
`1117`	`1117`	`}`
`1118`	`1118`
	`1119`	`+tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],`
	`1120`	`+ tokenTypes["EmptyTag"]))`
`1119`	`1121`
`1120`	`1122`
`1121`	`1123`	`prefixes = dict([(v,k) for k,v in namespaces.iteritems()])`