Move ASCII-lowercasing to a single function

gsnedders · gsnedders · commit 333e9b92ae6a · 2016-06-07T17:35:26.000+01:00
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
@@ -16,7 +16,7 @@
 
 from . import utils
 from .constants import (
-    spaceCharacters, asciiUpper2Lower,
+    spaceCharacters,
     specialElements, headingElements, cdataElements, rcdataElements,
     tokenTypes, tagTokenTypes,
     namespaces,
@@ -152,8 +152,7 @@ def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
                 element.namespace == namespaces["mathml"]):
             return ("encoding" in element.attributes and
-                    element.attributes["encoding"].translate(
-                        asciiUpper2Lower) in
+                    utils.ascii_lowercase(element.attributes["encoding"]) in
                     ("text/html", "application/xhtml+xml"))
         else:
             return (element.namespace, element.name) in htmlIntegrationPointElements
@@ -456,7 +455,7 @@ def processDoctype(self, token):
             self.tree.insertDoctype(token)
 
             if publicId != "":
-                publicId = publicId.translate(asciiUpper2Lower)
+                publicId = utils.ascii_lowercase(publicId)
 
             if (not correct or token["name"] != "html" or
                     publicId.startswith(
@@ -1167,7 +1166,7 @@ def startTagInput(self, token):
             framesetOK = self.parser.framesetOK
             self.startTagVoidFormatting(token)
             if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    utils.ascii_lowercase(token["data"]["type"]) == "hidden"):
                 # input type=hidden doesn't change framesetOK
                 self.parser.framesetOK = framesetOK
 
@@ -1729,7 +1728,7 @@ def startTagStyleScript(self, token):
 
         def startTagInput(self, token):
             if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    utils.ascii_lowercase(token["data"]["type"]) == "hidden"):
                 self.parser.parseError("unexpected-hidden-input-in-table")
                 self.tree.insertElement(token)
                 # XXX associate with form
@@ -2452,11 +2451,11 @@ def processStartTag(self, token):
         def processEndTag(self, token):
             nodeIndex = len(self.tree.openElements) - 1
             node = self.tree.openElements[-1]
-            if node.name.translate(asciiUpper2Lower) != token["name"]:
+            if utils.ascii_lowercase(node.name) != token["name"]:
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
             while True:
-                if node.name.translate(asciiUpper2Lower) == token["name"]:
+                if utils.ascii_lowercase(node.name) == token["name"]:
                     # XXX this isn't in the spec but it seems necessary
                     if self.parser.phase == self.parser.phases["inTableText"]:
                         self.parser.phase.flushCharacters()
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
@@ -4,9 +4,11 @@
 
 from collections import deque
 
+from .utils import ascii_lowercase
+
 from .constants import spaceCharacters
 from .constants import entities
-from .constants import asciiLetters, asciiUpper2Lower
+from .constants import asciiLetters
 from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
@@ -233,7 +235,7 @@ def emitCurrentToken(self):
         # Add token to the queue to be yielded
         if (token["type"] in tagTokenTypes):
             if self.lowercaseElementName:
-                token["name"] = token["name"].translate(asciiUpper2Lower)
+                token["name"] = ascii_lowercase(token["name"])
             if token["type"] == tokenTypes["EndTag"]:
                 if token["data"]:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -920,7 +922,7 @@ def attributeNameState(self):
             # to attributes, but we do want to report the parse error in time.
             if self.lowercaseAttrName:
                 self.currentToken["data"][-1][0] = (
-                    self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+                    ascii_lowercase(self.currentToken["data"][-1][0]))
             for name, _ in self.currentToken["data"][:-1]:
                 if self.currentToken["data"][-1][0] == name:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
@@ -1341,10 +1343,10 @@ def beforeDoctypeNameState(self):
     def doctypeNameState(self):
         data = self.stream.char()
         if data in spaceCharacters:
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
             self.state = self.afterDoctypeNameState
         elif data == ">":
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data == "\u0000":
@@ -1356,7 +1358,7 @@ def doctypeNameState(self):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                     "eof-in-doctype-name"})
             self.currentToken["correct"] = False
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
diff --git a/html5lib/utils.py b/html5lib/utils.py
@@ -10,6 +10,7 @@
 except ImportError:
     import xml.etree.ElementTree as default_etree
 
+from .constants import asciiUpper2Lower
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
            "surrogatePairToCodepoint", "moduleFactoryFactory",
@@ -125,3 +126,7 @@ def wrapped(*args, **kwargs):
         return cache[key]
 
     return wrapped
+
+
+def ascii_lowercase(s):
+    return s.translate(asciiUpper2Lower)