Skip to content

Commit 333e9b9

Browse files
committed
Move ASCII-lowercasing to a single function
1 parent fbce67d commit 333e9b9

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

html5lib/html5parser.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from . import utils
1818
from .constants import (
19-
spaceCharacters, asciiUpper2Lower,
19+
spaceCharacters,
2020
specialElements, headingElements, cdataElements, rcdataElements,
2121
tokenTypes, tagTokenTypes,
2222
namespaces,
@@ -152,8 +152,7 @@ def isHTMLIntegrationPoint(self, element):
152152
if (element.name == "annotation-xml" and
153153
element.namespace == namespaces["mathml"]):
154154
return ("encoding" in element.attributes and
155-
element.attributes["encoding"].translate(
156-
asciiUpper2Lower) in
155+
utils.ascii_lowercase(element.attributes["encoding"]) in
157156
("text/html", "application/xhtml+xml"))
158157
else:
159158
return (element.namespace, element.name) in htmlIntegrationPointElements
@@ -456,7 +455,7 @@ def processDoctype(self, token):
456455
self.tree.insertDoctype(token)
457456

458457
if publicId != "":
459-
publicId = publicId.translate(asciiUpper2Lower)
458+
publicId = utils.ascii_lowercase(publicId)
460459

461460
if (not correct or token["name"] != "html" or
462461
publicId.startswith(
@@ -1167,7 +1166,7 @@ def startTagInput(self, token):
11671166
framesetOK = self.parser.framesetOK
11681167
self.startTagVoidFormatting(token)
11691168
if ("type" in token["data"] and
1170-
token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
1169+
utils.ascii_lowercase(token["data"]["type"]) == "hidden"):
11711170
# input type=hidden doesn't change framesetOK
11721171
self.parser.framesetOK = framesetOK
11731172

@@ -1729,7 +1728,7 @@ def startTagStyleScript(self, token):
17291728

17301729
def startTagInput(self, token):
17311730
if ("type" in token["data"] and
1732-
token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
1731+
utils.ascii_lowercase(token["data"]["type"]) == "hidden"):
17331732
self.parser.parseError("unexpected-hidden-input-in-table")
17341733
self.tree.insertElement(token)
17351734
# XXX associate with form
@@ -2452,11 +2451,11 @@ def processStartTag(self, token):
24522451
def processEndTag(self, token):
24532452
nodeIndex = len(self.tree.openElements) - 1
24542453
node = self.tree.openElements[-1]
2455-
if node.name.translate(asciiUpper2Lower) != token["name"]:
2454+
if utils.ascii_lowercase(node.name) != token["name"]:
24562455
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
24572456

24582457
while True:
2459-
if node.name.translate(asciiUpper2Lower) == token["name"]:
2458+
if utils.ascii_lowercase(node.name) == token["name"]:
24602459
# XXX this isn't in the spec but it seems necessary
24612460
if self.parser.phase == self.parser.phases["inTableText"]:
24622461
self.parser.phase.flushCharacters()

html5lib/tokenizer.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
from collections import deque
66

7+
from .utils import ascii_lowercase
8+
79
from .constants import spaceCharacters
810
from .constants import entities
9-
from .constants import asciiLetters, asciiUpper2Lower
11+
from .constants import asciiLetters
1012
from .constants import digits, hexDigits, EOF
1113
from .constants import tokenTypes, tagTokenTypes
1214
from .constants import replacementCharacters
@@ -233,7 +235,7 @@ def emitCurrentToken(self):
233235
# Add token to the queue to be yielded
234236
if (token["type"] in tagTokenTypes):
235237
if self.lowercaseElementName:
236-
token["name"] = token["name"].translate(asciiUpper2Lower)
238+
token["name"] = ascii_lowercase(token["name"])
237239
if token["type"] == tokenTypes["EndTag"]:
238240
if token["data"]:
239241
self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -920,7 +922,7 @@ def attributeNameState(self):
920922
# to attributes, but we do want to report the parse error in time.
921923
if self.lowercaseAttrName:
922924
self.currentToken["data"][-1][0] = (
923-
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
925+
ascii_lowercase(self.currentToken["data"][-1][0]))
924926
for name, _ in self.currentToken["data"][:-1]:
925927
if self.currentToken["data"][-1][0] == name:
926928
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
@@ -1341,10 +1343,10 @@ def beforeDoctypeNameState(self):
13411343
def doctypeNameState(self):
13421344
data = self.stream.char()
13431345
if data in spaceCharacters:
1344-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1346+
self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
13451347
self.state = self.afterDoctypeNameState
13461348
elif data == ">":
1347-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1349+
self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
13481350
self.tokenQueue.append(self.currentToken)
13491351
self.state = self.dataState
13501352
elif data == "\u0000":
@@ -1356,7 +1358,7 @@ def doctypeNameState(self):
13561358
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
13571359
"eof-in-doctype-name"})
13581360
self.currentToken["correct"] = False
1359-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1361+
self.currentToken["name"] = ascii_lowercase(self.currentToken["name"])
13601362
self.tokenQueue.append(self.currentToken)
13611363
self.state = self.dataState
13621364
else:

html5lib/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
except ImportError:
1111
import xml.etree.ElementTree as default_etree
1212

13+
from .constants import asciiUpper2Lower
1314

1415
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
1516
"surrogatePairToCodepoint", "moduleFactoryFactory",
@@ -125,3 +126,7 @@ def wrapped(*args, **kwargs):
125126
return cache[key]
126127

127128
return wrapped
129+
130+
131+
def ascii_lowercase(s):
132+
return s.translate(asciiUpper2Lower)

0 commit comments

Comments
 (0)