Skip to content

Commit 47df02b

Browse files
committed
Move lowercasing to _ascii module
The current _ascii module is a placeholder, because I accidentally deleted the original implementation of it (but I needed to rewrite it to be even quicker anyway!)
1 parent 81b3aaf commit 47df02b

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

html5lib/_ascii.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .constants import asciiUpper2Lower
2+
3+
4+
def ascii_lower(s):
5+
return s.translate(asciiUpper2Lower)

html5lib/_tokenizer.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
from collections import deque, OrderedDict
66
from sys import version_info
77

8+
from ._ascii import ascii_lower
9+
810
from .constants import spaceCharacters
911
from .constants import entities
10-
from .constants import asciiLetters, asciiUpper2Lower
12+
from .constants import asciiLetters
1113
from .constants import digits, hexDigits, EOF
1214
from .constants import tokenTypes, tagTokenTypes
1315
from .constants import replacementCharacters
@@ -245,7 +247,7 @@ def emitCurrentToken(self):
245247
token = self.currentToken
246248
# Add token to the queue to be yielded
247249
if (token["type"] in tagTokenTypes):
248-
token["name"] = token["name"].translate(asciiUpper2Lower)
250+
token["name"] = ascii_lower(token["name"])
249251
if token["type"] == tokenTypes["StartTag"]:
250252
raw = token["data"]
251253
data = attributeMap(raw)
@@ -939,7 +941,7 @@ def attributeNameState(self):
939941
# start tag token is emitted so values can still be safely appended
940942
# to attributes, but we do want to report the parse error in time.
941943
self.currentToken["data"][-1][0] = (
942-
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
944+
ascii_lower(self.currentToken["data"][-1][0]))
943945
for name, _ in self.currentToken["data"][:-1]:
944946
if self.currentToken["data"][-1][0] == name:
945947
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
@@ -1360,10 +1362,10 @@ def beforeDoctypeNameState(self):
13601362
def doctypeNameState(self):
13611363
data = self.stream.char()
13621364
if data in spaceCharacters:
1363-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1365+
self.currentToken["name"] = ascii_lower(self.currentToken["name"])
13641366
self._state = self.afterDoctypeNameState
13651367
elif data == ">":
1366-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1368+
self.currentToken["name"] = ascii_lower(self.currentToken["name"])
13671369
self.tokenQueue.append(self.currentToken)
13681370
self._state = self.dataState
13691371
elif data == "\u0000":
@@ -1375,7 +1377,7 @@ def doctypeNameState(self):
13751377
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
13761378
"eof-in-doctype-name"})
13771379
self.currentToken["correct"] = False
1378-
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
1380+
self.currentToken["name"] = ascii_lower(self.currentToken["name"])
13791381
self.tokenQueue.append(self.currentToken)
13801382
self._state = self.dataState
13811383
else:

0 commit comments

Comments
 (0)