Skip to content

Commit 2a332b9

Browse files
committed
Save some calls to len() in the input stream (improves tokeniser performance by maybe 1-2%)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401234
1 parent 43a2727 commit 2a332b9

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/html5lib/inputstream.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
7070
'replace')
7171

7272
self.chunk = u""
73+
self.chunkSize = 0
7374
self.chunkOffset = 0
7475
self.ungetBuffer = [] # reversed list of chars from unget()
7576
self.readChars = []
@@ -255,7 +256,7 @@ def char(self):
255256
self.readChars.append(char)
256257
return char
257258

258-
if self.chunkOffset >= len(self.chunk):
259+
if self.chunkOffset >= self.chunkSize:
259260
if not self.readChunk():
260261
return EOF
261262

@@ -267,6 +268,7 @@ def char(self):
267268

268269
def readChunk(self, chunkSize=_defaultChunkSize):
269270
self.chunk = u""
271+
self.chunkSize = 0
270272
self.chunkOffset = 0
271273

272274
data = self.dataStream.read(chunkSize)
@@ -291,6 +293,7 @@ def readChunk(self, chunkSize=_defaultChunkSize):
291293

292294
data = unicode(data)
293295
self.chunk = data
296+
self.chunkSize = len(data)
294297

295298
self.updatePosition()
296299
return True
@@ -329,7 +332,7 @@ def charsUntil(self, characters, opposite = False):
329332
m = chars.match(self.chunk, self.chunkOffset)
330333
# If not everything matched, return everything up to the part that didn't match
331334
end = m.end()
332-
if end != len(self.chunk):
335+
if end != self.chunkSize:
333336
rv.append(self.chunk[self.chunkOffset:end])
334337
self.chunkOffset = end
335338
break

0 commit comments

Comments
 (0)