Skip to content

Commit cfb1e85

Browse files
committed
Compare against EOF (None) using 'is' instead of '==', for ~3% performance improvement
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401239
1 parent 362c648 commit cfb1e85

File tree

1 file changed

+31
-31
lines changed

1 file changed

+31
-31
lines changed

src/html5lib/tokenizer.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def processSolidusInTag(self):
121121

122122
if self.currentToken["type"] == "StartTag" and data == u">":
123123
self.currentToken["type"] = "EmptyTag"
124-
elif data == EOF:
124+
elif data is EOF:
125125
self.tokenQueue.append({"type": "ParseError", "data":
126126
"EOF following solidus"})
127127
self.state = self.states["data"]
@@ -248,7 +248,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
248248
def entitiesStartingWith(name):
249249
return [e for e in filteredEntityList if e.startswith(name)]
250250

251-
while charStack[-1] != EOF and\
251+
while charStack[-1] is not EOF and\
252252
entitiesStartingWith("".join(charStack)):
253253
charStack.append(self.stream.char())
254254

@@ -344,7 +344,7 @@ def dataState(self):
344344
self.escapeFlag and "".join(self.lastFourChars)[1:] == "-->":
345345
self.escapeFlag = False
346346
self.tokenQueue.append({"type": "Characters", "data":data})
347-
elif data == EOF:
347+
elif data is EOF:
348348
# Tokenization ends.
349349
return False
350350
elif data in spaceCharacters:
@@ -430,7 +430,7 @@ def closeTagOpenState(self):
430430
for x in xrange(len(self.currentToken["name"]) + 1):
431431
charStack.append(self.stream.char())
432432
# Make sure we don't get hit by EOF
433-
if charStack[-1] == EOF:
433+
if charStack[-1] is EOF:
434434
break
435435

436436
# Since this is just for checking. We put the characters back on
@@ -461,7 +461,7 @@ def closeTagOpenState(self):
461461
self.tokenQueue.append({"type": "ParseError", "data":
462462
"expected-closing-tag-but-got-right-bracket"})
463463
self.state = self.states["data"]
464-
elif data == EOF:
464+
elif data is EOF:
465465
self.tokenQueue.append({"type": "ParseError", "data":
466466
"expected-closing-tag-but-got-eof"})
467467
self.tokenQueue.append({"type": "Characters", "data": u"</"})
@@ -484,7 +484,7 @@ def tagNameState(self):
484484
self.stream.charsUntil(asciiLetters, True)
485485
elif data == u">":
486486
self.emitCurrentToken()
487-
elif data == EOF:
487+
elif data is EOF:
488488
self.tokenQueue.append({"type": "ParseError", "data":
489489
"eof-in-tag-name"})
490490
self.emitCurrentToken()
@@ -511,7 +511,7 @@ def beforeAttributeNameState(self):
511511
"invalid-character-in-attribute-name"})
512512
self.currentToken["data"].append([data, ""])
513513
self.state = self.states["attributeName"]
514-
elif data == EOF:
514+
elif data is EOF:
515515
self.tokenQueue.append({"type": "ParseError", "data":
516516
"expected-attribute-name-but-got-eof"})
517517
self.emitCurrentToken()
@@ -545,7 +545,7 @@ def attributeNameState(self):
545545
"invalid-character-in-attribute-name"})
546546
self.currentToken["data"][-1][0] += data
547547
leavingThisState = False
548-
elif data == EOF:
548+
elif data is EOF:
549549
self.tokenQueue.append({"type": "ParseError", "data":
550550
"eof-in-attribute-name"})
551551
self.state = self.states["data"]
@@ -585,7 +585,7 @@ def afterAttributeNameState(self):
585585
elif data == u"/":
586586
if not self.processSolidusInTag():
587587
self.state = self.states["beforeAttributeName"]
588-
elif data == EOF:
588+
elif data is EOF:
589589
self.tokenQueue.append({"type": "ParseError", "data":
590590
"expected-end-of-tag-but-got-eof"})
591591
self.emitCurrentToken()
@@ -612,7 +612,7 @@ def beforeAttributeValueState(self):
612612
"equals-in-unquoted-attribute-value"})
613613
self.currentToken["data"][-1][1] += data
614614
self.state = self.states["attributeValueUnQuoted"]
615-
elif data == EOF:
615+
elif data is EOF:
616616
self.tokenQueue.append({"type": "ParseError", "data":
617617
"expected-attribute-value-but-got-eof"})
618618
self.emitCurrentToken()
@@ -627,7 +627,7 @@ def attributeValueDoubleQuotedState(self):
627627
self.state = self.states["afterAttributeValue"]
628628
elif data == u"&":
629629
self.processEntityInAttribute(u'"')
630-
elif data == EOF:
630+
elif data is EOF:
631631
self.tokenQueue.append({"type": "ParseError", "data":
632632
"eof-in-attribute-value-double-quote"})
633633
self.emitCurrentToken()
@@ -642,7 +642,7 @@ def attributeValueSingleQuotedState(self):
642642
self.state = self.states["afterAttributeValue"]
643643
elif data == u"&":
644644
self.processEntityInAttribute(u"'")
645-
elif data == EOF:
645+
elif data is EOF:
646646
self.tokenQueue.append({"type": "ParseError", "data":
647647
"eof-in-attribute-value-single-quote"})
648648
self.emitCurrentToken()
@@ -663,7 +663,7 @@ def attributeValueUnQuotedState(self):
663663
self.tokenQueue.append({"type": "ParseError", "data":
664664
"unexpected-character-in-unquoted-attribute-value"})
665665
self.currentToken["data"][-1][1] += data
666-
elif data == EOF:
666+
elif data is EOF:
667667
self.tokenQueue.append({"type": "ParseError", "data":
668668
"eof-in-attribute-value-no-quotes"})
669669
self.emitCurrentToken()
@@ -682,7 +682,7 @@ def afterAttributeValueState(self):
682682
elif data == u"/":
683683
if not self.processSolidusInTag():
684684
self.state = self.states["beforeAttributeName"]
685-
elif data == EOF:
685+
elif data is EOF:
686686
self.tokenQueue.append({"type": "ParseError", "data":
687687
"unexpected-EOF-after-attribute-value"})
688688
self.emitCurrentToken()
@@ -738,7 +738,7 @@ def commentStartState(self):
738738
"incorrect-comment"})
739739
self.tokenQueue.append(self.currentToken)
740740
self.state = self.states["data"]
741-
elif data == EOF:
741+
elif data is EOF:
742742
self.tokenQueue.append({"type": "ParseError", "data":
743743
"eof-in-comment"})
744744
self.tokenQueue.append(self.currentToken)
@@ -757,7 +757,7 @@ def commentStartDashState(self):
757757
"incorrect-comment"})
758758
self.tokenQueue.append(self.currentToken)
759759
self.state = self.states["data"]
760-
elif data == EOF:
760+
elif data is EOF:
761761
self.tokenQueue.append({"type": "ParseError", "data":
762762
"eof-in-comment"})
763763
self.tokenQueue.append(self.currentToken)
@@ -772,7 +772,7 @@ def commentState(self):
772772
data = self.stream.char()
773773
if data == u"-":
774774
self.state = self.states["commentEndDash"]
775-
elif data == EOF:
775+
elif data is EOF:
776776
self.tokenQueue.append({"type": "ParseError", "data":
777777
"eof-in-comment"})
778778
self.tokenQueue.append(self.currentToken)
@@ -785,7 +785,7 @@ def commentEndDashState(self):
785785
data = self.stream.char()
786786
if data == u"-":
787787
self.state = self.states["commentEnd"]
788-
elif data == EOF:
788+
elif data is EOF:
789789
self.tokenQueue.append({"type": "ParseError", "data":
790790
"eof-in-comment-end-dash"})
791791
self.tokenQueue.append(self.currentToken)
@@ -808,7 +808,7 @@ def commentEndState(self):
808808
self.tokenQueue.append({"type": "ParseError", "data":
809809
"unexpected-dash-after-double-dash-in-comment"})
810810
self.currentToken["data"] += data
811-
elif data == EOF:
811+
elif data is EOF:
812812
self.tokenQueue.append({"type": "ParseError", "data":
813813
"eof-in-comment-double-dash"})
814814
self.tokenQueue.append(self.currentToken)
@@ -842,7 +842,7 @@ def beforeDoctypeNameState(self):
842842
self.currentToken["correct"] = False
843843
self.tokenQueue.append(self.currentToken)
844844
self.state = self.states["data"]
845-
elif data == EOF:
845+
elif data is EOF:
846846
self.tokenQueue.append({"type": "ParseError", "data":
847847
"expected-doctype-name-but-got-eof"})
848848
self.currentToken["correct"] = False
@@ -860,7 +860,7 @@ def doctypeNameState(self):
860860
elif data == u">":
861861
self.tokenQueue.append(self.currentToken)
862862
self.state = self.states["data"]
863-
elif data == EOF:
863+
elif data is EOF:
864864
self.tokenQueue.append({"type": "ParseError", "data":
865865
"eof-in-doctype-name"})
866866
self.currentToken["correct"] = False
@@ -877,7 +877,7 @@ def afterDoctypeNameState(self):
877877
elif data == u">":
878878
self.tokenQueue.append(self.currentToken)
879879
self.state = self.states["data"]
880-
elif data == EOF:
880+
elif data is EOF:
881881
self.currentToken["correct"] = False
882882
self.stream.unget(data)
883883
self.tokenQueue.append({"type": "ParseError", "data":
@@ -919,7 +919,7 @@ def beforeDoctypePublicIdentifierState(self):
919919
self.currentToken["correct"] = False
920920
self.tokenQueue.append(self.currentToken)
921921
self.state = self.states["data"]
922-
elif data == EOF:
922+
elif data is EOF:
923923
self.tokenQueue.append({"type": "ParseError", "data":
924924
"eof-in-doctype"})
925925
self.currentToken["correct"] = False
@@ -942,7 +942,7 @@ def doctypePublicIdentifierDoubleQuotedState(self):
942942
self.currentToken["correct"] = False
943943
self.tokenQueue.append(self.currentToken)
944944
self.state = self.states["data"]
945-
elif data == EOF:
945+
elif data is EOF:
946946
self.tokenQueue.append({"type": "ParseError", "data":
947947
"eof-in-doctype"})
948948
self.currentToken["correct"] = False
@@ -962,7 +962,7 @@ def doctypePublicIdentifierSingleQuotedState(self):
962962
self.currentToken["correct"] = False
963963
self.tokenQueue.append(self.currentToken)
964964
self.state = self.states["data"]
965-
elif data == EOF:
965+
elif data is EOF:
966966
self.tokenQueue.append({"type": "ParseError", "data":
967967
"eof-in-doctype"})
968968
self.currentToken["correct"] = False
@@ -985,7 +985,7 @@ def afterDoctypePublicIdentifierState(self):
985985
elif data == ">":
986986
self.tokenQueue.append(self.currentToken)
987987
self.state = self.states["data"]
988-
elif data == EOF:
988+
elif data is EOF:
989989
self.tokenQueue.append({"type": "ParseError", "data":
990990
"eof-in-doctype"})
991991
self.currentToken["correct"] = False
@@ -1014,7 +1014,7 @@ def beforeDoctypeSystemIdentifierState(self):
10141014
self.currentToken["correct"] = False
10151015
self.tokenQueue.append(self.currentToken)
10161016
self.state = self.states["data"]
1017-
elif data == EOF:
1017+
elif data is EOF:
10181018
self.tokenQueue.append({"type": "ParseError", "data":
10191019
"eof-in-doctype"})
10201020
self.currentToken["correct"] = False
@@ -1037,7 +1037,7 @@ def doctypeSystemIdentifierDoubleQuotedState(self):
10371037
self.currentToken["correct"] = False
10381038
self.tokenQueue.append(self.currentToken)
10391039
self.state = self.states["data"]
1040-
elif data == EOF:
1040+
elif data is EOF:
10411041
self.tokenQueue.append({"type": "ParseError", "data":
10421042
"eof-in-doctype"})
10431043
self.currentToken["correct"] = False
@@ -1057,7 +1057,7 @@ def doctypeSystemIdentifierSingleQuotedState(self):
10571057
self.currentToken["correct"] = False
10581058
self.tokenQueue.append(self.currentToken)
10591059
self.state = self.states["data"]
1060-
elif data == EOF:
1060+
elif data is EOF:
10611061
self.tokenQueue.append({"type": "ParseError", "data":
10621062
"eof-in-doctype"})
10631063
self.currentToken["correct"] = False
@@ -1074,7 +1074,7 @@ def afterDoctypeSystemIdentifierState(self):
10741074
elif data == ">":
10751075
self.tokenQueue.append(self.currentToken)
10761076
self.state = self.states["data"]
1077-
elif data == EOF:
1077+
elif data is EOF:
10781078
self.tokenQueue.append({"type": "ParseError", "data":
10791079
"eof-in-doctype"})
10801080
self.currentToken["correct"] = False
@@ -1091,7 +1091,7 @@ def bogusDoctypeState(self):
10911091
if data == u">":
10921092
self.tokenQueue.append(self.currentToken)
10931093
self.state = self.states["data"]
1094-
elif data == EOF:
1094+
elif data is EOF:
10951095
# XXX EMIT
10961096
self.stream.unget(data)
10971097
self.tokenQueue.append(self.currentToken)

0 commit comments

Comments
 (0)