diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index c49eee0d..79774578 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -306,7 +306,7 @@ def rcdataState(self): # have already been appended to lastFourChars and will have broken # any sequences else: - chars = self.stream.charsUntil(("&", "<")) + chars = self.stream.charsUntil(("&", "<", "\u0000")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True @@ -1016,7 +1016,7 @@ def attributeValueDoubleQuotedState(self): self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", "&")) + self.stream.charsUntil(("\"", "&", "\u0000")) return True def attributeValueSingleQuotedState(self): @@ -1035,7 +1035,7 @@ def attributeValueSingleQuotedState(self): self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", "&")) + self.stream.charsUntil(("'", "&", "\u0000")) return True def attributeValueUnQuotedState(self): @@ -1060,7 +1060,7 @@ def attributeValueUnQuotedState(self): self.state = self.dataState else: self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset(("&", ">", '"', "'", "=", "<", "`")) | spaceCharacters) + frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) return True def afterAttributeValueState(self):