@@ -955,7 +955,7 @@ def afterDoctypeNameState(self):
955
955
matched = False
956
956
break
957
957
if matched :
958
- self .state = self .beforeDoctypePublicIdentifierState
958
+ self .state = self .afterDoctypePublicKeywordState
959
959
return True
960
960
elif data in (u"s" , u"S" ):
961
961
matched = True
@@ -966,7 +966,7 @@ def afterDoctypeNameState(self):
966
966
matched = False
967
967
break
968
968
if matched :
969
- self .state = self .beforeDoctypeSystemIdentifierState
969
+ self .state = self .afterDoctypeSystemKeywordState
970
970
return True
971
971
972
972
# All the characters read before the current 'data' will be
@@ -981,6 +981,26 @@ def afterDoctypeNameState(self):
981
981
self .state = self .bogusDoctypeState
982
982
983
983
return True
984
+
985
+ def afterDoctypePublicKeywordState (self ):
986
+ data = self .stream .char ()
987
+ if data in spaceCharacters :
988
+ self .state = self .beforeDoctypePublicIdentifierState
989
+ elif data in ("'" , '"' ):
990
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
991
+ "unexpected-char-in-doctype" })
992
+ self .stream .unget (data )
993
+ self .state = self .beforeDoctypePublicIdentifierState
994
+ elif data is EOF :
995
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
996
+ "eof-in-doctype" })
997
+ self .currentToken ["correct" ] = False
998
+ self .tokenQueue .append (self .currentToken )
999
+ self .state = self .dataState
1000
+ else :
1001
+ self .stream .unget (data )
1002
+ self .state = self .beforeDoctypePublicIdentifierState
1003
+ return True
984
1004
985
1005
def beforeDoctypePublicIdentifierState (self ):
986
1006
data = self .stream .char ()
@@ -1054,17 +1074,47 @@ def doctypePublicIdentifierSingleQuotedState(self):
1054
1074
def afterDoctypePublicIdentifierState (self ):
1055
1075
data = self .stream .char ()
1056
1076
if data in spaceCharacters :
1057
- pass
1058
- elif data == "\" " :
1077
+ self .state = self .betweenDoctypePublicAndSystemIdentifiersState
1078
+ elif data == ">" :
1079
+ self .tokenQueue .append (self .currentToken )
1080
+ self .state = self .dataState
1081
+ elif data == '"' :
1082
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1083
+ "unexpected-char-in-doctype" })
1059
1084
self .currentToken ["systemId" ] = u""
1060
1085
self .state = self .doctypeSystemIdentifierDoubleQuotedState
1061
1086
elif data == "'" :
1087
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1088
+ "unexpected-char-in-doctype" })
1062
1089
self .currentToken ["systemId" ] = u""
1063
1090
self .state = self .doctypeSystemIdentifierSingleQuotedState
1091
+ elif data is EOF :
1092
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1093
+ "eof-in-doctype" })
1094
+ self .currentToken ["correct" ] = False
1095
+ self .tokenQueue .append (self .currentToken )
1096
+ self .state = self .dataState
1097
+ else :
1098
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1099
+ "unexpected-char-in-doctype" })
1100
+ self .currentToken ["correct" ] = False
1101
+ self .state = self .bogusDoctypeState
1102
+ return True
1103
+
1104
+ def betweenDoctypePublicAndSystemIdentifiersState (self ):
1105
+ data = self .stream .char ()
1106
+ if data in spaceCharacters :
1107
+ pass
1064
1108
elif data == ">" :
1065
1109
self .tokenQueue .append (self .currentToken )
1066
1110
self .state = self .dataState
1067
- elif data is EOF :
1111
+ elif data == '"' :
1112
+ self .currentToken ["systemId" ] = u""
1113
+ self .state = self .doctypeSystemIdentifierDoubleQuotedState
1114
+ elif data == "'" :
1115
+ self .currentToken ["systemId" ] = u""
1116
+ self .state = self .doctypeSystemIdentifierSingleQuotedState
1117
+ elif data == EOF :
1068
1118
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1069
1119
"eof-in-doctype" })
1070
1120
self .currentToken ["correct" ] = False
@@ -1077,6 +1127,26 @@ def afterDoctypePublicIdentifierState(self):
1077
1127
self .state = self .bogusDoctypeState
1078
1128
return True
1079
1129
1130
+ def afterDoctypeSystemKeywordState (self ):
1131
+ data = self .stream .char ()
1132
+ if data in spaceCharacters :
1133
+ self .state = self .beforeDoctypeSystemIdentifierState
1134
+ elif data in ("'" , '"' ):
1135
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1136
+ "unexpected-char-in-doctype" })
1137
+ self .stream .unget (data )
1138
+ self .state = self .beforeDoctypeSystemIdentifierState
1139
+ elif data is EOF :
1140
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
1141
+ "eof-in-doctype" })
1142
+ self .currentToken ["correct" ] = False
1143
+ self .tokenQueue .append (self .currentToken )
1144
+ self .state = self .dataState
1145
+ else :
1146
+ self .stream .unget (data )
1147
+ self .state = self .beforeDoctypeSystemIdentifierState
1148
+ return True
1149
+
1080
1150
def beforeDoctypeSystemIdentifierState (self ):
1081
1151
data = self .stream .char ()
1082
1152
if data in spaceCharacters :
0 commit comments