Skip to content

Commit 67262f8

Browse files
committed
Introduce type hierarchy for tag-related tokens
1 parent bcee8bd commit 67262f8

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

html5lib/_tokenizer.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,25 @@ class Characters(Token):
4141
class SpaceCharacters(Token):
4242
pass
4343

44-
class StartTag(Token):
45-
def __init__(self, name, data, self_closing, self_closing_acknowledged=False):
44+
45+
class Tag(Token):
46+
def __init__(self, name, data, self_closing):
4647
self.name = name
4748
self.data = data
4849
self.self_closing = self_closing
50+
51+
class StartTag(Tag):
52+
def __init__(self, name, data, self_closing, self_closing_acknowledged=False):
53+
super(StartTag, self).__init__(name, data, self_closing)
4954
self.self_closing_acknowledged = self_closing_acknowledged
5055

51-
class EndTag(Token):
56+
class EndTag(Tag):
5257
def __init__(self, name, data, self_closing):
53-
self.name = name
54-
self.data = data
55-
self.self_closing = self_closing
58+
super(EndTag, self).__init__(name, data, self_closing)
5659

57-
class EmptyTag(Token):
58-
pass
60+
class EmptyTag(Tag):
61+
def __init__(self, name, data):
62+
super(EmptyTag, self).__init__(name, data, self_closing)
5963

6064
class Comment(Token):
6165
pass
@@ -66,9 +70,6 @@ def __init__(self, data, datavars=None):
6670
self.datavars = datavars or {}
6771

6872

69-
tagTokenTypes = frozenset([StartTag, EndTag, EmptyTag])
70-
71-
7273
class HTMLTokenizer(object):
7374
""" This class takes care of tokenizing HTML.
7475
@@ -266,7 +267,7 @@ def emitCurrentToken(self):
266267
"""
267268
token = self.currentToken
268269
# Add token to the queue to be yielded
269-
if (type(token) in tagTokenTypes):
270+
if isinstance(token, Tag):
270271
token.name = token.name.translate(asciiUpper2Lower)
271272
if isinstance(token, StartTag):
272273
raw = token.data

html5lib/html5parser.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
Comment,
1414
Doctype,
1515
ParseError as TokenizerParseError,
16-
tagTokenTypes,
16+
Tag,
1717
)
1818

1919
from . import treebuilders
@@ -400,9 +400,8 @@ def log(function):
400400
def wrapped(self, *args, **kwargs):
401401
if function.__name__.startswith("process") and len(args) > 0:
402402
token = args[0]
403-
token_type = type(token)
404-
info = {"type": token_type.__name__}
405-
if token_type in tagTokenTypes:
403+
info = {"type": token.__class__.__name__}
404+
if isinstance(token, Tag):
406405
info["name"] = token.name
407406

408407
self.parser.log.append((self.parser.tokenizer.state.__name__,

0 commit comments

Comments
 (0)