Skip to content

Commit 10b9010

Browse files
committed
Work in progress
--HG-- branch : svgmathml rename : python/README => python3/README rename : python/html5-tests.patch => python3/html5-tests.patch rename : python/parse.py => python3/parse.py rename : python/setup.py => python3/setup.py rename : python/setup_base.py => python3/setup_base.py rename : python/src/html5lib/__init__.py => python3/src/html5lib/__init__.py rename : python/src/html5lib/constants.py => python3/src/html5lib/constants.py rename : python/src/html5lib/filters/__init__.py => python3/src/html5lib/filters/__init__.py rename : python/src/html5lib/filters/_base.py => python3/src/html5lib/filters/_base.py rename : python/src/html5lib/filters/formfiller.py => python3/src/html5lib/filters/formfiller.py rename : python/src/html5lib/filters/inject_meta_charset.py => python3/src/html5lib/filters/inject_meta_charset.py rename : python/src/html5lib/filters/iso639codes.py => python3/src/html5lib/filters/iso639codes.py rename : python/src/html5lib/filters/lint.py => python3/src/html5lib/filters/lint.py rename : python/src/html5lib/filters/optionaltags.py => python3/src/html5lib/filters/optionaltags.py rename : python/src/html5lib/filters/rfc2046.py => python3/src/html5lib/filters/rfc2046.py rename : python/src/html5lib/filters/rfc3987.py => python3/src/html5lib/filters/rfc3987.py rename : python/src/html5lib/filters/sanitizer.py => python3/src/html5lib/filters/sanitizer.py rename : python/src/html5lib/filters/validator.py => python3/src/html5lib/filters/validator.py rename : python/src/html5lib/filters/whitespace.py => python3/src/html5lib/filters/whitespace.py rename : python/src/html5lib/html5parser.py => python3/src/html5lib/html5parser.py rename : python/src/html5lib/ihatexml.py => python3/src/html5lib/ihatexml.py rename : python/src/html5lib/inputstream.py => python3/src/html5lib/inputstream.py rename : python/src/html5lib/liberalxmlparser.py => python3/src/html5lib/liberalxmlparser.py rename : python/src/html5lib/sanitizer.py => python3/src/html5lib/sanitizer.py rename : python/src/html5lib/serializer/__init__.py => python3/src/html5lib/serializer/__init__.py rename : python/src/html5lib/serializer/htmlserializer.py => python3/src/html5lib/serializer/htmlserializer.py rename : python/src/html5lib/serializer/xhtmlserializer.py => python3/src/html5lib/serializer/xhtmlserializer.py rename : python/src/html5lib/tokenizer.py => python3/src/html5lib/tokenizer.py rename : python/src/html5lib/treebuilders/__init__.py => python3/src/html5lib/treebuilders/__init__.py rename : python/src/html5lib/treebuilders/_base.py => python3/src/html5lib/treebuilders/_base.py rename : python/src/html5lib/treebuilders/dom.py => python3/src/html5lib/treebuilders/dom.py rename : python/src/html5lib/treebuilders/etree.py => python3/src/html5lib/treebuilders/etree.py rename : python/src/html5lib/treebuilders/etree_lxml.py => python3/src/html5lib/treebuilders/etree_lxml.py rename : python/src/html5lib/treebuilders/simpletree.py => python3/src/html5lib/treebuilders/simpletree.py rename : python/src/html5lib/treebuilders/soup.py => python3/src/html5lib/treebuilders/soup.py rename : python/src/html5lib/treewalkers/__init__.py => python3/src/html5lib/treewalkers/__init__.py rename : python/src/html5lib/treewalkers/_base.py => python3/src/html5lib/treewalkers/_base.py rename : python/src/html5lib/treewalkers/dom.py => python3/src/html5lib/treewalkers/dom.py rename : python/src/html5lib/treewalkers/etree.py => python3/src/html5lib/treewalkers/etree.py rename : python/src/html5lib/treewalkers/genshistream.py => python3/src/html5lib/treewalkers/genshistream.py rename : python/src/html5lib/treewalkers/lxmletree.py => python3/src/html5lib/treewalkers/lxmletree.py rename : python/src/html5lib/treewalkers/pulldom.py => python3/src/html5lib/treewalkers/pulldom.py rename : python/src/html5lib/treewalkers/simpletree.py => python3/src/html5lib/treewalkers/simpletree.py rename : python/src/html5lib/treewalkers/soup.py => python3/src/html5lib/treewalkers/soup.py rename : python/src/html5lib/utils.py => python3/src/html5lib/utils.py rename : python/tests/README => python3/tests/README rename : python/tests/__init__.py => python3/tests/__init__.py rename : python/tests/mockParser.py => python3/tests/mockParser.py rename : python/tests/performance/concatenation.py => python3/tests/performance/concatenation.py rename : python/tests/runparsertests.py => python3/tests/runparsertests.py rename : python/tests/runtests.py => python3/tests/runtests.py rename : python/tests/support.py => python3/tests/support.py rename : python/tests/test_encoding.py => python3/tests/test_encoding.py rename : python/tests/test_formfiller.py => python3/tests/test_formfiller.py rename : python/tests/test_lxp.py => python3/tests/test_lxp.py rename : python/tests/test_parser.py => python3/tests/test_parser.py rename : python/tests/test_parser2.py => python3/tests/test_parser2.py rename : python/tests/test_sanitizer.py => python3/tests/test_sanitizer.py rename : python/tests/test_sax.py => python3/tests/test_sax.py rename : python/tests/test_serializer.py => python3/tests/test_serializer.py rename : python/tests/test_stream.py => python3/tests/test_stream.py rename : python/tests/test_tokenizer.py => python3/tests/test_tokenizer.py rename : python/tests/test_treewalkers.py => python3/tests/test_treewalkers.py rename : python/tests/test_validator.py => python3/tests/test_validator.py rename : python/tests/test_whitespace_filter.py => python3/tests/test_whitespace_filter.py rename : python/tests/us-ascii.html => python3/tests/us-ascii.html rename : python/tests/utf-8-bom.html => python3/tests/utf-8-bom.html rename : python/utils/encodings.py => python3/utils/encodings.py rename : python/utils/extract-entities.py => python3/utils/extract-entities.py rename : python/utils/iana_parse.py => python3/utils/iana_parse.py rename : python/utils/package.py => python3/utils/package.py rename : python/utils/spider.py => python3/utils/spider.py rename : python/validate.py => python3/validate.py extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/branches/svgmathml%401265
1 parent b3a5385 commit 10b9010

File tree

8 files changed

+723
-742
lines changed

8 files changed

+723
-742
lines changed

src/html5lib/__init__.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
99
import html5lib
1010
f = open("my_document.html")
11-
p = html5lib.HTMLParser()
12-
tree = p.parse(f)
11+
tree = html5lib.parse(f)
1312
"""
14-
from html5parser import HTMLParser, parse
15-
from treebuilders import getTreeBuilder
13+
print(__path__)
14+
15+
#from .html5parser import HTMLParser, parse
16+
#from treebuilders import getTreeBuilder
17+
18+
#from .liberalxmlparser import XMLParser, XHTMLParser
1619

17-
from liberalxmlparser import XMLParser, XHTMLParser

src/html5lib/constants.py

Lines changed: 506 additions & 513 deletions
Large diffs are not rendered by default.

src/html5lib/html5parser.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
1-
try:
2-
frozenset
3-
except NameError:
4-
# Import from the sets module for python 2.3
5-
from sets import Set as set
6-
from sets import ImmutableSet as frozenset
71
import sys
82

9-
import inputstream
10-
import tokenizer
3+
from . import inputstream
4+
from . import tokenizer
115

12-
import treebuilders
13-
from treebuilders._base import Marker
14-
from treebuilders import simpletree
6+
from . import treebuilders
7+
from .treebuilders._base import Marker
8+
from .treebuilders import simpletree
159

16-
import utils
17-
from constants import contentModelFlags, spaceCharacters, asciiUpper2Lower
18-
from constants import scopingElements, formattingElements, specialElements
19-
from constants import headingElements, tableInsertModeElements
20-
from constants import cdataElements, rcdataElements, voidElements
21-
from constants import tokenTypes
10+
from . import utils
11+
12+
from .constants import contentModelFlags, spaceCharacters, asciiUpper2Lower
13+
from .constants import scopingElements, formattingElements, specialElements
14+
from .constants import headingElements, tableInsertModeElements
15+
from .constants import cdataElements, rcdataElements, voidElements
16+
from .constants import tokenTypes
2217

2318
def parse(doc, treebuilderName="simpletree", encoding=None):
2419
tb = treebuilders.getTreeBuilder(treebuilderName)
@@ -307,7 +302,7 @@ def startTagHtml(self, token):
307302
self.parser.parseError("non-html-root")
308303
# XXX Need a check here to see if the first start tag token emitted is
309304
# this token... If it's not, invoke self.parser.parseError().
310-
for attr, value in token["data"].iteritems():
305+
for attr, value in token["data"].items():
311306
if attr not in self.tree.openElements[0].attributes:
312307
self.tree.openElements[0].attributes[attr] = value
313308
self.parser.firstStartTag = False
@@ -821,7 +816,7 @@ def startTagBody(self, token):
821816
or self.tree.openElements[1].name != "body"):
822817
assert self.parser.innerHTML
823818
else:
824-
for attr, value in token["data"].iteritems():
819+
for attr, value in token["data"].items():
825820
if attr not in self.tree.openElements[1].attributes:
826821
self.tree.openElements[1].attributes[attr] = value
827822

@@ -834,7 +829,7 @@ def startTagCloseP(self, token):
834829

835830
def startTagForm(self, token):
836831
if self.tree.formPointer:
837-
self.parser.parseError(u"unexpected-start-tag", {"name": "form"})
832+
self.parser.parseError("unexpected-start-tag", {"name": "form"})
838833
else:
839834
if self.tree.elementInScope("p"):
840835
self.endTagP("p")
@@ -855,7 +850,7 @@ def startTagListItem(self, token):
855850
if i >= 1:
856851
self.parser.parseError(
857852
i == 1 and "missing-end-tag" or "missing-end-tags",
858-
{"name": u", ".join([item.name
853+
{"name": ", ".join([item.name
859854
for item
860855
in poppedNodes[:-1]])})
861856
break

0 commit comments

Comments
 (0)