Skip to content

Commit 91ae089

Browse files
committed
A little more documentation
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40406
1 parent 1ce4e2d commit 91ae089

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

README

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,17 @@ f = open("mydocument.html")
2424
parser = html5lib.HTMLParser()
2525
document = parser.parse(f)
2626

27-
The returned document is a simple DOM-like structure which can be
28-
navigated using the .parent and .childNode attributes on each
29-
element. Future releases will support a more substantial tree
30-
implementation.
27+
By default, the returned document is a simple DOM-like structure which
28+
can be navigated using the .parent and .childNode attributes on each
29+
element.
30+
31+
It is also possible to generate an ElementTree tree, this requires the use of the "tree" argument to the parser:
32+
33+
from html5lib.treebuilders import etree
34+
parser = html5lib.HTMLParser(tree=etree.TreeBuilder)
35+
36+
Intrepid users may write their own treebuilder implementations - see
37+
help(html5lib.treebuilders) for more information
3138

3239
More documentation is avaliable in the docstrings.
3340

src/parser.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,17 @@ class HTMLParser(object):
3535
"""Main parser class"""
3636

3737
def __init__(self, strict = False, tree=simpletree.TreeBuilder):
38+
"""HTML parser. Generates a tree structure from a stream of (possibly
39+
malformed) HTML.
40+
strict - raise an exception when a parse error is encountered
41+
tree - a treebuilder class controlling the type of tree that will be
42+
returned (default - html5lib.simpletree.TreeBuilder)"""
43+
3844
# Raise an exception on the first error encountered
3945
self.strict = strict
4046
self.errors = []
4147

42-
self.tree = tree()
48+
self._treeCls = tree
4349

4450
self.phases = {
4551
"initial": InitialPhase(self, self.tree),
@@ -70,7 +76,7 @@ def parse(self, stream, innerHTML=False):
7076
issues have not yet been dealt with."""
7177

7278
# XXX - need to ensure the tree is reset here
73-
# Why? -- anne
79+
self.tree = self._treeCls()
7480

7581
# We don't actually support innerHTML yet but this should allow
7682
# assertations

0 commit comments

Comments
 (0)