diff --git a/MANIFEST.in b/MANIFEST.in index 0bad7a6c..33b31140 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include LICENSE +include README.rst +include requirements*.txt graft html5lib/tests/testdata recursive-include html5lib/tests *.py diff --git a/README b/README deleted file mode 100644 index 12a48f30..00000000 --- a/README +++ /dev/null @@ -1,39 +0,0 @@ -html5lib is a pure-python library for parsing HTML. It is designed to -conform to the HTML 5 specification, which has formalized the error handling -algorithms of popular web browsers. - - = Installation = - -html5lib is packaged with distutils. To install it use: - $ python setup.py install - - = Tests = - -You may wish to check that your installation has been a success by -running the testsuite. All the tests can be run by invoking -runtests.py in the html5lib/tests/ directory - - = Usage = - -Simple usage follows this pattern: - -import html5lib -f = open("mydocument.html") -parser = html5lib.HTMLParser() -document = parser.parse(f) - - -More documentation is avaliable in the docstrings or from -http://code.google.com/p/html5lib/wiki/UserDocumentation - - = Bugs = - -Please report any bugs on the issue tracker: -http://code.google.com/p/html5lib/issues/list - - = Get Involved = - -Contributions to code or documenation are actively encouraged. Submit -patches to the issue tracker or discuss changes on irc in the #whatwg -channel on freenode.net - diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..1c25df37 --- /dev/null +++ b/README.rst @@ -0,0 +1,113 @@ +html5lib +======== + +html5lib is a pure-python library for parsing HTML. It is designed to +conform to the HTML specification, as is implemented by all major web +browsers. + + +Requirements +------------ + +Python 2.6 and above as well as Python 3.0 and above are +supported. Implementations known to work are CPython (as the reference +implementation) and PyPy. Jython is known *not* to work due to various +bugs in its implementation of the language. Others such as IronPython +may or may not work; if you wish to try, you are strongly encouraged +to run the testsuite and report back! + +The only required library dependency is ``six``, this can be found +packaged in PyPi. + +Optionally: + +- ``datrie`` can be used to improve parsing performance (though in + almost all cases the improvement is marginal); + +- ``lxml`` is supported as a tree format (for both building and + walking) under CPython (but *not* PyPy where it is known to cause + segfaults); + +- ``genshi`` has a treewalker (but not builder); and + +- ``chardet`` can be used as a fallback when character encoding cannot + be determined (note currently this is only packaged on PyPi for + Python 2, though several package managers include unofficial ports + to Python 3). + + +Installation +------------ + +html5lib is packaged with distutils. To install it use:: + + $ python setup.py install + + +Usage +----- + +Simple usage follows this pattern:: + + import html5lib + with open("mydocument.html", "r") as fp: + document = html5lib.parse(f) + +or:: + + import html5lib + document = html5lib.parse("
Hello World!")
+
+More documentation is available in the docstrings.
+
+
+Bugs
+----
+
+Please report any bugs on the `issue tracker
+