groklearning
diff --git a/‎parse.py
Lines changed: 22 additions & 16 deletions b/‎parse.py
Lines changed: 22 additions & 16 deletions
diff --git a/‎src/html5lib/__init__.py
Lines changed: 2 additions & 3 deletions b/‎src/html5lib/__init__.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/html5lib/filters/formfiller.py
Lines changed: 12 additions & 12 deletions b/‎src/html5lib/filters/formfiller.py
Lines changed: 12 additions & 12 deletions
diff --git a/‎src/html5lib/filters/inject_meta_charset.py
Lines changed: 3 additions & 3 deletions b/‎src/html5lib/filters/inject_meta_charset.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/html5lib/filters/iso639codes.py
Lines changed: 1 addition & 1 deletion b/‎src/html5lib/filters/iso639codes.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/html5lib/filters/lint.py
Lines changed: 21 additions & 21 deletions b/‎src/html5lib/filters/lint.py
Lines changed: 21 additions & 21 deletions
diff --git a/‎src/html5lib/filters/optionaltags.py
Lines changed: 1 addition & 1 deletion b/‎src/html5lib/filters/optionaltags.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/html5lib/filters/sanitizer.py
Lines changed: 1 addition & 1 deletion b/‎src/html5lib/filters/sanitizer.py
Lines changed: 1 addition & 1 deletion
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.0
 """usage: %prog [options] filename
 
 Parse a document to a simpletree tree, with optional profiling
@@ -9,11 +9,16 @@
 import os
 from optparse import OptionParser
 
+print(sys.stdout.encoding)
+
 #RELEASE remove
 sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../src')))
 #END RELEASE
-from html5lib import html5parser, liberalxmlparser, sanitizer
+print(sys.path)
+import html5lib
+import html5lib.html5parser as html5parser
 from html5lib.tokenizer import HTMLTokenizer
+from html5lib import treebuilders
 from html5lib import treebuilders, serializer, treewalkers
 from html5lib import constants
 
@@ -27,8 +32,8 @@ def parse():
         # Try opening from the internet
         if f.startswith('http://'):
             try:
-                import urllib, cgi
-                f = urllib.urlopen(f)
+                from urllib import request
+                f = request.urlopen(f)
                 contentType = f.headers.get('content-type')
                 if contentType:
                     (mediaType, params) = cgi.parse_header(contentType)
@@ -39,7 +44,7 @@ def parse():
         else:
             try:
                 # Try opening from file system
-                f = open(f)
+                f = open(f, "rb")
             except IOError: pass
     except IndexError:
         sys.stderr.write("No filename provided. Use -h for help\n")
@@ -64,16 +69,16 @@ def parse():
 
     if opts.profile:
         #XXX should import cProfile instead and use that
-        import hotshot
-        import hotshot.stats
-        prof = hotshot.Profile('stats.prof')
-        prof.runcall(parseMethod, f, encoding=encoding)
+        try:
+            import cProfile as profile
+        except ImportError:
+            import profile
+        import pstats
+        prof = profile.run('parseMethod(f, encoding=encoding)', 'prof.out')
         prof.close()
         # XXX - We should use a temp file here
-        stats = hotshot.stats.load('stats.prof')
-        stats.strip_dirs()
-        stats.sort_stats('time')
-        stats.print_stats()
+        stats = pstats.stats('prof.out')
+        stats.strip_dirs().sort_stats('time').print_stats()
     elif opts.time:
         import time
         t0 = time.time()
@@ -88,13 +93,14 @@ def parse():
 
 def printOutput(parser, document, opts):
     if opts.encoding:
-        print "Encoding:", parser.tokenizer.stream.charEncoding
+        print("Encoding:", parser.tokenizer.stream.charEncoding)
     if opts.xml:
         sys.stdout.write(document.toxml("utf-8"))
     elif opts.tree:
         if not hasattr(document,'__getitem__'): document = [document]
         for fragment in document:
-            print parser.tree.testSerializer(fragment).encode("utf-8")
+            sys.stdout.write(parser.tree.testSerializer(fragment))
+        sys.stdout.write("\n")
     elif opts.hilite:
         sys.stdout.write(document.hilite("utf-8"))
     elif opts.html:
@@ -103,7 +109,7 @@ def printOutput(parser, document, opts):
             kwargs[opt] = getattr(opts,opt)
         if not kwargs['quote_char']: del kwargs['quote_char']
         tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
-        for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding='utf-8'):
+        for text in serializer.HTMLSerializer(**kwargs).serialize(tokens):
             sys.stdout.write(text)
         if not text.endswith('\n'): sys.stdout.write('\n')
     if opts.error:
 
@@ -10,10 +10,9 @@
 f = open("my_document.html")
 tree = html5lib.parse(f) 
 """
-print(__path__)
 
-#from .html5parser import HTMLParser, parse
-#from treebuilders import getTreeBuilder
+from .html5parser import HTMLParser, parse
+from .treebuilders import getTreeBuilder
 
 #from .liberalxmlparser import XMLParser, XHTMLParser
 
@@ -4,10 +4,10 @@
 # See http://www.whatwg.org/specs/web-forms/current-work/#seeding
 #
 
-import _base
+from . import _base
 
 from html5lib.constants import spaceCharacters
-spaceCharacters = u"".join(spaceCharacters)
+spaceCharacters = "".join(spaceCharacters)
 
 class SimpleFilter(_base.Filter):
     def __init__(self, source, fieldStorage):
@@ -29,13 +29,13 @@ def __iter__(self):
                     input_checked_index = -1
                     for i,(n,v) in enumerate(token["data"]):
                         n = n.lower()
-                        if n == u"name":
+                        if n == "name":
                             field_name = v.strip(spaceCharacters)
-                        elif n == u"type":
+                        elif n == "type":
                             field_type = v.strip(spaceCharacters)
-                        elif n == u"checked":
+                        elif n == "checked":
                             input_checked_index = i
-                        elif n == u"value":
+                        elif n == "value":
                             input_value_index = i
 
                     value_list = self.fieldStorage.getlist(field_name)
@@ -45,20 +45,20 @@ def __iter__(self):
                     else:
                         value = ""
 
-                    if field_type in (u"checkbox", u"radio"):
+                    if field_type in ("checkbox", "radio"):
                         if value_list:
                             if token["data"][input_value_index][1] == value:
                                 if input_checked_index < 0:
-                                    token["data"].append((u"checked", u""))
+                                    token["data"].append(("checked", ""))
                                 field_indices[field_name] = field_index + 1
                             elif input_checked_index >= 0:
                                 del token["data"][input_checked_index]
 
-                    elif field_type not in (u"button", u"submit", u"reset"):
+                    elif field_type not in ("button", "submit", "reset"):
                         if input_value_index >= 0:
-                            token["data"][input_value_index] = (u"value", value)
+                            token["data"][input_value_index] = ("value", value)
                         else:
-                            token["data"].append((u"value", value))
+                            token["data"].append(("value", value))
                         field_indices[field_name] = field_index + 1
 
                     field_type = None
@@ -96,7 +96,7 @@ def __iter__(self):
                                 value = ""
                             if (is_select_multiple or not is_selected_option_found) and option_value == value:
                                 if option_selected_index < 0:
-                                    token["data"].append((u"selected", u""))
+                                    token["data"].append(("selected", ""))
                                 field_indices[field_name] = field_index + 1
                                 is_selected_option_found = True
                             elif option_selected_index >= 0:
 
@@ -1,4 +1,4 @@
-import _base
+from . import _base
 
 class Filter(_base.Filter):
     def __init__(self, source, encoding):
@@ -23,7 +23,7 @@ def __iter__(self):
                    content_index = -1
                    for i,(name,value) in enumerate(token["data"]):
                        if name.lower() == 'charset':
-                          token["data"][i] = (u'charset', self.encoding)
+                          token["data"][i] = ('charset', self.encoding)
                           meta_found = True
                           break
                        elif name == 'http-equiv' and value.lower() == 'content-type':
@@ -32,7 +32,7 @@ def __iter__(self):
                            content_index = i
                    else:
                        if has_http_equiv_content_type and content_index >= 0:
-                           token["data"][content_index] = (u'content', u'text/html; charset=%s' % self.encoding)
+                           token["data"][content_index] = ('content', 'text/html; charset=%s' % self.encoding)
                            meta_found = True
 
                 elif token["name"].lower() == "head" and not meta_found:
 
@@ -746,4 +746,4 @@ def isValidLangCode(value):
         lang, sublang = value.split('-', 1)
     else:
         lang = value
-    return isoLang.has_key(unicode.lower(unicode(lang)))
+    return str.lower(str(lang)) in isoLang
@@ -1,11 +1,11 @@
 from gettext import gettext
 _ = gettext
 
-import _base
+from . import _base
 from html5lib.constants import cdataElements, rcdataElements, voidElements
 
 from html5lib.constants import spaceCharacters
-spaceCharacters = u"".join(spaceCharacters)
+spaceCharacters = "".join(spaceCharacters)
 
 class LintError(Exception): pass
 
@@ -19,22 +19,22 @@ def __iter__(self):
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
                     raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
-                if not isinstance(name, unicode):
-                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %r") % name)
                 if not name:
-                    raise LintError(_(u"Empty tag name"))
+                    raise LintError(_("Empty tag name"))
                 if type == "StartTag" and name in voidElements:
-                    raise LintError(_(u"Void element reported as StartTag token: %s") % name)
+                    raise LintError(_("Void element reported as StartTag token: %s") % name)
                 elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
+                    raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
                 if type == "StartTag":
                     open_elements.append(name)
                 for name, value in token["data"]:
-                    if not isinstance(name, unicode):
+                    if not isinstance(name, str):
                         raise LintError(_("Attribute name is not a string: %r") % name)
                     if not name:
-                        raise LintError(_(u"Empty attribute name"))
-                    if not isinstance(value, unicode):
+                        raise LintError(_("Empty attribute name"))
+                    if not isinstance(value, str):
                         raise LintError(_("Attribute value is not a string: %r") % value)
                 if name in cdataElements:
                     contentModelFlag = "CDATA"
@@ -45,15 +45,15 @@ def __iter__(self):
 
             elif type == "EndTag":
                 name = token["name"]
-                if not isinstance(name, unicode):
-                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %r") % name)
                 if not name:
-                    raise LintError(_(u"Empty tag name"))
+                    raise LintError(_("Empty tag name"))
                 if name in voidElements:
-                    raise LintError(_(u"Void element reported as EndTag token: %s") % name)
+                    raise LintError(_("Void element reported as EndTag token: %s") % name)
                 start_name = open_elements.pop()
                 if start_name != name:
-                    raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name))
+                    raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
                 contentModelFlag = "PCDATA"
 
             elif type == "Comment":
@@ -62,27 +62,27 @@ def __iter__(self):
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                if not isinstance(data, unicode):
+                if not isinstance(data, str):
                     raise LintError(_("Attribute name is not a string: %r") % data)
                 if not data:
-                    raise LintError(_(u"%s token with empty data") % type)
+                    raise LintError(_("%s token with empty data") % type)
                 if type == "SpaceCharacters":
                     data = data.strip(spaceCharacters)
                     if data:
-                        raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
+                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
 
             elif type == "Doctype":
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
                     raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
-                if not isinstance(name, unicode):
-                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %r") % name)
                 # XXX: what to do with token["data"] ?
 
             elif type in ("ParseError", "SerializeError"):
                 pass
 
             else:
-                raise LintError(_(u"Unknown token type: %s") % type)
+                raise LintError(_("Unknown token type: %s") % type)
 
             yield token
@@ -1,4 +1,4 @@
-import _base
+from . import _base
 
 class Filter(_base.Filter):
     def slider(self):
 
@@ -1,4 +1,4 @@
-import _base
+from . import _base
 from html5lib.sanitizer import HTMLSanitizerMixin
 
 class Filter(_base.Filter, HTMLSanitizerMixin):
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-import _base`
	`1`	`+from . import _base`
`2`	`2`
`3`	`3`	`class Filter(_base.Filter):`
`4`	`4`	`def slider(self):`