Skip to content

Commit 7a06a5a

Browse files
committed
Placate pyflakes and fix tests after fixing issues.
This is mostly just removing dead variables, however, there are a few substantial changes in here: - Move to using try/except ImportError/else in tests where we are checking some module existing, as this was hiding genuine bugs that manifested themselves as ImportError (the ElementTree treewalker was throwing ImportError when being imported). - Fixes the ImportError the ElementTree treewalker was throwing (this was, too, reported as a bug by pyflakes, thereby showing its value). - Fixes the assertion then shown in the ElementTree treewalker (this is simply a case of it having been missed in the move to Python 3, due to the tests not being run). - Fixes the ElementTree treewalker with xml.etree.cElementTree under 2.6, where the ElementTree Comment factory isn't the tag attribute on Comment objects (this is effectively the treewalker equivalent of 3e50aad). - The parser defined the scriptDataDoubleEscapedDashState twice, therefore everything that should've been run in this state was in fact run in the scriptDataDoubleEscapedDashDashState. This also adds flake8 to Travis, albeit running it without any PEP 8 errors showing.
1 parent b72b001 commit 7a06a5a

28 files changed

+68
-113
lines changed

.travis.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ env:
1010
- USE_OPTIONAL=true
1111
- USE_OPTIONAL=false
1212

13+
matrix:
14+
exclude:
15+
- python: "2.7"
16+
env: USE_OPTIONAL=false
17+
include:
18+
- python: "2.7"
19+
env: USE_OPTIONAL=false FLAKE=true
20+
1321
before_install:
1422
- git submodule update --init --recursive
1523

@@ -19,9 +27,11 @@ install:
1927
- if [[ $TRAVIS_PYTHON_VERSION != 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.txt --use-mirrors; fi
2028
- if [[ $TRAVIS_PYTHON_VERSION == 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-3.txt --use-mirrors; fi
2129
- if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi
30+
- if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi
2231

2332
script:
2433
- nosetests
34+
- if [[ $FLAKE == "true" ]]; then flake8 --exclude=E,W html5lib; fi
2535

2636
after_script:
2737
- python debug-info.py

html5lib/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@
1818
from .treewalkers import getTreeWalker
1919
from .serializer import serialize
2020

21+
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
22+
"getTreeWalker", "serialize"]
2123
__version__ = "1.0b1"

html5lib/html5parser.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import absolute_import, division, unicode_literals
22
from six import with_metaclass
33

4-
import sys
54
import types
65

76
from . import inputstream
@@ -14,10 +13,10 @@
1413
from . import utils
1514
from . import constants
1615
from .constants import spaceCharacters, asciiUpper2Lower
17-
from .constants import formattingElements, specialElements
18-
from .constants import headingElements, tableInsertModeElements
19-
from .constants import cdataElements, rcdataElements, voidElements
20-
from .constants import tokenTypes, ReparseException, namespaces, spaceCharacters
16+
from .constants import specialElements
17+
from .constants import headingElements
18+
from .constants import cdataElements, rcdataElements
19+
from .constants import tokenTypes, ReparseException, namespaces
2120
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
2221

2322
def parse(doc, treebuilder="simpletree", encoding=None,
@@ -88,7 +87,7 @@ def _parse(self, stream, innerHTML=False, container="div",
8887
try:
8988
self.mainLoop()
9089
break
91-
except ReparseException as e:
90+
except ReparseException:
9291
self.reset()
9392

9493
def reset(self):
@@ -405,7 +404,7 @@ def parseRCDataRawtext(self, token, contentType):
405404
"""
406405
assert contentType in ("RAWTEXT", "RCDATA")
407406

408-
element = self.tree.insertElement(token)
407+
self.tree.insertElement(token)
409408

410409
if contentType == "RAWTEXT":
411410
self.tokenizer.state = self.tokenizer.rawtextState
@@ -1402,7 +1401,6 @@ def endTagFormatting(self, token):
14021401
"""The much-feared adoption agency algorithm"""
14031402
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
14041403
# XXX Better parseError messages appreciated.
1405-
name = token["name"]
14061404

14071405
# Step 1
14081406
outerLoopCounter = 0
@@ -1620,7 +1618,7 @@ def endTagScript(self, token):
16201618
#document.write works
16211619

16221620
def endTagOther(self, token):
1623-
node = self.tree.openElements.pop()
1621+
self.tree.openElements.pop()
16241622
self.parser.phase = self.parser.originalPhase
16251623

16261624
class InTablePhase(Phase):

html5lib/inputstream.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import codecs
55
import re
6-
import types
76
import sys
87

98
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
@@ -107,8 +106,7 @@ def _readFromBuffer(self, bytes):
107106
bytesToRead = len(bufferedData) - bufferOffset
108107
self.position = [bufferIndex, len(bufferedData)]
109108
bufferIndex += 1
110-
data = rv.append(bufferedData[bufferOffset:
111-
bufferOffset + bytesToRead])
109+
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
112110
remainingBytes -= bytesToRead
113111

114112
bufferOffset = 0
@@ -290,7 +288,6 @@ def characterErrorsUCS2(self, data):
290288
#Someone picked the wrong compile option
291289
#You lose
292290
skip = False
293-
import sys
294291
for match in invalid_unicode_re.finditer(data):
295292
if skip:
296293
continue

html5lib/serializer/htmlserializer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
else:
2525
unicode_encode_errors = "htmlentityreplace"
2626

27-
from html5lib.constants import entities
28-
2927
encode_entity_map = {}
3028
is_ucs4 = len("\U0010FFFF") == 1
3129
for k, v in list(entities.items()):
@@ -228,7 +226,6 @@ def serialize(self, treewalker, encoding=None):
228226
in_cdata = True
229227
elif in_cdata:
230228
self.serializeError(_("Unexpected child element of a CDATA element"))
231-
attributes = []
232229
for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()):
233230
#TODO: Add namespace support here
234231
k = attr_name

html5lib/tests/__init__.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1 @@
11
from __future__ import absolute_import, division, unicode_literals
2-
3-
import sys
4-
import os
5-
6-
parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
7-
8-
if not parent_path in sys.path:
9-
sys.path.insert(0, parent_path)
10-
del parent_path
11-
12-
from . import support

html5lib/tests/support.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
os.path.pardir,
1313
os.path.pardir)))
1414

15-
import html5lib
16-
from html5lib import html5parser, treebuilders
15+
from html5lib import treebuilders
1716
del base_path
1817

1918
#Build a dict of avaliable trees
@@ -43,10 +42,11 @@
4342
pass
4443

4544
try:
46-
import lxml.etree as lxml
47-
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
45+
import lxml.etree as lxml # flake8: noqa
4846
except ImportError:
4947
pass
48+
else:
49+
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
5050

5151
def get_data_files(subdirectory, files='*.dat'):
5252
return glob.glob(os.path.join(test_dir,subdirectory,files))

html5lib/tests/test_encoding.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import re
43
import os
54
import unittest
65

@@ -27,7 +26,7 @@ def test_codec_name_d(self):
2726

2827
def runParserEncodingTest(data, encoding):
2928
p = HTMLParser()
30-
t = p.parse(data, useChardet=False)
29+
p.parse(data, useChardet=False)
3130
encoding = encoding.lower().decode("ascii")
3231

3332
assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0])
@@ -44,18 +43,17 @@ def runPreScanEncodingTest(data, encoding):
4443

4544
def test_encoding():
4645
for filename in get_data_files("encoding"):
47-
test_name = os.path.basename(filename).replace('.dat',''). \
48-
replace('-','')
4946
tests = TestData(filename, b"data", encoding=None)
5047
for idx, test in enumerate(tests):
5148
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
5249
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
5350

5451
try:
55-
import chardet
52+
import chardet # flake8: noqa
53+
except ImportError:
54+
print("chardet not found, skipping chardet tests")
55+
else:
5656
def test_chardet():
5757
with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
5858
encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
5959
assert encoding[0].lower() == "big5"
60-
except ImportError:
61-
print("chardet not found, skipping chardet tests")

html5lib/tests/test_parser.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,14 @@
33
import os
44
import sys
55
import traceback
6-
import io
76
import warnings
87
import re
98

109
warnings.simplefilter("error")
1110

1211
from .support import get_data_files
1312
from .support import TestData, convert, convertExpected, treeTypes
14-
import html5lib
15-
from html5lib import html5parser, treebuilders, constants
13+
from html5lib import html5parser, constants
1614

1715
#Run the parse error checks
1816
checkParseErrors = False

html5lib/tests/test_parser2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import io
44

5-
from . import support
5+
from . import support # flake8: noqa
66
from html5lib import html5parser
77
from html5lib.constants import namespaces
88
from html5lib.treebuilders import dom

html5lib/tests/test_sanitizer.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import os
4-
import sys
5-
import unittest
6-
73
try:
84
import json
95
except ImportError:

html5lib/tests/test_serializer.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import os
43
import unittest
54
from .support import get_data_files
65

@@ -15,7 +14,7 @@
1514
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
1615

1716
import html5lib
18-
from html5lib import html5parser, serializer, constants
17+
from html5lib import serializer, constants
1918
from html5lib.treewalkers._base import TreeWalker
2019

2120
optionals_loaded = []
@@ -172,6 +171,5 @@ def test_serializer():
172171
for filename in get_data_files('serializer', '*.test'):
173172
with open(filename) as fp:
174173
tests = json.load(fp)
175-
test_name = os.path.basename(filename).replace('.test','')
176174
for index, test in enumerate(tests['tests']):
177175
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})

html5lib/tests/test_stream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
from . import support
3+
from . import support # flake8: noqa
44
import unittest, codecs
55

66
from html5lib.inputstream import HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream

html5lib/tests/test_tokenizer.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33

44

5-
import sys
6-
import os
7-
import io
85
import warnings
96
import re
107

@@ -176,7 +173,6 @@ def testTokenizer():
176173
for filename in get_data_files('tokenizer', '*.test'):
177174
with open(filename) as fp:
178175
tests = json.load(fp)
179-
testName = os.path.basename(filename).replace(".test","")
180176
if 'tests' in tests:
181177
for index,test in enumerate(tests['tests']):
182178
if 'initialStates' not in test:

html5lib/tests/test_treewalkers.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from .support import get_data_files, TestData, convertExpected
1515

1616
from html5lib import html5parser, treewalkers, treebuilders, constants
17-
from html5lib.filters.lint import Filter as LintFilter, LintError
1817

1918
def PullDOMAdapter(node):
2019
from xml.dom import Node
@@ -58,42 +57,35 @@ def PullDOMAdapter(node):
5857
#"supposed" to work
5958
try:
6059
import xml.etree.ElementTree as ElementTree
60+
except ImportError:
61+
pass
62+
else:
6163
treeTypes['ElementTree'] = \
6264
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
6365
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
64-
except ImportError:
65-
try:
66-
import elementtree.ElementTree as ElementTree
67-
treeTypes['ElementTree'] = \
68-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
69-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
70-
except ImportError:
71-
pass
7266

7367
try:
7468
import xml.etree.cElementTree as ElementTree
69+
except ImportError:
70+
pass
71+
else:
7572
treeTypes['cElementTree'] = \
7673
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
7774
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
78-
except ImportError:
79-
try:
80-
import cElementTree as ElementTree
81-
treeTypes['cElementTree'] = \
82-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
83-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
84-
except ImportError:
85-
pass
75+
8676

8777
try:
88-
import lxml.etree as ElementTree
78+
import lxml.etree as ElementTree # flake8: noqa
79+
except ImportError:
80+
pass
81+
else:
8982
# treeTypes['lxml_as_etree'] = \
9083
# {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
9184
# "walker": treewalkers.getTreeWalker("etree", ElementTree)}
9285
treeTypes['lxml_native'] = \
9386
{"builder": treebuilders.getTreeBuilder("lxml"),
9487
"walker": treewalkers.getTreeWalker("lxml")}
95-
except ImportError:
96-
pass
88+
9789

9890
#Try whatever etree implementations are available from a list that are
9991
#"supposed" to work

html5lib/tests/tokenizertotree.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import html5lib
99
from . import support
10-
from . import test_parser
1110
from . import test_tokenizer
1211

1312
p = html5lib.HTMLParser()

html5lib/tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from collections import deque
99

1010
from .constants import spaceCharacters
11-
from .constants import entitiesWindows1252, entities
12-
from .constants import asciiLowercase, asciiLetters, asciiUpper2Lower
11+
from .constants import entities
12+
from .constants import asciiLetters, asciiUpper2Lower
1313
from .constants import digits, hexDigits, EOF
1414
from .constants import tokenTypes, tagTokenTypes
1515
from .constants import replacementCharacters
@@ -798,7 +798,7 @@ def scriptDataDoubleEscapedDashState(self):
798798
self.state = self.scriptDataDoubleEscapedState
799799
return True
800800

801-
def scriptDataDoubleEscapedDashState(self):
801+
def scriptDataDoubleEscapedDashDashState(self):
802802
data = self.stream.char()
803803
if data == "-":
804804
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})

html5lib/treebuilders/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434

3535
treeBuilderCache = {}
3636

37-
import sys
38-
3937
def getTreeBuilder(treeType, implementation=None, **kwargs):
4038
"""Get a TreeBuilder class for various types of tree with built-in support
4139

0 commit comments

Comments
 (0)