1
1
from . import support
2
2
import unittest , codecs
3
3
4
- from html5lib .inputstream import HTMLInputStream
4
+ from html5lib .inputstream import HTMLInputStream , HTMLUnicodeInputStream , HTMLBinaryInputStream
5
5
6
- class HTMLInputStreamShortChunk (HTMLInputStream ):
6
+ class HTMLUnicodeInputStreamShortChunk (HTMLUnicodeInputStream ):
7
+ _defaultChunkSize = 2
8
+
9
+ class HTMLBinaryInputStreamShortChunk (HTMLBinaryInputStream ):
7
10
_defaultChunkSize = 2
8
11
9
12
class HTMLInputStreamTest (unittest .TestCase ):
10
13
11
14
def test_char_ascii (self ):
12
- stream = HTMLInputStream ("'" , encoding = 'ascii' )
15
+ stream = HTMLInputStream (b "'" , encoding = 'ascii' )
13
16
self .assertEquals (stream .charEncoding [0 ], 'ascii' )
14
17
self .assertEquals (stream .char (), "'" )
15
18
16
- def test_char_null (self ):
17
- stream = HTMLInputStream ("\x00 " )
18
- self .assertEquals (stream .char (), '\ufffd ' )
19
-
20
19
def test_char_utf8 (self ):
21
20
stream = HTMLInputStream ('\u2018 ' .encode ('utf-8' ), encoding = 'utf-8' )
22
21
self .assertEquals (stream .charEncoding [0 ], 'utf-8' )
@@ -30,7 +29,7 @@ def test_char_win1252(self):
30
29
self .assertEquals (stream .char (), "\u2019 " )
31
30
32
31
def test_bom (self ):
33
- stream = HTMLInputStream (codecs .BOM_UTF8 + "'" )
32
+ stream = HTMLInputStream (codecs .BOM_UTF8 + b "'" )
34
33
self .assertEquals (stream .charEncoding [0 ], 'utf-8' )
35
34
self .assertEquals (stream .char (), "'" )
36
35
@@ -40,7 +39,7 @@ def test_utf_16(self):
40
39
self .assertEquals (len (stream .charsUntil (' ' , True )), 1025 )
41
40
42
41
def test_newlines (self ):
43
- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\r \n ccc\r ddddxe" )
42
+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\r \n ccc\r ddddxe" )
44
43
self .assertEquals (stream .position (), (1 , 0 ))
45
44
self .assertEquals (stream .charsUntil ('c' ), "a\n bb\n " )
46
45
self .assertEquals (stream .position (), (3 , 0 ))
@@ -50,12 +49,12 @@ def test_newlines(self):
50
49
self .assertEquals (stream .position (), (4 , 5 ))
51
50
52
51
def test_newlines2 (self ):
53
- size = HTMLInputStream ._defaultChunkSize
52
+ size = HTMLUnicodeInputStream ._defaultChunkSize
54
53
stream = HTMLInputStream ("\r " * size + "\n " )
55
54
self .assertEquals (stream .charsUntil ('x' ), "\n " * size )
56
55
57
56
def test_position (self ):
58
- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\n ccc\n ddde\n f\n gh" )
57
+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\n ccc\n ddde\n f\n gh" )
59
58
self .assertEquals (stream .position (), (1 , 0 ))
60
59
self .assertEquals (stream .charsUntil ('c' ), "a\n bb\n " )
61
60
self .assertEquals (stream .position (), (3 , 0 ))
@@ -73,7 +72,7 @@ def test_position(self):
73
72
self .assertEquals (stream .position (), (6 , 1 ))
74
73
75
74
def test_position2 (self ):
76
- stream = HTMLInputStreamShortChunk ("abc\n d" )
75
+ stream = HTMLUnicodeInputStreamShortChunk ("abc\n d" )
77
76
self .assertEquals (stream .position (), (1 , 0 ))
78
77
self .assertEquals (stream .char (), "a" )
79
78
self .assertEquals (stream .position (), (1 , 1 ))
0 commit comments