@@ -61,9 +61,9 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
61
61
'replace' )
62
62
63
63
self .queue = deque ([])
64
+ self .readChars = []
64
65
self .errors = []
65
66
66
- self .line = self .col = 0
67
67
self .lineLengths = []
68
68
69
69
#Flag to indicate we may have a CR LF broken across a data chunk
@@ -202,10 +202,33 @@ def detectEncodingMeta(self):
202
202
self .seek (buffer , 0 )
203
203
return parser .getEncoding ()
204
204
205
+ def updatePosition (self ):
206
+ #Remove EOF from readChars, if present
207
+ if not self .readChars :
208
+ return
209
+ if self .readChars and self .readChars [- 1 ] == EOF :
210
+ #There may be more than one EOF in readChars so we cannot assume
211
+ #readChars.index(EOF) == -1
212
+ self .readChars = self .readChars [:self .readChars .index (EOF )]
213
+ readChars = "" .join (self .readChars )
214
+ lines = readChars .split ("\n " )
215
+ if self .lineLengths :
216
+ self .lineLengths [- 1 ] += len (lines [0 ])
217
+ else :
218
+ self .lineLengths .append (len (lines [0 ]))
219
+ for line in lines [1 :]:
220
+ self .lineLengths .append (len (line ))
221
+ self .readChars = []
222
+ #print self.lineLengths
223
+
205
224
def position (self ):
206
225
"""Returns (line, col) of the current position in the stream."""
207
- line , col = self .line , self .col
208
- return (line + 1 , col )
226
+ self .updatePosition ()
227
+ if self .lineLengths :
228
+ line , col = len (self .lineLengths ), self .lineLengths [- 1 ]
229
+ else :
230
+ line , col = 1 ,0
231
+ return (line , col )
209
232
210
233
def char (self ):
211
234
""" Read one character from the stream or queue if available. Return
@@ -219,13 +242,7 @@ def char(self):
219
242
220
243
char = self .queue .popleft ()
221
244
222
- # update position in stream
223
- if char == '\n ' :
224
- self .lineLengths .append (self .col )
225
- self .line += 1
226
- self .col = 0
227
- else :
228
- self .col += 1
245
+ self .readChars .append (char )
229
246
return char
230
247
231
248
def readChunk (self , chunkSize = 10240 ):
@@ -246,6 +263,8 @@ def readChunk(self, chunkSize=10240):
246
263
data = unicode (data )
247
264
self .queue .extend ([char for char in data ])
248
265
266
+ self .updatePosition ()
267
+
249
268
def charsUntil (self , characters , opposite = False ):
250
269
""" Returns a string of characters from the stream up to but not
251
270
including any character in characters or EOF. characters can be
@@ -273,60 +292,27 @@ def charsUntil(self, characters, opposite = False):
273
292
#If the queue doesn't grow we have reached EOF
274
293
if i == len (self .queue ) or self .queue [i ] is EOF :
275
294
break
276
- #XXX- wallpaper over bug in calculation below
277
- #Otherwise change the stream position
278
- if self .queue [i ] == '\n ' :
279
- self .lineLengths .append (self .col )
280
- self .line += 1
281
- self .col = 0
282
- else :
283
- self .col += 1
284
295
285
- rv = u"" .join ([ self .queue .popleft () for c in range (i ) ])
286
-
287
- #Calculate where we now are in the stream
288
- #One possible optimisation would be to store all read characters and
289
- #Calculate this on an as-needed basis (perhaps flushing the read data
290
- #every time we read a new chunk) rather than once per call here and
291
- #in .char()
292
-
293
- #XXX Temporarily disable this because there is a bug
296
+ rv = [self .queue .popleft () for c in range (i )]
294
297
295
- #lines = rv.split("\n")
296
- #
297
- #if lines:
298
- # #Add number of lines passed onto positon
299
- # oldCol = self.col
300
- # self.line += len(lines)-1
301
- # if len(lines) > 1:
302
- # self.col = len(lines[-1])
303
- # else:
304
- # self.col += len(lines[0])
305
- #
306
- # if self.lineLengths and oldCol > 0:
307
- # self.lineLengths[-1] += len(lines[0])
308
- # lines = lines[1:-1]
309
- # else:
310
- # lines = lines[:-1]
311
- #
312
- # for line in lines:
313
- # self.lineLengths.append(len(line))
314
- #
298
+ self .readChars .extend (rv )
315
299
300
+ rv = u"" .join (rv )
316
301
return rv
317
302
318
303
def unget (self , chars ):
304
+ self .updatePosition ()
319
305
if chars :
320
306
l = list (chars )
321
307
l .reverse ()
322
308
self .queue .extendleft (l )
323
309
#Alter the current line, col position
324
310
for c in chars [::- 1 ]:
325
311
if c == '\n ' :
326
- self .line -= 1
327
- self .col = self . lineLengths [ self . line ]
312
+ assert self .lineLengths [ - 1 ] == 0
313
+ self .lineLengths . pop ()
328
314
else :
329
- self .col -= 1
315
+ self .lineLengths [ - 1 ] -= 1
330
316
331
317
class EncodingBytes (str ):
332
318
"""String-like object with an assosiated position and various extra methods
0 commit comments