@@ -107,8 +107,9 @@ def __init__(self, urls, users, reply, lists, tags, html):
107
107
class Parser (object ):
108
108
'''A Tweet Parser'''
109
109
110
- def __init__ (self , max_url_length = 30 ):
110
+ def __init__ (self , max_url_length = 30 , include_spans = False ):
111
111
self ._max_url_length = max_url_length
112
+ self ._include_spans = include_spans
112
113
113
114
def parse (self , text , html = True ):
114
115
'''Parse the text and return a ParseResult instance.'''
@@ -171,7 +172,10 @@ def _parse_urls(self, match):
171
172
pre , url = mat [:pos ], mat [pos :]
172
173
full_url = 'http://%s' % url
173
174
174
- self ._urls .append (url )
175
+ if self ._include_spans :
176
+ self ._urls .append ((url , match .span (0 )))
177
+ else :
178
+ self ._urls .append (url )
175
179
176
180
if self ._html :
177
181
return '%s%s' % (pre , self .format_url (full_url ,
@@ -185,7 +189,10 @@ def _parse_users(self, match):
185
189
return match .group (0 )
186
190
187
191
mat = match .group (0 )
188
- self ._users .append (mat [1 :])
192
+ if self ._include_spans :
193
+ self ._users .append ((mat [1 :], match .span (0 )))
194
+ else :
195
+ self ._users .append (mat [1 :])
189
196
190
197
if self ._html :
191
198
return self .format_username (mat [0 :1 ], mat [1 :])
@@ -199,7 +206,10 @@ def _parse_lists(self, match):
199
206
200
207
pre , at_char , user , list_name = match .groups ()
201
208
list_name = list_name [1 :]
202
- self ._lists .append ((user , list_name ))
209
+ if self ._include_spans :
210
+ self ._lists .append ((user , list_name , match .span (0 )))
211
+ else :
212
+ self ._lists .append ((user , list_name ))
203
213
204
214
if self ._html :
205
215
return '%s%s' % (pre , self .format_list (at_char , user , list_name ))
@@ -218,7 +228,10 @@ def _parse_tags(self, match):
218
228
break
219
229
220
230
pre , text = mat [:pos ], mat [pos + 1 :]
221
- self ._tags .append (text )
231
+ if self ._include_spans :
232
+ self ._tags .append ((text , match .span (0 )))
233
+ else :
234
+ self ._tags .append (text )
222
235
223
236
if self ._html :
224
237
return '%s%s' % (pre , self .format_tag (tag , text ))
0 commit comments