Skip to content

Commit 71b793a

Browse files
committed
Applied schwa's span addition
https://github.com/schwa/twitter-text-python/commit/b81cef33a6fc12c837936d60a0b4a86222d45a4f to add option to extract span for matched parts of message for URLs, users etc
1 parent 82cf864 commit 71b793a

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

ttp.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ def __init__(self, urls, users, reply, lists, tags, html):
107107
class Parser(object):
108108
'''A Tweet Parser'''
109109

110-
def __init__(self, max_url_length=30):
110+
def __init__(self, max_url_length=30, include_spans = False):
111111
self._max_url_length = max_url_length
112+
self._include_spans = include_spans
112113

113114
def parse(self, text, html=True):
114115
'''Parse the text and return a ParseResult instance.'''
@@ -171,7 +172,10 @@ def _parse_urls(self, match):
171172
pre, url = mat[:pos], mat[pos:]
172173
full_url = 'http://%s' % url
173174

174-
self._urls.append(url)
175+
if self._include_spans:
176+
self._urls.append((url, match.span(0)))
177+
else:
178+
self._urls.append(url)
175179

176180
if self._html:
177181
return '%s%s' % (pre, self.format_url(full_url,
@@ -185,7 +189,10 @@ def _parse_users(self, match):
185189
return match.group(0)
186190

187191
mat = match.group(0)
188-
self._users.append(mat[1:])
192+
if self._include_spans:
193+
self._users.append((mat[1:], match.span(0)))
194+
else:
195+
self._users.append(mat[1:])
189196

190197
if self._html:
191198
return self.format_username(mat[0:1], mat[1:])
@@ -199,7 +206,10 @@ def _parse_lists(self, match):
199206

200207
pre, at_char, user, list_name = match.groups()
201208
list_name = list_name[1:]
202-
self._lists.append((user, list_name))
209+
if self._include_spans:
210+
self._lists.append((user, list_name, match.span(0)))
211+
else:
212+
self._lists.append((user, list_name))
203213

204214
if self._html:
205215
return '%s%s' % (pre, self.format_list(at_char, user, list_name))
@@ -218,7 +228,10 @@ def _parse_tags(self, match):
218228
break
219229

220230
pre, text = mat[:pos], mat[pos + 1:]
221-
self._tags.append(text)
231+
if self._include_spans:
232+
self._tags.append((text, match.span(0)))
233+
else:
234+
self._tags.append(text)
222235

223236
if self._html:
224237
return '%s%s' % (pre, self.format_tag(tag, text))

0 commit comments

Comments
 (0)