Skip to content

Commit 19e2368

Browse files
committed
bump of version nbr for this new working version, added a shortlink follower in utils.py
1 parent a9973f9 commit 19e2368

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed

README.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ You can also ask for the span tags to be returned for each entity::
4444
[('https://github.com/ianozsvald/', (57, 87))]
4545

4646

47+
To use the shortlink follower:
48+
49+
>>> from ttp import utils
50+
>>> # assume that result.urls == ['http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF']
51+
>>> print utils.follow_shortlinks(result.urls) # pass in list of shortlink URLs
52+
{'http://t.co/8o0z9BbEMu': [u'http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF', u'http://www.bbc.co.uk/sport/0/21711199#TWEET650562'], u'http://bbc.in/16dClPF': [u'http://bbc.in/16dClPF', u'http://www.bbc.co.uk/sport/0/21711199#TWEET650562']}
53+
>>> # note that bad shortlink URLs have a key to an empty list (lost/forgotten shortlink URLs don't generate any error)
54+
4755

4856
Installation
4957
------------
@@ -61,6 +69,7 @@ Changelog
6169
---------
6270

6371
* 2013/2/11 1.0.0.2 released to PyPI
72+
* 2013/4/? 1.0.1 new working version
6473

6574

6675
Tests

ttp/ttp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import re
2424
import urllib
2525

26-
__version__ = "1.0.0.2"
26+
__version__ = "1.0.1.0"
2727

2828
# Some of this code has been translated from the twitter-text-java library:
2929
# <http://github.com/mzsanford/twitter-text-java>

ttp/utils.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""Unwind short-links e.g. bit.ly, t.co etc to their canonical links"""
4+
import requests
5+
6+
7+
def follow_shortlinks(shortlinks):
8+
"""Follow redirects in list of shortlinks, return dict of resulting URLs"""
9+
links_followed = {}
10+
for shortlink in shortlinks:
11+
url = shortlink
12+
request_result = requests.get(url)
13+
redirect_history = request_result.history
14+
# history might look like:
15+
# (<Response [301]>, <Response [301]>)
16+
# where each response object has a URL
17+
all_urls = []
18+
for redirect in redirect_history:
19+
all_urls.append(redirect.url)
20+
# append the final URL that we finish with
21+
all_urls.append(request_result.url)
22+
links_followed[shortlink] = all_urls
23+
return links_followed
24+
25+
26+
if __name__ == "__main__":
27+
shortlinks = ['http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF']
28+
print follow_shortlinks(shortlinks)

0 commit comments

Comments
 (0)