Skip to content

Commit 013b627

Browse files
authored
Add files via upload
Adapted module for Python 3 and add some compatibility with official Twitter API. Now Tweet class has also author ID, date formatted like in of. API return, and expanded URLs list from tweet (if present).
1 parent 25d56a5 commit 013b627

File tree

7 files changed

+174
-0
lines changed

7 files changed

+174
-0
lines changed

got3/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import models
2+
from . import manager

got3/manager/TweetCriteria.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
class TweetCriteria:
2+
3+
def __init__(self):
4+
self.maxTweets = 0
5+
6+
def setUsername(self, username):
7+
self.username = username
8+
return self
9+
10+
def setSince(self, since):
11+
self.since = since
12+
return self
13+
14+
def setUntil(self, until):
15+
self.until = until
16+
return self
17+
18+
def setQuerySearch(self, querySearch):
19+
self.querySearch = querySearch
20+
return self
21+
22+
def setMaxTweets(self, maxTweets):
23+
self.maxTweets = maxTweets
24+
return self
25+
26+
def setLang(self, Lang):
27+
self.lang = Lang
28+
return self

got3/manager/TweetManager.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import urllib.request, urllib.parse, urllib.error,urllib.request,urllib.error,urllib.parse,json,re,datetime,sys,http.cookiejar
2+
from .. import models
3+
from pyquery import PyQuery
4+
5+
class TweetManager:
6+
7+
def __init__(self):
8+
pass
9+
10+
@staticmethod
11+
def getTweets(tweetCriteria, receiveBuffer = None, bufferLength = 100):
12+
refreshCursor = ''
13+
14+
results = []
15+
resultsAux = []
16+
cookieJar = http.cookiejar.CookieJar()
17+
18+
active = True
19+
20+
while active:
21+
json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar)
22+
if len(json['items_html'].strip()) == 0:
23+
break
24+
25+
refreshCursor = json['min_position']
26+
tweets = PyQuery(json['items_html'])('div.js-stream-tweet')
27+
28+
if len(tweets) == 0:
29+
break
30+
31+
for tweetHTML in tweets:
32+
tweetPQ = PyQuery(tweetHTML)
33+
tweet = models.Tweet()
34+
35+
usernameTweet = tweetPQ("span.username.js-action-profile-name b").text();
36+
txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text().replace('# ', '#').replace('@ ', '@'));
37+
retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
38+
favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
39+
dateSec = int(tweetPQ("small.time span.js-short-timestamp").attr("data-time"));
40+
id = tweetPQ.attr("data-tweet-id");
41+
permalink = tweetPQ.attr("data-permalink-path");
42+
user_id = int(tweetPQ("a.js-user-profile-link").attr("data-user-id"))
43+
44+
geo = ''
45+
geoSpan = tweetPQ('span.Tweet-geo')
46+
if len(geoSpan) > 0:
47+
geo = geoSpan.attr('title')
48+
urls = []
49+
for link in tweetPQ("a"):
50+
try:
51+
urls.append((link.attrib["data-expanded-url"]))
52+
except KeyError:
53+
pass
54+
tweet.id = id
55+
tweet.permalink = 'https://twitter.com' + permalink
56+
tweet.username = usernameTweet
57+
58+
tweet.text = txt
59+
tweet.date = datetime.datetime.fromtimestamp(dateSec)
60+
tweet.formatted_date = datetime.datetime.fromtimestamp(dateSec).strftime("%a %b %d %X +0000 %Y")
61+
tweet.retweets = retweets
62+
tweet.favorites = favorites
63+
tweet.mentions = " ".join(re.compile('(@\\w*)').findall(tweet.text))
64+
tweet.hashtags = " ".join(re.compile('(#\\w*)').findall(tweet.text))
65+
tweet.geo = geo
66+
tweet.urls = ",".join(urls)
67+
tweet.author_id = user_id
68+
69+
results.append(tweet)
70+
resultsAux.append(tweet)
71+
72+
if receiveBuffer and len(resultsAux) >= bufferLength:
73+
receiveBuffer(resultsAux)
74+
resultsAux = []
75+
76+
if tweetCriteria.maxTweets > 0 and len(results) >= tweetCriteria.maxTweets:
77+
active = False
78+
break
79+
80+
81+
if receiveBuffer and len(resultsAux) > 0:
82+
receiveBuffer(resultsAux)
83+
84+
return results
85+
86+
@staticmethod
87+
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar):
88+
url = "https://twitter.com/i/search/timeline?f=realtime&q=%s&src=typd&%smax_position=%s"
89+
90+
urlGetData = ''
91+
if hasattr(tweetCriteria, 'username'):
92+
urlGetData += ' from:' + tweetCriteria.username
93+
94+
if hasattr(tweetCriteria, 'since'):
95+
urlGetData += ' since:' + tweetCriteria.since
96+
97+
if hasattr(tweetCriteria, 'until'):
98+
urlGetData += ' until:' + tweetCriteria.until
99+
100+
if hasattr(tweetCriteria, 'querySearch'):
101+
urlGetData += ' ' + tweetCriteria.querySearch
102+
103+
if hasattr(tweetCriteria, 'lang'):
104+
urlLang = 'lang=' + tweetCriteria.lang + '&'
105+
else:
106+
urlLang = ''
107+
url = url % (urllib.parse.quote(urlGetData), urlLang, refreshCursor)
108+
#print(url)
109+
110+
headers = [
111+
('Host', "twitter.com"),
112+
('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"),
113+
('Accept', "application/json, text/javascript, */*; q=0.01"),
114+
('Accept-Language', "de,en-US;q=0.7,en;q=0.3"),
115+
('X-Requested-With', "XMLHttpRequest"),
116+
('Referer', url),
117+
('Connection', "keep-alive")
118+
]
119+
120+
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookieJar))
121+
opener.addheaders = headers
122+
123+
try:
124+
response = opener.open(url)
125+
jsonResponse = response.read()
126+
except:
127+
#print("Twitter weird response. Try to see on browser: ", url)
128+
print("Twitter weird response. Try to see on browser: https://twitter.com/search?q=%s&src=typd" % urllib.parse.quote(urlGetData))
129+
print("Unexpected error:", sys.exc_info()[0])
130+
sys.exit()
131+
return
132+
133+
dataJson = json.loads(jsonResponse.decode())
134+
135+
return dataJson

got3/manager/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .TweetCriteria import TweetCriteria
2+
from .TweetManager import TweetManager

got3/manager/__init__.py.bak

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from TweetCriteria import TweetCriteria
2+
from TweetManager import TweetManager

got3/models/Tweet.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class Tweet:
2+
3+
def __init__(self):
4+
pass

got3/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .Tweet import Tweet

0 commit comments

Comments
 (0)