Skip to content

Commit 9b688a8

Browse files
committed
add proxyCheck
1 parent c61d4a7 commit 9b688a8

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

SQLAlchemyUsage.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
from sqlalchemy import create_engine
3+
from sqlalchemy.ext.declarative import declarative_base
4+
from sqlalchemy import Column,Integer,String,DateTime
5+
from sqlalchemy.orm import sessionmaker
6+
7+
# mysql-python
8+
engine = create_engine('mysql+mysqldb://root:123456@localhost:3306/scrapy?charset=utf8')
9+
10+
# 创建对象的基类:
11+
Base = declarative_base()
12+
# 创建DBSession类型
13+
Session = sessionmaker(bind=engine)
14+
session=Session()
15+
class ProxyIP(Base):
16+
17+
__tablename__="ProxyIPs"
18+
# 表的结构:
19+
IpPort=Column(String(20),primary_key=True)
20+
Country=Column(String(20))
21+
Speed=Column(Integer)
22+
Type=Column(String(10))
23+
Level=Column(String(20))
24+
LastCheck=Column(DateTime)
25+
GoogleProxy=Column(String(1))
26+
27+
28+
# 寻找Base的所有子类,按照子类的结构在数据库中生成对应的数据表信息
29+
Base.metadata.create_all(engine)
30+
31+
ip=ProxyIP(IpPort='127.0.0.1:8080',Country='CN',Speed=500,Type='HTTP',Level='Anonymous',LastCheck='2016-11-20',GoogleProxy='Y')
32+
session.add(ip)
33+
session.commit()

proxyCheck.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# -*- coding: utf-8 -*-
2+
import httplib
3+
import urllib2
4+
import random
5+
import datetime
6+
from lxml import etree as ET
7+
8+
def checkProxy(proxyIP,protocol="http",retry_times=3,timeout=5):
9+
user_agent_list = [ \
10+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
11+
"(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
12+
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
13+
"(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
14+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
15+
"(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
16+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
17+
"(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
18+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
19+
"(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
20+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
21+
"(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
22+
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
23+
"(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
24+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
25+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
26+
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
27+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
28+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
29+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
30+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
31+
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
32+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
33+
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
34+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
35+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
36+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
37+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
38+
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
39+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
40+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
41+
"(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
42+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
43+
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
44+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
45+
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
46+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
47+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
48+
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
49+
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
50+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
51+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
52+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
53+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
54+
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
55+
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
56+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
57+
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
58+
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
59+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
60+
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
61+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
62+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
63+
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
64+
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
65+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
66+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
67+
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
68+
]
69+
headers = {
70+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
71+
"Accept-Language": "zh-CN,zh;q=0.8",
72+
"Cache-Control": "max-age=0",
73+
"Connection": "keep-alive",
74+
"Host": "ip.cn",
75+
"Upgrade-Insecure-Requests": "1",
76+
"User-Agent": random.choice(user_agent_list)
77+
}
78+
urllib2.socket.setdefaulttimeout(timeout)
79+
proxy_handler = urllib2.ProxyHandler({protocol: "%s://%s" % (protocol,proxyIP)})
80+
opener = urllib2.build_opener(proxy_handler)
81+
urllib2.install_opener(opener)
82+
req = urllib2.Request("http://ip.cn", headers=headers)
83+
result = {}
84+
85+
try:
86+
87+
starttime = datetime.datetime.now()
88+
response = urllib2.urlopen(req).read()
89+
html = ET.HTML(response)
90+
rstIP=html.xpath("//div[@id='result']/div[@class='well']/p[1]/code/text()")[0]
91+
rstLocation=html.xpath("//div[@id='result']/div[@class='well']/p[2]/code/text()")[0]
92+
cost = (datetime.datetime.now() - starttime).seconds
93+
94+
if rstIP:
95+
result["rstIP"] = rstIP
96+
result["rstLocation"] = rstLocation
97+
result["cost"] = cost
98+
result["status"] = "ok"
99+
return result
100+
else:
101+
return -1
102+
103+
except urllib2.URLError, e:
104+
if hasattr(e, "reason"):
105+
result["status"]="error"
106+
result["reason"] = e.reason
107+
result["msg"] = "Failed to reach the server!"
108+
return result
109+
elif hasattr(e, "code"):
110+
result["status"] = "error"
111+
result[" code"] = e.code
112+
result["msg"] = "The server couldn't fulfill the request!"
113+
else:
114+
return -1
115+
except urllib2.socket.timeout,e:
116+
result["status"] = "error"
117+
result["msg"] = e.message
118+
return result
119+
except httplib.BadStatusLine, e:
120+
result["status"] = "error"
121+
result["msg"] = e.message
122+
123+
124+
125+
126+
print checkProxy("173.35.56.183:3128",protocol="http",retry_times=3,timeout=5)

0 commit comments

Comments
 (0)