Skip to content

Commit c6b87e8

Browse files
author
zd
committed
simulate login_csdn
1 parent 95e0f02 commit c6b87e8

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

spidercsdn.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#! usr/bin/env python
2+
# _*_ coding:utf-8 _*_
3+
#导入要使用的模块
4+
import urllib, urllib2, cookielib
5+
from bs4 import BeautifulSoup
6+
7+
#创建cookie对象
8+
cookie = cookielib.CookieJar()
9+
#创建cookie处理器
10+
handler = urllib2.HTTPCookieProcessor(cookie)
11+
#创建opener对象
12+
opener = urllib2.build_opener(handler)
13+
url = 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn'
14+
#获取登录页面,捕获异常
15+
try:
16+
response = opener.open(url)
17+
except urllib2.URLError as e:
18+
if hasattr(e,'reason'):
19+
print 'We failed to reach a server.'
20+
print 'Reason:',e.reason
21+
elif hasattr(e,'code'):
22+
print "The server couldn't fulfill the request."
23+
print 'Error code:',e.code
24+
#将获取的页面传给BeautifulSoup构造BeautifulSoup对象
25+
soup = BeautifulSoup(response.read(),'lxml')
26+
#根据标签和属性获得input标签
27+
inputs = soup.find_all('input', attrs={'type':'hidden'})
28+
#根据属性获得值
29+
lt = inputs[0]['value']
30+
execution=inputs[1]['value']
31+
#前面页面获取成功,说明服务器没有检测头部
32+
username = ''
33+
password = ''
34+
values={'username':username, 'password':password, 'lt':lt, 'execution':execution, '_eventId':'submit'}
35+
#把数据编程特定格式
36+
datas=urllib.urlencode(values)
37+
AccpetLanguage='zh-CN,zh;q=0.8'
38+
referer = 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn'
39+
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36'
40+
headers = {'User-Agent':userAgent, 'Accept-Language':AccpetLanguage, 'Referer':referer}
41+
request = urllib2.Request(url, datas, headers)
42+
try:
43+
res = opener.open(request)
44+
except urllib2.URLError as e:
45+
if hasattr(e, 'reason'):
46+
print 'We failed to reach a server.'
47+
print 'Reason:', e.reason
48+
elif hasattr(e, 'code'):
49+
print "The server couldn't fulfill the request."
50+
print 'Error code:', e.code
51+
print "Response's info:", res.info()
52+
#判断对象是否为空
53+
print any(res)
54+
new_url = 'http://my.csdn.net'
55+
#不加headers,会保403拒绝访问
56+
req = urllib2.Request(new_url,datas, headers=headers)
57+
#获取登录后的页面,可以看到自己的用户名等等信息
58+
r = opener.open(req)
59+
print r.read()

0 commit comments

Comments
 (0)