|
| 1 | +#! usr/bin/env python |
| 2 | +# _*_ coding:utf-8 _*_ |
| 3 | +#导入要使用的模块 |
| 4 | +import urllib, urllib2, cookielib |
| 5 | +from bs4 import BeautifulSoup |
| 6 | + |
| 7 | +#创建cookie对象 |
| 8 | +cookie = cookielib.CookieJar() |
| 9 | +#创建cookie处理器 |
| 10 | +handler = urllib2.HTTPCookieProcessor(cookie) |
| 11 | +#创建opener对象 |
| 12 | +opener = urllib2.build_opener(handler) |
| 13 | +url = 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn' |
| 14 | +#获取登录页面,捕获异常 |
| 15 | +try: |
| 16 | + response = opener.open(url) |
| 17 | +except urllib2.URLError as e: |
| 18 | + if hasattr(e,'reason'): |
| 19 | + print 'We failed to reach a server.' |
| 20 | + print 'Reason:',e.reason |
| 21 | + elif hasattr(e,'code'): |
| 22 | + print "The server couldn't fulfill the request." |
| 23 | + print 'Error code:',e.code |
| 24 | +#将获取的页面传给BeautifulSoup构造BeautifulSoup对象 |
| 25 | +soup = BeautifulSoup(response.read(),'lxml') |
| 26 | +#根据标签和属性获得input标签 |
| 27 | +inputs = soup.find_all('input', attrs={'type':'hidden'}) |
| 28 | +#根据属性获得值 |
| 29 | +lt = inputs[0]['value'] |
| 30 | +execution=inputs[1]['value'] |
| 31 | +#前面页面获取成功,说明服务器没有检测头部 |
| 32 | +username = '' |
| 33 | +password = '' |
| 34 | +values={'username':username, 'password':password, 'lt':lt, 'execution':execution, '_eventId':'submit'} |
| 35 | +#把数据编程特定格式 |
| 36 | +datas=urllib.urlencode(values) |
| 37 | +AccpetLanguage='zh-CN,zh;q=0.8' |
| 38 | +referer = 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn' |
| 39 | +userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36' |
| 40 | +headers = {'User-Agent':userAgent, 'Accept-Language':AccpetLanguage, 'Referer':referer} |
| 41 | +request = urllib2.Request(url, datas, headers) |
| 42 | +try: |
| 43 | + res = opener.open(request) |
| 44 | +except urllib2.URLError as e: |
| 45 | + if hasattr(e, 'reason'): |
| 46 | + print 'We failed to reach a server.' |
| 47 | + print 'Reason:', e.reason |
| 48 | + elif hasattr(e, 'code'): |
| 49 | + print "The server couldn't fulfill the request." |
| 50 | + print 'Error code:', e.code |
| 51 | +print "Response's info:", res.info() |
| 52 | +#判断对象是否为空 |
| 53 | +print any(res) |
| 54 | +new_url = 'http://my.csdn.net' |
| 55 | +#不加headers,会保403拒绝访问 |
| 56 | +req = urllib2.Request(new_url,datas, headers=headers) |
| 57 | +#获取登录后的页面,可以看到自己的用户名等等信息 |
| 58 | +r = opener.open(req) |
| 59 | +print r.read() |
0 commit comments