Skip to content

Commit cad1420

Browse files
committed
加入三种方法爬取豌豆荚设计奖速度对比
1 parent e62a7d1 commit cad1420

File tree

2 files changed

+48
-48
lines changed

2 files changed

+48
-48
lines changed

.idea/workspace.xml

Lines changed: 36 additions & 34 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python爬虫日记系列/Python爬虫日记九:豌豆荚设计奖三种爬取方法速度对比.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import time
1+
import asyncio
22
import random
3-
import requests
4-
import pymongo
3+
import time
54
import aiohttp
6-
import asyncio
7-
from bs4 import BeautifulSoup
5+
import pymongo
6+
import requests
87
import multiprocessing
8+
from bs4 import BeautifulSoup
99

1010
# 共用部分
1111
clients = pymongo.MongoClient('localhost')
@@ -26,7 +26,6 @@
2626
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
2727
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
2828
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
29-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
3029
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
3130
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
3231
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
@@ -115,13 +114,13 @@ def method_2(url):
115114
print('成功插入一组数据' + str(content))
116115

117116

118-
# if __name__ == '__main__':
119-
# start = time.time()
120-
# pool = multiprocessing.Pool(4)
121-
# pool.map(method_2, urls)
122-
# pool.close()
123-
# pool.join()
124-
# print('一共用时:' + str(time.time() - start))
117+
# if __name__ == '__main__':
118+
# start = time.time()
119+
# pool = multiprocessing.Pool(4)
120+
# pool.map(method_2, urls)
121+
# pool.close()
122+
# pool.join()
123+
# print('一共用时:' + str(time.time() - start))
125124

126125

127126
# 方式三:使用Asyncio + Aiohttp python3.4之后出的异步io模块
@@ -156,6 +155,5 @@ async def parser(url):
156155
loop.run_until_complete(asyncio.gather(*tasks))
157156
print(time.time() - start)
158157

159-
160158
if __name__ == '__main__':
161159
method_3()

0 commit comments

Comments
 (0)