|
1 |
| -import time |
| 1 | +import asyncio |
2 | 2 | import random
|
3 |
| -import requests |
4 |
| -import pymongo |
| 3 | +import time |
5 | 4 | import aiohttp
|
6 |
| -import asyncio |
7 |
| -from bs4 import BeautifulSoup |
| 5 | +import pymongo |
| 6 | +import requests |
8 | 7 | import multiprocessing
|
| 8 | +from bs4 import BeautifulSoup |
9 | 9 |
|
10 | 10 | # 共用部分
|
11 | 11 | clients = pymongo.MongoClient('localhost')
|
|
26 | 26 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
27 | 27 | "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
28 | 28 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
29 |
| - "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)", |
30 | 29 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
31 | 30 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
32 | 31 | "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
|
@@ -115,13 +114,13 @@ def method_2(url):
|
115 | 114 | print('成功插入一组数据' + str(content))
|
116 | 115 |
|
117 | 116 |
|
118 |
| - # if __name__ == '__main__': |
119 |
| - # start = time.time() |
120 |
| - # pool = multiprocessing.Pool(4) |
121 |
| - # pool.map(method_2, urls) |
122 |
| - # pool.close() |
123 |
| - # pool.join() |
124 |
| - # print('一共用时:' + str(time.time() - start)) |
| 117 | +# if __name__ == '__main__': |
| 118 | +# start = time.time() |
| 119 | +# pool = multiprocessing.Pool(4) |
| 120 | +# pool.map(method_2, urls) |
| 121 | +# pool.close() |
| 122 | +# pool.join() |
| 123 | +# print('一共用时:' + str(time.time() - start)) |
125 | 124 |
|
126 | 125 |
|
127 | 126 | # 方式三:使用Asyncio + Aiohttp python3.4之后出的异步io模块
|
@@ -156,6 +155,5 @@ async def parser(url):
|
156 | 155 | loop.run_until_complete(asyncio.gather(*tasks))
|
157 | 156 | print(time.time() - start)
|
158 | 157 |
|
159 |
| - |
160 | 158 | if __name__ == '__main__':
|
161 | 159 | method_3()
|
0 commit comments