File tree Expand file tree Collapse file tree 5 files changed +116
-0
lines changed Expand file tree Collapse file tree 5 files changed +116
-0
lines changed Original file line number Diff line number Diff line change @@ -10,6 +10,8 @@ Python技术 公众号文章代码库
10
10
11
11
## 实例代码
12
12
13
+ [ 知乎热门:如何提高爬虫速度?] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/spiderspeed ) :知乎热门:如何提高爬虫速度?
14
+
13
15
[ Python异常还能写得如此优雅!] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/retry ) :Python异常还能写得如此优雅!
14
16
15
17
[ 神器 Spider!几分钟入门分布式爬虫!] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/disspider ) :神器 Spider!几分钟入门分布式爬虫!
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ @author: 闲欢
5
+ """
6
+ import aiohttp
7
+ import asyncio
8
+ import time
9
+
10
+
11
+ async def fetch (client ):
12
+ async with client .get ('http://httpbin.org/get' ) as resp :
13
+ assert resp .status == 200
14
+ return await resp .text ()
15
+
16
+
17
+ async def main ():
18
+ async with aiohttp .ClientSession () as client :
19
+ html = await fetch (client )
20
+ print (html )
21
+
22
+ loop = asyncio .get_event_loop ()
23
+
24
+ tasks = []
25
+ for i in range (100 ):
26
+ task = loop .create_task (main ())
27
+ tasks .append (task )
28
+
29
+ t1 = time .time ()
30
+
31
+ loop .run_until_complete (main ())
32
+
33
+ print ("aiohttp版爬虫耗时:" , time .time () - t1 )
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ @author: 闲欢
5
+ """
6
+ import requests
7
+ import time
8
+ import multiprocessing
9
+ from multiprocessing import Pool
10
+
11
+ MAX_WORKER_NUM = multiprocessing .cpu_count ()
12
+
13
+ def fetch ():
14
+ r = requests .get ('http://httpbin.org/get' )
15
+ print (r .text )
16
+
17
+ if __name__ == '__main__' :
18
+ t1 = time .time ()
19
+ p = Pool (MAX_WORKER_NUM )
20
+ for i in range (100 ):
21
+ p .apply_async (fetch , args = ())
22
+ p .close ()
23
+ p .join ()
24
+
25
+ print ('多进程爬虫耗时:' , time .time () - t1 )
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ @author: 闲欢
5
+ """
6
+ import time
7
+ import requests
8
+ from datetime import datetime
9
+
10
+
11
+ def fetch (url ):
12
+ r = requests .get (url )
13
+ print (r .text )
14
+
15
+ start = datetime .now ()
16
+
17
+ t1 = time .time ()
18
+ for i in range (100 ):
19
+ fetch ('http://httpbin.org/get' )
20
+
21
+ print ('requests版爬虫耗时:' , time .time () - t1 )
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ @author: 闲欢
5
+ """
6
+ import threading
7
+ import time
8
+ import requests
9
+
10
+
11
+ def fetch ():
12
+ r = requests .get ('http://httpbin.org/get' )
13
+ print (r .text )
14
+
15
+ t1 = time .time ()
16
+
17
+ t_list = []
18
+ for i in range (100 ):
19
+ t = threading .Thread (target = fetch , args = ())
20
+ t_list .append (t )
21
+ t .start ()
22
+
23
+ for t in t_list :
24
+ t .join ()
25
+
26
+ print ("多线程版爬虫耗时:" , time .time () - t1 )
You can’t perform that action at this time.
0 commit comments