提交代码

cxhuan · cxhuan · commit 79997e249991 · 2021-10-17T23:19:16.000+08:00
diff --git a/xianhuan/README.md b/xianhuan/README.md
@@ -10,6 +10,8 @@ Python技术 公众号文章代码库
 
 ## 实例代码
 
+[知乎热门：如何提高爬虫速度？](https://github.com/JustDoPython/python-examples/tree/master/xianhuan/spiderspeed)：知乎热门：如何提高爬虫速度？
+
 [Python异常还能写得如此优雅！](https://github.com/JustDoPython/python-examples/tree/master/xianhuan/retry)：Python异常还能写得如此优雅！
 
 [神器 Spider！几分钟入门分布式爬虫！](https://github.com/JustDoPython/python-examples/tree/master/xianhuan/disspider)：神器 Spider！几分钟入门分布式爬虫！
diff --git a/xianhuan/spiderspeed/async_demo.py b/xianhuan/spiderspeed/async_demo.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: 闲欢
+"""
+import aiohttp
+import asyncio
+import time
+
+
+async def fetch(client):
+    async with client.get('http://httpbin.org/get') as resp:
+        assert resp.status == 200
+        return await resp.text()
+
+
+async def main():
+    async with aiohttp.ClientSession() as client:
+        html = await fetch(client)
+        print(html)
+
+loop = asyncio.get_event_loop()
+
+tasks = []
+for i in range(100):
+    task = loop.create_task(main())
+    tasks.append(task)
+
+t1 = time.time()
+
+loop.run_until_complete(main())
+
+print("aiohttp版爬虫耗时：", time.time() - t1)
diff --git a/xianhuan/spiderspeed/multiprocess_demo.py b/xianhuan/spiderspeed/multiprocess_demo.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: 闲欢
+"""
+import requests
+import time
+import multiprocessing
+from multiprocessing import Pool
+
+MAX_WORKER_NUM = multiprocessing.cpu_count()
+
+def fetch():
+    r = requests.get('http://httpbin.org/get')
+    print(r.text)
+
+if __name__ == '__main__':
+    t1 = time.time()
+    p = Pool(MAX_WORKER_NUM)
+    for i in range(100):
+        p.apply_async(fetch, args=())
+    p.close()
+    p.join()
+
+    print('多进程爬虫耗时：', time.time() - t1)
diff --git a/xianhuan/spiderspeed/simple_demo.py b/xianhuan/spiderspeed/simple_demo.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: 闲欢
+"""
+import time
+import requests
+from datetime import datetime
+
+
+def fetch(url):
+    r = requests.get(url)
+    print(r.text)
+
+start = datetime.now()
+
+t1 = time.time()
+for i in range(100):
+    fetch('http://httpbin.org/get')
+
+print('requests版爬虫耗时：', time.time() - t1)
+
+
+
+
+
+
+
+
+
diff --git a/xianhuan/spiderspeed/thread_demo.py b/xianhuan/spiderspeed/thread_demo.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: 闲欢
+"""
+import threading
+import time
+import requests
+
+
+def fetch():
+    r = requests.get('http://httpbin.org/get')
+    print(r.text)
+
+t1 = time.time()
+
+t_list = []
+for i in range(100):
+    t = threading.Thread(target=fetch, args=())
+    t_list.append(t)
+    t.start()
+
+for t in t_list:
+    t.join()
+
+print("多线程版爬虫耗时：", time.time() - t1)