Skip to content

Commit 04f2783

Browse files
authored
Merge pull request Jack-Cherish#45 from sys0613/master
20180731unsplash壁纸爬虫接口,供参考
2 parents 922dbe3 + aa1cdda commit 04f2783

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

one_hour_spider/unsplash20180731.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# -*- coding:utf-8 -*-
2+
import requests
3+
import json
4+
import os
5+
from contextlib import closing
6+
7+
"""
8+
从https://unsplash.com/爬取壁纸代码,使用时我是开启了代理软件
9+
国内网速貌似有些限制,很慢
10+
2018-07-31
11+
"""
12+
13+
# 本地保存图片根路径(请确保根路径存在)
14+
save_path = 'G:/pythonlearn'
15+
dir_path=save_path+'/'+'unsplash-image'
16+
if not os.path.exists(dir_path):
17+
os.path.join(save_path, 'unsplash-image')
18+
os.mkdir(dir_path)
19+
n=10
20+
#n建议从第2页开始,因为第一页的per_page可能是1,不是12
21+
while n>2:
22+
print('当前爬取第'+str(n)+'次加载图片(本次共12张)')
23+
url='https://unsplash.com/napi/photos?page='+str(n)+'&per_page=12&order_by=latest'
24+
req=requests.get(url=url)
25+
html=json.loads(req.text)
26+
for each in html:
27+
downloadurl=each['links']["download"]
28+
jpgrep=requests.get(url=downloadurl)
29+
with closing(requests.get(url=downloadurl, stream=True)) as r:
30+
with open(dir_path+'/'+each['id']+'.jpg', 'ab+') as f:
31+
for chunk in r.iter_content(chunk_size=1024):
32+
if chunk:
33+
f.write(chunk)
34+
f.flush()
35+
n=n-1

0 commit comments

Comments
 (0)