Skip to content

Commit fb48367

Browse files
committed
1/16
1 parent aa62b08 commit fb48367

File tree

3 files changed

+4
-12
lines changed

3 files changed

+4
-12
lines changed

mm131/.idea/mm131.iml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mm131/mm131/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Obey robots.txt rules
1010
ROBOTSTXT_OBEY = False
1111
#设置下载延迟
12-
DOWNLOAD_DELAY = 0.5
12+
DOWNLOAD_DELAY = 0.2
1313
#禁止重试
1414
RETRY_ENABLED = False
1515

mm131/mm131/spiders/spider.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,7 @@ def parse_one(self, response):
6969
item['image_id'] = str(uuid.uuid1())
7070
item['category_code'] = category_code
7171
item['image_from'] = self.image_from
72-
# print 'parse_one图片Id' + item['image_id']
73-
# print 'parse_one图片标题' + item['image_title']
74-
# print 'parse_one图片图片保存在数据库的目录' + item['image_url_dir']
75-
# print 'parse_one图片存储的目录' + item['dir_path']
72+
7673
headers = {
7774
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
7875
"Accept-Language":"zh-CN,zh;q=0.8",
@@ -85,7 +82,6 @@ def parse_one(self, response):
8582
#1.获取每个image的标题,url入口,分类
8683
#2.随机生成一个image_id
8784
def parse_two(self, response):
88-
print response.text
8985
item = response.meta['item']
9086
category_code = item['category_code']
9187
is_page_last = response.xpath(u'.//div/a[@class="page-ch"]/text()="下一页"').extract()[0].encode('utf-8')
@@ -105,9 +101,5 @@ def parse_two(self, response):
105101
item['file_path'] = item['dir_path'] + '/' + file_name + '.jpg'
106102
item['image_url'] = item['image_url_dir'] + '/' + file_name + '.jpg'
107103
item['image_html'] = response.url
108-
print 'parse_one图片Id' + item['image_id']
109-
print 'parse_one图片标题' + item['image_title']
110-
print 'parse_one图片图片保存在数据库的目录' + item['image_url_dir']
111-
print 'parse_one图片存储的目录' + item['dir_path']
112-
print 'parse_one图片下载路径' + item['image_down_url']
104+
113105
yield item

0 commit comments

Comments
 (0)