Skip to content

Commit bb79f2b

Browse files
committed
爬虫测试
1 parent d047853 commit bb79f2b

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

nodejs/标准模块/http-reptile.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/**
2+
* Created by chenhao on 15/6/7.
3+
*/
4+
5+
var http = require('http');
6+
var url = 'http://www.imooc.com/learn/348';
7+
8+
/*获取页面所有的html*/
9+
//http.get(url,function(res){
10+
// var html='';
11+
//
12+
// res.on('data',function(data){
13+
// html+=data;
14+
// });
15+
//
16+
// res.on('end',function(){
17+
// console.log(html);
18+
// });
19+
//}).on('error',function(){
20+
// console.log('获取课程出错');
21+
//});
22+
23+
/*
24+
* 获取页面部分数据
25+
* ps:依赖第三方的cheerio模块(全局安装该模块会`Error: Cannot find module 'cheerio'`)
26+
*/
27+
var cheerio = require('cheerio');
28+
29+
function filterChapters(html) {
30+
var $ = cheerio.load(html),
31+
chapters = $('.learnchapter'),
32+
courseData = [];
33+
34+
chapters.each(function (item) {
35+
var chapter = $(this),
36+
chapterTitle = chapter.find('strong').text(),
37+
videos = chapter.find('.video').children('li'),
38+
chapterData = {
39+
chapterTitle: chapterTitle,
40+
videos: []
41+
};
42+
43+
console.log(chapter.find('.video'));
44+
videos.each(function (item) {
45+
var video = $(this).find('.studyvideo'),
46+
videoTitle = video.text(),
47+
videoId = video.attr('href').split('video/')[1];
48+
49+
chapterData.videos.push({
50+
videoTitle: videoTitle,
51+
id: videoId
52+
});
53+
});
54+
courseData.push(chapterData);
55+
});
56+
return courseData;
57+
}
58+
59+
function printChapterInfo(courseData) {
60+
courseData.forEach(function (item) {
61+
var chapterTitle = item.chapterTitle;
62+
63+
console.log(chapterTitle + '\n');
64+
65+
item.videos.forEach(function (video) {
66+
console.log('[' + video.id + ']' + video.videoTitle + '\n');
67+
});
68+
});
69+
}
70+
71+
http.get(url, function (res) {
72+
var html = '';
73+
74+
res.on('data', function (data) {
75+
html += data;
76+
});
77+
78+
res.on('end', function () {
79+
var courseData = filterChapters(html);
80+
81+
printChapterInfo(courseData);
82+
});
83+
}).on('error', function () {
84+
console.log('获取课程出错');
85+
});
File renamed without changes.

0 commit comments

Comments
 (0)