Skip to content

Commit 04cd62d

Browse files
committed
Closes n1k0#10 - Added console.log messages to JSON result + UI.
1 parent 4899e48 commit 04cd62d

File tree

5 files changed

+65
-34
lines changed

5 files changed

+65
-34
lines changed

phantom-scrape.js

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ var page = require("webpage").create();
33
var url = system.args[1];
44
var readabilityPath = system.args[2];
55
var userAgent = system.args[3];
6+
var consoleLogs = [];
67

78
// Prevent page js errors to break JSON output
89
// XXX: should we log these instead?
@@ -17,6 +18,43 @@ function outputJSON(object) {
1718
console.log(JSON.stringify(object, null, 2));
1819
}
1920

21+
/**
22+
* Note: This function runs within page environment.
23+
*/
24+
function runReadability(url, userAgent, pageContent) {
25+
var location = document.location;
26+
var uri = {
27+
spec: location.href,
28+
host: location.host,
29+
prePath: location.protocol + "//" + location.host, // TODO This is incomplete, needs username/password and port
30+
scheme: location.protocol.substr(0, location.protocol.indexOf(":")),
31+
pathBase: location.protocol + "//" + location.host + location.pathname.substr(0, location.pathname.lastIndexOf("/") + 1)
32+
};
33+
try {
34+
var result = new Readability(uri, document).parse();
35+
if (result) {
36+
result.userAgent = userAgent;
37+
} else {
38+
result = {
39+
error: {
40+
message: "Empty result from Readability.js.",
41+
sourceHTML: pageContent || "Empty page content."
42+
}
43+
};
44+
}
45+
return result;
46+
} catch (err) {
47+
return {
48+
error: {
49+
message: err.message,
50+
line: err.line,
51+
stack: err.stack,
52+
sourceHTML: pageContent || "Empty page content."
53+
}
54+
};
55+
}
56+
};
57+
2058
if (!url) {
2159
exitWithError("Missing url arg.");
2260
} else if (!readabilityPath) {
@@ -33,45 +71,23 @@ page.settings.loadImages = false;
3371
// ensure we don't waste time trying to load slow/missing resources
3472
page.settings.resourceTimeout = 1000;
3573

74+
page.onConsoleMessage = function(msg) {
75+
consoleLogs.push(msg);
76+
};
77+
3678
page.open(url, function(status) {
3779
if (status !== "success") {
3880
return exitWithError("Unable to access " + url);
3981
}
4082
if (!page.injectJs(readabilityPath)) {
4183
exitWithError("Couldn't inject " + readabilityPath);
4284
}
43-
outputJSON(page.evaluate(function(url, userAgent, pageContent) {
44-
var location = document.location;
45-
var uri = {
46-
spec: location.href,
47-
host: location.host,
48-
prePath: location.protocol + "//" + location.host, // TODO This is incomplete, needs username/password and port
49-
scheme: location.protocol.substr(0, location.protocol.indexOf(":")),
50-
pathBase: location.protocol + "//" + location.host + location.pathname.substr(0, location.pathname.lastIndexOf("/") + 1)
51-
};
52-
try {
53-
var result = new Readability(uri, document).parse();
54-
if (result) {
55-
result.userAgent = userAgent;
56-
} else {
57-
result = {
58-
error: {
59-
message: "Empty result from Readability.js.",
60-
sourceHTML: pageContent || "Empty page content."
61-
}
62-
};
63-
}
64-
return result;
65-
} catch (err) {
66-
return {
67-
error: {
68-
message: err.message,
69-
line: err.line,
70-
stack: err.stack,
71-
sourceHTML: pageContent || "Empty page content."
72-
}
73-
};
74-
}
75-
}, url, page.settings.userAgent, page.content));
85+
var result = page.evaluate(runReadability, url, page.settings.userAgent, page.content);
86+
if (result && result.error) {
87+
result.error.consoleLogs = consoleLogs;
88+
} else if (result && result.content) {
89+
result.consoleLogs = consoleLogs;
90+
}
91+
outputJSON(result);
7692
phantom.exit();
7793
});

scrape.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ module.exports = function scrape(url, options) {
3535
}
3636
if (error) {
3737
reject(objectAssign(new Error(), error));
38+
} else if (!response) {
39+
reject(new Error("Empty scraped response."));
3840
} else {
3941
fulfill(response);
4042
}

server.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ app.get("/api/get", function(req, res) {
4646
res.json(sanitize ? sanitizeResult(result) : result);
4747
}).catch(function(err) {
4848
console.log(err);
49-
res.status(500).json({error: JSON.parse(JSON.stringify(err))});
49+
res.status(500).json({error: {message: err.message}});
5050
});
5151
});
5252

static/index.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
<title>Readability.js test page</title>
66
<link rel="stylesheet" type="text/css" href="bootstrap.min.css">
77
<link rel="stylesheet" type="text/css" href="style.css">
8+
<style>
9+
#logs {
10+
width: 100%;
11+
height: 180px;
12+
font-family: monospace;
13+
}
14+
</style>
815
</head>
916
<body>
1017
<div class="container-fluid">
@@ -65,6 +72,10 @@ <h1>Readability.js <small>test page</small></h1>
6572
</div>
6673
</div>
6774
</div>
75+
<div>
76+
<h3>Console logs</h3>
77+
<textarea id="logs"></textarea>
78+
</div>
6879
</div>
6980
<script src="main.js"></script>
7081
</body>

static/main.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
q("#length").textContent = "";
2323
q("#dir").textContent = "";
2424
q("#excerpt").textContent = "";
25+
q("#logs").value = "";
2526
target.contentDocument.body.innerHTML = "";
2627
} else {
2728
q("#error").textContent = "";
@@ -30,6 +31,7 @@
3031
q("#length").textContent = jsonResponse.length;
3132
q("#dir").textContent = jsonResponse.dir;
3233
q("#excerpt").textContent = jsonResponse.excerpt;
34+
q("#logs").value = (jsonResponse.consoleLogs || []).join("\n");
3335
target.contentDocument.body.innerHTML = jsonResponse.content;
3436
}
3537
}

0 commit comments

Comments
 (0)