Skip to content

Commit a34a55d

Browse files
committed
More exported features to prepare for first release.
1 parent 51dcdb0 commit a34a55d

File tree

8 files changed

+61
-20
lines changed

8 files changed

+61
-20
lines changed

Procfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
web: node index.js
1+
web: node run.js

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,21 @@ Content sanitization disabled (default):
8181
"uri":"https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/"
8282
}
8383

84+
### Usage from node
85+
86+
#### scrape() function
87+
88+
The `scrape` function scrapes a URL and returns a Promise with the JSON result object described above:
89+
90+
```js
91+
var scrape = require("readable-proxy").scrape;
92+
var url = "https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/";
93+
94+
scrape(url, {sanitize: true, userAgent: "My custom User-Agent string"})
95+
.then(console.error.log(console))
96+
.catch(console.error.bind(console));
97+
```
98+
8499
Tests
85100
-----
86101

index.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
require("./server").serve();
1+
module.exports = {
2+
scrape: require("./scrape"),
3+
server: require("./server")
4+
};

package.json

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,30 @@
44
"description": "Node service attempting to fetch readable contents from any URL.",
55
"main": "index.js",
66
"scripts": {
7-
"start": "node index.js",
7+
"start": "node run.js",
88
"test": "mocha"
99
},
1010
"keywords": [
1111
"readable",
1212
"readability",
13-
"fetch"
13+
"fetch",
14+
"proxy",
15+
"scrape"
1416
],
1517
"author": "Nicolas Perriault <nperriault@mozilla.com>",
1618
"license": "MPL",
1719
"dependencies": {
18-
"bluebird": "^2.9.12",
19-
"bootstrap": "^3.3.2",
20-
"express": "^4.11.2",
21-
"html-md": "^3.0.2",
22-
"markdown": "^0.5.0",
23-
"phantomjs": "^1.9.15",
24-
"supertest": "^0.15.0"
20+
"bluebird": "^2.9.*",
21+
"bootstrap": "^3.3.*",
22+
"express": "^4.11.*",
23+
"html-md": "^3.0.*",
24+
"markdown": "^0.5.*",
25+
"phantomjs": "^1.9.*"
2526
},
2627
"devDependencies": {
27-
"chai": "^2.1.0",
28-
"mocha": "^2.1.0",
29-
"sinon": "^1.12.2"
28+
"chai": "^2.1.*",
29+
"mocha": "^2.1.*",
30+
"sinon": "^1.12.*",
31+
"supertest": "^0.15.*"
3032
}
3133
}

phantom-scrape.js

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ page.open(url, function(status) {
3434
if (!page.injectJs(readabilityPath)) {
3535
exitWithError("Couldn't inject " + readabilityPath);
3636
}
37-
outputJSON(page.evaluate(function(url, userAgent) {
37+
outputJSON(page.evaluate(function(url, userAgent, pageContent) {
3838
var location = document.location;
3939
var uri = {
4040
spec: location.href,
@@ -45,11 +45,27 @@ page.open(url, function(status) {
4545
};
4646
try {
4747
var result = new Readability(uri, document).parse();
48-
result.userAgent = userAgent;
48+
if (result) {
49+
result.userAgent = userAgent;
50+
} else {
51+
result = {
52+
error: {
53+
message: "Empty result from Readability.js.",
54+
sourceHTML: pageContent || "Empty page content."
55+
}
56+
};
57+
}
4958
return result;
5059
} catch (err) {
51-
return {error: err};
60+
return {
61+
error: {
62+
message: err.message,
63+
line: err.line,
64+
stack: err.stack,
65+
sourceHTML: pageContent || "Empty page content."
66+
}
67+
};
5268
}
53-
}, url, page.settings.userAgent));
69+
}, url, page.settings.userAgent, page.content));
5470
phantom.exit();
5571
});

run.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
require("./server").serve();

scrape.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ module.exports = function scrape(url, options) {
2323
try {
2424
response = JSON.parse(stdout);
2525
} catch (e) {
26-
error = "Unable to parse JSON proxy response.";
26+
error = {
27+
message: "Unable to parse JSON proxy response.",
28+
line: e.line,
29+
stack: e.stack
30+
};
2731
}
2832
if (response && response.error) {
2933
error = response.error;

test/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ describe("Tests", function() {
5757
});
5858

5959
scrape("http://invalid.test/").catch(function(err) {
60-
expect(err.error).to.match(/Unable to parse JSON proxy response/);
60+
expect(err.error.message).to.match(/Unable to parse JSON proxy response/);
6161
done();
6262
});
6363
});

0 commit comments

Comments
 (0)