Skip to content

Commit 96945bb

Browse files
committed
Initial import.
0 parents  commit 96945bb

File tree

7 files changed

+1664
-0
lines changed

7 files changed

+1664
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules

README.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
readable-proxy
2+
==============
3+
4+
Proxy server to retrieve a readable version of any provided url, powered by Node,
5+
[PhantomJS](http://phantom.org/) and [Readability.js](https://github.com/mozilla/readability).
6+
7+
Installation
8+
------------
9+
10+
$ git clone https://github.com/n1k0/readable-proxy
11+
$ cd readable-proxy
12+
$ npm install
13+
14+
Run
15+
---
16+
17+
Starts server on `localhost:3000`:
18+
19+
$ npm start
20+
21+
Note about CORS: by design, the server will allow any origin to access it, so browsers can consume it from pages hosted on a different domain.
22+
23+
Usage
24+
-----
25+
26+
$ curl http://0.0.0.0:3000/get\?url\=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/
27+
{
28+
"byline":"Nicolas Perriault —",
29+
"content":"<div id=\"readability-page-1\" class=\"page\"><section class=\"\">\n<p><strong>So finally you're…",
30+
"length":3851,
31+
"title":"Get your Frontend JavaScript Code Covered | Code",
32+
"uri":"https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/"
33+
}
34+
35+
License
36+
-------
37+
38+
MPL 2.0.

index.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
var scrape = require("./scrape");
2+
var express = require("express");
3+
var pkgInfo = require("./package.json");
4+
5+
var app = express();
6+
7+
app.use(function(req, res, next) {
8+
res.header("Content-Type", "application/json");
9+
res.header("Access-Control-Allow-Origin", "*");
10+
res.header("Access-Control-Allow-Headers", "Origin, Requested-With, Content-Type, Accept");
11+
next();
12+
});
13+
14+
app.get("/", function(req, res) {
15+
res.json({name: "moz-readable", version: pkgInfo.version});
16+
});
17+
18+
app.get("/get", function(req, res) {
19+
var url = req.query.url;
20+
if (!url) {
21+
return res.status(400).json({error: "Missing url parameter"});
22+
}
23+
scrape(url).then(function(result) {
24+
res.json(result);
25+
}).catch(function(err) {
26+
res.status(500).json({error: err.message});
27+
});
28+
});
29+
30+
var server = app.listen(3000, function() {
31+
var host = server.address().address;
32+
var port = server.address().port;
33+
console.log("Server listening at http://%s:%s", host, port);
34+
});

package.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"name": "readable-proxy",
3+
"version": "0.0.1",
4+
"description": "Node service attempting to fetch readable contents from any URL.",
5+
"main": "index.js",
6+
"scripts": {
7+
"start": "node index.js",
8+
"test": "echo \"Error: no test specified\" && exit 1"
9+
},
10+
"keywords": [
11+
"readable",
12+
"readability",
13+
"fetch"
14+
],
15+
"author": "Nicolas Perriault <nperriault@mozilla.com>",
16+
"license": "MPL",
17+
"dependencies": {
18+
"bluebird": "^2.9.12",
19+
"express": "^4.11.2",
20+
"phantomjs": "^1.9.15"
21+
}
22+
}

phantom-scrape.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
var system = require("system");
2+
var page = require("webpage").create();
3+
var url = system.args[1];
4+
5+
function json(o) {
6+
console.log(JSON.stringify(o, null, 2));
7+
}
8+
9+
if (!url) {
10+
json({error: "Missing url"});
11+
phantom.exit();
12+
}
13+
14+
page.open(url, function(status) {
15+
if (status !== "success") {
16+
json({error: "Unable to open " + url});
17+
return phantom.exit();
18+
}
19+
page.injectJs("vendor/Readability.js");
20+
json(page.evaluate(function(url) {
21+
return new Readability(document.location.href, document).parse();
22+
}, url));
23+
phantom.exit();
24+
});

scrape.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
var childProcess = require("child_process");
2+
var phantomjs = require("phantomjs");
3+
var binPath = phantomjs.path;
4+
var path = require("path");
5+
var Promise = require("bluebird");
6+
7+
module.exports = function scrape(url) {
8+
return new Promise(function(fulfill, reject) {
9+
var childArgs = [path.join(__dirname, "phantom-scrape.js"), url];
10+
childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) {
11+
if (err) {
12+
return reject(err);
13+
}
14+
try {
15+
fulfill(JSON.parse(stdout));
16+
} catch (e) {
17+
reject(new Error("Unable to parse JSON proxy response."));
18+
}
19+
});
20+
});
21+
};

0 commit comments

Comments
 (0)