Skip to content

Commit f64f411

Browse files
committed
Added comparison view.
1 parent 9efca4c commit f64f411

File tree

5 files changed

+99
-14
lines changed

5 files changed

+99
-14
lines changed

README.md

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,45 @@ Starts server on `localhost:3000`:
2020

2121
Note about CORS: by design, the server will allow any origin to access it, so browsers can consume it from pages hosted on a different domain.
2222

23+
Configuration
24+
-------------
25+
26+
By default, the proxy server will use the Readability.js version it ships with; to override this, you can set the `READABILITY_LIB_PATH` environment variable to the absolute path to the library file on your local system:
27+
28+
$ READABILITY_LIB_PATH=/path/to/my/own/version/of/Readability.js npm start
29+
2330
Usage
2431
-----
2532

26-
### `GET /get`
33+
### Web UI
34+
35+
Just head to `http://localhost:3000/`, enter some URL and start enjoying both original and readable renderings side by side.
36+
37+
![](https://s3.amazonaws.com/f.cl.ly/items/1L0E3W2U3N0Y25111y2i/Screen%20Shot%202015-02-24%20at%2013.31.10.png)
38+
39+
### REST/JSON API
40+
41+
The HTTP Rest API is available under `/api`.
42+
43+
**Disclaimer:** Truly *REST* implementation is probably far from being considered achieved.
44+
45+
#### `GET /api/get`
2746

28-
#### Required parameters
47+
##### Required parameters
2948

3049
- `url`: The URL to retrieve retrieve readable contents from, eg. `https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/`.
3150

32-
#### Optional parameters
51+
##### Optional parameters
3352

3453
- `sanitize`: A *boolean string* to enable HTML sanitization (valid truthy boolean strings: "1", "on", "true", "yes", "y"; everything else will be considered falsy):
3554

3655
**Note:** Enabling contents sanitization loses Readability.js specific HTML semantics, though is probably safer for users if you plan to publish retrieved contents on a public website.
3756

38-
#### Example
57+
##### Example
3958

4059
Content sanitization enabled:
4160

42-
$ curl http://0.0.0.0:3000/get\?sanitize=y&url\=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/
61+
$ curl http://0.0.0.0:3000/api/get\?sanitize=y&url\=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/
4362
{
4463
"byline":"Nicolas Perriault —",
4564
"content":"<p><strong>So finally you&#39;re <a href=\"https://nicolas.perriault.net/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/\">testing",
@@ -50,7 +69,7 @@ Content sanitization enabled:
5069

5170
Content sanitization disabled (default):
5271

53-
$ curl http://0.0.0.0:3000/get\?url\=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/
72+
$ curl http://0.0.0.0:3000/api/get\?url\=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/
5473
{
5574
"byline":"Nicolas Perriault —",
5675
"content":"<div id=\"readability-page-1\" class=\"page\"><section class=\"\">\n<p><strong>So finally you're…",

index.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ var html2md = require("html-md");
55
var markdown = require("markdown");
66

77
var app = express();
8+
app.use(express.static('static'));
89

910
/**
1011
* Casts a qs string arg into an actual boolean.
@@ -40,7 +41,7 @@ app.use(function(req, res, next) {
4041
next();
4142
});
4243

43-
app.get("/", function(req, res) {
44+
app.get("/api", function(req, res) {
4445
res.json({
4546
name: pkgInfo.name,
4647
documentation: "https://github.com/n1k0/readable-proxy/blob/master/README.md",
@@ -49,7 +50,7 @@ app.get("/", function(req, res) {
4950
});
5051
});
5152

52-
app.get("/get", function(req, res) {
53+
app.get("/api/get", function(req, res) {
5354
var url = req.query.url, sanitize = boolArg(req.query.sanitize);
5455
if (!url) {
5556
return res.status(400).json({error: "Missing url parameter"});

phantom-scrape.js

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,34 @@
11
var system = require("system");
22
var page = require("webpage").create();
33
var url = system.args[1];
4+
var readabilityPath = system.args[2];
45

56
// Prevent page js errors to break JSON output
67
// XXX: should we log these instead?
78
phantom.onError = page.onError = function(){};
89

10+
function exitWithError(message) {
11+
outputJSON({error: message});
12+
phantom.exit();
13+
}
14+
915
function outputJSON(object) {
1016
console.log(JSON.stringify(object, null, 2));
1117
}
1218

1319
if (!url) {
14-
outputJSON({error: "Missing url"});
15-
phantom.exit();
20+
exitWithError("Missing url arg.");
21+
} else if (!readabilityPath) {
22+
exitWithError("Missing readabilityPath arg.");
1623
}
1724

1825
page.open(url, function(status) {
1926
if (status !== "success") {
20-
outputJSON({error: "Unable to access " + url});
21-
return phantom.exit();
27+
return exitWithError("Unable to access " + url);
28+
}
29+
if (!page.injectJs(readabilityPath)) {
30+
exitWithError("Couldn't inject " + readabilityPath);
2231
}
23-
page.injectJs("vendor/Readability.js");
2432
outputJSON(page.evaluate(function(url) {
2533
var location = document.location;
2634
var uri = {

scrape.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ var binPath = phantomjs.path;
44
var path = require("path");
55
var Promise = require("bluebird");
66

7+
var readabilityPath = process.env.READABILITY_LIB_PATH ||
8+
path.normalize(path.join(__dirname, "vendor", "Readability.js"));
9+
710
module.exports = function scrape(url) {
811
return new Promise(function(fulfill, reject) {
9-
var childArgs = [path.join(__dirname, "phantom-scrape.js"), url];
12+
var childArgs = [path.join(__dirname, "phantom-scrape.js"), url, readabilityPath];
1013
childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) {
1114
if (err) {
1215
return reject(err);

static/index.html

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="utf-8">
5+
<title>Readability.js test page</title>
6+
<style>
7+
body {
8+
background-color: #ffe;
9+
font-size: 18px;
10+
}
11+
form { margin: 1em 0; }
12+
input { font-size: 16px; }
13+
iframe {
14+
width: 49%;
15+
height: 640px;
16+
background: #fff;
17+
}
18+
iframe body {
19+
font-size: 22px;
20+
}
21+
</style>
22+
</head>
23+
<body>
24+
<h1>Readability.js test page</h1>
25+
<form id="form">
26+
<p><label>URL
27+
<input type="url" id="url" size="120" placeholder="http://">
28+
</label></p>
29+
<p><label>
30+
<input type="checkbox" id="sanitize"> Sanitize output
31+
</label></p>
32+
<input type="submit">
33+
</form>
34+
<iframe id="source"></iframe>
35+
<iframe id="target"></iframe>
36+
<script>
37+
var q = document.querySelector.bind(document);
38+
39+
function injectReadableContents(url, sanitize, target) {
40+
var req = new XMLHttpRequest();
41+
req.open("GET", "/api/get?sanitize=" + (sanitize ? "yes" : "no") + "&url=" + encodeURIComponent(url), false);
42+
req.send(null);
43+
target.contentDocument.body.innerHTML = JSON.parse(req.responseText).content;
44+
}
45+
46+
q("form").addEventListener("submit", function(event) {
47+
event.preventDefault();
48+
var url = q("#url").value;
49+
q("#source").src = url;
50+
injectReadableContents(url, q("#sanitize").checked, q("#target"));
51+
});
52+
</script>
53+
</body>
54+
</html>

0 commit comments

Comments
 (0)