Skip to content
This repository was archived by the owner on Apr 27, 2022. It is now read-only.

Commit c118978

Browse files
committed
clean up and comment code a bit
1 parent 644ca30 commit c118978

File tree

2 files changed

+54
-31
lines changed

2 files changed

+54
-31
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Html5everErlang
22

3-
NIF wrapper of html5ever using Rustler
3+
NIF wrapper of html5ever using Rustler.
44

5-
FIXME: This currently spawns a new thread on every parse call, fix this!
5+
It is currently functional, but missing some features and optimization.
66

77
## Installation
88

native/html5ever_nif/src/lib.rs

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,21 @@ extern crate html5ever;
66
extern crate tendril;
77
extern crate scoped_pool;
88

9-
use std::borrow::Cow;
10-
use std::fmt;
9+
use std::panic;
1110

1211
use rustler::{
1312
NifEnv,
1413
NifTerm,
1514
NifResult,
1615
NifEncoder,
1716
};
17+
use rustler::types::binary::NifBinary;
18+
use rustler::env::OwnedEnv;
1819

1920
use html5ever::{ QualName };
2021
use html5ever::rcdom::{ RcDom, Handle, NodeEnum };
2122
use tendril::{ TendrilSink, StrTendril };
2223

23-
//mod flat_dom;
24-
25-
use rustler::types::binary::NifBinary;
26-
use rustler::env::OwnedEnv;
27-
2824
mod atoms {
2925
rustler_atoms! {
3026
atom html5ever_nif_result;
@@ -38,6 +34,10 @@ mod atoms {
3834
}
3935
}
4036

37+
// Zero-cost wrapper types which makes it possible to implement
38+
// NifEncoder for these externally defined types.
39+
// Unsure if this is a great way of doing it, but it's the way
40+
// that produced the cleanest and least noisy code.
4141
struct QNW<'a>(&'a QualName);
4242
struct STW<'a>(&'a StrTendril);
4343

@@ -54,54 +54,78 @@ impl<'b> NifEncoder for STW<'b> {
5454
}
5555
}
5656

57+
/// Takes a Handle from a RcDom, encodes it into a NifTerm.
58+
/// This follows the mochiweb encoding scheme with two exceptions:
59+
/// * A `{:doctype, name, pubid, sysid}` node.
60+
/// * Always returns a list as it's root node.
5761
fn handle_to_term<'a>(env: NifEnv<'a>, handle: &Handle) -> NifTerm<'a> {
5862
let node = handle.borrow();
5963

60-
let res: Vec<NifTerm<'a>> =
61-
node.children.iter().map(|h| handle_to_term(env, h)).collect();
62-
let children = res.encode(env);
64+
// Closure so that we don't encode this when we don't need to return
65+
// it to the user.
66+
let children = || {
67+
// Encodes a Vec<Handle> to a Vec<NifTerm>
68+
let res: Vec<NifTerm<'a>> =
69+
node.children.iter().map(|h| handle_to_term(env, h)).collect();
70+
// Encodes to erlang list term.
71+
res.encode(env)
72+
};
6373

6474
match node.node {
75+
// Root document node. As far as I know, this is only located in the
76+
// root of the DOM.
6577
NodeEnum::Document =>
66-
children,
78+
children(),
79+
6780
NodeEnum::Doctype(ref name, ref pubid, ref sysid) =>
6881
(atoms::doctype(), STW(name), STW(pubid), STW(sysid)).encode(env),
82+
6983
NodeEnum::Text(ref text) =>
7084
STW(text).encode(env),
85+
7186
NodeEnum::Comment(ref text) =>
7287
(atoms::comment(), STW(text)).encode(env),
73-
NodeEnum::Element(ref name, ref elem_type, ref attr) => {
74-
let attr_terms: Vec<NifTerm<'a>> =
75-
attr.iter().map(|a| {
76-
(QNW(&a.name), STW(&a.value)).encode(env)
77-
}).collect();
7888

79-
(QNW(name), attr_terms, children).encode(env)
89+
NodeEnum::Element(ref name, ref _elem_type, ref attributes) => {
90+
let attribute_terms: Vec<NifTerm<'a>> =
91+
attributes.iter()
92+
.map(|a| (QNW(&a.name), STW(&a.value)).encode(env))
93+
.collect();
94+
95+
(QNW(name), attribute_terms, children()).encode(env)
8096
},
8197
}
8298
}
8399

84-
use std::thread;
85-
use std::panic;
100+
// Thread pool for `parse_async`.
101+
// TODO: How do we decide on pool size?
102+
lazy_static! {
103+
static ref POOL: scoped_pool::Pool = scoped_pool::Pool::new(4);
104+
}
86105

87106
fn parse_async<'a>(env: NifEnv<'a>, args: &Vec<NifTerm<'a>>) -> NifResult<NifTerm<'a>> {
88107
let mut owned_env = OwnedEnv::new();
89-
let input_term_saved = owned_env.save(args[0]);
90108

91-
let pid = env.pid();
109+
// Copies the term into the inner env. Since this term is normally a large
110+
// binary term, copying it over should be cheap, since the binary will be
111+
// refcounted within the BEAM.
112+
let input_term = owned_env.save(args[0]);
113+
114+
let return_pid = env.pid();
92115

93116
POOL.spawn(move || {
94-
owned_env.send(pid, |inner_env| {
117+
owned_env.send(return_pid, |inner_env| {
118+
// This should not really be done in user code. We (Rustler project)
119+
// need to find a better abstraction that eliminates this.
95120
match panic::catch_unwind(|| {
96-
let input_term = input_term_saved.load(inner_env);
97-
98-
let binary: NifBinary = match input_term.decode() {
121+
let binary: NifBinary = match input_term.load(inner_env).decode() {
99122
Ok(inner) => inner,
100123
Err(_) => panic!("argument is not a binary"),
101124
};
102125

103126
let sink = RcDom::default();
104127

128+
// TODO: Use Parser.from_bytes instead?
105129
let parser = html5ever::parse_document(sink, Default::default());
106130
let result = parser.one(
107131
std::str::from_utf8(binary.as_slice()).unwrap());
@@ -112,6 +136,8 @@ fn parse_async<'a>(env: NifEnv<'a>, args: &Vec<NifTerm<'a>>) -> NifResult<NifTer
112136
}) {
113137
Ok(term) => term,
114138
Err(err) => {
139+
// Try to extract a panic reason and return that. If this
140+
// fails, fail generically.
115141
let reason =
116142
if let Some(s) = err.downcast_ref::<String>() {
117143
s.encode(inner_env)
@@ -136,10 +162,7 @@ rustler_export_nifs!(
136162
Some(on_load)
137163
);
138164

139-
lazy_static! {
140-
static ref POOL: scoped_pool::Pool = scoped_pool::Pool::new(4);
141-
}
142165

143-
fn on_load<'a>(env: NifEnv<'a>, _load_info: NifTerm<'a>) -> bool {
166+
fn on_load<'a>(_env: NifEnv<'a>, _load_info: NifTerm<'a>) -> bool {
144167
true
145168
}

0 commit comments

Comments
 (0)