Skip to content

Commit 2095b90

Browse files
committed
update html5ever version, narrow version requirements
1 parent 64f03df commit 2095b90

File tree

3 files changed

+110
-121
lines changed

3 files changed

+110
-121
lines changed

native/html5ever_nif/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ crate-type = ["dylib"]
1212
rustler = "^0.14"
1313
rustler_codegen = "^0.14"
1414

15-
html5ever = "*"
16-
tendril = "*"
17-
lazy_static = "*"
18-
scoped-pool = "*"
15+
html5ever = "0.16"
16+
tendril = "0.2"
17+
lazy_static = "0.2"
18+
scoped-pool = "1"

native/html5ever_nif/src/lib.rs

Lines changed: 41 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,19 @@ extern crate scoped_pool;
1010

1111
use std::panic;
1212

13-
use rustler::{
14-
NifEnv,
15-
NifTerm,
16-
NifResult,
17-
NifError,
18-
NifEncoder,
19-
NifDecoder,
20-
};
13+
use rustler::{NifEnv, NifTerm, NifResult, NifError, NifEncoder, NifDecoder};
2114
use rustler::types::binary::NifBinary;
2215
use rustler::env::OwnedEnv;
2316

24-
use html5ever::{ QualName };
25-
use html5ever::rcdom::{ RcDom, Handle, NodeEnum };
17+
use html5ever::rcdom::RcDom;
2618
use html5ever::driver::ParseOpts;
2719
use html5ever::tokenizer::TokenizerOpts;
2820
use html5ever::tree_builder::TreeBuilderOpts;
29-
use html5ever::tree_builder::interface::QuirksMode;
30-
use tendril::{ TendrilSink, StrTendril };
21+
use html5ever::tree_builder::QuirksMode;
22+
use tendril::TendrilSink;
23+
24+
mod rc_dom;
25+
use rc_dom::handle_to_term;
3126

3227
mod atoms {
3328
rustler_atoms! {
@@ -41,11 +36,11 @@ mod atoms {
4136
atom doctype;
4237
atom comment;
4338

44-
atom error_level;
4539
atom discard_bom;
4640
atom scripting_enabled;
4741
atom iframe_srcdoc;
4842
atom drop_doctype;
43+
atom error_level;
4944

5045
atom none;
5146
atom some;
@@ -61,10 +56,15 @@ enum ErrorLevel {
6156
}
6257
impl<'a> NifDecoder<'a> for ErrorLevel {
6358
fn decode(term: NifTerm<'a>) -> NifResult<ErrorLevel> {
64-
if atoms::none() == term { Ok(ErrorLevel::None) }
65-
else if atoms::some() == term { Ok(ErrorLevel::Some) }
66-
else if atoms::all() == term { Ok(ErrorLevel::All) }
67-
else { Err(NifError::BadArg) }
59+
if atoms::none() == term {
60+
Ok(ErrorLevel::None)
61+
} else if atoms::some() == term {
62+
Ok(ErrorLevel::Some)
63+
} else if atoms::all() == term {
64+
Ok(ErrorLevel::All)
65+
} else {
66+
Err(NifError::BadArg)
67+
}
6868
}
6969
}
7070

@@ -74,17 +74,13 @@ fn term_to_configs(term: NifTerm) -> NifResult<ParseOpts> {
7474
} else {
7575
let env = term.get_env();
7676

77-
let errors: ErrorLevel =
78-
term.map_get(atoms::error_level().to_term(env))?.decode()?;
77+
let errors: ErrorLevel = term.map_get(atoms::error_level().to_term(env))?.decode()?;
7978

80-
let discard_bom: bool =
81-
term.map_get(atoms::discard_bom().to_term(env))?.decode()?;
82-
let scripting_enabled: bool =
83-
term.map_get(atoms::scripting_enabled().to_term(env))?.decode()?;
84-
let iframe_srcdoc: bool =
85-
term.map_get(atoms::iframe_srcdoc().to_term(env))?.decode()?;
86-
let drop_doctype: bool =
87-
term.map_get(atoms::drop_doctype().to_term(env))?.decode()?;
79+
let discard_bom: bool = term.map_get(atoms::discard_bom().to_term(env))?.decode()?;
80+
let scripting_enabled: bool = term.map_get(atoms::scripting_enabled().to_term(env))?
81+
.decode()?;
82+
let iframe_srcdoc: bool = term.map_get(atoms::iframe_srcdoc().to_term(env))?.decode()?;
83+
let drop_doctype: bool = term.map_get(atoms::drop_doctype().to_term(env))?.decode()?;
8884

8985
Ok(ParseOpts {
9086
tokenizer: TokenizerOpts {
@@ -106,69 +102,6 @@ fn term_to_configs(term: NifTerm) -> NifResult<ParseOpts> {
106102
}
107103
}
108104

109-
// Zero-cost wrapper types which makes it possible to implement
110-
// NifEncoder for these externally defined types.
111-
// Unsure if this is a great way of doing it, but it's the way
112-
// that produced the cleanest and least noisy code.
113-
struct QNW<'a>(&'a QualName);
114-
struct STW<'a>(&'a StrTendril);
115-
116-
impl<'b> NifEncoder for QNW<'b> {
117-
fn encode<'a>(&self, env: NifEnv<'a>) -> NifTerm<'a> {
118-
let data: &str = &*self.0.local;
119-
data.encode(env)
120-
}
121-
}
122-
impl<'b> NifEncoder for STW<'b> {
123-
fn encode<'a>(&self, env: NifEnv<'a>) -> NifTerm<'a> {
124-
let data: &str = &*self.0;
125-
data.encode(env)
126-
}
127-
}
128-
129-
/// Takes a Handle from a RcDom, encodes it into a NifTerm.
130-
/// This follows the mochiweb encoding scheme with two exceptions:
131-
/// * A `{:doctype, name, pubid, sysid}` node.
132-
/// * Always returns a list as it's root node.
133-
fn handle_to_term<'a>(env: NifEnv<'a>, handle: &Handle) -> NifTerm<'a> {
134-
let node = handle.borrow();
135-
136-
// Closure so that we don't encode this when we don't need to return
137-
// it to the user.
138-
let children = || {
139-
// Encodes a Vec<Handle> to a Vec<NifTerm>
140-
let res: Vec<NifTerm<'a>> =
141-
node.children.iter().map(|h| handle_to_term(env, h)).collect();
142-
// Encodes to erlang list term.
143-
res.encode(env)
144-
};
145-
146-
match node.node {
147-
// Root document node. As far as I know, this is only located in the
148-
// root of the DOM.
149-
NodeEnum::Document =>
150-
children(),
151-
152-
NodeEnum::Doctype(ref name, ref pubid, ref sysid) =>
153-
(atoms::doctype(), STW(name), STW(pubid), STW(sysid)).encode(env),
154-
155-
NodeEnum::Text(ref text) =>
156-
STW(text).encode(env),
157-
158-
NodeEnum::Comment(ref text) =>
159-
(atoms::comment(), STW(text)).encode(env),
160-
161-
NodeEnum::Element(ref name, ref _elem_type, ref attributes) => {
162-
let attribute_terms: Vec<NifTerm<'a>> =
163-
attributes.iter()
164-
.map(|a| (QNW(&a.name), STW(&a.value)).encode(env))
165-
.collect();
166-
167-
(QNW(name), attribute_terms, children()).encode(env)
168-
},
169-
}
170-
}
171-
172105
// Thread pool for `parse_async`.
173106
// TODO: How do we decide on pool size?
174107
lazy_static! {
@@ -185,7 +118,7 @@ fn parse_async<'a>(env: NifEnv<'a>, args: &[NifTerm<'a>]) -> NifResult<NifTerm<'
185118

186119
let return_pid = env.pid();
187120

188-
//let config = term_to_configs(args[1]);
121+
// let config = term_to_configs(args[1]);
189122

190123
POOL.spawn(move || {
191124
owned_env.send_and_clear(&return_pid, |inner_env| {
@@ -201,28 +134,24 @@ fn parse_async<'a>(env: NifEnv<'a>, args: &[NifTerm<'a>]) -> NifResult<NifTerm<'
201134

202135
// TODO: Use Parser.from_bytes instead?
203136
let parser = html5ever::parse_document(sink, Default::default());
204-
let result = parser.one(
205-
std::str::from_utf8(binary.as_slice()).unwrap());
137+
let result = parser.one(std::str::from_utf8(binary.as_slice()).unwrap());
206138

207139
let result_term = handle_to_term(inner_env, &result.document);
208-
(atoms::html5ever_nif_result(), atoms::ok(), result_term)
209-
.encode(inner_env)
140+
(atoms::html5ever_nif_result(), atoms::ok(), result_term.unwrap()).encode(inner_env)
210141
}) {
211142
Ok(term) => term,
212143
Err(err) => {
213144
// Try to extract a panic reason and return that. If this
214145
// fails, fail generically.
215-
let reason =
216-
if let Some(s) = err.downcast_ref::<String>() {
217-
s.encode(inner_env)
218-
} else if let Some(&s) = err.downcast_ref::<&'static str>() {
219-
s.encode(inner_env)
220-
} else {
221-
atoms::nif_panic().encode(inner_env)
222-
};
223-
(atoms::html5ever_nif_result(), atoms::error(), reason)
224-
.encode(inner_env)
225-
},
146+
let reason = if let Some(s) = err.downcast_ref::<String>() {
147+
s.encode(inner_env)
148+
} else if let Some(&s) = err.downcast_ref::<&'static str>() {
149+
s.encode(inner_env)
150+
} else {
151+
atoms::nif_panic().encode(inner_env)
152+
};
153+
(atoms::html5ever_nif_result(), atoms::error(), reason).encode(inner_env)
154+
}
226155
}
227156
});
228157
});
@@ -236,24 +165,19 @@ fn parse_sync<'a>(env: NifEnv<'a>, args: &[NifTerm<'a>]) -> NifResult<NifTerm<'a
236165

237166
// TODO: Use Parser.from_bytes instead?
238167
let parser = html5ever::parse_document(sink, Default::default());
239-
let result = parser.one(
240-
std::str::from_utf8(binary.as_slice()).unwrap());
168+
let result = parser.one(std::str::from_utf8(binary.as_slice()).unwrap());
241169

242-
//std::thread::sleep(std::time::Duration::from_millis(10));
170+
// std::thread::sleep(std::time::Duration::from_millis(10));
243171

244172
let result_term = handle_to_term(env, &result.document);
245173

246-
Ok((atoms::html5ever_nif_result(), atoms::ok(), result_term)
247-
.encode(env))
174+
Ok((atoms::html5ever_nif_result(), atoms::ok(), result_term.unwrap()).encode(env))
248175

249176
}
250177

251-
rustler_export_nifs!(
252-
"Elixir.Html5ever.Native",
253-
[("parse_async", 1, parse_async),
254-
("parse_sync", 1, parse_sync)],
255-
Some(on_load)
256-
);
178+
rustler_export_nifs!("Elixir.Html5ever.Native",
179+
[("parse_async", 1, parse_async), ("parse_sync", 1, parse_sync)],
180+
Some(on_load));
257181

258182

259183
fn on_load<'a>(_env: NifEnv<'a>, _load_info: NifTerm<'a>) -> bool {

native/html5ever_nif/src/rc_dom.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use ::rustler::{NifEnv, NifTerm, NifEncoder};
2+
use ::html5ever::rcdom::{Handle, NodeData};
3+
use ::html5ever::QualName;
4+
use ::tendril::StrTendril;
5+
6+
// Zero-cost wrapper types which makes it possible to implement
7+
// NifEncoder for these externally defined types.
8+
// Unsure if this is a great way of doing it, but it's the way
9+
// that produced the cleanest and least noisy code.
10+
struct QNW<'a>(&'a QualName);
11+
struct STW<'a>(&'a StrTendril);
12+
13+
impl<'b> NifEncoder for QNW<'b> {
14+
fn encode<'a>(&self, env: NifEnv<'a>) -> NifTerm<'a> {
15+
let data: &str = &*self.0.local;
16+
data.encode(env)
17+
}
18+
}
19+
impl<'b> NifEncoder for STW<'b> {
20+
fn encode<'a>(&self, env: NifEnv<'a>) -> NifTerm<'a> {
21+
let data: &str = &*self.0;
22+
data.encode(env)
23+
}
24+
}
25+
26+
/// Takes a Handle from a RcDom, encodes it into a NifTerm.
27+
/// This follows the mochiweb encoding scheme with two exceptions:
28+
/// * A `{:doctype, name, pubid, sysid}` node.
29+
/// * Always returns a list as it's root node.
30+
pub fn handle_to_term<'a>(env: NifEnv<'a>, handle: &Handle) -> Option<NifTerm<'a>> {
31+
let node = handle;
32+
33+
// Closure so that we don't encode this when we don't need to return
34+
// it to the user.
35+
let children = || {
36+
// Encodes a Vec<Handle> to a Vec<NifTerm>
37+
let res: Vec<NifTerm<'a>> = node.children.borrow().iter().filter_map(|h| handle_to_term(env, h)).collect();
38+
// Encodes to erlang list term.
39+
res.encode(env)
40+
};
41+
42+
match node.data {
43+
// Root document node. As far as I know, this is only located in the
44+
// root of the DOM.
45+
NodeData::Document => Some(children()),
46+
47+
NodeData::Doctype { ref name, ref public_id, ref system_id } => {
48+
Some((::atoms::doctype(), STW(name), STW(public_id), STW(system_id)).encode(env))
49+
}
50+
51+
NodeData::Text { ref contents } => Some(STW(&*contents.borrow()).encode(env)),
52+
53+
NodeData::Comment { ref contents } => Some((::atoms::comment(), STW(contents)).encode(env)),
54+
55+
NodeData::Element { ref name, ref attrs, .. } => {
56+
let attribute_terms: Vec<NifTerm<'a>> = attrs.borrow().iter()
57+
.map(|a| (QNW(&a.name), STW(&a.value)).encode(env))
58+
.collect();
59+
60+
Some((QNW(name), attribute_terms, children()).encode(env))
61+
}
62+
63+
_ => None,
64+
}
65+
}

0 commit comments

Comments
 (0)