@@ -6,25 +6,21 @@ extern crate html5ever;
6
6
extern crate tendril;
7
7
extern crate scoped_pool;
8
8
9
- use std:: borrow:: Cow ;
10
- use std:: fmt;
9
+ use std:: panic;
11
10
12
11
use rustler:: {
13
12
NifEnv ,
14
13
NifTerm ,
15
14
NifResult ,
16
15
NifEncoder ,
17
16
} ;
17
+ use rustler:: types:: binary:: NifBinary ;
18
+ use rustler:: env:: OwnedEnv ;
18
19
19
20
use html5ever:: { QualName } ;
20
21
use html5ever:: rcdom:: { RcDom , Handle , NodeEnum } ;
21
22
use tendril:: { TendrilSink , StrTendril } ;
22
23
23
- //mod flat_dom;
24
-
25
- use rustler:: types:: binary:: NifBinary ;
26
- use rustler:: env:: OwnedEnv ;
27
-
28
24
mod atoms {
29
25
rustler_atoms ! {
30
26
atom html5ever_nif_result;
@@ -38,6 +34,10 @@ mod atoms {
38
34
}
39
35
}
40
36
37
+ // Zero-cost wrapper types which makes it possible to implement
38
+ // NifEncoder for these externally defined types.
39
+ // Unsure if this is a great way of doing it, but it's the way
40
+ // that produced the cleanest and least noisy code.
41
41
struct QNW < ' a > ( & ' a QualName ) ;
42
42
struct STW < ' a > ( & ' a StrTendril ) ;
43
43
@@ -54,54 +54,78 @@ impl<'b> NifEncoder for STW<'b> {
54
54
}
55
55
}
56
56
57
+ /// Takes a Handle from a RcDom, encodes it into a NifTerm.
58
+ /// This follows the mochiweb encoding scheme with two exceptions:
59
+ /// * A `{:doctype, name, pubid, sysid}` node.
60
+ /// * Always returns a list as it's root node.
57
61
fn handle_to_term < ' a > ( env : NifEnv < ' a > , handle : & Handle ) -> NifTerm < ' a > {
58
62
let node = handle. borrow ( ) ;
59
63
60
- let res: Vec < NifTerm < ' a > > =
61
- node. children . iter ( ) . map ( |h| handle_to_term ( env, h) ) . collect ( ) ;
62
- let children = res. encode ( env) ;
64
+ // Closure so that we don't encode this when we don't need to return
65
+ // it to the user.
66
+ let children = || {
67
+ // Encodes a Vec<Handle> to a Vec<NifTerm>
68
+ let res: Vec < NifTerm < ' a > > =
69
+ node. children . iter ( ) . map ( |h| handle_to_term ( env, h) ) . collect ( ) ;
70
+ // Encodes to erlang list term.
71
+ res. encode ( env)
72
+ } ;
63
73
64
74
match node. node {
75
+ // Root document node. As far as I know, this is only located in the
76
+ // root of the DOM.
65
77
NodeEnum :: Document =>
66
- children,
78
+ children ( ) ,
79
+
67
80
NodeEnum :: Doctype ( ref name, ref pubid, ref sysid) =>
68
81
( atoms:: doctype ( ) , STW ( name) , STW ( pubid) , STW ( sysid) ) . encode ( env) ,
82
+
69
83
NodeEnum :: Text ( ref text) =>
70
84
STW ( text) . encode ( env) ,
85
+
71
86
NodeEnum :: Comment ( ref text) =>
72
87
( atoms:: comment ( ) , STW ( text) ) . encode ( env) ,
73
- NodeEnum :: Element ( ref name, ref elem_type, ref attr) => {
74
- let attr_terms: Vec < NifTerm < ' a > > =
75
- attr. iter ( ) . map ( |a| {
76
- ( QNW ( & a. name ) , STW ( & a. value ) ) . encode ( env)
77
- } ) . collect ( ) ;
78
88
79
- ( QNW ( name) , attr_terms, children) . encode ( env)
89
+ NodeEnum :: Element ( ref name, ref _elem_type, ref attributes) => {
90
+ let attribute_terms: Vec < NifTerm < ' a > > =
91
+ attributes. iter ( )
92
+ . map ( |a| ( QNW ( & a. name ) , STW ( & a. value ) ) . encode ( env) )
93
+ . collect ( ) ;
94
+
95
+ ( QNW ( name) , attribute_terms, children ( ) ) . encode ( env)
80
96
} ,
81
97
}
82
98
}
83
99
84
- use std:: thread;
85
- use std:: panic;
100
+ // Thread pool for `parse_async`.
101
+ // TODO: How do we decide on pool size?
102
+ lazy_static ! {
103
+ static ref POOL : scoped_pool:: Pool = scoped_pool:: Pool :: new( 4 ) ;
104
+ }
86
105
87
106
fn parse_async < ' a > ( env : NifEnv < ' a > , args : & Vec < NifTerm < ' a > > ) -> NifResult < NifTerm < ' a > > {
88
107
let mut owned_env = OwnedEnv :: new ( ) ;
89
- let input_term_saved = owned_env. save ( args[ 0 ] ) ;
90
108
91
- let pid = env. pid ( ) ;
109
+ // Copies the term into the inner env. Since this term is normally a large
110
+ // binary term, copying it over should be cheap, since the binary will be
111
+ // refcounted within the BEAM.
112
+ let input_term = owned_env. save ( args[ 0 ] ) ;
113
+
114
+ let return_pid = env. pid ( ) ;
92
115
93
116
POOL . spawn ( move || {
94
- owned_env. send ( pid, |inner_env| {
117
+ owned_env. send ( return_pid, |inner_env| {
118
+ // This should not really be done in user code. We (Rustler project)
119
+ // need to find a better abstraction that eliminates this.
95
120
match panic:: catch_unwind ( || {
96
- let input_term = input_term_saved. load ( inner_env) ;
97
-
98
- let binary: NifBinary = match input_term. decode ( ) {
121
+ let binary: NifBinary = match input_term. load ( inner_env) . decode ( ) {
99
122
Ok ( inner) => inner,
100
123
Err ( _) => panic ! ( "argument is not a binary" ) ,
101
124
} ;
102
125
103
126
let sink = RcDom :: default ( ) ;
104
127
128
+ // TODO: Use Parser.from_bytes instead?
105
129
let parser = html5ever:: parse_document ( sink, Default :: default ( ) ) ;
106
130
let result = parser. one (
107
131
std:: str:: from_utf8 ( binary. as_slice ( ) ) . unwrap ( ) ) ;
@@ -112,6 +136,8 @@ fn parse_async<'a>(env: NifEnv<'a>, args: &Vec<NifTerm<'a>>) -> NifResult<NifTer
112
136
} ) {
113
137
Ok ( term) => term,
114
138
Err ( err) => {
139
+ // Try to extract a panic reason and return that. If this
140
+ // fails, fail generically.
115
141
let reason =
116
142
if let Some ( s) = err. downcast_ref :: < String > ( ) {
117
143
s. encode ( inner_env)
@@ -136,10 +162,7 @@ rustler_export_nifs!(
136
162
Some ( on_load)
137
163
) ;
138
164
139
- lazy_static ! {
140
- static ref POOL : scoped_pool:: Pool = scoped_pool:: Pool :: new( 4 ) ;
141
- }
142
165
143
- fn on_load < ' a > ( env : NifEnv < ' a > , _load_info : NifTerm < ' a > ) -> bool {
166
+ fn on_load < ' a > ( _env : NifEnv < ' a > , _load_info : NifTerm < ' a > ) -> bool {
144
167
true
145
168
}
0 commit comments