Skip to content
This repository was archived by the owner on Apr 27, 2022. It is now read-only.

Commit 1723af9

Browse files
committed
do parsing inline for small html documents
1 parent 4eba284 commit 1723af9

File tree

4 files changed

+56
-2
lines changed

4 files changed

+56
-2
lines changed

lib/ex_html5ever.ex

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@ defmodule ExHtml5ever do
33
Documentation for ExHtml5ever.
44
"""
55

6+
def parse(html) when byte_size(html) > 500 do
7+
parse_async(html)
8+
end
69
def parse(html) do
10+
parse_sync(html)
11+
end
12+
13+
defp parse_async(html) do
714
ExHtml5ever.Native.parse_async(html)
815
receive do
916
{:html5ever_nif_result, :ok, result} ->
@@ -13,4 +20,13 @@ defmodule ExHtml5ever do
1320
end
1421
end
1522

23+
defp parse_sync(html) do
24+
case ExHtml5ever.Native.parse_sync(html) do
25+
{:html5ever_nif_result, :ok, result} ->
26+
{:ok, result}
27+
{:html5ever_nif_result, :error, err} ->
28+
{:error, err}
29+
end
30+
end
31+
1632
end

lib/ex_html5ever/native.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ defmodule ExHtml5ever.Native do
66
use Rustler, otp_app: :ex_html5ever, crate: "html5ever_nif"
77

88
def parse_async(binary), do: err()
9+
def parse_sync(binary), do: err()
910

1011
defp err() do
1112
throw NifNotLoadedError

native/html5ever_nif/src/lib.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,28 @@ fn parse_async<'a>(env: NifEnv<'a>, args: &Vec<NifTerm<'a>>) -> NifResult<NifTer
156156
Ok(atoms::ok().encode(env))
157157
}
158158

159+
fn parse_sync<'a>(env: NifEnv<'a>, args: &Vec<NifTerm<'a>>) -> NifResult<NifTerm<'a>> {
160+
let binary: NifBinary = args[0].decode()?;
161+
let sink = RcDom::default();
162+
163+
// TODO: Use Parser.from_bytes instead?
164+
let parser = html5ever::parse_document(sink, Default::default());
165+
let result = parser.one(
166+
std::str::from_utf8(binary.as_slice()).unwrap());
167+
168+
//std::thread::sleep(std::time::Duration::from_millis(10));
169+
170+
let result_term = handle_to_term(env, &result.document);
171+
172+
Ok((atoms::html5ever_nif_result(), atoms::ok(), result_term)
173+
.encode(env))
174+
175+
}
176+
159177
rustler_export_nifs!(
160178
"Elixir.ExHtml5ever.Native",
161-
[("parse_async", 1, parse_async)],
179+
[("parse_async", 1, parse_async),
180+
("parse_sync", 1, parse_sync)],
162181
Some(on_load)
163182
);
164183

test/ex_html5ever_test.exs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@ defmodule ExHtml5everTest do
22
use ExUnit.Case
33
doctest ExHtml5ever
44

5+
#def p() do
6+
# receive do
7+
# thing -> IO.inspect thing
8+
# end
9+
# p()
10+
#end
11+
12+
#setup_all do
13+
# pid = Process.spawn(ExHtml5everTest, :p, [], [])
14+
# :erlang.system_monitor(pid, [{:long_schedule, 1}])
15+
# :ok
16+
#end
17+
518
def read_html(name) do
619
dir = to_string(:code.priv_dir(:ex_html5ever)) <> "/test_data/"
720
File.read!(dir <> name)
@@ -14,7 +27,7 @@ defmodule ExHtml5everTest do
1427
end
1528

1629
test "parse example.com html" do
17-
html = read_html("drudgereport.html")
30+
html = read_html("example.html")
1831
assert match?({:ok, _}, ExHtml5ever.parse(html))
1932
end
2033

@@ -23,4 +36,9 @@ defmodule ExHtml5everTest do
2336
assert match?({:ok, _}, ExHtml5ever.parse(html))
2437
end
2538

39+
test "unbalanced worst case" do
40+
html = String.duplicate("<div>", 100)
41+
assert match?({:ok, _}, ExHtml5ever.parse(html))
42+
end
43+
2644
end

0 commit comments

Comments
 (0)