Skip to content
This repository was archived by the owner on Apr 27, 2022. It is now read-only.

Commit 22a7e3d

Browse files
committed
first basic version
0 parents  commit 22a7e3d

File tree

17 files changed

+22558
-0
lines changed

17 files changed

+22558
-0
lines changed

.gitignore

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# The directory Mix will write compiled artifacts to.
2+
/_build
3+
4+
# If you run "mix test --cover", coverage assets end up here.
5+
/cover
6+
7+
# The directory Mix downloads your dependencies sources to.
8+
/deps
9+
10+
# Where 3rd-party dependencies like ExDoc output generated docs.
11+
/doc
12+
13+
# Ignore .fetch files in case you like to edit your project deps locally.
14+
/.fetch
15+
16+
# If the VM crashes, it generates a dump, let's ignore it too.
17+
erl_crash.dump
18+
19+
# Also ignore archive artifacts (built via "mix archive.build").
20+
*.ez
21+
22+
/priv/rustler

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Html5everErlang
2+
3+
NIF wrapper of html5ever using Rustler
4+
5+
FIXME: This currently spawns a new thread on every parse call, fix this!
6+
7+
## Installation
8+
9+
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
10+
by adding `ex_html5ever` to your list of dependencies in `mix.exs`:
11+
12+
```elixir
13+
def deps do
14+
[{:ex_html5ever, "~> 0.1.0"}]
15+
end
16+
```
17+
18+
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
19+
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
20+
be found at [https://hexdocs.pm/ex_html5ever](https://hexdocs.pm/ex_html5ever).
21+

config/config.exs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This file is responsible for configuring your application
2+
# and its dependencies with the aid of the Mix.Config module.
3+
use Mix.Config
4+
5+
# This configuration is loaded before any dependency and is restricted
6+
# to this project. If another project depends on this project, this
7+
# file won't be loaded nor affect the parent project. For this reason,
8+
# if you want to provide default values for your application for
9+
# 3rd-party users, it should be done in your "mix.exs" file.
10+
11+
# You can configure for your application as:
12+
#
13+
# config :ex_html5ever, key: :value
14+
#
15+
# And access this configuration in your application as:
16+
#
17+
# Application.get_env(:ex_html5ever, :key)
18+
#
19+
# Or configure a 3rd-party app:
20+
#
21+
# config :logger, level: :info
22+
#
23+
24+
# It is also possible to import configuration files, relative to this
25+
# directory. For example, you can emulate configuration per environment
26+
# by uncommenting the line below and defining dev.exs, test.exs and such.
27+
# Configuration from the imported file will override the ones defined
28+
# here (which is why it is important to import them last).
29+
#
30+
# import_config "#{Mix.env}.exs"

lib/ex_html5ever.ex

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
defmodule ExHtml5ever do
2+
@moduledoc """
3+
Documentation for ExHtml5ever.
4+
"""
5+
6+
def parse(html) do
7+
ExHtml5ever.Native.parse_async(html)
8+
receive do
9+
{:html5ever_nif_result, :ok, result} ->
10+
{:ok, result}
11+
{:html5ever_nif_result, :error, err} ->
12+
{:error, err}
13+
end
14+
end
15+
16+
end

lib/ex_html5ever/native.ex

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
defmodule NifNotLoadedError do
2+
defexception message: "nif not loaded"
3+
end
4+
5+
defmodule ExHtml5ever.Native do
6+
@on_load :load_nif
7+
8+
def load_nif do
9+
require Rustler
10+
Rustler.load_nif(:ex_html5ever, "html5ever_nif")
11+
end
12+
13+
def parse_async(binary), do: err()
14+
15+
defp err() do
16+
throw NifNotLoadedError
17+
end
18+
19+
end

mix.exs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
defmodule ExHtml5ever.Mixfile do
2+
use Mix.Project
3+
4+
def project do
5+
[app: :ex_html5ever,
6+
version: "0.1.0",
7+
elixir: "~> 1.4",
8+
build_embedded: Mix.env == :prod,
9+
start_permanent: Mix.env == :prod,
10+
compilers: [:rustler] ++ Mix.compilers(),
11+
rustler_crates: rustler_crates(),
12+
deps: deps()]
13+
end
14+
15+
def rustler_crates do
16+
[
17+
html5ever_nif: [
18+
path: "/native/html5ever_nif",
19+
cargo: :system,
20+
default_features: false,
21+
features: [],
22+
mode: :release,#(if Mix.env == :prod, do: :release, else: :debug),
23+
]
24+
]
25+
end
26+
27+
# Configuration for the OTP application
28+
#
29+
# Type "mix help compile.app" for more information
30+
def application do
31+
# Specify extra applications you'll use from Erlang/Elixir
32+
[extra_applications: [:logger]]
33+
end
34+
35+
# Dependencies can be Hex packages:
36+
#
37+
# {:my_dep, "~> 0.3.0"}
38+
#
39+
# Or git/path repositories:
40+
#
41+
# {:my_dep, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
42+
#
43+
# Type "mix help deps" for more examples and options
44+
defp deps do
45+
[{:rustler, github: "hansihe/Rustler", sparse: "rustler_mix"}]
46+
end
47+
end

mix.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
%{"rustler": {:git, "https://github.com/hansihe/Rustler.git", "aad4ecf3a7ac03e5774ed110cf8802917cc7a3eb", [sparse: "rustler_mix"]}}

native/html5ever_nif/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
target
2+
Cargo.lock

native/html5ever_nif/Cargo.toml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[package]
2+
name = "html5ever_nif"
3+
version = "0.1.0"
4+
authors = ["HansiHE <hansihe@hansihe.com>"]
5+
6+
[lib]
7+
name = "html5ever_nif"
8+
path = "src/lib.rs"
9+
crate-type = ["dylib"]
10+
11+
[build-dependencies]
12+
rustler_codegen = { git = "https://github.com/hansihe/Rustler" }
13+
14+
[dependencies]
15+
rustler = { git = "https://github.com/hansihe/Rustler" }
16+
rustler_codegen = { git = "https://github.com/hansihe/Rustler" }
17+
18+
html5ever = "*"
19+
tendril = "*"
20+
lazy_static = "*"

native/html5ever_nif/src/flat_dom.rs

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
use ::html5ever;
2+
use html5ever::{ QualName, Attribute };
3+
use html5ever::tree_builder::interface::{ TreeSink, QuirksMode, NodeOrText };
4+
5+
use tendril::{ StrTendril, TendrilSink };
6+
7+
use std::borrow::Cow;
8+
9+
#[derive(Copy, Clone, PartialEq, Debug)]
10+
pub struct ElementHandle(usize);
11+
12+
#[derive(Debug)]
13+
struct Element {
14+
id: usize,
15+
name: Option<QualName>,
16+
children: Vec<ElementHandle>,
17+
parent: Option<ElementHandle>,
18+
last_string: bool,
19+
}
20+
impl Element {
21+
fn new(id: usize) -> Self {
22+
Element {
23+
id: id,
24+
name: None,
25+
children: Vec::with_capacity(10),
26+
parent: None,
27+
last_string: false,
28+
}
29+
}
30+
31+
fn handle(&self) -> ElementHandle {
32+
ElementHandle(self.id)
33+
}
34+
}
35+
36+
#[derive(Debug)]
37+
enum ElementType {
38+
Element(Element),
39+
Text(StrTendril),
40+
}
41+
impl ElementType {
42+
fn elem(&self) -> &Element {
43+
match self {
44+
&ElementType::Element(ref elem) => elem,
45+
&ElementType::Text(_) => unreachable!(),
46+
}
47+
}
48+
fn elem_mut(&mut self) -> &mut Element {
49+
match self {
50+
&mut ElementType::Element(ref mut elem) => elem,
51+
&mut ElementType::Text(_) => unreachable!(),
52+
}
53+
}
54+
fn text_mut(&mut self) -> &mut StrTendril {
55+
match self {
56+
&mut ElementType::Element(_) => unreachable!(),
57+
&mut ElementType::Text(ref mut st) => st,
58+
}
59+
}
60+
}
61+
62+
#[derive(Debug)]
63+
pub struct FlatSink {
64+
elements: Vec<ElementType>,
65+
}
66+
67+
impl FlatSink {
68+
69+
pub fn new() -> FlatSink {
70+
let mut sink = FlatSink {
71+
elements: Vec::with_capacity(200),
72+
};
73+
74+
// Element 0 is always root
75+
sink.elements.push(ElementType::Element(Element::new(0)));
76+
77+
sink
78+
}
79+
80+
fn elem(&self, elem: ElementHandle) -> &ElementType {
81+
&self.elements[elem.0]
82+
}
83+
fn elem_mut(&mut self, elem: ElementHandle) -> &mut ElementType {
84+
&mut self.elements[elem.0]
85+
}
86+
87+
fn new_elem(&mut self) -> &mut Element {
88+
let idx = self.elements.len();
89+
self.elements.push(ElementType::Element(Element::new(idx)));
90+
self.elements[idx].elem_mut()
91+
}
92+
fn new_text(&mut self, text: StrTendril) -> ElementHandle {
93+
let idx = self.elements.len();
94+
self.elements.push(ElementType::Text(text));
95+
ElementHandle(idx)
96+
}
97+
98+
fn append_node(&mut self, parent: ElementHandle, child: ElementHandle) {
99+
self.elem_mut(child).elem_mut().parent = Some(parent);
100+
let elem = self.elem_mut(parent).elem_mut();
101+
elem.children.push(child);
102+
elem.last_string = false;
103+
}
104+
105+
fn append_text(&mut self, parent: ElementHandle, child: StrTendril) {
106+
if self.elem(parent).elem().last_string {
107+
match self.elem(parent).elem().children.last() {
108+
Some(&handle) => self.elem_mut(handle).text_mut().push_tendril(&child),
109+
_ => unreachable!(),
110+
}
111+
} else {
112+
let st = self.new_text(child);
113+
let elem = self.elem_mut(parent).elem_mut();
114+
elem.children.push(st);
115+
elem.last_string = true;
116+
}
117+
}
118+
119+
}
120+
121+
impl TreeSink for FlatSink {
122+
type Output = u32;
123+
type Handle = ElementHandle;
124+
125+
fn finish(self) -> Self::Output {
126+
println!("{:?}", self);
127+
0
128+
}
129+
130+
// TODO: Log this or something
131+
fn parse_error(&mut self, msg: Cow<'static, str>) {}
132+
fn set_quirks_mode(&mut self, mode: QuirksMode) {}
133+
134+
fn get_document(&mut self) -> Self::Handle { ElementHandle(0) }
135+
fn get_template_contents(&mut self, target: Self::Handle) -> Self::Handle {
136+
panic!("Templates not supported");
137+
}
138+
139+
fn same_node(&self, x: Self::Handle, y: Self::Handle) -> bool { x == y }
140+
fn elem_name(&self, target: Self::Handle) -> QualName {
141+
self.elem(target).elem().name.as_ref().map(|i| i.clone()).unwrap()
142+
}
143+
144+
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>) -> Self::Handle {
145+
let elem = self.new_elem();
146+
elem.name = Some(name);
147+
elem.handle()
148+
}
149+
150+
fn create_comment(&mut self, _text: StrTendril) -> Self::Handle {
151+
let elem = self.new_elem();
152+
elem.handle()
153+
}
154+
155+
fn append(&mut self, parent: Self::Handle, child: NodeOrText<Self::Handle>) {
156+
match child {
157+
NodeOrText::AppendNode(node) => self.append_node(parent, node),
158+
NodeOrText::AppendText(text) => self.append_text(parent, text),
159+
};
160+
}
161+
162+
fn append_before_sibling(&mut self, sibling: Self::Handle, new_node: NodeOrText<Self::Handle>) -> Result<(), NodeOrText<Self::Handle>> {
163+
panic!("unsupported");
164+
}
165+
166+
fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) {
167+
println!("append_doctype_to_document");
168+
}
169+
170+
fn add_attrs_if_missing(&mut self, target: Self::Handle, attrs: Vec<Attribute>) {
171+
panic!("unsupported");
172+
}
173+
174+
fn remove_from_parent(&mut self, target: Self::Handle) {
175+
panic!("unsupported");
176+
}
177+
178+
fn reparent_children(&mut self, node: Self::Handle, new_parent: Self::Handle) {
179+
panic!("unsupported");
180+
}
181+
182+
fn mark_script_already_started(&mut self, elem: Self::Handle) {
183+
panic!("unsupported");
184+
}
185+
186+
}

0 commit comments

Comments
 (0)