Skip to content

Commit 29465f0

Browse files
perf(tokenizer): did some perf (#115)
* feat: 🎸 finish * chore: 🤖 fix clippy * chore: 🤖 clean up
1 parent 3efc40d commit 29465f0

File tree

8 files changed

+117
-98
lines changed

8 files changed

+117
-98
lines changed

crates/recursive-parser/benches/parse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use recursive_parser::parser::{Parser, Root};
44
const SMALL_CSS_FILE: &str = include_str!("../../../assets/bootstrap-reboot.css");
55
const LARGE_CSS_FILE: &str = include_str!("../../../assets/bootstrap.css");
66

7-
fn parse<'a>(css: &'a str) -> Root {
7+
fn parse(css: &str) -> Root {
88
let parser = Parser::new(css);
99
parser.parse().unwrap()
1010
}

crates/recursive-parser/examples/plugin.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// use mimalloc_rust::*;
22
use recursive_parser::{parser::*, visitor::VisitMut};
3-
use std::{borrow::Cow, io::Write, time::Instant};
3+
use std::fmt::Write;
4+
use std::{borrow::Cow, time::Instant};
45

56
// #[global_allocator]
67
// static GLOBAL_MIMALLOC: GlobalMiMalloc = GlobalMiMalloc;
@@ -12,17 +13,17 @@ fn main() {
1213
}
1314
}";
1415
let bootstrap = include_str!("../../../assets/bootstrap-reboot.css");
15-
let mut start = Instant::now();
16+
let start = Instant::now();
1617
let mut root = Parser::new(bootstrap).parse().unwrap();
1718
println!("parse {:?}", start.elapsed());
1819
// start = Instant::now();
1920
// ReverseProp::default().visit_root(&mut root);
2021
// println!("reverse {:?}", start.elapsed());
2122
let start = Instant::now();
22-
let mut printer = SimplePrettier::new(Vec::with_capacity(bootstrap.len()));
23+
let mut printer = SimplePrettier::new(String::with_capacity(bootstrap.len()));
2324
printer.visit_root(&mut root).unwrap();
2425
println!("stringify {:?}", start.elapsed());
25-
println!("{}", String::from_utf8(printer.writer).unwrap());
26+
println!("{}", printer.writer);
2627
}
2728

2829
#[derive(Default)]
@@ -37,8 +38,8 @@ impl<W: Write> SimplePrettier<W> {
3738
}
3839
}
3940

40-
impl<'a, W: std::io::Write> VisitMut<'a, std::io::Result<()>> for SimplePrettier<W> {
41-
fn visit_root(&mut self, root: &mut Root<'a>) -> std::io::Result<()> {
41+
impl<'a, W: Write> VisitMut<'a, std::fmt::Result> for SimplePrettier<W> {
42+
fn visit_root(&mut self, root: &mut Root<'a>) -> std::fmt::Result {
4243
for child in root.children.iter_mut() {
4344
match child {
4445
RuleOrAtRuleOrDecl::Rule(rule) => {
@@ -55,10 +56,13 @@ impl<'a, W: std::io::Write> VisitMut<'a, std::io::Result<()>> for SimplePrettier
5556
Ok(())
5657
}
5758

58-
fn visit_rule(&mut self, rule: &mut Rule<'a>) -> std::io::Result<()> {
59-
self
60-
.writer
61-
.write(format!("{}{} {}\n", " ".repeat(self.level * 2), rule.selector, "{").as_bytes())?;
59+
fn visit_rule(&mut self, rule: &mut Rule<'a>) -> std::fmt::Result {
60+
writeln!(
61+
self.writer,
62+
"{}{} {{",
63+
" ".repeat(self.level * 2),
64+
rule.selector,
65+
)?;
6266
self.level += 1;
6367
for child in rule.children.iter_mut() {
6468
match child {
@@ -74,18 +78,17 @@ impl<'a, W: std::io::Write> VisitMut<'a, std::io::Result<()>> for SimplePrettier
7478
}
7579
}
7680
self.level -= 1;
77-
write!(self.writer, "{}{}\n", " ".repeat(self.level * 2), "}")?;
81+
writeln!(self.writer, "{}}}", " ".repeat(self.level * 2),)?;
7882
Ok(())
7983
}
8084

81-
fn visit_at_rule(&mut self, at_rule: &mut AtRule<'a>) -> std::io::Result<()> {
82-
write!(
85+
fn visit_at_rule(&mut self, at_rule: &mut AtRule<'a>) -> std::fmt::Result {
86+
writeln!(
8387
self.writer,
84-
"{}@{} {} {}\n",
88+
"{}@{} {} {{",
8589
" ".repeat(self.level * 2),
8690
at_rule.name,
8791
at_rule.params,
88-
"{"
8992
)?;
9093
self.level += 1;
9194
for child in at_rule.children.iter_mut() {
@@ -102,13 +105,13 @@ impl<'a, W: std::io::Write> VisitMut<'a, std::io::Result<()>> for SimplePrettier
102105
}
103106
}
104107
self.level -= 1;
105-
write!(self.writer, "{}{}\n", " ".repeat(self.level * 2), "}")
108+
writeln!(self.writer, "{}}}", " ".repeat(self.level * 2))
106109
}
107110

108-
fn visit_declaration(&mut self, decl: &mut Declaration<'a>) -> std::io::Result<()> {
109-
write!(
111+
fn visit_declaration(&mut self, decl: &mut Declaration<'a>) -> std::fmt::Result {
112+
writeln!(
110113
self.writer,
111-
"{}{} : {};\n",
114+
"{}{} : {};",
112115
" ".repeat(self.level * 2),
113116
decl.prop,
114117
decl.value

crates/recursive-parser/tests/basic.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ mod test_ast {
2626
dbg!(&file_name);
2727
let expected_ast_path = format!(
2828
"./tests/fixtures/{}.ast",
29-
file_name.rsplit_once(".").unwrap().0
29+
file_name.rsplit_once('.').unwrap().0
3030
);
3131
let expected_ast = read_to_string(expected_ast_path)?;
3232
let parser = Parser::new(&file);
@@ -79,7 +79,7 @@ mod test_ast {
7979
let file = read_to_string(format!("./tests/official-cases/{}", file_name))?;
8080
let expected_ast_path = format!(
8181
"./tests/official-cases/{}.ast",
82-
file_name.rsplit_once(".").unwrap().0
82+
file_name.rsplit_once('.').unwrap().0
8383
);
8484
let expected_ast = read_to_string(expected_ast_path)?;
8585
let parser = Parser::new(&file);

crates/rowan-parser/benches/parse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rowan_parser::{parser, syntax::SyntaxNode};
44
const SMALL_CSS_FILE: &str = include_str!("../../../assets/bootstrap-reboot.css");
55
const LARGE_CSS_FILE: &str = include_str!("../../../assets/bootstrap.css");
66

7-
fn parse<'a>(css: &'a str) -> SyntaxNode {
7+
fn parse(css: &str) -> SyntaxNode {
88
let parser = parser::Parser::new(css);
99
parser.parse()
1010
}

crates/rowan-parser/examples/remove_space.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ static GLOBAL_MIMALLOC: GlobalMiMalloc = GlobalMiMalloc;
99
fn main() {
1010
let css = "#id { font-size: 12px; }";
1111
let root = Parser::new(css).parse();
12-
let root_mut = root.clone_for_update().clone();
12+
let root_mut = root.clone_for_update();
1313
remove_space_mut(&root_mut);
1414
let mut output = String::with_capacity(0);
1515
remove_space(&root, &mut output, css);
@@ -47,7 +47,7 @@ fn remove_space_mut(node: &SyntaxNode) {
4747
if child.kind() == SyntaxKind::Space {
4848
child.detach();
4949
}
50-
child.as_node().map(|n| remove_space_mut(n));
50+
child.as_node().map(remove_space_mut);
5151
}
5252
}
5353

crates/tokenizer/src/input.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ impl fmt::Display for FilePosition {
2828
}
2929

3030
static DEFAULT_INPUT: Lazy<Input> = Lazy::new(Input::default);
31-
#[derive(Debug, PartialEq, Clone, Default)]
31+
#[derive(Debug, PartialEq, Eq, Clone, Default)]
3232
pub struct Input<'a> {
3333
pub css: &'a str,
3434
// map: PreviousMap,

crates/tokenizer/src/main.rs

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,47 @@
1-
use std::env::current_exe;
2-
use std::fs::read_to_string;
1+
// use std::env::current_exe;
2+
// use std::fs::read_to_string;
33
use std::time::Instant;
4-
use tokenizer::Tokenizer;
5-
4+
use tokenizer::tokenize;
65
fn main() {
7-
let file_list = [
8-
// ("tailwind-components.css", "2.8K"),
9-
// ("bootstrap-reboot.css", "7.4K"),
10-
// ("bootstrap-grid.css", "71K"),
11-
("bootstrap.css", "201K"),
12-
// ("tailwind.css", "3.5M"),
13-
// ("tailwind-dark.css", "5.8M"),
14-
];
6+
let source = include_str!("../../../assets/bootstrap.css");
7+
let start = Instant::now();
8+
for _ in 0..100 {
9+
tokenize(source);
10+
}
11+
println!("{:?}", start.elapsed());
12+
// let vec = vec![
13+
// b'\t', b'\n', b'\r', b' ', b'"', b'#', b'\'', b'(', b')', b'/', b';', b'[', b'\\', b']', b'{',
14+
// b'}',
15+
// ];
16+
// '\t', '\n', '\u{c}', '\r', ' ', '!', '"', '#', '\'', '(', ')', ':', ';', '@', '[', '\\', ']',
17+
// '{', '}', '/',
18+
// ];
19+
// let file_list = [
20+
// // ("tailwind-components.css", "2.8K"),
21+
// // ("bootstrap-reboot.css", "7.4K"),
22+
// // ("bootstrap-grid.css", "71K"),
23+
// ("bootstrap.css", "201K"),
24+
// // ("tailwind.css", "3.5M"),
25+
// // ("tailwind-dark.css", "5.8M"),
26+
// ];
1527

16-
let assets_path = get_assets_path();
28+
// let assets_path = get_assets_path();
1729

18-
for (file, size) in file_list {
19-
let css: String = read_to_string(format!("{}/{}", assets_path, file)).unwrap();
20-
let mut vec = Vec::default();
21-
let start = Instant::now();
22-
let processor = Tokenizer::new(&css, false);
23-
while !processor.end_of_file() {
24-
vec.push(processor.next_token(false));
25-
}
26-
let end = start.elapsed();
27-
println!("rust: tokenizer/{}({}): {:?}", file, size, end);
28-
}
30+
// for (file, size) in file_list {
31+
// let css: String = read_to_string(format!("{}/{}", assets_path, file)).unwrap();
32+
// let mut vec = Vec::default();
33+
// let start = Instant::now();
34+
// let processor = Tokenizer::new(&css, false);
35+
// while !processor.end_of_file() {
36+
// vec.push(processor.next_token(false));
37+
// }
38+
// let end = start.elapsed();
39+
// println!("rust: tokenizer/{}({}): {:?}", file, size, end);
40+
// }
2941
}
3042

31-
fn get_assets_path() -> String {
32-
let mut path = current_exe().unwrap();
33-
path.push("../../../assets");
34-
path.canonicalize().unwrap().to_str().unwrap().to_string()
35-
}
43+
// fn get_assets_path() -> String {
44+
// let mut path = current_exe().unwrap();
45+
// path.push("../../../assets");
46+
// path.canonicalize().unwrap().to_str().unwrap().to_string()
47+
// }

crates/tokenizer/src/tokenizer.rs

Lines changed: 47 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,26 @@ const AT: char = '@';
3030

3131
const MAX_BUFFER: usize = 102400;
3232

33+
const INDEX_OF_WORD_END: [usize; 255] = [
34+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35+
1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
36+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
37+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
38+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42+
];
43+
const INDEX_OF_AT_END: [usize; 255] = [
44+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45+
1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
46+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
47+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
48+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52+
];
3353
static FINDER_END_OF_COMMENT: Lazy<Finder<'static>> = Lazy::new(|| Finder::new("*/"));
3454

3555
#[derive(Debug, Clone, Eq, PartialEq, Copy)]
@@ -261,9 +281,13 @@ impl<'a> Tokenizer<'a> {
261281
self.pos.replace(next + 1);
262282
}
263283
AT => {
264-
let next = index_of_at_end(self.css, self.position() + 1) - 1;
265-
current_token = Token(TokenType::AtWord, self.position(), next + 1);
266-
self.pos.replace(next + 1);
284+
let next = index_of_at_end(&self.css[self.position() + 1..]);
285+
current_token = Token(
286+
TokenType::AtWord,
287+
self.position(),
288+
next + self.position() + 1,
289+
);
290+
self.pos.replace(next + self.position() + 1);
267291
}
268292
BACKSLASH => {
269293
let mut next = self.position();
@@ -311,11 +335,12 @@ impl<'a> Tokenizer<'a> {
311335
current_token = Token(TokenType::Comment, self.position(), next + 1);
312336
next
313337
} else {
314-
let next = index_of_word_end(self.css, self.position() + 1) - 1;
315-
let content = sub_str(self.css, self.position(), next + 1);
316-
current_token = Token::new(TokenType::Word, self.position(), next + 1);
338+
let position = self.position();
339+
let next = index_of_word_end(&self.css[position + 1..]);
340+
let content = sub_str(self.css, self.position(), next + position + 1);
341+
current_token = Token::new(TokenType::Word, self.position(), next + position + 1);
317342
self.push(content);
318-
next
343+
next + position
319344
},
320345
);
321346
self.pos_plus_one();
@@ -366,11 +391,7 @@ fn sub_str(s: &str, start: usize, end: usize) -> &str {
366391

367392
#[inline]
368393
fn char_code_at(s: &str, n: usize) -> char {
369-
if n >= s.len() {
370-
'\0'
371-
} else {
372-
s.as_bytes()[n] as char
373-
}
394+
*s.as_bytes().get(n).unwrap_or(&b'\0') as char
374395
}
375396

376397
#[inline]
@@ -398,47 +419,30 @@ fn is_bad_bracket(s: &str) -> bool {
398419
}
399420

400421
#[inline]
401-
fn index_of_at_end(s: &str, start: usize) -> usize {
402-
let bytes = s.as_bytes();
403-
let mut i = start;
404-
let len = bytes.len();
405-
406-
while i < len {
407-
match bytes[i] as char {
408-
'\t' | '\n' | '\u{c}' | '\r' | ' ' | '"' | '#' | '\'' | '(' | ')' | '/' | ';' | '['
409-
| '\\' | ']' | '{' | '}' => {
410-
return i;
411-
}
412-
_ => i += 1,
422+
fn index_of_at_end(s: &str) -> usize {
423+
for (i, ch) in s.bytes().enumerate() {
424+
if let 1 = INDEX_OF_AT_END[ch as usize] {
425+
return i;
413426
};
414427
}
415428

416-
i
429+
s.len()
417430
}
418431

419432
#[inline]
420-
fn index_of_word_end(s: &str, start: usize) -> usize {
421-
let bytes = s.as_bytes();
422-
let mut i = start;
423-
let len = bytes.len();
424-
425-
while i < len {
426-
match bytes[i] as char {
427-
'\t' | '\n' | '\u{c}' | '\r' | ' ' | '!' | '"' | '#' | '\'' | '(' | ')' | ':' | ';' | '@'
428-
| '[' | '\\' | ']' | '{' | '}' => {
429-
return i;
430-
}
431-
'/' => {
432-
if bytes[i + 1] as char == '*' {
433+
fn index_of_word_end(s: &str) -> usize {
434+
for (i, ch) in s.bytes().enumerate() {
435+
match INDEX_OF_WORD_END[ch as usize] {
436+
1 => return i,
437+
2 => {
438+
if s.as_bytes().get(i + 1) == Some(&b'*') {
433439
return i;
434-
} else {
435-
i += 1;
436440
}
437441
}
438-
_ => i += 1,
439-
};
442+
_ => continue,
443+
}
440444
}
441-
i
445+
s.len()
442446
}
443447

444448
/// SAFETY: YOU SHOULD NEVER CALL THIS FUNCTION WITH THE PARAM OTHER THAN THESE BELOW.

0 commit comments

Comments
 (0)