Skip to content

Commit d37643d

Browse files
committed
more efficient par_render function
1 parent 1635374 commit d37643d

File tree

3 files changed

+32
-49
lines changed

3 files changed

+32
-49
lines changed

Cargo.toml

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ rayon = { version = "1.0.1", optional = true }
1616

1717
# uncomment if doing benchmarks
1818
# otherwise, it just slows down compilation of examples
19-
# [dev-dependencies]
20-
# criterion = "0.2.3"
21-
# handlebars = "0.32.0"
22-
# serde_json = "1.0.16"
23-
# serde_derive = "1.0.43"
24-
# serde = "1.0.43"
19+
[dev-dependencies]
20+
criterion = "0.2.3"
21+
handlebars = "0.32.0"
22+
serde_json = "1.0.16"
23+
serde_derive = "1.0.43"
24+
serde = "1.0.43"
2525

2626
[[bench]]
2727
name = "benchmark"

benches/benchmark.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ fn bench_zapper(c: &mut Criterion) {
6767

6868
// build up a group of 100 (similar) people
6969
let mut group = vec![];
70-
for i in 0..100 {
70+
for i in 0..1000 {
7171
group.push(Person {
7272
id: 12 + i,
7373
name: "Bob".to_string(),
@@ -105,7 +105,7 @@ fn bench_zapper_par(c: &mut Criterion) {
105105

106106
// build up a group of 100 (similar) people
107107
let mut group = vec![];
108-
for i in 0..100 {
108+
for i in 0..1000 {
109109
group.push(Person {
110110
id: 12 + i,
111111
name: "Bob".to_string(),
@@ -117,7 +117,7 @@ fn bench_zapper_par(c: &mut Criterion) {
117117
c.bench_function("zapper_par", move |b| {
118118
b.iter(|| {
119119
let mut output = Vec::new();
120-
bytecode.par_render(&group, &mut output).unwrap();
120+
bytecode.par_render(&group, &mut output, 20).unwrap();
121121
output
122122
})
123123
});

src/bytecode.rs

+23-40
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ use std::fmt::Debug;
77
use std::io::Write;
88
use tokenizer::Operator;
99

10-
#[cfg(feature = "rayon")]
11-
use rayon::iter::IntoParallelRefIterator;
1210
#[cfg(feature = "rayon")]
1311
use rayon::prelude::*;
1412

@@ -74,58 +72,43 @@ impl<
7472
}
7573

7674
/// Renders a template across multiple items in parallel using Rayon with
77-
/// convenient internally-managed buffers.
75+
/// convenient internally-managed buffers. par_chunk_size controls the number
76+
/// of iterations that are rendered on each thread before locking the mutex
77+
/// that wraps `output` to write out the work done so far.
7878
///
79-
/// NOTE: This function makes serious trade-offs to enable the _maximum_ throughput.
80-
/// It is far less efficient, and builds up a single buffer containing all results
81-
/// prior to writing this buffer into the output, so it can consume much more memory,
82-
/// and the latency to first-write is also significantly higher.
79+
/// NOTE: This function makes trade-offs to enable the _maximum_ throughput.
80+
/// It is less efficient, but given the right par_chunk_size and right number
81+
/// of cores, it can increase total throughput.
8382
///
84-
/// Only use if total throughput is the sole concern.
83+
/// A recommended starting point for par_chunk_size is 20.
8584
#[cfg(feature = "rayon")]
86-
pub fn par_render<'b, RunnerItem>(
85+
pub fn par_render<'b, RunnerItem, Writer>(
8786
&self,
8887
runner: &[RunnerItem],
89-
output: &mut Write,
90-
) -> Result<(), Vec<::std::io::Error>>
88+
output: &mut Writer,
89+
par_chunk_size: usize,
90+
) -> Result<(), ::std::io::Error>
9191
where
9292
RunnerItem: 'b + Runner<NumEnum, StrEnum, FilterEnum> + Send + Sync,
93+
Writer: Write + Send,
9394
{
9495
thread_local!(static STORE: (Vec<f64>, String) = (Vec::with_capacity(8), String::with_capacity(8)));
9596

96-
let results: Result<Vec<u8>, Vec<::std::io::Error>> = runner
97-
.par_iter()
98-
.map(|item| {
97+
let output = ::std::sync::Mutex::new(output);
98+
99+
runner
100+
.par_chunks(par_chunk_size)
101+
.map(|items| {
99102
STORE.with(|(ref mut stack, ref mut buffer)| {
100103
let mut write_buf = Vec::with_capacity(8);
101-
write_buf.clear();
102-
self.render_with(item, &mut write_buf, stack, buffer)
103-
.map(|_| write_buf)
104-
.map_err(|e| vec![e])
104+
for item in items {
105+
self.render_with(item, &mut write_buf, stack, buffer)?;
106+
}
107+
output.lock().unwrap().write_all(&write_buf)?;
108+
return Ok(());
105109
})
106110
})
107-
.reduce(
108-
|| Ok(Vec::with_capacity(32)),
109-
|acc, item| match (acc, item) {
110-
(Ok(mut buf), Ok(new_buf)) => {
111-
buf.extend(new_buf);
112-
Ok(buf)
113-
}
114-
(Err(mut errors), Err(new_errors)) => {
115-
errors.extend(new_errors);
116-
Err(errors)
117-
}
118-
(Err(errs), _) | (_, Err(errs)) => Err(errs),
119-
},
120-
);
121-
122-
match results {
123-
Ok(result) => match output.write_all(&result) {
124-
Ok(_) => Ok(()),
125-
Err(e) => Err(vec![e]),
126-
},
127-
Err(errors) => Err(errors),
128-
}
111+
.collect()
129112
}
130113

131114
/// Renders a template using convenient internally-managed buffers, which requires a mutable reference to self.

0 commit comments

Comments
 (0)