Skip to content

Commit c004e78

Browse files
committed
Safe implementations of uart::dec() and divisions
The previous implementation of uart::dec() forced the compiler to emit a software out-of-bounds acces check into the code, bloating it a bit. Prevent that by using an iterator instead. Same is true for the ordinary division operator that was used in multiple places in the benchmark code. Here, the compiler emitted software-checks for divison by zero. Prevent this by using `checked_div()` where we can implement our own "panic" handling.
1 parent 17b582f commit c004e78

File tree

6 files changed

+62
-46
lines changed

6 files changed

+62
-46
lines changed

0D_cache_performance/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
3333
read the value, add 1, and write it back. This whole process is repeated
3434
`20_000` times.
3535

36-
### main.rs
37-
3836
The benchmark function is called twice. Once for the cacheable and once for the
3937
non-cacheable virtual addresses. Remember that both virtual addresses point to
4038
the _same_ physical DRAM, so the difference in time that we will see will

0D_cache_performance/kernel8

-6.77 KB
Binary file not shown.

0D_cache_performance/kernel8.img

-2.74 KB
Binary file not shown.

0D_cache_performance/src/benchmark.rs

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use core::sync::atomic::{compiler_fence, Ordering};
22
use cortex_a::{barrier, regs::*};
3+
use super::uart;
34

45
/// We assume that addr is cacheline aligned
5-
pub fn batch_modify(addr: u64) -> u32 {
6+
fn batch_modify_time(addr: u64) -> Option<u64> {
67
const CACHELINE_SIZE_BYTES: usize = 64; // TODO: retrieve this from a system register
78
const NUM_CACHELINES_TOUCHED: usize = 5;
89
const NUM_BENCH_ITERATIONS: usize = 20_000;
@@ -34,6 +35,62 @@ pub fn batch_modify(addr: u64) -> u32 {
3435
unsafe { barrier::dsb(barrier::SY) };
3536

3637
let t2 = CNTPCT_EL0.get();
38+
let frq = u64::from(CNTFRQ_EL0.get());
3739

38-
((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
40+
((t2 - t1) * 1000).checked_div(frq)
41+
}
42+
43+
pub fn run(uart: &uart::Uart) {
44+
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
45+
const ERROR_STRING: &str = "Something went wrong!";
46+
47+
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
48+
// NON-cacheable DRAM memory.
49+
let non_cacheable_addr: u64 = SIZE_2MIB;
50+
51+
// Start of the __THIRD__ virtual 2 MiB block.
52+
// Cacheable DRAM memory
53+
let cacheable_addr: u64 = 2 * SIZE_2MIB;
54+
55+
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
56+
uart.hex(non_cacheable_addr as u32);
57+
uart.puts(", physical 0x");
58+
uart.hex(2 * SIZE_2MIB as u32);
59+
uart.puts(":\n");
60+
61+
let result_nc = match batch_modify_time(non_cacheable_addr) {
62+
Some(t) => {
63+
uart.dec(t as u32);
64+
uart.puts(" miliseconds.\n\n");
65+
t
66+
},
67+
None => {
68+
uart.puts(ERROR_STRING);
69+
return;
70+
}
71+
};
72+
73+
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
74+
uart.hex(cacheable_addr as u32);
75+
uart.puts(", physical 0x");
76+
uart.hex(2 * SIZE_2MIB as u32);
77+
uart.puts(":\n");
78+
79+
let result_c = match batch_modify_time(cacheable_addr) {
80+
Some(t) => {
81+
uart.dec(t as u32);
82+
uart.puts(" miliseconds.\n\n");
83+
t
84+
},
85+
None => {
86+
uart.puts(ERROR_STRING);
87+
return;
88+
}
89+
};
90+
91+
if let Some(t) = (result_nc - result_c).checked_div(result_c) {
92+
uart.puts("With caching, the function is ");
93+
uart.dec((t * 100) as u32);
94+
uart.puts("% faster!\n");
95+
}
3996
}

0D_cache_performance/src/main.rs

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -43,43 +43,6 @@ mod mmu;
4343
mod uart;
4444
mod benchmark;
4545

46-
fn do_benchmarks(uart: &uart::Uart) {
47-
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
48-
49-
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
50-
// NON-cacheable DRAM memory.
51-
let non_cacheable_addr: u64 = SIZE_2MIB;
52-
53-
// Start of the __THIRD__ virtual 2 MiB block.
54-
// Cacheable DRAM memory
55-
let cacheable_addr: u64 = 2 * SIZE_2MIB;
56-
57-
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
58-
uart.hex(non_cacheable_addr as u32);
59-
uart.puts(", physical 0x");
60-
uart.hex(2 * SIZE_2MIB as u32);
61-
uart.puts(":\n");
62-
63-
let result_nc = benchmark::batch_modify(non_cacheable_addr);
64-
uart.dec(result_nc);
65-
uart.puts(" miliseconds.\n\n");
66-
67-
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
68-
uart.hex(cacheable_addr as u32);
69-
uart.puts(", physical 0x");
70-
uart.hex(2 * SIZE_2MIB as u32);
71-
uart.puts(":\n");
72-
let result_c = benchmark::batch_modify(cacheable_addr);
73-
uart.dec(result_c);
74-
uart.puts(" miliseconds.\n\n");
75-
76-
let percent_diff = (result_nc - result_c) * 100 / result_c;
77-
78-
uart.puts("With caching, the function is ");
79-
uart.dec(percent_diff);
80-
uart.puts("% faster!\n");
81-
}
82-
8346
entry!(kernel_entry);
8447

8548
fn kernel_entry() -> ! {
@@ -102,7 +65,7 @@ fn kernel_entry() -> ! {
10265

10366
uart.puts("MMU is live \\o/\n\n");
10467

105-
do_benchmarks(&uart);
68+
benchmark::run(&uart);
10669

10770
// echo everything back
10871
loop {

0D_cache_performance/src/uart.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,10 @@ impl Uart {
290290
pub fn dec(&self, d: u32) {
291291
let mut digits: [char; 10] = ['\0'; 10];
292292
let mut d = d;
293-
let mut i: usize = 0;
294293

295-
loop {
296-
digits[i] = ((d % 10) + 0x30) as u8 as char;
294+
for i in digits.iter_mut() {
295+
*i = ((d % 10) + 0x30) as u8 as char;
297296

298-
i += 1;
299297
d /= 10;
300298

301299
if d == 0 {

0 commit comments

Comments
 (0)