Skip to content

Commit 00ee4b0

Browse files
authored
Merge pull request #38 from Jules-Bertholet/refactor-tests
Refactor tests
2 parents 9c4477c + 49ef069 commit 00ee4b0

File tree

6 files changed

+124
-139
lines changed

6 files changed

+124
-139
lines changed

.github/workflows/rust.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ on:
77
branches: [ "master" ]
88

99
env:
10+
CARGO_INCREMENTAL: 0
1011
CARGO_TERM_COLOR: always
12+
RUST_BACKTRACE: 1
13+
RUSTFLAGS: -D warnings
14+
RUSTDOCFLAGS: -D warnings
1115

1216
jobs:
1317
build:
@@ -18,6 +22,12 @@ jobs:
1822
run: cargo build --verbose
1923
- name: Run tests
2024
run: cargo test --verbose
25+
- name: Build docs
26+
run: cargo doc
27+
- name: Check formatting
28+
run: cargo fmt --check
29+
- name: Check clippy
30+
run: cargo clippy --lib --tests
2131
regen:
2232
runs-on: ubuntu-latest
2333
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ Cargo.lock
33
scripts/tmp
44
scripts/*.txt
55
scripts/*.rs
6+
bench_data/*

Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
name = "unicode-width"
44
version = "0.1.11"
5-
authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
5+
authors = [
6+
"kwantam <kwantam@gmail.com>",
7+
"Manish Goregaokar <manishsmail@gmail.com>",
8+
]
69

710
homepage = "https://github.com/unicode-rs/unicode-width"
811
repository = "https://github.com/unicode-rs/unicode-width"
@@ -14,8 +17,9 @@ description = """
1417
Determine displayed width of `char` and `str` types
1518
according to Unicode Standard Annex #11 rules.
1619
"""
20+
edition = "2021"
1721

18-
exclude = [ "target/*", "Cargo.lock" ]
22+
exclude = ["target/*", "Cargo.lock"]
1923

2024
[dependencies]
2125
std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
@@ -27,7 +31,6 @@ unicode-normalization = "0.1.23"
2731

2832
[features]
2933
default = []
30-
bench = []
3134
rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
3235

3336
# Legacy, now a no-op

benches/benches.rs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
#![feature(test)]
11+
12+
extern crate test;
13+
14+
use std::iter;
15+
16+
use test::Bencher;
17+
18+
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
19+
20+
#[bench]
21+
fn cargo(b: &mut Bencher) {
22+
let string = iter::repeat('a').take(4096).collect::<String>();
23+
24+
b.iter(|| {
25+
for c in string.chars() {
26+
test::black_box(UnicodeWidthChar::width(c));
27+
}
28+
});
29+
}
30+
31+
#[bench]
32+
#[allow(deprecated)]
33+
fn stdlib(b: &mut Bencher) {
34+
let string = iter::repeat('a').take(4096).collect::<String>();
35+
36+
b.iter(|| {
37+
for c in string.chars() {
38+
test::black_box(c.width());
39+
}
40+
});
41+
}
42+
43+
#[bench]
44+
fn simple_if(b: &mut Bencher) {
45+
let string = iter::repeat('a').take(4096).collect::<String>();
46+
47+
b.iter(|| {
48+
for c in string.chars() {
49+
test::black_box(simple_width_if(c));
50+
}
51+
});
52+
}
53+
54+
#[bench]
55+
fn simple_match(b: &mut Bencher) {
56+
let string = iter::repeat('a').take(4096).collect::<String>();
57+
58+
b.iter(|| {
59+
for c in string.chars() {
60+
test::black_box(simple_width_match(c));
61+
}
62+
});
63+
}
64+
65+
#[inline]
66+
fn simple_width_if(c: char) -> Option<usize> {
67+
let cu = c as u32;
68+
if cu < 127 {
69+
if cu > 31 {
70+
Some(1)
71+
} else if cu == 0 {
72+
Some(0)
73+
} else {
74+
None
75+
}
76+
} else {
77+
UnicodeWidthChar::width(c)
78+
}
79+
}
80+
81+
#[inline]
82+
fn simple_width_match(c: char) -> Option<usize> {
83+
match c as u32 {
84+
cu if cu == 0 => Some(0),
85+
cu if cu < 0x20 => None,
86+
cu if cu < 0x7f => Some(1),
87+
_ => UnicodeWidthChar::width(c),
88+
}
89+
}
90+
91+
#[bench]
92+
fn enwik8(b: &mut Bencher) {
93+
// To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
94+
let data_path = "bench_data/enwik8";
95+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
96+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
97+
}
98+
99+
#[bench]
100+
fn jawiki(b: &mut Bencher) {
101+
// To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
102+
// https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
103+
let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
104+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
105+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
106+
}

src/lib.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,13 @@
4747
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
4848
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
4949
)]
50-
#![cfg_attr(feature = "bench", feature(test))]
5150
#![no_std]
5251

53-
#[cfg(test)]
54-
#[macro_use]
55-
extern crate std;
56-
57-
#[cfg(feature = "bench")]
58-
extern crate test;
59-
6052
use tables::charwidth as cw;
6153
pub use tables::UNICODE_VERSION;
6254

6355
mod tables;
6456

65-
#[cfg(test)]
66-
mod tests;
67-
6857
/// Methods for determining displayed width of Unicode characters.
6958
pub trait UnicodeWidthChar {
7059
/// Returns the character's displayed width in columns, or `None` if the

0 commit comments

Comments
 (0)