Skip to content

Commit bf8599b

Browse files
authored
Merge pull request #104 from unicode-rs/fmt
Run rustfmt
2 parents 122cd59 + deaf747 commit bf8599b

File tree

10 files changed

+5381
-2093
lines changed

10 files changed

+5381
-2093
lines changed

.github/workflows/rust.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,13 @@ jobs:
2020
run: cargo build --verbose
2121
- name: Run tests
2222
run: cargo test --verbose
23+
fmt:
24+
25+
runs-on: ubuntu-latest
26+
27+
steps:
28+
- uses: actions/checkout@v2
29+
- name: Rustfmt
30+
run: cargo fmt --check
2331
- name: Verify regenerated files
24-
run: ./scripts/unicode.py && diff tables.rs src/tables.rs
32+
run: ./scripts/unicode.py && diff tables.rs src/tables.rs

benches/graphemes.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use unicode_segmentation::UnicodeSegmentation;
77
fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
88
let text = fs::read_to_string(path).unwrap();
99

10-
c.bench_function(&format!("graphemes_{}",lang), |bench| {
10+
c.bench_function(&format!("graphemes_{}", lang), |bench| {
1111
bench.iter(|| {
1212
for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
1313
black_box(g);
@@ -17,35 +17,35 @@ fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
1717
}
1818

1919
fn graphemes_arabic(c: &mut Criterion) {
20-
graphemes(c, "arabic" ,"benches/texts/arabic.txt");
20+
graphemes(c, "arabic", "benches/texts/arabic.txt");
2121
}
2222

2323
fn graphemes_english(c: &mut Criterion) {
24-
graphemes(c, "english" ,"benches/texts/english.txt");
24+
graphemes(c, "english", "benches/texts/english.txt");
2525
}
2626

2727
fn graphemes_hindi(c: &mut Criterion) {
28-
graphemes(c, "hindi" ,"benches/texts/hindi.txt");
28+
graphemes(c, "hindi", "benches/texts/hindi.txt");
2929
}
3030

3131
fn graphemes_japanese(c: &mut Criterion) {
32-
graphemes(c, "japanese" ,"benches/texts/japanese.txt");
32+
graphemes(c, "japanese", "benches/texts/japanese.txt");
3333
}
3434

3535
fn graphemes_korean(c: &mut Criterion) {
36-
graphemes(c, "korean" ,"benches/texts/korean.txt");
36+
graphemes(c, "korean", "benches/texts/korean.txt");
3737
}
3838

3939
fn graphemes_mandarin(c: &mut Criterion) {
40-
graphemes(c, "mandarin" ,"benches/texts/mandarin.txt");
40+
graphemes(c, "mandarin", "benches/texts/mandarin.txt");
4141
}
4242

4343
fn graphemes_russian(c: &mut Criterion) {
44-
graphemes(c, "russian" ,"benches/texts/russian.txt");
44+
graphemes(c, "russian", "benches/texts/russian.txt");
4545
}
4646

4747
fn graphemes_source_code(c: &mut Criterion) {
48-
graphemes(c, "source_code","benches/texts/source_code.txt");
48+
graphemes(c, "source_code", "benches/texts/source_code.txt");
4949
}
5050

5151
criterion_group!(

benches/unicode_words.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ extern crate bencher;
33
extern crate unicode_segmentation;
44

55
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
76
use std::fs;
7+
use unicode_segmentation::UnicodeSegmentation;
88

99
fn unicode_words(bench: &mut Bencher, path: &str) {
1010
let text = fs::read_to_string(path).unwrap();

benches/word_bounds.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ extern crate bencher;
33
extern crate unicode_segmentation;
44

55
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
76
use std::fs;
7+
use unicode_segmentation::UnicodeSegmentation;
88

99
fn word_bounds(bench: &mut Bencher, path: &str) {
1010
let text = fs::read_to_string(path).unwrap();

src/grapheme.rs

Lines changed: 92 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ impl<'a> Iterator for GraphemeIndices<'a> {
4949

5050
#[inline]
5151
fn next(&mut self) -> Option<(usize, &'a str)> {
52-
self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s))
52+
self.iter
53+
.next()
54+
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
5355
}
5456

5557
#[inline]
@@ -61,7 +63,9 @@ impl<'a> Iterator for GraphemeIndices<'a> {
6163
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
6264
#[inline]
6365
fn next_back(&mut self) -> Option<(usize, &'a str)> {
64-
self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s))
66+
self.iter
67+
.next_back()
68+
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
6569
}
6670
}
6771

@@ -126,7 +130,11 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
126130
if end == self.cursor.cur_cursor() {
127131
return None;
128132
}
129-
let prev = self.cursor_back.prev_boundary(self.string, 0).unwrap().unwrap();
133+
let prev = self
134+
.cursor_back
135+
.prev_boundary(self.string, 0)
136+
.unwrap()
137+
.unwrap();
130138
Some(&self.string[prev..end])
131139
}
132140
}
@@ -143,7 +151,10 @@ pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
143151

144152
#[inline]
145153
pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
146-
GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
154+
GraphemeIndices {
155+
start_offset: s.as_ptr() as usize,
156+
iter: new_graphemes(s, is_extended),
157+
}
147158
}
148159

149160
// maybe unify with PairResult?
@@ -215,7 +226,7 @@ pub enum GraphemeIncomplete {
215226
/// current chunk, so the chunk after that is requested. This will only be
216227
/// returned if the chunk ends before the `len` parameter provided on
217228
/// creation of the cursor.
218-
NextChunk, // requesting chunk following the one given
229+
NextChunk, // requesting chunk following the one given
219230

220231
/// An error returned when the chunk given does not contain the cursor position.
221232
InvalidOffset,
@@ -224,42 +235,42 @@ pub enum GraphemeIncomplete {
224235
// An enum describing the result from lookup of a pair of categories.
225236
#[derive(PartialEq, Eq)]
226237
enum PairResult {
227-
NotBreak, // definitely not a break
228-
Break, // definitely a break
229-
Extended, // a break iff not in extended mode
230-
Regional, // a break if preceded by an even number of RIS
231-
Emoji, // a break if preceded by emoji base and (Extend)*
238+
NotBreak, // definitely not a break
239+
Break, // definitely a break
240+
Extended, // a break iff not in extended mode
241+
Regional, // a break if preceded by an even number of RIS
242+
Emoji, // a break if preceded by emoji base and (Extend)*
232243
}
233244

234245
#[inline]
235246
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
236-
use crate::tables::grapheme::GraphemeCat::*;
237247
use self::PairResult::*;
248+
use crate::tables::grapheme::GraphemeCat::*;
238249
match (before, after) {
239-
(GC_CR, GC_LF) => NotBreak, // GB3
240-
(GC_Control, _) => Break, // GB4
241-
(GC_CR, _) => Break, // GB4
242-
(GC_LF, _) => Break, // GB4
243-
(_, GC_Control) => Break, // GB5
244-
(_, GC_CR) => Break, // GB5
245-
(_, GC_LF) => Break, // GB5
246-
(GC_L, GC_L) => NotBreak, // GB6
247-
(GC_L, GC_V) => NotBreak, // GB6
248-
(GC_L, GC_LV) => NotBreak, // GB6
249-
(GC_L, GC_LVT) => NotBreak, // GB6
250-
(GC_LV, GC_V) => NotBreak, // GB7
251-
(GC_LV, GC_T) => NotBreak, // GB7
252-
(GC_V, GC_V) => NotBreak, // GB7
253-
(GC_V, GC_T) => NotBreak, // GB7
254-
(GC_LVT, GC_T) => NotBreak, // GB8
255-
(GC_T, GC_T) => NotBreak, // GB8
256-
(_, GC_Extend) => NotBreak, // GB9
257-
(_, GC_ZWJ) => NotBreak, // GB9
258-
(_, GC_SpacingMark) => Extended, // GB9a
259-
(GC_Prepend, _) => Extended, // GB9b
260-
(GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11
261-
(GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
262-
(_, _) => Break, // GB999
250+
(GC_CR, GC_LF) => NotBreak, // GB3
251+
(GC_Control, _) => Break, // GB4
252+
(GC_CR, _) => Break, // GB4
253+
(GC_LF, _) => Break, // GB4
254+
(_, GC_Control) => Break, // GB5
255+
(_, GC_CR) => Break, // GB5
256+
(_, GC_LF) => Break, // GB5
257+
(GC_L, GC_L) => NotBreak, // GB6
258+
(GC_L, GC_V) => NotBreak, // GB6
259+
(GC_L, GC_LV) => NotBreak, // GB6
260+
(GC_L, GC_LVT) => NotBreak, // GB6
261+
(GC_LV, GC_V) => NotBreak, // GB7
262+
(GC_LV, GC_T) => NotBreak, // GB7
263+
(GC_V, GC_V) => NotBreak, // GB7
264+
(GC_V, GC_T) => NotBreak, // GB7
265+
(GC_LVT, GC_T) => NotBreak, // GB8
266+
(GC_T, GC_T) => NotBreak, // GB8
267+
(_, GC_Extend) => NotBreak, // GB9
268+
(_, GC_ZWJ) => NotBreak, // GB9
269+
(_, GC_SpacingMark) => Extended, // GB9a
270+
(GC_Prepend, _) => Extended, // GB9b
271+
(GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11
272+
(GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
273+
(_, _) => Break, // GB999
263274
}
264275
}
265276

@@ -397,17 +408,19 @@ impl GraphemeCursor {
397408
if self.is_extended && chunk_start + chunk.len() == self.offset {
398409
let ch = chunk.chars().rev().next().unwrap();
399410
if self.grapheme_category(ch) == gr::GC_Prepend {
400-
self.decide(false); // GB9b
411+
self.decide(false); // GB9b
401412
return;
402413
}
403414
}
404415
match self.state {
405416
GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
406417
GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
407-
_ => if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
408-
let ch = chunk.chars().rev().next().unwrap();
409-
self.cat_before = Some(self.grapheme_category(ch));
410-
},
418+
_ => {
419+
if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
420+
let ch = chunk.chars().rev().next().unwrap();
421+
self.cat_before = Some(self.grapheme_category(ch));
422+
}
423+
}
411424
}
412425
}
413426

@@ -515,17 +528,21 @@ impl GraphemeCursor {
515528
/// cursor.set_cursor(12);
516529
/// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
517530
/// ```
518-
pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
531+
pub fn is_boundary(
532+
&mut self,
533+
chunk: &str,
534+
chunk_start: usize,
535+
) -> Result<bool, GraphemeIncomplete> {
519536
use crate::tables::grapheme as gr;
520537
if self.state == GraphemeState::Break {
521-
return Ok(true)
538+
return Ok(true);
522539
}
523540
if self.state == GraphemeState::NotBreak {
524-
return Ok(false)
541+
return Ok(false);
525542
}
526543
if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
527544
if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
528-
return Err(GraphemeIncomplete::InvalidOffset)
545+
return Err(GraphemeIncomplete::InvalidOffset);
529546
}
530547
}
531548
if let Some(pre_context_offset) = self.pre_context_offset {
@@ -606,7 +623,11 @@ impl GraphemeCursor {
606623
/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
607624
/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
608625
/// ```
609-
pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
626+
pub fn next_boundary(
627+
&mut self,
628+
chunk: &str,
629+
chunk_start: usize,
630+
) -> Result<Option<usize>, GraphemeIncomplete> {
610631
if self.offset == self.len {
611632
return Ok(None);
612633
}
@@ -681,7 +702,11 @@ impl GraphemeCursor {
681702
/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
682703
/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
683704
/// ```
684-
pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
705+
pub fn prev_boundary(
706+
&mut self,
707+
chunk: &str,
708+
chunk_start: usize,
709+
) -> Result<Option<usize>, GraphemeIncomplete> {
685710
if self.offset == 0 {
686711
return Ok(None);
687712
}
@@ -702,7 +727,11 @@ impl GraphemeCursor {
702727
self.cat_after = self.cat_before.take();
703728
self.state = GraphemeState::Unknown;
704729
if let Some(ris_count) = self.ris_count {
705-
self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None };
730+
self.ris_count = if ris_count > 0 {
731+
Some(ris_count - 1)
732+
} else {
733+
None
734+
};
706735
}
707736
if let Some(prev_ch) = iter.next() {
708737
ch = prev_ch;
@@ -729,7 +758,10 @@ impl GraphemeCursor {
729758
fn test_grapheme_cursor_ris_precontext() {
730759
let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
731760
let mut c = GraphemeCursor::new(8, s.len(), true);
732-
assert_eq!(c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4)));
761+
assert_eq!(
762+
c.is_boundary(&s[4..], 4),
763+
Err(GraphemeIncomplete::PreContext(4))
764+
);
733765
c.provide_context(&s[..4], 0);
734766
assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
735767
}
@@ -738,7 +770,10 @@ fn test_grapheme_cursor_ris_precontext() {
738770
fn test_grapheme_cursor_chunk_start_require_precontext() {
739771
let s = "\r\n";
740772
let mut c = GraphemeCursor::new(1, s.len(), true);
741-
assert_eq!(c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1)));
773+
assert_eq!(
774+
c.is_boundary(&s[1..], 1),
775+
Err(GraphemeIncomplete::PreContext(1))
776+
);
742777
c.provide_context(&s[..1], 0);
743778
assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
744779
}
@@ -747,14 +782,20 @@ fn test_grapheme_cursor_chunk_start_require_precontext() {
747782
fn test_grapheme_cursor_prev_boundary() {
748783
let s = "abcd";
749784
let mut c = GraphemeCursor::new(3, s.len(), true);
750-
assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
785+
assert_eq!(
786+
c.prev_boundary(&s[2..], 2),
787+
Err(GraphemeIncomplete::PrevChunk)
788+
);
751789
assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
752790
}
753791

754792
#[test]
755793
fn test_grapheme_cursor_prev_boundary_chunk_start() {
756794
let s = "abcd";
757795
let mut c = GraphemeCursor::new(2, s.len(), true);
758-
assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
796+
assert_eq!(
797+
c.prev_boundary(&s[2..], 2),
798+
Err(GraphemeIncomplete::PrevChunk)
799+
);
759800
assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
760801
}

src/lib.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,10 @@
5050
//! ```
5151
5252
#![deny(missing_docs, unsafe_code)]
53-
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
54-
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
55-
53+
#![doc(
54+
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
55+
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
56+
)]
5657
#![no_std]
5758

5859
#[cfg(test)]
@@ -63,16 +64,17 @@ extern crate std;
6364
#[macro_use]
6465
extern crate quickcheck;
6566

66-
pub use grapheme::{Graphemes, GraphemeIndices};
6767
pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
68+
pub use grapheme::{GraphemeIndices, Graphemes};
69+
pub use sentence::{USentenceBoundIndices, USentenceBounds, UnicodeSentences};
6870
pub use tables::UNICODE_VERSION;
69-
pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords, UnicodeWordIndices};
70-
pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
71+
pub use word::{UWordBoundIndices, UWordBounds, UnicodeWordIndices, UnicodeWords};
7172

7273
mod grapheme;
74+
#[rustfmt::skip]
7375
mod tables;
74-
mod word;
7576
mod sentence;
77+
mod word;
7678

7779
#[cfg(test)]
7880
mod test;

0 commit comments

Comments
 (0)