Skip to content

Commit 7c320b5

Browse files
Manishearthmbrubeck
authored andcommitted
Cache flag indicators during reverse iteration
1 parent bc121b5 commit 7c320b5

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

src/grapheme.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub struct Graphemes<'a> {
4848
extended: bool,
4949
cat: Option<GraphemeCat>,
5050
catb: Option<GraphemeCat>,
51+
regional_count_back: Option<usize>,
5152
}
5253

5354
// state machine for cluster boundary rules
@@ -85,6 +86,11 @@ impl<'a> Iterator for Graphemes<'a> {
8586
let mut idx = 0;
8687
let mut state = Start;
8788
let mut cat = gr::GC_Any;
89+
90+
// caching used by next_back() should be invalidated
91+
self.regional_count_back = None;
92+
self.catb = None;
93+
8894
for (curr, ch) in self.string.char_indices() {
8995
idx = curr;
9096

@@ -292,12 +298,15 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
292298
Regional => { // rule GB12/GB13
293299
// Need to scan backward to find if this is preceded by an odd or even number
294300
// of Regional_Indicator characters.
295-
//
296-
// TODO: Save this state to avoid O(n^2) re-scanning in long RI sequences?
297-
let prev_chars = self.string[..previdx].chars().rev();
298-
let count = prev_chars.take_while(|c| {
299-
gr::grapheme_category(*c) == gr::GC_Regional_Indicator
300-
}).count();
301+
let count = match self.regional_count_back {
302+
Some(count) => count,
303+
None => self.string[..previdx].chars().rev().take_while(|c| {
304+
gr::grapheme_category(*c) == gr::GC_Regional_Indicator
305+
}).count()
306+
};
307+
// Cache the count to avoid re-scanning the same chars on the next iteration.
308+
self.regional_count_back = count.checked_sub(1);
309+
301310
if count % 2 == 0 {
302311
take_curr = false;
303312
break;
@@ -372,7 +381,13 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
372381

373382
#[inline]
374383
pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
375-
Graphemes { string: s, extended: is_extended, cat: None, catb: None }
384+
Graphemes {
385+
string: s,
386+
extended: is_extended,
387+
cat: None,
388+
catb: None,
389+
regional_count_back: None
390+
}
376391
}
377392

378393
#[inline]

0 commit comments

Comments
 (0)