diff --git a/src/lib.rs b/src/lib.rs index 6f903c0..bc3204a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,6 +107,44 @@ pub trait UnicodeSegmentation { /// ``` fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>; + /// Returns an iterator over the [legacy grapheme clusters][graphemes] of `self`. + /// + /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + /// + /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) + /// recommends extended grapheme cluster boundaries for general processing. + #[inline] + fn legacy_graphemes<'a>(&'a self) -> Graphemes<'a> { + self.graphemes(false) + } + + /// Returns an iterator over the [extended grapheme clusters][graphemes] of `self`. + /// + /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + /// + /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) + /// recommends extended grapheme cluster boundaries for general processing. + /// + /// # Examples + /// + /// ``` + /// # use self::unicode_segmentation::UnicodeSegmentation; + /// let gr1 = UnicodeSegmentation::extended_graphemes("a\u{310}e\u{301}o\u{308}\u{332}") + /// .collect::>(); + /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"]; + /// + /// assert_eq!(&gr1[..], b); + /// + /// let gr2 = UnicodeSegmentation::extended_graphemes("a\r\nb🇷🇺🇸🇹").collect::>(); + /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺", "🇸🇹"]; + /// + /// assert_eq!(&gr2[..], b); + /// ``` + #[inline] + fn extended_graphemes<'a>(&'a self) -> Graphemes<'a> { + self.graphemes(true) + } + /// Returns an iterator over the grapheme clusters of `self` and their /// byte offsets. See `graphemes()` for more information. /// @@ -122,6 +160,31 @@ pub trait UnicodeSegmentation { /// ``` fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>; + /// Returns an iterator over the legacy grapheme clusters of `self` and their + /// byte offsets. See `legacy_graphemes()` for more information. + #[inline] + fn legacy_grapheme_indices<'a>(&'a self) -> GraphemeIndices<'a> { + self.grapheme_indices(false) + } + + /// Returns an iterator over the grapheme clusters of `self` and their + /// byte offsets. See `graphemes()` for more information. + /// + /// # Examples + /// + /// ``` + /// # use self::unicode_segmentation::UnicodeSegmentation; + /// let gr_inds = UnicodeSegmentation::extended_grapheme_indices("a̐éö̲\r\n") + /// .collect::>(); + /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; + /// + /// assert_eq!(&gr_inds[..], b); + /// ``` + #[inline] + fn extended_grapheme_indices<'a>(&'a self) -> GraphemeIndices<'a> { + self.grapheme_indices(true) + } + /// Returns an iterator over the words of `self`, separated on /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries). /// diff --git a/src/test.rs b/src/test.rs index 54493fe..4deb69f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -44,20 +44,28 @@ fn test_graphemes() { // test forward iterator assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned())); + assert!(UnicodeSegmentation::extended_graphemes(s).eq(g.iter().cloned())); + assert!(UnicodeSegmentation::legacy_graphemes(s).eq(g.iter().cloned())); // test reverse iterator assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned())); + assert!(UnicodeSegmentation::extended_graphemes(s).rev().eq(g.iter().rev().cloned())); + assert!(UnicodeSegmentation::legacy_graphemes(s).rev().eq(g.iter().rev().cloned())); } for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) { // test forward iterator assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned())); + assert!(UnicodeSegmentation::extended_graphemes(s).eq(gt.iter().cloned())); + assert!(UnicodeSegmentation::legacy_graphemes(s).eq(gf.iter().cloned())); // test reverse iterator assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned())); + assert!(UnicodeSegmentation::extended_graphemes(s).rev().eq(gt.iter().rev().cloned())); + assert!(UnicodeSegmentation::legacy_graphemes(s).rev().eq(gf.iter().rev().cloned())); } // test the indices iterators