|
9 | 9 | // except according to those terms.
|
10 | 10 |
|
11 | 11 | //! Determine displayed width of `char` and `str` types according to
|
12 |
| -//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
13 |
| -//! rules. |
| 12 | +//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/), |
| 13 | +//! other portions of the Unicode standard, and common implementations of |
| 14 | +//! POSIX [`wcwidth()`](https://pubs.opengroup.org/onlinepubs/9699919799/). |
| 15 | +//! See the [Rules for determining width](#rules-for-determining-width) section |
| 16 | +//! for the exact rules. |
14 | 17 | //!
|
15 |
| -//! ```rust |
16 |
| -//! extern crate unicode_width; |
| 18 | +//! This crate is `#![no_std]`. |
17 | 19 | //!
|
| 20 | +//! ```rust |
18 | 21 | //! use unicode_width::UnicodeWidthStr;
|
19 | 22 | //!
|
20 |
| -//! fn main() { |
21 |
| -//! let teststr = "Hello, world!"; |
22 |
| -//! let width = UnicodeWidthStr::width(teststr); |
23 |
| -//! println!("{}", teststr); |
24 |
| -//! println!("The above string is {} columns wide.", width); |
25 |
| -//! let width = teststr.width_cjk(); |
26 |
| -//! println!("The above string is {} columns wide (CJK).", width); |
27 |
| -//! } |
| 23 | +//! let teststr = "Hello, world!"; |
| 24 | +//! let width = UnicodeWidthStr::width(teststr); |
| 25 | +//! println!("{}", teststr); |
| 26 | +//! println!("The above string is {} columns wide.", width); |
| 27 | +//! let width = teststr.width_cjk(); |
| 28 | +//! println!("The above string is {} columns wide (CJK).", width); |
28 | 29 | //! ```
|
29 | 30 | //!
|
30 |
| -//! # features |
| 31 | +//! # Rules for determining width |
31 | 32 | //!
|
32 |
| -//! unicode-width does not depend on `std`, so it can be used in crates |
33 |
| -//! with the `#![no_std]` attribute. |
| 33 | +//! This crate currently uses the following rules to determine the width of a |
| 34 | +//! character or string, in order of decreasing precedence. These may be tweaked in the future. |
34 | 35 | //!
|
35 |
| -//! # crates.io |
| 36 | +//! 1. [`'\u{00AD}'` SOFT HYPHEN](https://util.unicode.org/UnicodeJsps/character.jsp?a=00AD) has width 1. |
| 37 | +//! 2. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2. |
| 38 | +//! 3. The following have width 0: |
| 39 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D) |
| 40 | +//! with the [`Default_Ignorable_Code_Point`](https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095) property. |
| 41 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D) |
| 42 | +//! with the [`Grapheme_Extend`] property. |
| 43 | +//! - The following 8 characters, all of which have NFD decompositions consisting of two [`Grapheme_Extend`] chracters: |
| 44 | +//! - [`'\u{0CC0}'` KANNADA VOWEL SIGN II](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC0), |
| 45 | +//! - [`'\u{0CC7}'` KANNADA VOWEL SIGN EE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC7), |
| 46 | +//! - [`'\u{0CC8}'` KANNADA VOWEL SIGN AI](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC8), |
| 47 | +//! - [`'\u{0CCA}'` KANNADA VOWEL SIGN O](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCA), |
| 48 | +//! - [`'\u{0CCB}'` KANNADA VOWEL SIGN OO](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCB), |
| 49 | +//! - [`'\u{1B3B}'` BALINESE VOWEL SIGN RA REPA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3B), |
| 50 | +//! - [`'\u{1B3D}'` BALINESE VOWEL SIGN LA LENGA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3D), and |
| 51 | +//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43). |
| 52 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D) |
| 53 | +//! with a [`Hangul_Syllable_Type`](https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593) |
| 54 | +//! of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`). |
| 55 | +//! - [`'\0'` NUL](https://util.unicode.org/UnicodeJsps/character.jsp?a=0000). |
| 56 | +//! 4. The [control characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BCc%7D) |
| 57 | +//! have no defined width, and are ignored when determining the width of a string. |
| 58 | +//! 5. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D) |
| 59 | +//! with an [`East_Asian_Width`] of [`Fullwidth` (`F`)](https://www.unicode.org/reports/tr11/#ED2) |
| 60 | +//! or [`Wide` (`W`)](https://www.unicode.org/reports/tr11/#ED4) have width 2. |
| 61 | +//! 6. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D) |
| 62 | +//! with an [`East_Asian_Width`] of [`Ambiguous` (`A`)](https://www.unicode.org/reports/tr11/#ED6) |
| 63 | +//! have width 2 in an East Asian context, and width 1 otherwise. |
| 64 | +//! 7. All other characters have width 1. |
36 | 65 | //!
|
37 |
| -//! You can use this package in your project by adding the following |
38 |
| -//! to your `Cargo.toml`: |
| 66 | +//! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1 |
| 67 | +//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443 |
| 68 | +
|
39 | 69 | //!
|
40 |
| -//! ```toml |
41 |
| -//! [dependencies] |
42 |
| -//! unicode-width = "0.1.5" |
43 |
| -//! ``` |
| 70 | +//! ## Canonical equivalence |
| 71 | +//! |
| 72 | +//! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width. |
| 73 | +//! However, this guarantee does not currently hold for the CJK width variants. |
44 | 74 |
|
45 |
| -#![deny(missing_docs, unsafe_code)] |
| 75 | +#![forbid(unsafe_code)] |
| 76 | +#![deny(missing_docs)] |
46 | 77 | #![doc(
|
47 | 78 | html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
|
48 | 79 | html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
|
|
0 commit comments