Skip to content

Commit be1aa17

Browse files
Simplify derivation of ambiguous
Use `Letter` general category instead of script and block. Changes `ℓ` to narrow, matching common fonts
1 parent 5a5c031 commit be1aa17

File tree

3 files changed

+4
-18
lines changed

3 files changed

+4
-18
lines changed

scripts/unicode.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
# - NormalizationTest.txt (for tests only)
1919
# - PropList.txt
2020
# - ReadMe.txt
21-
# - Scripts.txt
2221
# - UnicodeData.txt
2322
# - auxiliary/GraphemeBreakProperty.txt
2423
# - emoji/emoji-data.txt
@@ -430,22 +429,10 @@ def load_east_asian_widths() -> list[EastAsianWidth]:
430429
# Catch any leftover codepoints and assign them implicit Neutral/narrow width.
431430
width_map.append(EastAsianWidth.NARROW)
432431

433-
# Characters from alphabetic scripts are narrow
434-
load_property(
435-
"Scripts.txt",
436-
r"(?:Latin|Greek|Cyrillic)",
437-
lambda cp: (
438-
operator.setitem(width_map, cp, EastAsianWidth.NARROW)
439-
if width_map[cp] == EastAsianWidth.AMBIGUOUS
440-
and not (0x2160 <= cp <= 0x217F) # Roman numerals remain ambiguous
441-
else None
442-
),
443-
)
444-
445-
# Ambiguous `Modifier_Letter`s and `Modifier_Symbol`s are narrow
432+
# Ambiguous `Letter`s and `Modifier_Symbol`s are narrow
446433
load_property(
447434
"extracted/DerivedGeneralCategory.txt",
448-
r"(:?Lm|Sk)",
435+
r"(:?Lu|Ll|Lt|Lm|Lo|Sk)",
449436
lambda cp: (
450437
operator.setitem(width_map, cp, EastAsianWidth.NARROW)
451438
if width_map[cp] == EastAsianWidth.AMBIGUOUS

src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,7 @@
122122
//! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
123123
//! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
124124
//! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
125-
//! - Does not have a [`General_Category`] of `Modifier_Letter` or `Modifier_Symbol`, and
126-
//! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
125+
//! - Does not have a [`General_Category`] of `Letter` or `Modifier_Symbol`.
127126
//! 7. All other characters have width 1.
128127
//!
129128
//! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338

src/tables.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1877,7 +1877,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
18771877
],
18781878
#[cfg(feature = "cjk")]
18791879
[
1880-
0x95, 0x59, 0x59, 0x55, 0x95, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1880+
0x95, 0x59, 0x59, 0x55, 0x55, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
18811881
0x55, 0x55, 0x55, 0x55, 0x55, 0x95, 0x56, 0x95, 0x6A, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA,
18821882
0x5A, 0x55,
18831883
],

0 commit comments

Comments
 (0)