diff --git a/Cargo.lock b/Cargo.lock index 864e90ce5a..274daba698 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,6 +827,7 @@ dependencies = [ "serde_derive 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "statrs 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb)", "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1102,6 +1103,11 @@ dependencies = [ "unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-casing" +version = "0.1.0" +source = "git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb#90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb" + [[package]] name = "unicode-normalization" version = "0.1.8" @@ -1431,6 +1437,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" "checksum unic-emoji-char 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" "checksum unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +"checksum unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb)" = "" "checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" "checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" diff --git a/vm/Cargo.toml b/vm/Cargo.toml index ef1a9cbc35..5af648084a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -27,3 +27,8 @@ unicode-segmentation = "1.2.1" lazy_static = "^1.0.1" lexical = "2.0.0" itertools = "^0.8.0" + +# TODO: release and publish to crates.io +[dependencies.unicode-casing] +git = "https://github.com/OddCoincidence/unicode-casing" +rev = "90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb" diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index 1638a712ce..b52a91409d 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -5,6 +5,7 @@ use std::str::FromStr; use std::string::ToString; use num_traits::ToPrimitive; +use unicode_casing::CharExt; use unicode_segmentation::UnicodeSegmentation; use crate::format::{FormatParseError, FormatPart, FormatString}; @@ -413,12 +414,12 @@ impl PyString { for c in self.value.chars() { if c.is_lowercase() { if !previous_is_cased { - title.extend(c.to_uppercase()); + title.extend(c.to_titlecase()); } else { title.push(c); } previous_is_cased = true; - } else if c.is_uppercase() { + } else if c.is_uppercase() || c.is_titlecase() { if previous_is_cased { title.extend(c.to_lowercase()); } else { @@ -652,7 +653,7 @@ impl PyString { let mut cased = false; let mut previous_is_cased = false; for c in self.value.chars() { - if c.is_uppercase() { + if c.is_uppercase() || c.is_titlecase() { if previous_is_cased { return false; } @@ -1050,6 +1051,7 @@ mod tests { ("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"), ("Getint", "getInt"), ("Greek Ωppercases ...", "greek ωppercases ..."), + ("Greek ῼitlecases ...", "greek ῳitlecases ..."), ]; for (title, input) in tests { assert_eq!(PyString::from(input).title(&vm).as_str(), title); @@ -1066,6 +1068,7 @@ mod tests { "A\nTitlecased Line", "A Titlecased, Line", "Greek Ωppercases ...", + "Greek ῼitlecases ...", ]; for s in pos {