From 79623f4e7a3e9f3e3fdcbbe65f2dda923250b799 Mon Sep 17 00:00:00 2001 From: Joey Date: Sun, 14 Apr 2019 12:23:09 -0700 Subject: [PATCH 1/2] str: proper titlecase support --- Cargo.lock | 7 +++++++ vm/Cargo.toml | 1 + vm/src/obj/objstr.rs | 9 ++++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 864e90ce5a..100f6a3599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,6 +827,7 @@ dependencies = [ "serde_derive 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "statrs 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing)", "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1102,6 +1103,11 @@ dependencies = [ "unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-casing" +version = "0.1.0" +source = "git+https://github.com/OddCoincidence/unicode-casing#90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb" + [[package]] name = "unicode-normalization" version = "0.1.8" @@ -1431,6 +1437,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" "checksum unic-emoji-char 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" "checksum unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +"checksum unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing)" = "" "checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" "checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" diff --git a/vm/Cargo.toml b/vm/Cargo.toml index ef1a9cbc35..1ba1986b78 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -23,6 +23,7 @@ regex = "1" rustc_version_runtime = "0.1.*" statrs = "0.10.0" caseless = "0.2.1" +unicode-casing = { git = "https://github.com/OddCoincidence/unicode-casing" } unicode-segmentation = "1.2.1" lazy_static = "^1.0.1" lexical = "2.0.0" diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index 390e92d27d..b5e7a29cf7 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -5,6 +5,7 @@ use std::str::FromStr; use std::string::ToString; use num_traits::ToPrimitive; +use unicode_casing::CharExt; use unicode_segmentation::UnicodeSegmentation; use crate::format::{FormatParseError, FormatPart, FormatString}; @@ -413,12 +414,12 @@ impl PyString { for c in self.value.chars() { if c.is_lowercase() { if !previous_is_cased { - title.extend(c.to_uppercase()); + title.extend(c.to_titlecase()); } else { title.push(c); } previous_is_cased = true; - } else if c.is_uppercase() { + } else if c.is_uppercase() || c.is_titlecase() { if previous_is_cased { title.extend(c.to_lowercase()); } else { @@ -652,7 +653,7 @@ impl PyString { let mut cased = false; let mut previous_is_cased = false; for c in self.value.chars() { - if c.is_uppercase() { + if c.is_uppercase() || c.is_titlecase() { if previous_is_cased { return false; } @@ -1050,6 +1051,7 @@ mod tests { ("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"), ("Getint", "getInt"), ("Greek Ωppercases ...", "greek ωppercases ..."), + ("Greek ῼitlecases ...", "greek ῳitlecases ..."), ]; for (title, input) in tests { assert_eq!(PyString::from(input).title(&vm).as_str(), title); @@ -1066,6 +1068,7 @@ mod tests { "A\nTitlecased Line", "A Titlecased, Line", "Greek Ωppercases ...", + "Greek ῼitlecases ...", ]; for s in pos { From 29ea307f4c633b61f727689c535fdf33ddc044be Mon Sep 17 00:00:00 2001 From: Joey Date: Fri, 19 Apr 2019 09:26:24 -0700 Subject: [PATCH 2/2] unicode-casing: specify rev explicitly, add todo comment --- Cargo.lock | 6 +++--- vm/Cargo.toml | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 100f6a3599..274daba698 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,7 +827,7 @@ dependencies = [ "serde_derive 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "statrs 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing)", + "unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb)", "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "unicode-casing" version = "0.1.0" -source = "git+https://github.com/OddCoincidence/unicode-casing#90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb" +source = "git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb#90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb" [[package]] name = "unicode-normalization" @@ -1437,7 +1437,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum unic-common 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" "checksum unic-emoji-char 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" "checksum unic-ucd-version 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -"checksum unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing)" = "" +"checksum unicode-casing 0.1.0 (git+https://github.com/OddCoincidence/unicode-casing?rev=90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb)" = "" "checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426" "checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 1ba1986b78..5af648084a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -23,8 +23,12 @@ regex = "1" rustc_version_runtime = "0.1.*" statrs = "0.10.0" caseless = "0.2.1" -unicode-casing = { git = "https://github.com/OddCoincidence/unicode-casing" } unicode-segmentation = "1.2.1" lazy_static = "^1.0.1" lexical = "2.0.0" itertools = "^0.8.0" + +# TODO: release and publish to crates.io +[dependencies.unicode-casing] +git = "https://github.com/OddCoincidence/unicode-casing" +rev = "90d6d1f02b9cc04ffb55a5f1c3fa1455a84231fb"