From 1b5e4e4e47852cc1e29d6b6c3ed79cbaa2f52fe9 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Sat, 26 Jul 2025 01:55:45 +0200 Subject: [PATCH 1/3] i18n: Add localized month-parsing --- Cargo.lock | 147 +++++++++++++- Cargo.toml | 1 + fuzz/Cargo.lock | 239 +++++++++++++++++++---- src/uucore/Cargo.toml | 6 +- src/uucore/src/lib/features/i18n/date.rs | 114 +++++++++++ src/uucore/src/lib/features/i18n/mod.rs | 9 + 6 files changed, 481 insertions(+), 35 deletions(-) create mode 100644 src/uucore/src/lib/features/i18n/date.rs diff --git a/Cargo.lock b/Cargo.lock index b854e8ab80..167dafbf7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -284,6 +284,16 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "calendrical_calculations" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f6df87e869fb08be61c7e97ced8e69ab802df1d8bc612ed67dba78c07fbc12c" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cast" version = "0.3.0" @@ -483,6 +493,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + [[package]] name = "coreutils" version = "0.1.0" @@ -1262,6 +1281,30 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_calendar" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7a6ed1ea995a24dff839bc5ca4471ce2fa18ba14d8b09061c2527a46a1c6079" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219c8639ab936713a87b571eed2bc2615aa9137e8af6eb221446ee5644acc18" + [[package]] name = "icu_collator" version = "2.0.0" @@ -1301,6 +1344,37 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_datetime" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0790c15e3d6ae3303365fa2337b4f6469de257916141110d14dcaf73f1d31ac5" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "smallvec", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83791ac10bb7b774f130bb81fa89c4059de710dcef53caa0b86e645212d6d54c" + [[package]] name = "icu_decimal" version = "2.0.0" @@ -1380,6 +1454,38 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +[[package]] +name = "icu_pattern" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "983825f401e6bc4a13c45d552ffd9ad6f3f6b6bc0ec03f31d6835a90a46deb1f" +dependencies = [ + "displaydoc", + "either", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd83a65f58b6f28e1f3da8c6ada6b415ee3ad5cb480b75bdb669f34d72dd179" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ec552d761eaf4a1c39ad28936e0af77a41bf01ff756ea54be4f8bfc21c265d7" + [[package]] name = "icu_properties" version = "2.0.1" @@ -1419,6 +1525,32 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_time" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10d01a4a2dcbc5e5180ef113920e7461d0e9caaddb3567d81c4eca262efe55c0" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "tinystr", + "writeable", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8472be4410d26a03d7208cae3a76c798dd6766e8226ab977cd8b2d349a6dbf08" + [[package]] name = "indexmap" version = "2.9.0" @@ -1520,6 +1652,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "ixdtf" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8289f7f711a1a51f80e2e368355d023042ca55d8d554fd5e953f01464c15842d" +dependencies = [ + "displaydoc", +] + [[package]] name = "jiff" version = "0.2.15" @@ -4086,6 +4227,7 @@ dependencies = [ "glob", "hex", "icu_collator", + "icu_datetime", "icu_decimal", "icu_locale", "icu_provider", @@ -4311,7 +4453,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -4623,6 +4765,9 @@ name = "writeable" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +dependencies = [ + "either", +] [[package]] name = "wyz" diff --git a/Cargo.toml b/Cargo.toml index 49258db557..683e542dfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -312,6 +312,7 @@ glob = "0.3.1" half = "2.4.1" hostname = "0.4" icu_collator = "2.0.0" +icu_datetime = "2.0.0" icu_decimal = "2.0.0" icu_locale = "2.0.0" icu_provider = "2.0.0" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index c69adf6744..0755bf4d00 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -96,9 +96,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bigdecimal" @@ -184,11 +184,21 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" +[[package]] +name = "calendrical_calculations" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f6df87e869fb08be61c7e97ced8e69ab802df1d8bc612ed67dba78c07fbc12c" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cc" -version = "1.2.27" +version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ "jobserver", "libc", @@ -221,18 +231,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.40" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.40" +version = "4.5.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" dependencies = [ "anstream", "anstyle", @@ -304,6 +314,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -315,9 +334,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -449,12 +468,12 @@ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "errno" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -463,6 +482,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fixed_decimal" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "fluent" version = "0.17.0" @@ -589,6 +619,30 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_calendar" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7a6ed1ea995a24dff839bc5ca4471ce2fa18ba14d8b09061c2527a46a1c6079" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219c8639ab936713a87b571eed2bc2615aa9137e8af6eb221446ee5644acc18" + [[package]] name = "icu_collator" version = "2.0.0" @@ -628,6 +682,60 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_datetime" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0790c15e3d6ae3303365fa2337b4f6469de257916141110d14dcaf73f1d31ac5" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "smallvec", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83791ac10bb7b774f130bb81fa89c4059de710dcef53caa0b86e645212d6d54c" + +[[package]] +name = "icu_decimal" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5" + [[package]] name = "icu_locale" version = "2.0.0" @@ -684,6 +792,38 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +[[package]] +name = "icu_pattern" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "983825f401e6bc4a13c45d552ffd9ad6f3f6b6bc0ec03f31d6835a90a46deb1f" +dependencies = [ + "displaydoc", + "either", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd83a65f58b6f28e1f3da8c6ada6b415ee3ad5cb480b75bdb669f34d72dd179" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ec552d761eaf4a1c39ad28936e0af77a41bf01ff756ea54be4f8bfc21c265d7" + [[package]] name = "icu_properties" version = "2.0.1" @@ -723,6 +863,32 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_time" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10d01a4a2dcbc5e5180ef113920e7461d0e9caaddb3567d81c4eca262efe55c0" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "tinystr", + "writeable", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8472be4410d26a03d7208cae3a76c798dd6766e8226ab977cd8b2d349a6dbf08" + [[package]] name = "intl-memoizer" version = "0.5.3" @@ -757,6 +923,15 @@ dependencies = [ "either", ] +[[package]] +name = "ixdtf" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8289f7f711a1a51f80e2e368355d023042ca55d8d554fd5e953f01464c15842d" +dependencies = [ + "displaydoc", +] + [[package]] name = "jiff" version = "0.2.15" @@ -835,9 +1010,9 @@ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "libfuzzer-sys" -version = "0.4.10" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" dependencies = [ "arbitrary", "cc", @@ -1146,13 +1321,12 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rust-ini" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" +checksum = "e7295b7ce3bf4806b419dc3420745998b447178b7005e2011947b38fc5aa6791" dependencies = [ "cfg-if", "ordered-multimap", - "trim-in-place", ] [[package]] @@ -1163,15 +1337,15 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1279,9 +1453,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.103" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -1361,12 +1535,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "trim-in-place" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" - [[package]] name = "type-map" version = "0.5.1" @@ -1609,7 +1777,9 @@ dependencies = [ "glob", "hex", "icu_collator", + "icu_datetime", "icu_locale", + "icu_provider", "itertools", "libc", "md-5", @@ -1995,6 +2165,9 @@ name = "writeable" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +dependencies = [ + "either", +] [[package]] name = "yoke" @@ -2028,18 +2201,18 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 381a3041eb..e796df8b5c 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -65,6 +65,9 @@ selinux = { workspace = true, optional = true } icu_collator = { workspace = true, optional = true, features = [ "compiled_data", ] } +icu_datetime = { workspace = true, optional = true, features = [ + "compiled_data", +] } icu_decimal = { workspace = true, optional = true, features = [ "compiled_data", ] } @@ -120,8 +123,9 @@ format = [ "num-traits", "quoting-style", ] -i18n-all = ["i18n-collator", "i18n-decimal"] +i18n-all = ["i18n-date", "i18n-collator", "i18n-decimal"] i18n-common = ["icu_locale"] +i18n-date = ["i18n-common", "icu_datetime", "icu_provider"] i18n-collator = ["i18n-common", "icu_collator"] i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"] mode = ["libc"] diff --git a/src/uucore/src/lib/features/i18n/date.rs b/src/uucore/src/lib/features/i18n/date.rs new file mode 100644 index 0000000000..5f7748daad --- /dev/null +++ b/src/uucore/src/lib/features/i18n/date.rs @@ -0,0 +1,114 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::sync::OnceLock; + +use icu_datetime::provider::neo::{ + DatetimeNamesMonthGregorianV1, MonthNames, marker_attrs::ABBR_STANDALONE, +}; +use icu_locale::Locale; +use icu_provider::prelude::*; + +use crate::i18n::{DEFAULT_LOCALE, get_time_locale}; + +#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] +/// Sortable month enum +pub enum Month { + Unknown, + January, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December, +} + +impl Month { + #[inline] + fn from_1(i: usize) -> Self { + match i { + 1 => Self::January, + 2 => Self::February, + 3 => Self::March, + 4 => Self::April, + 5 => Self::May, + 6 => Self::June, + 7 => Self::July, + 8 => Self::August, + 9 => Self::September, + 10 => Self::October, + 11 => Self::November, + 12 => Self::December, + _ => Self::Unknown, + } + } + + #[inline] + fn from_0(i: usize) -> Self { + Self::from_1(i.saturating_add(1)) + } +} + +/// Return a vector containing all month names depending on the given locale, starting from january. +fn get_abbr_month_names(loc: Locale) -> Option> { + if loc == DEFAULT_LOCALE { + return None; + } + + let data_locale = DataLocale::from(loc); + + let request = DataRequest { + id: DataIdentifierBorrowed::for_marker_attributes_and_locale(ABBR_STANDALONE, &data_locale), + metadata: DataRequestMetadata::default(), + }; + + let response: DataResponse = + icu_datetime::provider::Baked.load(request).unwrap(); + + match response.payload.get() { + MonthNames::Linear(months) => Some(months.iter().map(ToString::to_string).collect()), + _ => todo!("unsupported"), + } +} + +pub fn locale_abbr_month_names() -> Option<&'static [String]> { + static DECIMAL_SEP: OnceLock>> = OnceLock::new(); + + DECIMAL_SEP + .get_or_init(|| get_abbr_month_names(get_time_locale().0.clone())) + .as_deref() +} + +pub fn locale_parse_abbr_month(input: &[u8]) -> Month { + // Use a match instead of a loop to improve the locale=C case + if let Some(months) = locale_abbr_month_names() { + months + .iter() + .position(|month| input.starts_with(month.as_bytes())) + .map_or(Month::Unknown, Month::from_0) + } else { + match input.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() { + Some(b"JAN") => Month::January, + Some(b"FEB") => Month::February, + Some(b"MAR") => Month::March, + Some(b"APR") => Month::April, + Some(b"MAY") => Month::May, + Some(b"JUN") => Month::June, + Some(b"JUL") => Month::July, + Some(b"AUG") => Month::August, + Some(b"SEP") => Month::September, + Some(b"OCT") => Month::October, + Some(b"NOV") => Month::November, + Some(b"DEC") => Month::December, + _ => Month::Unknown, + } + } +} diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs index c42d41c7ea..0b1095b728 100644 --- a/src/uucore/src/lib/features/i18n/mod.rs +++ b/src/uucore/src/lib/features/i18n/mod.rs @@ -9,6 +9,8 @@ use icu_locale::{Locale, locale}; #[cfg(feature = "i18n-collator")] pub mod collator; +#[cfg(feature = "i18n-date")] +pub mod date; #[cfg(feature = "i18n-decimal")] pub mod decimal; @@ -77,6 +79,13 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) { NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC")) } +/// Get the time locale from the environment +pub fn get_time_locale() -> &'static (Locale, UEncoding) { + static TIME_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new(); + + TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME")) +} + /// Return the encoding deduced from the locale environment variable. pub fn get_locale_encoding() -> UEncoding { get_collating_locale().1 From 9d696f4a0079b5925424d5e185465c8855d38918 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Sat, 26 Jul 2025 01:57:38 +0200 Subject: [PATCH 2/3] sort: Use locale-aware month sorting --- src/uu/sort/Cargo.toml | 7 +++- src/uu/sort/src/sort.rs | 75 ++++++++++++----------------------------- 2 files changed, 27 insertions(+), 55 deletions(-) diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 789f6fec51..753a0dad76 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -34,8 +34,13 @@ self_cell = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } unicode-width = { workspace = true } -uucore = { workspace = true, features = ["fs", "parser", "version-cmp"] } fluent = { workspace = true } +uucore = { workspace = true, features = [ + "fs", + "parser", + "version-cmp", + "i18n-date", +] } [target.'cfg(target_os = "linux")'.dependencies] nix = { workspace = true } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 74b6253fbd..2b27c5e43c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -18,17 +18,15 @@ mod numeric_str_cmp; mod tmp_dir; use bigdecimal::BigDecimal; -use chunks::LineData; use clap::builder::ValueParser; use clap::{Arg, ArgAction, Command}; -use custom_str_cmp::custom_str_cmp; -use ext_sort::ext_sort; use fnv::FnvHasher; #[cfg(target_os = "linux")] use nix::libc::{RLIMIT_NOFILE, getrlimit, rlimit}; -use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; +use thiserror::Error; + use std::cmp::Ordering; use std::env; use std::ffi::{OsStr, OsString}; @@ -37,23 +35,28 @@ use std::hash::{Hash, Hasher}; use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout}; use std::num::IntErrorKind; use std::ops::Range; -use std::path::Path; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::str::Utf8Error; -use thiserror::Error; + use uucore::display::Quotable; -use uucore::error::{FromIo, strip_errno}; -use uucore::error::{UError, UResult, USimpleError, UUsageError, set_exit_code}; +use uucore::error::{ + FromIo, UError, UResult, USimpleError, UUsageError, set_exit_code, strip_errno, +}; use uucore::extendedbigdecimal::ExtendedBigDecimal; -use uucore::format_usage; +use uucore::i18n::date::{self, locale_parse_abbr_month}; use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; use uucore::parser::parse_size::{ParseSizeError, Parser}; use uucore::parser::shortcut_value_parser::ShortcutValueParser; -use uucore::show_error; -use uucore::translate; use uucore::version_cmp::version_cmp; - +use uucore::{format_usage, show_error, translate}; + +use crate::chunks::LineData; +use crate::custom_str_cmp::custom_str_cmp; +use crate::ext_sort::ext_sort; +use crate::numeric_str_cmp::{ + NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp, +}; use crate::tmp_dir::TmpDirWrapper; mod options { @@ -599,7 +602,9 @@ impl<'a> Line<'a> { .enumerate() .skip_while(|(_, c)| c.is_ascii_whitespace()); - let month = if month_parse(initial_selection) == Month::Unknown { + let month = if locale_parse_abbr_month(initial_selection.trim_ascii_start()) + == date::Month::Unknown + { // We failed to parse a month, which is equivalent to matching nothing. // Add the "no match for key" marker to the first non-whitespace character. let first_non_whitespace = month_chars.next(); @@ -1871,47 +1876,9 @@ fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering { da.cmp(&db) } -#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] -enum Month { - Unknown, - January, - February, - March, - April, - May, - June, - July, - August, - September, - October, - November, - December, -} - -/// Parse the beginning string into a Month, returning [`Month::Unknown`] on errors. -fn month_parse(line: &[u8]) -> Month { - let line = line.trim_ascii_start(); - - match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() { - Some(b"JAN") => Month::January, - Some(b"FEB") => Month::February, - Some(b"MAR") => Month::March, - Some(b"APR") => Month::April, - Some(b"MAY") => Month::May, - Some(b"JUN") => Month::June, - Some(b"JUL") => Month::July, - Some(b"AUG") => Month::August, - Some(b"SEP") => Month::September, - Some(b"OCT") => Month::October, - Some(b"NOV") => Month::November, - Some(b"DEC") => Month::December, - _ => Month::Unknown, - } -} - fn month_compare(a: &[u8], b: &[u8]) -> Ordering { - let ma = month_parse(a); - let mb = month_parse(b); + let ma = locale_parse_abbr_month(a.trim_ascii_start()); + let mb = locale_parse_abbr_month(b.trim_ascii_start()); ma.cmp(&mb) } From cc23c93fcad3c5b8a4ff35b63c169e9bc15b40b2 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Tue, 29 Jul 2025 16:29:39 +0200 Subject: [PATCH 3/3] patch sort-month.sh --- util/build-gnu.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index e9a5499865..cf8203e6bc 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -390,3 +390,9 @@ sed -i 's/echo "changing security context/echo "chcon: changing security context # * the selinux crate is handling errors # * the test says "maybe we should not fail when no context available" sed -i -e "s|returns_ 1||g" tests/cp/no-ctx.sh + +# There is a discrepancy between GNU localization and ICU (ours) when it comes +# to the abbreviated month of april in the French locale: +# GNU is 'avril' +# ICU is 'avr.' +# sed -i -e 's|\(LC_ALL=$LOC locale abmon 2>/dev/null\)|\1 \| sed s/avril/avr./|' tests/sort/sort-month.sh