Skip to content

Commit 77faddc

Browse files
committed
transliterator
1 parent 04cc716 commit 77faddc

File tree

6 files changed

+185
-19
lines changed

6 files changed

+185
-19
lines changed

components/experimental/Cargo.toml

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,48 @@ icu_normalizer_data = { workspace = true }
7171

7272
[features]
7373
default = ["compiled_data"]
74-
compiled_data = ["dep:icu_experimental_data", "icu_decimal/compiled_data", "icu_list/compiled_data", "icu_plurals/compiled_data", "icu_properties/compiled_data", "icu_normalizer/compiled_data", "icu_casemap/compiled_data", "icu_provider/baked"]
75-
datagen = ["serde", "dep:databake", "zerovec/databake", "zerotrie/databake", "tinystr/databake", "icu_collections/databake", "log", "icu_pattern/databake", "icu_plurals/datagen", "icu_pattern/alloc", "icu_provider/export"]
74+
compiled_data = [
75+
"dep:icu_experimental_data",
76+
"icu_casemap/compiled_data",
77+
"icu_decimal/compiled_data",
78+
"icu_list/compiled_data",
79+
"icu_locale/compiled_data",
80+
"icu_normalizer/compiled_data",
81+
"icu_plurals/compiled_data",
82+
"icu_properties/compiled_data",
83+
"icu_provider/baked",
84+
]
85+
datagen = [
86+
"dep:databake",
87+
"icu_collections/databake",
88+
"icu_pattern/alloc",
89+
"icu_pattern/databake",
90+
"icu_plurals/datagen",
91+
"icu_provider/export",
92+
"log",
93+
"serde",
94+
"tinystr/databake",
95+
"zerotrie/databake",
96+
"zerovec/databake",
97+
]
7698
ryu = ["fixed_decimal/ryu"]
77-
serde = ["dep:serde", "zerovec/serde", "potential_utf/serde", "tinystr/serde", "icu_collections/serde", "icu_decimal/serde", "icu_list/serde", "icu_pattern/serde", "icu_plurals/serde", "icu_provider/alloc", "icu_provider/serde", "zerotrie/serde", "icu_normalizer/serde", "icu_casemap/serde"]
99+
serde = [
100+
"dep:serde",
101+
"icu_casemap/serde",
102+
"icu_collections/serde",
103+
"icu_decimal/serde",
104+
"icu_list/serde",
105+
"icu_locale/serde",
106+
"icu_normalizer/serde",
107+
"icu_pattern/serde",
108+
"icu_plurals/serde",
109+
"icu_provider/alloc",
110+
"icu_provider/serde",
111+
"potential_utf/serde",
112+
"tinystr/serde",
113+
"zerotrie/serde",
114+
"zerovec/serde",
115+
]
78116

79117
[[bench]]
80118
name = "transliterate"

components/experimental/src/transliterate/compile/mod.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use alloc::string::{String, ToString};
1010
use alloc::vec::Vec;
1111
use core::cell::RefCell;
1212
use icu_casemap::provider::CaseMapV1;
13+
use icu_locale::provider::{LocaleLikelySubtagsLanguageV1, LocaleParentsV1};
1314
use icu_locale_core::Locale;
1415
use icu_normalizer::provider::*;
1516
use icu_properties::{
@@ -156,25 +157,28 @@ impl RuleCollection {
156157
icu_properties::provider::Baked,
157158
icu_normalizer::provider::Baked,
158159
icu_casemap::provider::Baked,
160+
icu_locale::provider::Baked,
159161
> {
160162
RuleCollectionProvider {
161163
collection: self,
162164
properties_provider: &icu_properties::provider::Baked,
163165
normalizer_provider: &icu_normalizer::provider::Baked,
164166
casemap_provider: &icu_casemap::provider::Baked,
167+
locale_provider: &icu_locale::provider::Baked,
165168
xid_start: CodePointSetData::new::<XidStart>().static_to_owned(),
166169
xid_continue: CodePointSetData::new::<XidContinue>().static_to_owned(),
167170
pat_ws: CodePointSetData::new::<PatternWhiteSpace>().static_to_owned(),
168171
}
169172
}
170173

171174
#[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::as_provider)]
172-
pub fn as_provider_unstable<'a, PP, NP, NC>(
175+
pub fn as_provider_unstable<'a, PP, NP, NC, LP>(
173176
&'a self,
174177
properties_provider: &'a PP,
175178
normalizer_provider: &'a NP,
176179
casemap_provider: &'a NC,
177-
) -> Result<RuleCollectionProvider<'a, PP, NP, NC>, DataError>
180+
locale_provider: &'a LP,
181+
) -> Result<RuleCollectionProvider<'a, PP, NP, NC, LP>, DataError>
178182
where
179183
PP: ?Sized
180184
+ DataProvider<PropertyBinaryAlphabeticV1>
@@ -255,6 +259,7 @@ impl RuleCollection {
255259
properties_provider,
256260
normalizer_provider,
257261
casemap_provider,
262+
locale_provider,
258263
xid_start: CodePointSetData::try_new_unstable::<XidStart>(properties_provider)?,
259264
xid_continue: CodePointSetData::try_new_unstable::<XidContinue>(properties_provider)?,
260265
pat_ws: CodePointSetData::try_new_unstable::<PatternWhiteSpace>(properties_provider)?,
@@ -264,17 +269,19 @@ impl RuleCollection {
264269

265270
/// A provider that is usable by [`Transliterator::try_new_unstable`](crate::transliterate::Transliterator::try_new_unstable).
266271
#[derive(Debug)]
267-
pub struct RuleCollectionProvider<'a, PP: ?Sized, NP: ?Sized, NC: ?Sized> {
272+
pub struct RuleCollectionProvider<'a, PP: ?Sized, NP: ?Sized, NC: ?Sized, LP: ?Sized> {
268273
collection: &'a RuleCollection,
269274
properties_provider: &'a PP,
270275
normalizer_provider: &'a NP,
271276
casemap_provider: &'a NC,
277+
locale_provider: &'a LP,
272278
xid_start: CodePointSetData,
273279
xid_continue: CodePointSetData,
274280
pat_ws: CodePointSetData,
275281
}
276282

277-
impl<PP, NP, NC> DataProvider<TransliteratorRulesV1> for RuleCollectionProvider<'_, PP, NP, NC>
283+
impl<PP, NP, NC, LP> DataProvider<TransliteratorRulesV1>
284+
for RuleCollectionProvider<'_, PP, NP, NC, LP>
278285
where
279286
PP: ?Sized
280287
+ DataProvider<PropertyBinaryAlphabeticV1>
@@ -419,7 +426,7 @@ where
419426
macro_rules! redirect {
420427
($($marker:ty),*) => {
421428
$(
422-
impl<PP: ?Sized, NP: ?Sized + DataProvider<$marker>, NC: ?Sized> DataProvider<$marker> for RuleCollectionProvider<'_, PP, NP, NC> {
429+
impl<PP: ?Sized, NP: ?Sized + DataProvider<$marker>, NC: ?Sized, NL: ?Sized> DataProvider<$marker> for RuleCollectionProvider<'_, PP, NP, NC, NL> {
423430
fn load(&self, req: DataRequest) -> Result<DataResponse<$marker>, DataError> {
424431
self.normalizer_provider.load(req)
425432
}
@@ -436,17 +443,31 @@ redirect!(
436443
NormalizerNfcV1
437444
);
438445

439-
impl<PP: ?Sized, NP: ?Sized, NC: ?Sized + DataProvider<CaseMapV1>> DataProvider<CaseMapV1>
440-
for RuleCollectionProvider<'_, PP, NP, NC>
446+
impl<PP: ?Sized, NP: ?Sized, NC: ?Sized + DataProvider<CaseMapV1>, NL: ?Sized>
447+
DataProvider<CaseMapV1> for RuleCollectionProvider<'_, PP, NP, NC, NL>
441448
{
442449
fn load(&self, req: DataRequest) -> Result<DataResponse<CaseMapV1>, DataError> {
443450
self.casemap_provider.load(req)
444451
}
445452
}
446453

454+
macro_rules! redirect {
455+
($($marker:ty),*) => {
456+
$(
457+
impl<PP: ?Sized, NP: ?Sized, NC: ?Sized, NL: ?Sized + DataProvider<$marker>> DataProvider<$marker> for RuleCollectionProvider<'_, PP, NP, NC, NL> {
458+
fn load(&self, req: DataRequest) -> Result<DataResponse<$marker>, DataError> {
459+
self.locale_provider.load(req)
460+
}
461+
}
462+
)*
463+
}
464+
}
465+
466+
redirect!(LocaleParentsV1, LocaleLikelySubtagsLanguageV1);
467+
447468
#[cfg(feature = "datagen")]
448-
impl<PP, NP, NC> IterableDataProvider<TransliteratorRulesV1>
449-
for RuleCollectionProvider<'_, PP, NP, NC>
469+
impl<PP, NP, NC, NL> IterableDataProvider<TransliteratorRulesV1>
470+
for RuleCollectionProvider<'_, PP, NP, NC, NL>
450471
where
451472
PP: ?Sized
452473
+ DataProvider<PropertyBinaryAlphabeticV1>

components/experimental/src/transliterate/provider.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use icu_collections::{
2323
codepointinvlist::{CodePointInversionList, CodePointInversionListULE},
2424
codepointinvliststringlist::CodePointInversionListAndStringListULE,
2525
};
26+
use icu_locale::fallback::{LocaleFallbackConfig, LocaleFallbackPriority};
2627
use icu_provider::prelude::*;
2728
use vecs::Index32;
2829
use zerovec::*;
@@ -32,7 +33,12 @@ use zerovec::*;
3233
icu_provider::data_marker!(
3334
/// `TransliteratorRulesV1`
3435
TransliteratorRulesV1,
35-
RuleBasedTransliterator<'static>
36+
RuleBasedTransliterator<'static>,
37+
fallback_config = {
38+
let mut config = LocaleFallbackConfig::default();
39+
config.priority = LocaleFallbackPriority::Script;
40+
config
41+
},
3642
);
3743

3844
/// The data struct representing [UTS #35 transform rules](https://unicode.org/reports/tr35/tr35-general.html#Transforms).

0 commit comments

Comments
 (0)