Skip to content

Commit 74ea683

Browse files
committed
Implement new rules GB9/GB11 for zero-width joiner (ZWJ)
1 parent bcfd39c commit 74ea683

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

src/grapheme.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ enum GraphemeState {
5959
HangulLV,
6060
HangulLVT,
6161
Regional,
62+
Zwj,
6263
}
6364

6465
impl<'a> Iterator for Graphemes<'a> {
@@ -93,13 +94,14 @@ impl<'a> Iterator for Graphemes<'a> {
9394
_ => self.cat.take().unwrap()
9495
};
9596

96-
if match cat {
97-
gr::GC_Extend => true,
98-
gr::GC_SpacingMark if self.extended => true,
99-
_ => false
97+
if let Some(new_state) = match cat {
98+
gr::GC_Extend => Some(FindExtend), // rule GB9
99+
gr::GC_SpacingMark if self.extended => Some(FindExtend), // rule GB9a
100+
gr::GC_ZWJ => Some(Zwj), // rule GB9/GB11
101+
_ => None
100102
} {
101-
state = FindExtend; // rule GB9/GB9a
102-
continue;
103+
state = new_state;
104+
continue;
103105
}
104106

105107
state = match state {
@@ -153,7 +155,14 @@ impl<'a> Iterator for Graphemes<'a> {
153155
take_curr = false;
154156
break;
155157
}
156-
}
158+
},
159+
Zwj => match cat { // rule GB11: ZWJ x (GAZ|EBG)
160+
gr::GC_Glue_After_Zwj | gr::GC_E_Base_GAZ => continue,
161+
_ => {
162+
take_curr = false;
163+
break;
164+
}
165+
},
157166
}
158167
}
159168

@@ -215,6 +224,8 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
215224
Start | FindExtend => match cat {
216225
gr::GC_Extend => FindExtend,
217226
gr::GC_SpacingMark if self.extended => FindExtend,
227+
gr::GC_ZWJ => FindExtend,
228+
gr::GC_Glue_After_Zwj | gr::GC_E_Base_GAZ => Zwj,
218229
gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
219230
gr::GC_V => HangulLV,
220231
gr::GC_T => HangulLVT,
@@ -255,6 +266,13 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
255266
take_curr = false;
256267
break;
257268
}
269+
},
270+
Zwj => match cat { // char to right is (GAZ|EBG)
271+
gr::GC_ZWJ => continue, // rule GB11: ZWJ x (GAZ|EBG)
272+
_ => {
273+
take_curr = false;
274+
break;
275+
}
258276
}
259277
}
260278
}

0 commit comments

Comments
 (0)