Skip to content

Commit fa10dd3

Browse files
committed
Fetch and generate sentence tests, property table
1 parent 6fc6815 commit fa10dd3

File tree

4 files changed

+1323
-0
lines changed

4 files changed

+1323
-0
lines changed

scripts/unicode.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,10 @@ def emit_break_module(f, break_table, break_cats, name):
351351
word_table.extend([(x, y, cat) for (x, y) in word_cats[cat]])
352352
word_table.sort(key=lambda w: w[0])
353353
emit_break_module(rf, word_table, word_cats.keys(), "word")
354+
355+
sentence_cats = load_properties("auxiliary/SentenceBreakProperty.txt", [])
356+
sentence_table = []
357+
for cat in sentence_cats:
358+
sentence_table.extend([(x, y, cat) for (x, y) in sentence_cats[cat]])
359+
sentence_table.sort(key=lambda w: w[0])
360+
emit_break_module(rf, sentence_table, sentence_cats.keys(), "sentence")

scripts/unicode_gen_breaktests.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,23 @@ def create_words_data(f):
190190
f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n")
191191
unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
192192

193+
def create_sentence_data(f):
194+
d = load_test_data("auxiliary/SentenceBreakTest.txt")
195+
196+
test = []
197+
198+
for (c, i) in d:
199+
allchars = [cn for s in c for cn in s]
200+
test.append((allchars, c))
201+
202+
wtype = "&'static [(&'static str, &'static [&'static str])]"
203+
f.write(" // official Unicode test data\n")
204+
f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/SentenceBreakTest.txt\n")
205+
unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
206+
193207
if __name__ == "__main__":
194208
with open("testdata.rs", "w") as rf:
195209
rf.write(unicode.preamble)
196210
create_grapheme_data(rf)
197211
create_words_data(rf)
212+
create_sentence_data(rf)

0 commit comments

Comments
 (0)