20
20
# Since this should not require frequent updates, we just store this
21
21
# out-of-line and check the unicode.rs file into git.
22
22
23
- import fileinput , re , os , sys , operator
23
+ import fileinput , re , os , sys
24
24
25
25
preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
26
26
// file at the top-level directory of this distribution and at
@@ -59,7 +59,7 @@ def is_surrogate(n):
59
59
60
60
def fetch (f ):
61
61
if not os .path .exists (os .path .basename (f )):
62
- os .system ("curl -O http://www.unicode.org/Public/UNIDATA /%s"
62
+ os .system ("curl -O http://www.unicode.org/Public/9.0.0/ucd /%s"
63
63
% f )
64
64
65
65
if not os .path .exists (os .path .basename (f )):
@@ -80,7 +80,7 @@ def load_gencats(f):
80
80
if is_surrogate (cp ):
81
81
continue
82
82
if range_start >= 0 :
83
- for i in xrange (range_start , cp ):
83
+ for i in range (range_start , cp ):
84
84
udict [i ] = data ;
85
85
range_start = - 1 ;
86
86
if data [1 ].endswith (", First>" ):
@@ -150,8 +150,8 @@ def format_table_content(f, content, indent):
150
150
def load_properties (f , interestingprops ):
151
151
fetch (f )
152
152
props = {}
153
- re1 = re .compile ("^ *([0-9A-F]+) *; *(\w+)" )
154
- re2 = re .compile ("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
153
+ re1 = re .compile (r "^ *([0-9A-F]+) *; *(\w+)" )
154
+ re2 = re .compile (r "^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
155
155
156
156
for line in fileinput .input (os .path .basename (f )):
157
157
prop = None
@@ -309,7 +309,7 @@ def emit_break_module(f, break_table, break_cats, name):
309
309
# download and parse all the data
310
310
fetch ("ReadMe.txt" )
311
311
with open ("ReadMe.txt" ) as readme :
312
- pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
312
+ pattern = r "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
313
313
unicode_version = re .search (pattern , readme .read ()).groups ()
314
314
rf .write ("""
315
315
/// The version of [Unicode](http://www.unicode.org/)
@@ -342,19 +342,19 @@ def emit_break_module(f, break_table, break_cats, name):
342
342
for cat in grapheme_cats :
343
343
grapheme_table .extend ([(x , y , cat ) for (x , y ) in grapheme_cats [cat ]])
344
344
grapheme_table .sort (key = lambda w : w [0 ])
345
- emit_break_module (rf , grapheme_table , grapheme_cats .keys (), "grapheme" )
345
+ emit_break_module (rf , grapheme_table , list ( grapheme_cats .keys () ), "grapheme" )
346
346
rf .write ("\n " )
347
347
348
348
word_cats = load_properties ("auxiliary/WordBreakProperty.txt" , [])
349
349
word_table = []
350
350
for cat in word_cats :
351
351
word_table .extend ([(x , y , cat ) for (x , y ) in word_cats [cat ]])
352
352
word_table .sort (key = lambda w : w [0 ])
353
- emit_break_module (rf , word_table , word_cats .keys (), "word" )
353
+ emit_break_module (rf , word_table , list ( word_cats .keys () ), "word" )
354
354
355
355
sentence_cats = load_properties ("auxiliary/SentenceBreakProperty.txt" , [])
356
356
sentence_table = []
357
357
for cat in sentence_cats :
358
358
sentence_table .extend ([(x , y , cat ) for (x , y ) in sentence_cats [cat ]])
359
359
sentence_table .sort (key = lambda w : w [0 ])
360
- emit_break_module (rf , sentence_table , sentence_cats .keys (), "sentence" )
360
+ emit_break_module (rf , sentence_table , list ( sentence_cats .keys () ), "sentence" )
0 commit comments