@@ -33,6 +33,7 @@ pub enum BadSequence {
33
33
CharRepeatInSet1 ,
34
34
InvalidRepeatCount ( String ) ,
35
35
EmptySet2WhenNotTruncatingSet1 ,
36
+ ClassExceptLowerUpperInSet2 ,
36
37
}
37
38
38
39
impl Display for BadSequence {
@@ -54,6 +55,9 @@ impl Display for BadSequence {
54
55
Self :: EmptySet2WhenNotTruncatingSet1 => {
55
56
write ! ( f, "when not truncating set1, string2 must be non-empty" )
56
57
}
58
+ Self :: ClassExceptLowerUpperInSet2 => {
59
+ write ! ( f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'" )
60
+ }
57
61
}
58
62
}
59
63
}
@@ -62,11 +66,7 @@ impl Error for BadSequence {}
62
66
impl UError for BadSequence { }
63
67
64
68
#[ derive( Debug , Clone , Copy ) ]
65
- pub enum Sequence {
66
- Char ( u8 ) ,
67
- CharRange ( u8 , u8 ) ,
68
- CharStar ( u8 ) ,
69
- CharRepeat ( u8 , usize ) ,
69
+ pub enum Class {
70
70
Alnum ,
71
71
Alpha ,
72
72
Blank ,
@@ -81,44 +81,55 @@ pub enum Sequence {
81
81
Xdigit ,
82
82
}
83
83
84
+ #[ derive( Debug , Clone , Copy ) ]
85
+ pub enum Sequence {
86
+ Char ( u8 ) ,
87
+ CharRange ( u8 , u8 ) ,
88
+ CharStar ( u8 ) ,
89
+ CharRepeat ( u8 , usize ) ,
90
+ Class ( Class ) ,
91
+ }
92
+
84
93
impl Sequence {
85
94
pub fn flatten ( & self ) -> Box < dyn Iterator < Item = u8 > > {
86
95
match self {
87
96
Self :: Char ( c) => Box :: new ( std:: iter:: once ( * c) ) ,
88
97
Self :: CharRange ( l, r) => Box :: new ( * l..=* r) ,
89
98
Self :: CharStar ( c) => Box :: new ( std:: iter:: repeat ( * c) ) ,
90
99
Self :: CharRepeat ( c, n) => Box :: new ( std:: iter:: repeat ( * c) . take ( * n) ) ,
91
- Self :: Alnum => Box :: new ( ( b'0' ..=b'9' ) . chain ( b'A' ..=b'Z' ) . chain ( b'a' ..=b'z' ) ) ,
92
- Self :: Alpha => Box :: new ( ( b'A' ..=b'Z' ) . chain ( b'a' ..=b'z' ) ) ,
93
- Self :: Blank => Box :: new ( unicode_table:: BLANK . iter ( ) . cloned ( ) ) ,
94
- Self :: Control => Box :: new ( ( 0 ..=31 ) . chain ( std:: iter:: once ( 127 ) ) ) ,
95
- Self :: Digit => Box :: new ( b'0' ..=b'9' ) ,
96
- Self :: Graph => Box :: new (
97
- ( 48 ..=57 ) // digit
98
- . chain ( 65 ..=90 ) // uppercase
99
- . chain ( 97 ..=122 ) // lowercase
100
- // punctuations
101
- . chain ( 33 ..=47 )
102
- . chain ( 58 ..=64 )
103
- . chain ( 91 ..=96 )
104
- . chain ( 123 ..=126 )
105
- . chain ( std:: iter:: once ( 32 ) ) , // space
106
- ) ,
107
- Self :: Lower => Box :: new ( b'a' ..=b'z' ) ,
108
- Self :: Print => Box :: new (
109
- ( 48 ..=57 ) // digit
110
- . chain ( 65 ..=90 ) // uppercase
111
- . chain ( 97 ..=122 ) // lowercase
112
- // punctuations
113
- . chain ( 33 ..=47 )
114
- . chain ( 58 ..=64 )
115
- . chain ( 91 ..=96 )
116
- . chain ( 123 ..=126 ) ,
117
- ) ,
118
- Self :: Punct => Box :: new ( ( 33 ..=47 ) . chain ( 58 ..=64 ) . chain ( 91 ..=96 ) . chain ( 123 ..=126 ) ) ,
119
- Self :: Space => Box :: new ( unicode_table:: SPACES . iter ( ) . cloned ( ) ) ,
120
- Self :: Upper => Box :: new ( b'A' ..=b'Z' ) ,
121
- Self :: Xdigit => Box :: new ( ( b'0' ..=b'9' ) . chain ( b'A' ..=b'F' ) . chain ( b'a' ..=b'f' ) ) ,
100
+ Self :: Class ( class) => match class {
101
+ Class :: Alnum => Box :: new ( ( b'0' ..=b'9' ) . chain ( b'A' ..=b'Z' ) . chain ( b'a' ..=b'z' ) ) ,
102
+ Class :: Alpha => Box :: new ( ( b'A' ..=b'Z' ) . chain ( b'a' ..=b'z' ) ) ,
103
+ Class :: Blank => Box :: new ( unicode_table:: BLANK . iter ( ) . cloned ( ) ) ,
104
+ Class :: Control => Box :: new ( ( 0 ..=31 ) . chain ( std:: iter:: once ( 127 ) ) ) ,
105
+ Class :: Digit => Box :: new ( b'0' ..=b'9' ) ,
106
+ Class :: Graph => Box :: new (
107
+ ( 48 ..=57 ) // digit
108
+ . chain ( 65 ..=90 ) // uppercase
109
+ . chain ( 97 ..=122 ) // lowercase
110
+ // punctuations
111
+ . chain ( 33 ..=47 )
112
+ . chain ( 58 ..=64 )
113
+ . chain ( 91 ..=96 )
114
+ . chain ( 123 ..=126 )
115
+ . chain ( std:: iter:: once ( 32 ) ) , // space
116
+ ) ,
117
+ Class :: Lower => Box :: new ( b'a' ..=b'z' ) ,
118
+ Class :: Print => Box :: new (
119
+ ( 48 ..=57 ) // digit
120
+ . chain ( 65 ..=90 ) // uppercase
121
+ . chain ( 97 ..=122 ) // lowercase
122
+ // punctuations
123
+ . chain ( 33 ..=47 )
124
+ . chain ( 58 ..=64 )
125
+ . chain ( 91 ..=96 )
126
+ . chain ( 123 ..=126 ) ,
127
+ ) ,
128
+ Class :: Punct => Box :: new ( ( 33 ..=47 ) . chain ( 58 ..=64 ) . chain ( 91 ..=96 ) . chain ( 123 ..=126 ) ) ,
129
+ Class :: Space => Box :: new ( unicode_table:: SPACES . iter ( ) . cloned ( ) ) ,
130
+ Class :: Upper => Box :: new ( b'A' ..=b'Z' ) ,
131
+ Class :: Xdigit => Box :: new ( ( b'0' ..=b'9' ) . chain ( b'A' ..=b'F' ) . chain ( b'a' ..=b'f' ) ) ,
132
+ } ,
122
133
}
123
134
}
124
135
@@ -128,13 +139,23 @@ impl Sequence {
128
139
set2_str : & [ u8 ] ,
129
140
complement_flag : bool ,
130
141
truncate_set1_flag : bool ,
142
+ translating : bool ,
131
143
) -> Result < ( Vec < u8 > , Vec < u8 > ) , BadSequence > {
132
144
let set1 = Self :: from_str ( set1_str) ?;
133
-
134
145
let is_char_star = |s : & & Self | -> bool { matches ! ( s, Self :: CharStar ( _) ) } ;
135
146
let set1_star_count = set1. iter ( ) . filter ( is_char_star) . count ( ) ;
136
147
if set1_star_count == 0 {
137
148
let set2 = Self :: from_str ( set2_str) ?;
149
+
150
+ if translating
151
+ && set2. iter ( ) . any ( |& x| {
152
+ matches ! ( x, Self :: Class ( _) )
153
+ && !matches ! ( x, Self :: Class ( Class :: Upper ) | Self :: Class ( Class :: Lower ) )
154
+ } )
155
+ {
156
+ return Err ( BadSequence :: ClassExceptLowerUpperInSet2 ) ;
157
+ }
158
+
138
159
let set2_star_count = set2. iter ( ) . filter ( is_char_star) . count ( ) ;
139
160
if set2_star_count < 2 {
140
161
let char_star = set2. iter ( ) . find_map ( |s| match s {
@@ -305,18 +326,18 @@ impl Sequence {
305
326
alt ( (
306
327
map (
307
328
alt ( (
308
- value ( Self :: Alnum , tag ( "alnum" ) ) ,
309
- value ( Self :: Alpha , tag ( "alpha" ) ) ,
310
- value ( Self :: Blank , tag ( "blank" ) ) ,
311
- value ( Self :: Control , tag ( "cntrl" ) ) ,
312
- value ( Self :: Digit , tag ( "digit" ) ) ,
313
- value ( Self :: Graph , tag ( "graph" ) ) ,
314
- value ( Self :: Lower , tag ( "lower" ) ) ,
315
- value ( Self :: Print , tag ( "print" ) ) ,
316
- value ( Self :: Punct , tag ( "punct" ) ) ,
317
- value ( Self :: Space , tag ( "space" ) ) ,
318
- value ( Self :: Upper , tag ( "upper" ) ) ,
319
- value ( Self :: Xdigit , tag ( "xdigit" ) ) ,
329
+ value ( Self :: Class ( Class :: Alnum ) , tag ( "alnum" ) ) ,
330
+ value ( Self :: Class ( Class :: Alpha ) , tag ( "alpha" ) ) ,
331
+ value ( Self :: Class ( Class :: Blank ) , tag ( "blank" ) ) ,
332
+ value ( Self :: Class ( Class :: Control ) , tag ( "cntrl" ) ) ,
333
+ value ( Self :: Class ( Class :: Digit ) , tag ( "digit" ) ) ,
334
+ value ( Self :: Class ( Class :: Graph ) , tag ( "graph" ) ) ,
335
+ value ( Self :: Class ( Class :: Lower ) , tag ( "lower" ) ) ,
336
+ value ( Self :: Class ( Class :: Print ) , tag ( "print" ) ) ,
337
+ value ( Self :: Class ( Class :: Punct ) , tag ( "punct" ) ) ,
338
+ value ( Self :: Class ( Class :: Space ) , tag ( "space" ) ) ,
339
+ value ( Self :: Class ( Class :: Upper ) , tag ( "upper" ) ) ,
340
+ value ( Self :: Class ( Class :: Xdigit ) , tag ( "xdigit" ) ) ,
320
341
) ) ,
321
342
Ok ,
322
343
) ,
0 commit comments