@@ -1127,41 +1127,166 @@ impl PyByteInner {
1127
1127
bytes_zfill ( & self . elements , width. to_usize ( ) . unwrap_or ( 0 ) )
1128
1128
}
1129
1129
1130
- pub fn replace (
1130
+ // len(self)>=1, from="", len(to)>=1, maxcount>=1
1131
+ fn replace_interleave ( & self , to : PyByteInner , maxcount : Option < usize > ) -> Vec < u8 > {
1132
+ let place_count = self . elements . len ( ) + 1 ;
1133
+ let count = maxcount. map_or ( place_count, |v| std:: cmp:: min ( v, place_count) ) - 1 ;
1134
+ let capacity = self . elements . len ( ) + count * to. len ( ) ;
1135
+ let mut result = Vec :: with_capacity ( capacity) ;
1136
+ let to_slice = to. elements . as_slice ( ) ;
1137
+ result. extend_from_slice ( to_slice) ;
1138
+ for c in & self . elements [ ..count] {
1139
+ result. push ( * c) ;
1140
+ result. extend_from_slice ( to_slice) ;
1141
+ }
1142
+ result. extend_from_slice ( & self . elements [ count..] ) ;
1143
+ result
1144
+ }
1145
+
1146
+ fn replace_delete ( & self , from : PyByteInner , maxcount : Option < usize > ) -> Vec < u8 > {
1147
+ let count = count_substring ( self . elements . as_slice ( ) , from. elements . as_slice ( ) , maxcount) ;
1148
+ if count == 0 {
1149
+ // no matches
1150
+ return self . elements . clone ( ) ;
1151
+ }
1152
+
1153
+ let result_len = self . len ( ) - ( count * from. len ( ) ) ;
1154
+ debug_assert ! ( self . len( ) >= count * from. len( ) ) ;
1155
+
1156
+ let mut result = Vec :: with_capacity ( result_len) ;
1157
+ let mut last_end = 0 ;
1158
+ let mut count = count;
1159
+ for offset in self . elements . find_iter ( & from. elements ) {
1160
+ result. extend_from_slice ( & self . elements [ last_end..offset] ) ;
1161
+ last_end = offset + from. len ( ) ;
1162
+ count -= 1 ;
1163
+ if count == 0 {
1164
+ break ;
1165
+ }
1166
+ }
1167
+ result. extend_from_slice ( & self . elements [ last_end..] ) ;
1168
+ result
1169
+ }
1170
+
1171
+ pub fn replace_in_place (
1131
1172
& self ,
1132
- old : PyByteInner ,
1133
- new : PyByteInner ,
1134
- count : OptionalArg < PyIntRef > ,
1135
- ) -> PyResult < Vec < u8 > > {
1136
- let count = match count. into_option ( ) {
1137
- Some ( int) => int
1138
- . as_bigint ( )
1139
- . to_u32 ( )
1140
- . unwrap_or ( self . elements . len ( ) as u32 ) ,
1141
- None => self . elements . len ( ) as u32 ,
1173
+ from : PyByteInner ,
1174
+ to : PyByteInner ,
1175
+ maxcount : Option < usize > ,
1176
+ ) -> Vec < u8 > {
1177
+ let len = from. len ( ) ;
1178
+ let mut iter = self . elements . find_iter ( & from. elements ) ;
1179
+
1180
+ let mut new = if let Some ( offset) = iter. next ( ) {
1181
+ let mut new = self . elements . clone ( ) ;
1182
+ new[ offset..offset + len] . clone_from_slice ( to. elements . as_slice ( ) ) ;
1183
+ if maxcount == Some ( 1 ) {
1184
+ return new;
1185
+ } else {
1186
+ new
1187
+ }
1188
+ } else {
1189
+ return self . elements . clone ( ) ;
1142
1190
} ;
1143
1191
1144
- let mut res = vec ! [ ] ;
1145
- let mut index = 0 ;
1146
- let mut done = 0 ;
1192
+ let mut count = maxcount. unwrap_or ( std:: usize:: MAX ) - 1 ;
1193
+ for offset in iter {
1194
+ new[ offset..offset + len] . clone_from_slice ( to. elements . as_slice ( ) ) ;
1195
+ count -= 1 ;
1196
+ if count == 0 {
1197
+ break ;
1198
+ }
1199
+ }
1200
+ new
1201
+ }
1147
1202
1148
- let slice = & self . elements ;
1149
- loop {
1150
- if done == count || index > slice. len ( ) - old. len ( ) {
1151
- res. extend_from_slice ( & slice[ index..] ) ;
1203
+ fn replace_general (
1204
+ & self ,
1205
+ from : PyByteInner ,
1206
+ to : PyByteInner ,
1207
+ maxcount : Option < usize > ,
1208
+ vm : & VirtualMachine ,
1209
+ ) -> PyResult < Vec < u8 > > {
1210
+ let count = count_substring ( self . elements . as_slice ( ) , from. elements . as_slice ( ) , maxcount) ;
1211
+ if count == 0 {
1212
+ // no matches, return unchanged
1213
+ return Ok ( self . elements . clone ( ) ) ;
1214
+ }
1215
+
1216
+ // Check for overflow
1217
+ // result_len = self_len + count * (to_len-from_len)
1218
+ debug_assert ! ( count > 0 ) ;
1219
+ if to. len ( ) as isize - from. len ( ) as isize
1220
+ > ( std:: isize:: MAX - self . elements . len ( ) as isize ) / count as isize
1221
+ {
1222
+ return Err ( vm. new_overflow_error ( "replace bytes is too long" . to_owned ( ) ) ) ;
1223
+ }
1224
+ let result_len = self . elements . len ( ) + count * ( to. len ( ) - from. len ( ) ) ;
1225
+
1226
+ let mut result = Vec :: with_capacity ( result_len) ;
1227
+ let mut last_end = 0 ;
1228
+ let mut count = count;
1229
+ for offset in self . elements . find_iter ( & from. elements ) {
1230
+ result. extend_from_slice ( & self . elements [ last_end..offset] ) ;
1231
+ result. extend_from_slice ( to. elements . as_slice ( ) ) ;
1232
+ last_end = offset + from. len ( ) ;
1233
+ count -= 1 ;
1234
+ if count == 0 {
1152
1235
break ;
1153
1236
}
1154
- if & slice[ index..index + old. len ( ) ] == old. elements . as_slice ( ) {
1155
- res. extend_from_slice ( & new. elements ) ;
1156
- index += old. len ( ) ;
1157
- done += 1 ;
1158
- } else {
1159
- res. push ( slice[ index] ) ;
1160
- index += 1
1237
+ }
1238
+ result. extend_from_slice ( & self . elements [ last_end..] ) ;
1239
+ Ok ( result)
1240
+ }
1241
+
1242
+ pub fn replace (
1243
+ & self ,
1244
+ from : PyByteInner ,
1245
+ to : PyByteInner ,
1246
+ maxcount : OptionalArg < isize > ,
1247
+ vm : & VirtualMachine ,
1248
+ ) -> PyResult < Vec < u8 > > {
1249
+ // stringlib_replace in CPython
1250
+ let maxcount = match maxcount {
1251
+ OptionalArg :: Present ( maxcount) if maxcount >= 0 => {
1252
+ if maxcount == 0 || self . elements . is_empty ( ) {
1253
+ // nothing to do; return the original bytes
1254
+ return Ok ( self . elements . clone ( ) ) ;
1255
+ }
1256
+ Some ( maxcount as usize )
1161
1257
}
1258
+ _ => None ,
1259
+ } ;
1260
+
1261
+ // Handle zero-length special cases
1262
+ if from. elements . is_empty ( ) {
1263
+ if to. elements . is_empty ( ) {
1264
+ // nothing to do; return the original bytes
1265
+ return Ok ( self . elements . clone ( ) ) ;
1266
+ }
1267
+ // insert the 'to' bytes everywhere.
1268
+ // >>> b"Python".replace(b"", b".")
1269
+ // b'.P.y.t.h.o.n.'
1270
+ return Ok ( self . replace_interleave ( to, maxcount) ) ;
1162
1271
}
1163
1272
1164
- Ok ( res)
1273
+ // Except for b"".replace(b"", b"A") == b"A" there is no way beyond this
1274
+ // point for an empty self bytes to generate a non-empty bytes
1275
+ // Special case so the remaining code always gets a non-empty bytes
1276
+ if self . elements . is_empty ( ) {
1277
+ return Ok ( self . elements . clone ( ) ) ;
1278
+ }
1279
+
1280
+ if to. elements . is_empty ( ) {
1281
+ // delete all occurrences of 'from' bytes
1282
+ Ok ( self . replace_delete ( from, maxcount) )
1283
+ } else if from. len ( ) == to. len ( ) {
1284
+ // Handle special case where both bytes have the same length
1285
+ Ok ( self . replace_in_place ( from, to, maxcount) )
1286
+ } else {
1287
+ // Otherwise use the more generic algorithms
1288
+ self . replace_general ( from, to, maxcount, vm)
1289
+ }
1165
1290
}
1166
1291
1167
1292
pub fn title ( & self ) -> Vec < u8 > {
@@ -1233,6 +1358,16 @@ pub fn try_as_byte(obj: &PyObjectRef) -> Option<Vec<u8>> {
1233
1358
} )
1234
1359
}
1235
1360
1361
+ #[ inline]
1362
+ fn count_substring ( haystack : & [ u8 ] , needle : & [ u8 ] , maxcount : Option < usize > ) -> usize {
1363
+ let substrings = haystack. find_iter ( needle) ;
1364
+ if let Some ( maxcount) = maxcount {
1365
+ std:: cmp:: min ( substrings. take ( maxcount) . count ( ) , maxcount)
1366
+ } else {
1367
+ substrings. count ( )
1368
+ }
1369
+ }
1370
+
1236
1371
pub trait ByteOr : ToPrimitive {
1237
1372
fn byte_or ( & self , vm : & VirtualMachine ) -> PyResult < u8 > {
1238
1373
match self . to_u8 ( ) {
0 commit comments