@@ -21,6 +21,7 @@ use super::objsequence::PySliceableSequence;
21
21
use super :: objslice:: PySliceRef ;
22
22
use super :: objtuple;
23
23
use super :: objtype:: { self , PyClassRef } ;
24
+ use super :: pystr:: PyCommonString ;
24
25
use crate :: cformat:: {
25
26
CFormatPart , CFormatPreconversor , CFormatQuantity , CFormatSpec , CFormatString , CFormatType ,
26
27
CNumberType ,
@@ -455,61 +456,27 @@ impl PyString {
455
456
456
457
#[ pymethod]
457
458
fn split ( & self , args : SplitArgs , vm : & VirtualMachine ) -> PyResult {
458
- let value = & self . value ;
459
- let pattern = args. non_empty_sep ( vm) ?;
460
- let num_splits = args. maxsplit ;
461
- let elements: Vec < _ > = match ( pattern, num_splits. is_negative ( ) ) {
462
- ( Some ( pattern) , true ) => value
463
- . split ( pattern)
464
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
465
- . collect ( ) ,
466
- ( Some ( pattern) , false ) => value
467
- . splitn ( num_splits as usize + 1 , pattern)
468
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
469
- . collect ( ) ,
470
- ( None , true ) => value
471
- . trim_start ( )
472
- . split ( |c : char | c. is_ascii_whitespace ( ) )
473
- . filter ( |s| !s. is_empty ( ) )
474
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
475
- . collect ( ) ,
476
- ( None , false ) => value
477
- . trim_start ( )
478
- . splitn ( num_splits as usize + 1 , |c : char | c. is_ascii_whitespace ( ) )
479
- . filter ( |s| !s. is_empty ( ) )
480
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
481
- . collect ( ) ,
482
- } ;
459
+ let elements = self . value . py_split (
460
+ args. non_empty_sep ( vm) ?,
461
+ args. maxsplit ,
462
+ vm,
463
+ |v, s, vm| v. split ( s) . map ( |s| vm. ctx . new_str ( s) ) . collect ( ) ,
464
+ |v, s, n, vm| v. splitn ( n, s) . map ( |s| vm. ctx . new_str ( s) ) . collect ( ) ,
465
+ |v, n, vm| v. py_split_whitespace ( n, |s| vm. ctx . new_str ( s) ) ,
466
+ ) ;
483
467
Ok ( vm. ctx . new_list ( elements) )
484
468
}
485
469
486
470
#[ pymethod]
487
471
fn rsplit ( & self , args : SplitArgs , vm : & VirtualMachine ) -> PyResult {
488
- let value = & self . value ;
489
- let pattern = args. non_empty_sep ( vm) ?;
490
- let num_splits = args. maxsplit ;
491
- let mut elements: Vec < _ > = match ( pattern, num_splits. is_negative ( ) ) {
492
- ( Some ( pattern) , true ) => value
493
- . rsplit ( pattern)
494
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
495
- . collect ( ) ,
496
- ( Some ( pattern) , false ) => value
497
- . rsplitn ( num_splits as usize + 1 , pattern)
498
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
499
- . collect ( ) ,
500
- ( None , true ) => value
501
- . trim_end ( )
502
- . rsplit ( |c : char | c. is_ascii_whitespace ( ) )
503
- . filter ( |s| !s. is_empty ( ) )
504
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
505
- . collect ( ) ,
506
- ( None , false ) => value
507
- . trim_end ( )
508
- . rsplitn ( num_splits as usize + 1 , |c : char | c. is_ascii_whitespace ( ) )
509
- . filter ( |s| !s. is_empty ( ) )
510
- . map ( |o| vm. ctx . new_str ( o. to_owned ( ) ) )
511
- . collect ( ) ,
512
- } ;
472
+ let mut elements = self . value . py_split (
473
+ args. non_empty_sep ( vm) ?,
474
+ args. maxsplit ,
475
+ vm,
476
+ |v, s, vm| v. rsplit ( s) . map ( |s| vm. ctx . new_str ( s) ) . collect ( ) ,
477
+ |v, s, n, vm| v. rsplitn ( n, s) . map ( |s| vm. ctx . new_str ( s) ) . collect ( ) ,
478
+ |v, n, vm| v. py_rsplit_whitespace ( n, |s| vm. ctx . new_str ( s) ) ,
479
+ ) ;
513
480
// Unlike Python rsplit, Rust rsplitn returns an iterator that
514
481
// starts from the end of the string.
515
482
elements. reverse ( ) ;
@@ -1882,3 +1849,57 @@ mod tests {
1882
1849
assert_eq ! ( translated. unwrap_err( ) . class( ) . name, "TypeError" . to_owned( ) ) ;
1883
1850
}
1884
1851
}
1852
+
1853
+ impl PyCommonString < ' _ , char > for str {
1854
+ fn py_split_whitespace < F > ( & self , maxsplit : isize , convert : F ) -> Vec < PyObjectRef >
1855
+ where
1856
+ F : Fn ( & Self ) -> PyObjectRef ,
1857
+ {
1858
+ // CPython split_whitespace
1859
+ let mut splited = Vec :: new ( ) ;
1860
+ let mut last_offset = 0 ;
1861
+ let mut count = maxsplit;
1862
+ for ( offset, _) in self . match_indices ( |c : char | c. is_ascii_whitespace ( ) || c == '\x0b' ) {
1863
+ if last_offset == offset {
1864
+ last_offset += 1 ;
1865
+ continue ;
1866
+ }
1867
+ if count == 0 {
1868
+ break ;
1869
+ }
1870
+ splited. push ( convert ( & self [ last_offset..offset] ) ) ;
1871
+ last_offset = offset + 1 ;
1872
+ count -= 1 ;
1873
+ }
1874
+ if last_offset != self . len ( ) {
1875
+ splited. push ( convert ( & self [ last_offset..] ) ) ;
1876
+ }
1877
+ splited
1878
+ }
1879
+
1880
+ fn py_rsplit_whitespace < F > ( & self , maxsplit : isize , convert : F ) -> Vec < PyObjectRef >
1881
+ where
1882
+ F : Fn ( & Self ) -> PyObjectRef ,
1883
+ {
1884
+ // CPython rsplit_whitespace
1885
+ let mut splited = Vec :: new ( ) ;
1886
+ let mut last_offset = self . len ( ) ;
1887
+ let mut count = maxsplit;
1888
+ for ( offset, _) in self . rmatch_indices ( |c : char | c. is_ascii_whitespace ( ) || c == '\x0b' ) {
1889
+ if last_offset == offset + 1 {
1890
+ last_offset -= 1 ;
1891
+ continue ;
1892
+ }
1893
+ if count == 0 {
1894
+ break ;
1895
+ }
1896
+ splited. push ( convert ( & self [ offset + 1 ..last_offset] ) ) ;
1897
+ last_offset = offset;
1898
+ count -= 1 ;
1899
+ }
1900
+ if last_offset != 0 {
1901
+ splited. push ( convert ( & self [ ..last_offset] ) ) ;
1902
+ }
1903
+ splited
1904
+ }
1905
+ }
0 commit comments