Skip to content

Commit 14b4fdc

Browse files
committed
Fix str.split
1 parent 0e853f7 commit 14b4fdc

File tree

4 files changed

+115
-52
lines changed

4 files changed

+115
-52
lines changed

vm/src/obj/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,4 @@ pub mod objtype;
4444
pub mod objweakproxy;
4545
pub mod objweakref;
4646
pub mod objzip;
47+
mod pystr;

vm/src/obj/objstr.rs

Lines changed: 71 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use super::objsequence::PySliceableSequence;
2121
use super::objslice::PySliceRef;
2222
use super::objtuple;
2323
use super::objtype::{self, PyClassRef};
24+
use super::pystr::PyCommonString;
2425
use crate::cformat::{
2526
CFormatPart, CFormatPreconversor, CFormatQuantity, CFormatSpec, CFormatString, CFormatType,
2627
CNumberType,
@@ -455,61 +456,27 @@ impl PyString {
455456

456457
#[pymethod]
457458
fn split(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult {
458-
let value = &self.value;
459-
let pattern = args.non_empty_sep(vm)?;
460-
let num_splits = args.maxsplit;
461-
let elements: Vec<_> = match (pattern, num_splits.is_negative()) {
462-
(Some(pattern), true) => value
463-
.split(pattern)
464-
.map(|o| vm.ctx.new_str(o.to_owned()))
465-
.collect(),
466-
(Some(pattern), false) => value
467-
.splitn(num_splits as usize + 1, pattern)
468-
.map(|o| vm.ctx.new_str(o.to_owned()))
469-
.collect(),
470-
(None, true) => value
471-
.trim_start()
472-
.split(|c: char| c.is_ascii_whitespace())
473-
.filter(|s| !s.is_empty())
474-
.map(|o| vm.ctx.new_str(o.to_owned()))
475-
.collect(),
476-
(None, false) => value
477-
.trim_start()
478-
.splitn(num_splits as usize + 1, |c: char| c.is_ascii_whitespace())
479-
.filter(|s| !s.is_empty())
480-
.map(|o| vm.ctx.new_str(o.to_owned()))
481-
.collect(),
482-
};
459+
let elements = self.value.py_split(
460+
args.non_empty_sep(vm)?,
461+
args.maxsplit,
462+
vm,
463+
|v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s)).collect(),
464+
|v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s)).collect(),
465+
|v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s)),
466+
);
483467
Ok(vm.ctx.new_list(elements))
484468
}
485469

486470
#[pymethod]
487471
fn rsplit(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult {
488-
let value = &self.value;
489-
let pattern = args.non_empty_sep(vm)?;
490-
let num_splits = args.maxsplit;
491-
let mut elements: Vec<_> = match (pattern, num_splits.is_negative()) {
492-
(Some(pattern), true) => value
493-
.rsplit(pattern)
494-
.map(|o| vm.ctx.new_str(o.to_owned()))
495-
.collect(),
496-
(Some(pattern), false) => value
497-
.rsplitn(num_splits as usize + 1, pattern)
498-
.map(|o| vm.ctx.new_str(o.to_owned()))
499-
.collect(),
500-
(None, true) => value
501-
.trim_end()
502-
.rsplit(|c: char| c.is_ascii_whitespace())
503-
.filter(|s| !s.is_empty())
504-
.map(|o| vm.ctx.new_str(o.to_owned()))
505-
.collect(),
506-
(None, false) => value
507-
.trim_end()
508-
.rsplitn(num_splits as usize + 1, |c: char| c.is_ascii_whitespace())
509-
.filter(|s| !s.is_empty())
510-
.map(|o| vm.ctx.new_str(o.to_owned()))
511-
.collect(),
512-
};
472+
let mut elements = self.value.py_split(
473+
args.non_empty_sep(vm)?,
474+
args.maxsplit,
475+
vm,
476+
|v, s, vm| v.rsplit(s).map(|s| vm.ctx.new_str(s)).collect(),
477+
|v, s, n, vm| v.rsplitn(n, s).map(|s| vm.ctx.new_str(s)).collect(),
478+
|v, n, vm| v.py_rsplit_whitespace(n, |s| vm.ctx.new_str(s)),
479+
);
513480
// Unlike Python rsplit, Rust rsplitn returns an iterator that
514481
// starts from the end of the string.
515482
elements.reverse();
@@ -1882,3 +1849,57 @@ mod tests {
18821849
assert_eq!(translated.unwrap_err().class().name, "TypeError".to_owned());
18831850
}
18841851
}
1852+
1853+
impl PyCommonString<'_, char> for str {
1854+
fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1855+
where
1856+
F: Fn(&Self) -> PyObjectRef,
1857+
{
1858+
// CPython split_whitespace
1859+
let mut splited = Vec::new();
1860+
let mut last_offset = 0;
1861+
let mut count = maxsplit;
1862+
for (offset, _) in self.match_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1863+
if last_offset == offset {
1864+
last_offset += 1;
1865+
continue;
1866+
}
1867+
if count == 0 {
1868+
break;
1869+
}
1870+
splited.push(convert(&self[last_offset..offset]));
1871+
last_offset = offset + 1;
1872+
count -= 1;
1873+
}
1874+
if last_offset != self.len() {
1875+
splited.push(convert(&self[last_offset..]));
1876+
}
1877+
splited
1878+
}
1879+
1880+
fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1881+
where
1882+
F: Fn(&Self) -> PyObjectRef,
1883+
{
1884+
// CPython rsplit_whitespace
1885+
let mut splited = Vec::new();
1886+
let mut last_offset = self.len();
1887+
let mut count = maxsplit;
1888+
for (offset, _) in self.rmatch_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1889+
if last_offset == offset + 1 {
1890+
last_offset -= 1;
1891+
continue;
1892+
}
1893+
if count == 0 {
1894+
break;
1895+
}
1896+
splited.push(convert(&self[offset + 1..last_offset]));
1897+
last_offset = offset;
1898+
count -= 1;
1899+
}
1900+
if last_offset != 0 {
1901+
splited.push(convert(&self[..last_offset]));
1902+
}
1903+
splited
1904+
}
1905+
}

vm/src/obj/pystr.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
use crate::pyobject::PyObjectRef;
2+
use crate::vm::VirtualMachine;
3+
4+
pub trait PyCommonString<'a, E>
5+
where
6+
Self: 'a,
7+
{
8+
fn py_split<SP, SN, SW, R>(
9+
&self,
10+
sep: Option<&Self>,
11+
maxsplit: isize,
12+
vm: &VirtualMachine,
13+
split: SP,
14+
splitn: SN,
15+
splitw: SW,
16+
) -> Vec<R>
17+
where
18+
SP: Fn(&Self, &Self, &VirtualMachine) -> Vec<R>,
19+
SN: Fn(&Self, &Self, usize, &VirtualMachine) -> Vec<R>,
20+
SW: Fn(&Self, isize, &VirtualMachine) -> Vec<R>,
21+
{
22+
if let Some(pattern) = sep {
23+
if maxsplit < 0 {
24+
split(self, pattern, vm)
25+
} else {
26+
splitn(self, pattern, (maxsplit + 1) as usize, vm)
27+
}
28+
} else {
29+
splitw(self, maxsplit, vm)
30+
}
31+
}
32+
fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
33+
where
34+
F: Fn(&Self) -> PyObjectRef;
35+
fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
36+
where
37+
F: Fn(&Self) -> PyObjectRef;
38+
}

vm/src/pyobject.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,10 @@ impl PyContext {
420420
PyObject::new(PyComplex::from(value), self.complex_type(), None)
421421
}
422422

423-
pub fn new_str(&self, s: String) -> PyObjectRef {
423+
pub fn new_str<S>(&self, s: S) -> PyObjectRef
424+
where
425+
objstr::PyString: std::convert::From<S>,
426+
{
424427
PyObject::new(objstr::PyString::from(s), self.str_type(), None)
425428
}
426429

@@ -1305,7 +1308,7 @@ pub trait PyClassImpl: PyClassDef {
13051308
class.slots.borrow_mut().flags = Self::TP_FLAGS;
13061309
ctx.add_tp_new_wrapper(&class);
13071310
if let Some(doc) = Self::DOC {
1308-
class.set_str_attr("__doc__", ctx.new_str(doc.into()));
1311+
class.set_str_attr("__doc__", ctx.new_str(doc));
13091312
}
13101313
}
13111314

0 commit comments

Comments
 (0)