Skip to content

Commit 3f3a83c

Browse files
authored
Merge pull request #1883 from youknowone/share-splitlines
Share str.splitlines and bytes.splitlines implementaion
2 parents eb25ce6 + 5cfdf1d commit 3f3a83c

File tree

5 files changed

+56
-73
lines changed

5 files changed

+56
-73
lines changed

vm/src/obj/objbytearray.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,13 +451,10 @@ impl PyByteArray {
451451

452452
#[pymethod(name = "splitlines")]
453453
fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult {
454-
let as_bytes = self
454+
let lines = self
455455
.borrow_value()
456-
.splitlines(options)
457-
.iter()
458-
.map(|x| vm.ctx.new_bytearray(x.to_vec()))
459-
.collect::<Vec<PyObjectRef>>();
460-
Ok(vm.ctx.new_list(as_bytes))
456+
.splitlines(options, |x| vm.ctx.new_bytearray(x.to_vec()));
457+
Ok(vm.ctx.new_list(lines))
461458
}
462459

463460
#[pymethod(name = "zfill")]

vm/src/obj/objbyteinner.rs

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -961,46 +961,11 @@ impl PyByteInner {
961961
res
962962
}
963963

964-
pub fn splitlines(&self, options: pystr::SplitLinesArgs) -> Vec<&[u8]> {
965-
let mut res = vec![];
966-
967-
if self.elements.is_empty() {
968-
return vec![];
969-
}
970-
971-
let mut prev_index = 0;
972-
let mut index = 0;
973-
let keep = if options.keepends { 1 } else { 0 };
974-
let slice = &self.elements;
975-
976-
while index < slice.len() {
977-
match slice[index] {
978-
b'\n' => {
979-
res.push(&slice[prev_index..index + keep]);
980-
index += 1;
981-
prev_index = index;
982-
}
983-
b'\r' => {
984-
if index + 2 <= slice.len() && slice[index + 1] == b'\n' {
985-
res.push(&slice[prev_index..index + keep + keep]);
986-
index += 2;
987-
} else {
988-
res.push(&slice[prev_index..index + keep]);
989-
index += 1;
990-
}
991-
prev_index = index;
992-
}
993-
_x => {
994-
if index == slice.len() - 1 {
995-
res.push(&slice[prev_index..=index]);
996-
break;
997-
}
998-
index += 1
999-
}
1000-
}
1001-
}
1002-
1003-
res
964+
pub fn splitlines<FW, W>(&self, options: pystr::SplitLinesArgs, into_wrapper: FW) -> Vec<W>
965+
where
966+
FW: Fn(&[u8]) -> W,
967+
{
968+
self.elements.py_splitlines(options, into_wrapper)
1004969
}
1005970

1006971
pub fn zfill(&self, width: isize) -> Vec<u8> {
@@ -1329,6 +1294,10 @@ impl PyCommonString<u8> for [u8] {
13291294
Vec::with_capacity(capacity)
13301295
}
13311296

1297+
fn as_bytes(&self) -> &[u8] {
1298+
self
1299+
}
1300+
13321301
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
13331302
&self[range]
13341303
}

vm/src/obj/objbytes.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -418,13 +418,10 @@ impl PyBytes {
418418

419419
#[pymethod(name = "splitlines")]
420420
fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult {
421-
let as_bytes = self
421+
let lines = self
422422
.inner
423-
.splitlines(options)
424-
.iter()
425-
.map(|x| vm.ctx.new_bytes(x.to_vec()))
426-
.collect::<Vec<PyObjectRef>>();
427-
Ok(vm.ctx.new_list(as_bytes))
423+
.splitlines(options, |x| vm.ctx.new_bytes(x.to_vec()));
424+
Ok(vm.ctx.new_list(lines))
428425
}
429426

430427
#[pymethod(name = "zfill")]

vm/src/obj/objstr.rs

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -803,27 +803,8 @@ impl PyString {
803803

804804
#[pymethod]
805805
fn splitlines(&self, args: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyObjectRef {
806-
let mut elements = vec![];
807-
let mut curr = "".to_owned();
808-
let mut chars = self.value.chars().peekable();
809-
while let Some(ch) = chars.next() {
810-
if ch == '\n' || ch == '\r' {
811-
if args.keepends {
812-
curr.push(ch);
813-
}
814-
if ch == '\r' && chars.peek() == Some(&'\n') {
815-
continue;
816-
}
817-
elements.push(vm.ctx.new_str(curr.clone()));
818-
curr.clear();
819-
} else {
820-
curr.push(ch);
821-
}
822-
}
823-
if !curr.is_empty() {
824-
elements.push(vm.ctx.new_str(curr));
825-
}
826-
vm.ctx.new_list(elements)
806+
vm.ctx
807+
.new_list(self.value.py_splitlines(args, |s| vm.new_str(s.to_owned())))
827808
}
828809

829810
#[pymethod]
@@ -1752,6 +1733,10 @@ impl PyCommonString<char> for str {
17521733
String::with_capacity(capacity)
17531734
}
17541735

1736+
fn as_bytes(&self) -> &[u8] {
1737+
self.as_bytes()
1738+
}
1739+
17551740
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
17561741
&self[range]
17571742
}

vm/src/obj/pystr.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ pub trait PyCommonString<E> {
129129
type Container;
130130

131131
fn with_capacity(capacity: usize) -> Self::Container;
132+
fn as_bytes(&self) -> &[u8];
132133
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
133134
// FIXME: get_chars is expensive for str
134135
fn get_chars<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
@@ -297,4 +298,38 @@ pub trait PyCommonString<E> {
297298
&self
298299
}
299300
}
301+
302+
fn py_splitlines<FW, W>(&self, options: SplitLinesArgs, into_wrapper: FW) -> Vec<W>
303+
where
304+
FW: Fn(&Self) -> W,
305+
{
306+
let keep = if options.keepends { 1 } else { 0 };
307+
let mut elements = Vec::new();
308+
let mut last_i = 0;
309+
let mut enumerated = self.as_bytes().iter().enumerate().peekable();
310+
while let Some((i, ch)) = enumerated.next() {
311+
let (end_len, i_diff) = match *ch {
312+
b'\n' => (keep, 1),
313+
b'\r' => {
314+
let is_rn = enumerated.peek().map_or(false, |(_, ch)| **ch == b'\n');
315+
if is_rn {
316+
let _ = enumerated.next();
317+
(keep + keep, 2)
318+
} else {
319+
(keep, 1)
320+
}
321+
}
322+
_ => {
323+
continue;
324+
}
325+
};
326+
let range = last_i..i + end_len;
327+
last_i = i + i_diff;
328+
elements.push(into_wrapper(self.get_bytes(range)));
329+
}
330+
if last_i != self.bytes_len() {
331+
elements.push(into_wrapper(self.get_bytes(last_i..self.bytes_len())));
332+
}
333+
elements
334+
}
300335
}

0 commit comments

Comments
 (0)