diff --git a/Cargo.lock b/Cargo.lock index aef44b9c0f..e778792b60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "bstr" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe8a65814ca90dfc9705af76bb6ba3c6e2534489a72270e797e603783bb4990b" +checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" dependencies = [ "lazy_static 1.4.0", "memchr", @@ -1598,6 +1598,7 @@ dependencies = [ "base64", "bitflags", "blake2", + "bstr", "byteorder 1.3.2", "caseless", "chrono", diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 4a41e58104..3bc05fad58 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -777,7 +777,6 @@ def test_additional_rsplit(self): self.checkequal(['arf', 'barf'], b, 'rsplit', None) self.checkequal(['arf', 'barf'], b, 'rsplit', None, 2) - @unittest.skip("TODO: RUSTPYTHON test_bytes") def test_strip_whitespace(self): self.checkequal('hello', ' hello ', 'strip') self.checkequal('hello ', ' hello ', 'lstrip') @@ -795,7 +794,6 @@ def test_strip_whitespace(self): self.checkequal(' hello', ' hello ', 'rstrip', None) self.checkequal('hello', 'hello', 'strip', None) - @unittest.skip("TODO: RUSTPYTHON test_bytes") def test_strip(self): # strip/lstrip/rstrip with str arg self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz') diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 5897b143ce..1d2fdb296c 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -72,6 +72,7 @@ result-like = "^0.2.1" foreign-types = "0.3" num_enum = "0.4" smallbox = "0.8" +bstr = "0.2.12" flame = { version = "0.2", optional = true } flamer = { version = "0.3", optional = true } diff --git a/vm/src/obj/objbytearray.rs b/vm/src/obj/objbytearray.rs index 634124c7f7..b561ce5305 100644 --- a/vm/src/obj/objbytearray.rs +++ b/vm/src/obj/objbytearray.rs @@ -5,8 +5,8 @@ use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use super::objbyteinner::{ ByteInnerExpandtabsOptions, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, - ByteInnerPosition, ByteInnerSplitOptions, ByteInnerSplitlinesOptions, - ByteInnerTranslateOptions, ByteOr, PyByteInner, + ByteInnerSplitOptions, ByteInnerSplitlinesOptions, ByteInnerTranslateOptions, ByteOr, + PyByteInner, }; use super::objint::PyIntRef; use super::objiter; @@ -15,7 +15,7 @@ use super::objstr::{PyString, PyStringRef}; use super::objtuple::PyTupleRef; use super::objtype::PyClassRef; use crate::cformat::CFormatString; -use crate::function::OptionalArg; +use crate::function::{OptionalArg, OptionalOption}; use crate::obj::objstr::do_cformat_string; use crate::pyobject::{ Either, PyClassImpl, PyComparisonValue, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, @@ -373,27 +373,18 @@ impl PyByteArray { } #[pymethod(name = "strip")] - fn strip(&self, chars: OptionalArg) -> PyResult { - Ok(self - .borrow_value() - .strip(chars, ByteInnerPosition::All)? - .into()) + fn strip(&self, chars: OptionalOption) -> PyByteArray { + self.borrow_value().strip(chars).into() } #[pymethod(name = "lstrip")] - fn lstrip(&self, chars: OptionalArg) -> PyResult { - Ok(self - .borrow_value() - .strip(chars, ByteInnerPosition::Left)? - .into()) + fn lstrip(&self, chars: OptionalOption) -> PyByteArray { + self.borrow_value().lstrip(chars).into() } #[pymethod(name = "rstrip")] - fn rstrip(&self, chars: OptionalArg) -> PyResult { - Ok(self - .borrow_value() - .strip(chars, ByteInnerPosition::Right)? - .into()) + fn rstrip(&self, chars: OptionalOption) -> PyByteArray { + self.borrow_value().rstrip(chars).into() } #[pymethod(name = "split")] diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index a24dd7e380..ecca71689e 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -1,9 +1,9 @@ -use std::convert::TryFrom; -use std::ops::Range; - +use bstr::ByteSlice; use num_bigint::{BigInt, ToBigInt}; use num_integer::Integer; use num_traits::{One, Signed, ToPrimitive, Zero}; +use std::convert::TryFrom; +use std::ops::Range; use super::objbytearray::{PyByteArray, PyByteArrayRef}; use super::objbytes::{PyBytes, PyBytesRef}; @@ -15,7 +15,7 @@ use super::objsequence::{is_valid_slice_arg, PySliceableSequence}; use super::objslice::PySliceRef; use super::objstr::{self, adjust_indices, PyString, PyStringRef, StringRange}; use super::objtuple::PyTupleRef; -use crate::function::OptionalArg; +use crate::function::{OptionalArg, OptionalOption}; use crate::pyhash; use crate::pyobject::{ Either, PyComparisonValue, PyIterable, PyObjectRef, PyResult, ThreadSafe, TryFromObject, @@ -938,40 +938,37 @@ impl PyByteInner { Ok(res) } - pub fn strip( - &self, - chars: OptionalArg, - position: ByteInnerPosition, - ) -> PyResult> { - let is_valid_char = |c| { - if let OptionalArg::Present(ref bytes) = chars { - bytes.elements.contains(c) - } else { - c.is_ascii_whitespace() - } + pub fn strip(&self, chars: OptionalOption) -> Vec { + let chars = chars.flat_option(); + let chars = match chars { + Some(ref chars) => &chars.elements, + None => return self.elements.trim().to_owned(), }; + self.elements + .trim_with(|c| chars.contains(&(c as u8))) + .to_owned() + } - let mut start = 0; - let mut end = self.len(); - - if let ByteInnerPosition::Left | ByteInnerPosition::All = position { - for (i, c) in self.elements.iter().enumerate() { - if !is_valid_char(c) { - start = i; - break; - } - } - } + pub fn lstrip(&self, chars: OptionalOption) -> Vec { + let chars = chars.flat_option(); + let chars = match chars { + Some(ref chars) => &chars.elements, + None => return self.elements.trim_start().to_owned(), + }; + self.elements + .trim_start_with(|c| chars.contains(&(c as u8))) + .to_owned() + } - if let ByteInnerPosition::Right | ByteInnerPosition::All = position { - for (i, c) in self.elements.iter().rev().enumerate() { - if !is_valid_char(c) { - end = self.len() - i; - break; - } - } - } - Ok(self.elements[start..end].to_vec()) + pub fn rstrip(&self, chars: OptionalOption) -> Vec { + let chars = chars.flat_option(); + let chars = match chars { + Some(ref chars) => &chars.elements, + None => return self.elements.trim_end().to_owned(), + }; + self.elements + .trim_end_with(|c| chars.contains(&(c as u8))) + .to_owned() } pub fn split(&self, options: ByteInnerSplitOptions, reverse: bool) -> PyResult> { @@ -1213,12 +1210,6 @@ pub trait ByteOr: ToPrimitive { impl ByteOr for BigInt {} -pub enum ByteInnerPosition { - Left, - Right, - All, -} - fn split_slice<'a>(slice: &'a [u8], sep: &[u8], maxsplit: isize) -> Vec<&'a [u8]> { let mut splitted: Vec<&[u8]> = vec![]; let mut prev_index = 0; diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index 7e277e4a4a..4459daa7c2 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -4,8 +4,7 @@ use std::ops::Deref; use super::objbyteinner::{ ByteInnerExpandtabsOptions, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, - ByteInnerPosition, ByteInnerSplitOptions, ByteInnerSplitlinesOptions, - ByteInnerTranslateOptions, PyByteInner, + ByteInnerSplitOptions, ByteInnerSplitlinesOptions, ByteInnerTranslateOptions, PyByteInner, }; use super::objint::PyIntRef; use super::objiter; @@ -14,7 +13,7 @@ use super::objstr::{PyString, PyStringRef}; use super::objtuple::PyTupleRef; use super::objtype::PyClassRef; use crate::cformat::CFormatString; -use crate::function::OptionalArg; +use crate::function::{OptionalArg, OptionalOption}; use crate::obj::objstr::do_cformat_string; use crate::pyhash; use crate::pyobject::{ @@ -329,18 +328,18 @@ impl PyBytes { } #[pymethod(name = "strip")] - fn strip(&self, chars: OptionalArg) -> PyResult { - Ok(self.inner.strip(chars, ByteInnerPosition::All)?.into()) + fn strip(&self, chars: OptionalOption) -> PyBytes { + self.inner.strip(chars).into() } #[pymethod(name = "lstrip")] - fn lstrip(&self, chars: OptionalArg) -> PyResult { - Ok(self.inner.strip(chars, ByteInnerPosition::Left)?.into()) + fn lstrip(&self, chars: OptionalOption) -> PyBytes { + self.inner.lstrip(chars).into() } #[pymethod(name = "rstrip")] - fn rstrip(&self, chars: OptionalArg) -> PyResult { - Ok(self.inner.strip(chars, ByteInnerPosition::Right)?.into()) + fn rstrip(&self, chars: OptionalOption) -> PyBytes { + self.inner.rstrip(chars).into() } #[pymethod(name = "split")]