Skip to content

Fix bytes.isspace #5655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
23 changes: 0 additions & 23 deletions Lib/test/test_bigmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,17 +786,6 @@ def test_title(self, size):
def test_swapcase(self, size):
self._test_swapcase(size)

# TODO: RUSTPYTHON
@unittest.expectedFailure
@bigmemtest(size=_2G, memuse=2)
def test_isspace(self, size):
super().test_isspace(size)

# TODO: RUSTPYTHON
@unittest.expectedFailure
@bigmemtest(size=_2G, memuse=2)
def test_istitle(self, size):
super().test_istitle(size)

class BytearrayTest(unittest.TestCase, BaseStrTest):

Expand All @@ -823,18 +812,6 @@ def test_swapcase(self, size):
test_hash = None
test_split_large = None

# TODO: RUSTPYTHON
@unittest.expectedFailure
@bigmemtest(size=_2G, memuse=2)
def test_isspace(self, size):
super().test_isspace(size)

# TODO: RUSTPYTHON
@unittest.expectedFailure
@bigmemtest(size=_2G, memuse=2)
def test_istitle(self, size):
super().test_istitle(size)

class TupleTest(unittest.TestCase):

# Tuples have a small, fixed-sized head and an array of pointers to
Expand Down
35 changes: 33 additions & 2 deletions extra_tests/snippets/builtin_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
== bytes(range(0, 256))
)
assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97])
# assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97])


a = b"abcd"
Expand Down Expand Up @@ -668,4 +668,35 @@ def __new__(cls, value):
b = B1.fromhex('a0a1a2')
assert b.foo == 'bar'

skip_if_unsupported(3,11,test__bytes__)
skip_if_unsupported(3,11,test__bytes__)

assert " \f\n\r\t\v".encode("utf-8").isspace()
assert " \f\n\r\t\v".encode("latin-1").isspace()

# bytes.istitle tests
s = b"Aa6A"
assert s.istitle(), f"{s}"
s = b"Aa6aA"
assert not s.istitle(), f"{s}"
s = b"Python Is Fun"
assert s.istitle(), f"{s}"
s = b"Python is fun"
assert not s.istitle(), f"{s}"
s = b"PYTHON IS FUN"
assert not s.istitle(), f"{s}"
s = b"Python 3.9 Is Awesome!"
assert s.istitle(), f"{s}"
s = b""
assert not s.istitle(), f"{s}"
s = b"Hello Is Amazing"
assert s.istitle(), f"{s}"
SUBSTR = b"123456"
s = b"".join([b"A", b"a" * 64, SUBSTR])
assert s.istitle(), f"{s}"
s += b"A"
assert s.istitle(), f"{s}"
s += b"aA"
assert not s.istitle(), f"{s}"
assert "123A".istitle(), f"{s}"
assert not "123a".istitle(), f"{s}"
assert not "123A\ta".istitle(), f"{s}"
51 changes: 25 additions & 26 deletions vm/src/bytes_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,43 +364,42 @@ impl PyBytesInner {
}

pub fn isspace(&self) -> bool {
// What CPython considers whitespace is a bit different from what Rust.
// In particular, Rust does not consider vertical tabulation (\x0B) to be a whitespace.
// See https://docs.python.org/3/library/stdtypes.html#bytearray.isspace
// See https://doc.rust-lang.org/std/primitive.char.html#method.is_ascii_whitespace
// Note that str.isspace uses a different definition too.
// See https://docs.python.org/3/library/stdtypes.html#str.isspace
!self.elements.is_empty()
&& self
.elements
.iter()
.all(|x| char::from(*x).is_ascii_whitespace())
.map(|c| char::from(*c))
.all(|c| c.is_ascii_whitespace() || c == '\x0b')
}

pub fn istitle(&self) -> bool {
if self.elements.is_empty() {
return false;
}

let mut iter = self.elements.iter().peekable();
let mut prev_cased = false;

while let Some(c) = iter.next() {
let current = char::from(*c);
let next = if let Some(k) = iter.peek() {
char::from(**k)
} else if current.is_uppercase() {
return !prev_cased;
} else {
return prev_cased;
};

let is_cased = current.to_uppercase().next().unwrap() != current
|| current.to_lowercase().next().unwrap() != current;
if (is_cased && next.is_uppercase() && !prev_cased)
|| (!is_cased && next.is_lowercase())
{
return false;
}

prev_cased = is_cased;
}

true
std::iter::once(&b' ')
.chain(self.elements.iter())
.zip(self.elements.iter())
.map(|(a, b)| (char::from(*a), char::from(*b)))
.all(|(prev, current)| {
if prev.is_alphabetic() && current.is_alphabetic() {
!current.is_ascii_uppercase()
} else if prev.is_alphabetic() {
current.is_ascii_whitespace()
|| current.is_numeric()
|| [',', '!'].contains(&current)
} else if prev.is_ascii_whitespace() {
current.is_ascii_uppercase() || current.is_numeric()
} else {
true
}
})
}

pub fn lower(&self) -> Vec<u8> {
Expand Down
Loading