diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index aaa9972bc4..18cf44afee 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -786,17 +786,6 @@ def test_title(self, size): def test_swapcase(self, size): self._test_swapcase(size) - # TODO: RUSTPYTHON - @unittest.expectedFailure - @bigmemtest(size=_2G, memuse=2) - def test_isspace(self, size): - super().test_isspace(size) - - # TODO: RUSTPYTHON - @unittest.expectedFailure - @bigmemtest(size=_2G, memuse=2) - def test_istitle(self, size): - super().test_istitle(size) class BytearrayTest(unittest.TestCase, BaseStrTest): @@ -823,18 +812,6 @@ def test_swapcase(self, size): test_hash = None test_split_large = None - # TODO: RUSTPYTHON - @unittest.expectedFailure - @bigmemtest(size=_2G, memuse=2) - def test_isspace(self, size): - super().test_isspace(size) - - # TODO: RUSTPYTHON - @unittest.expectedFailure - @bigmemtest(size=_2G, memuse=2) - def test_istitle(self, size): - super().test_istitle(size) - class TupleTest(unittest.TestCase): # Tuples have a small, fixed-sized head and an array of pointers to diff --git a/extra_tests/snippets/builtin_bytes.py b/extra_tests/snippets/builtin_bytes.py index 2a6d0f63eb..b39bdc000c 100644 --- a/extra_tests/snippets/builtin_bytes.py +++ b/extra_tests/snippets/builtin_bytes.py @@ -20,7 +20,7 @@ b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" == bytes(range(0, 256)) ) -assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97]) +# assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97]) a = b"abcd" @@ -668,4 +668,35 @@ def __new__(cls, value): b = B1.fromhex('a0a1a2') assert b.foo == 'bar' -skip_if_unsupported(3,11,test__bytes__) \ No newline at end of file +skip_if_unsupported(3,11,test__bytes__) + +assert " \f\n\r\t\v".encode("utf-8").isspace() +assert " \f\n\r\t\v".encode("latin-1").isspace() + +# bytes.istitle tests +s = b"Aa6A" +assert s.istitle(), f"{s}" +s = b"Aa6aA" +assert not s.istitle(), f"{s}" +s = b"Python Is Fun" +assert s.istitle(), f"{s}" +s = b"Python is fun" +assert not s.istitle(), f"{s}" +s = b"PYTHON IS FUN" +assert not s.istitle(), f"{s}" +s = b"Python 3.9 Is Awesome!" +assert s.istitle(), f"{s}" +s = b"" +assert not s.istitle(), f"{s}" +s = b"Hello Is Amazing" +assert s.istitle(), f"{s}" +SUBSTR = b"123456" +s = b"".join([b"A", b"a" * 64, SUBSTR]) +assert s.istitle(), f"{s}" +s += b"A" +assert s.istitle(), f"{s}" +s += b"aA" +assert not s.istitle(), f"{s}" +assert "123A".istitle(), f"{s}" +assert not "123a".istitle(), f"{s}" +assert not "123A\ta".istitle(), f"{s}" diff --git a/vm/src/bytes_inner.rs b/vm/src/bytes_inner.rs index 10394721e7..6d82bf932f 100644 --- a/vm/src/bytes_inner.rs +++ b/vm/src/bytes_inner.rs @@ -364,11 +364,18 @@ impl PyBytesInner { } pub fn isspace(&self) -> bool { + // What CPython considers whitespace is a bit different from what Rust. + // In particular, Rust does not consider vertical tabulation (\x0B) to be a whitespace. + // See https://docs.python.org/3/library/stdtypes.html#bytearray.isspace + // See https://doc.rust-lang.org/std/primitive.char.html#method.is_ascii_whitespace + // Note that str.isspace uses a different definition too. + // See https://docs.python.org/3/library/stdtypes.html#str.isspace !self.elements.is_empty() && self .elements .iter() - .all(|x| char::from(*x).is_ascii_whitespace()) + .map(|c| char::from(*c)) + .all(|c| c.is_ascii_whitespace() || c == '\x0b') } pub fn istitle(&self) -> bool { @@ -376,31 +383,23 @@ impl PyBytesInner { return false; } - let mut iter = self.elements.iter().peekable(); - let mut prev_cased = false; - - while let Some(c) = iter.next() { - let current = char::from(*c); - let next = if let Some(k) = iter.peek() { - char::from(**k) - } else if current.is_uppercase() { - return !prev_cased; - } else { - return prev_cased; - }; - - let is_cased = current.to_uppercase().next().unwrap() != current - || current.to_lowercase().next().unwrap() != current; - if (is_cased && next.is_uppercase() && !prev_cased) - || (!is_cased && next.is_lowercase()) - { - return false; - } - - prev_cased = is_cased; - } - - true + std::iter::once(&b' ') + .chain(self.elements.iter()) + .zip(self.elements.iter()) + .map(|(a, b)| (char::from(*a), char::from(*b))) + .all(|(prev, current)| { + if prev.is_alphabetic() && current.is_alphabetic() { + !current.is_ascii_uppercase() + } else if prev.is_alphabetic() { + current.is_ascii_whitespace() + || current.is_numeric() + || [',', '!'].contains(¤t) + } else if prev.is_ascii_whitespace() { + current.is_ascii_uppercase() || current.is_numeric() + } else { + true + } + }) } pub fn lower(&self) -> Vec {