Skip to content

Commit 68011df

Browse files
authored
Merge pull request #1024 from yanganto/str_isprintable
str.isprintable
2 parents 1b968e9 + f76be43 commit 68011df

File tree

3 files changed

+34
-0
lines changed

3 files changed

+34
-0
lines changed

tests/snippets/strings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,10 @@ def try_mutate_str():
216216
for s, b, e in zip(ss, bs, ['u8', 'U8', 'utf-8', 'UTF-8', 'utf_8']):
217217
assert s.encode(e) == b
218218
# assert s.encode(encoding=e) == b
219+
220+
# str.isisprintable
221+
assert "".isprintable()
222+
assert " ".isprintable()
223+
assert "abcdefg".isprintable()
224+
assert not "abcdefg\n".isprintable()
225+
assert "ʹ".isprintable()

vm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ hexf = "0.1.0"
3131
indexmap = "1.0.2"
3232
crc = "^1.0.0"
3333
bincode = "1.1.4"
34+
unicode_categories = "0.1.1"
3435

3536

3637
# TODO: release and publish to crates.io

vm/src/obj/objstr.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
extern crate unicode_categories;
12
extern crate unicode_xid;
23

34
use std::fmt;
@@ -27,6 +28,8 @@ use super::objsequence::PySliceableSequence;
2728
use super::objslice::PySlice;
2829
use super::objtype::{self, PyClassRef};
2930

31+
use unicode_categories::UnicodeCategories;
32+
3033
/// str(object='') -> str
3134
/// str(bytes_or_buffer[, encoding[, errors]]) -> str
3235
///
@@ -519,6 +522,29 @@ impl PyString {
519522
}
520523
}
521524

525+
/// Return true if all characters in the string are printable or the string is empty,
526+
/// false otherwise. Nonprintable characters are those characters defined in the
527+
/// Unicode character database as `Other` or `Separator`,
528+
/// excepting the ASCII space (0x20) which is considered printable.
529+
///
530+
/// All characters except those characters defined in the Unicode character
531+
/// database as following categories are considered printable.
532+
/// * Cc (Other, Control)
533+
/// * Cf (Other, Format)
534+
/// * Cs (Other, Surrogate)
535+
/// * Co (Other, Private Use)
536+
/// * Cn (Other, Not Assigned)
537+
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
538+
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
539+
/// * Zs (Separator, Space) other than ASCII space('\x20').
540+
#[pymethod]
541+
fn isprintable(&self, _vm: &VirtualMachine) -> bool {
542+
self.value.chars().all(|c| match c {
543+
'\u{0020}' => true,
544+
_ => !(c.is_other_control() | c.is_separator()),
545+
})
546+
}
547+
522548
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
523549
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
524550
#[pymethod]

0 commit comments

Comments
 (0)