Skip to content

Commit 4f81731

Browse files
author
Antonio Yang
committed
str.isprintable
1 parent 1b7088c commit 4f81731

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

tests/snippets/strings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,10 @@ def try_mutate_str():
216216
for s, b, e in zip(ss, bs, ['u8', 'U8', 'utf-8', 'UTF-8', 'utf_8']):
217217
assert s.encode(e) == b
218218
# assert s.encode(encoding=e) == b
219+
220+
# str.isisprintable
221+
assert "".isprintable()
222+
assert " ".isprintable()
223+
assert "abcdefg".isprintable()
224+
assert not "abcdefg\n".isprintable()
225+
assert "ʹ".isprintable()

vm/src/obj/objstr.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,91 @@ impl PyString {
519519
}
520520
}
521521

522+
// Return true if all characters in the string are printable or the string is empty,
523+
// false otherwise. Nonprintable characters are those characters defined in the
524+
// Unicode character database as `Other` or `Separator`,
525+
// excepting the ASCII space (0x20) which is considered printable.
526+
//
527+
// All characters except those characters defined in the Unicode character
528+
// database as following categories are considered printable.
529+
// * Cc (Other, Control)
530+
// * Cf (Other, Format)
531+
// * Cs (Other, Surrogate)
532+
// * Co (Other, Private Use)
533+
// * Cn (Other, Not Assigned)
534+
// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
535+
// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
536+
// * Zs (Separator, Space) other than ASCII space('\x20').
537+
#[pymethod]
538+
fn isprintable(&self, _vm: &VirtualMachine) -> bool {
539+
self.value.is_empty()
540+
|| self
541+
.value
542+
.chars()
543+
.all(|c| {
544+
match c {
545+
// Other, Control
546+
'\u{0000}'| '\u{0001}'| '\u{0002}'| '\u{0003}'| '\u{0004}'| '\u{0005}'|
547+
'\u{0006}'| '\u{0007}'| '\u{0008}'| '\u{0009}'| '\u{000A}'| '\u{000B}'|
548+
'\u{000C}'| '\u{000D}'| '\u{000E}'| '\u{000F}'| '\u{0010}'| '\u{0011}'|
549+
'\u{0012}'| '\u{0013}'| '\u{0014}'| '\u{0015}'| '\u{0016}'| '\u{0017}'|
550+
'\u{0018}'| '\u{0019}'| '\u{001A}'| '\u{001B}'| '\u{001C}'| '\u{001D}'|
551+
'\u{001E}'| '\u{001F}'| '\u{007F}'| '\u{0080}'| '\u{0081}'| '\u{0082}'|
552+
'\u{0083}'| '\u{0084}'| '\u{0085}'| '\u{0086}'| '\u{0087}'| '\u{0088}'|
553+
'\u{0089}'| '\u{008A}'| '\u{008B}'| '\u{008C}'| '\u{008D}'| '\u{008E}'|
554+
'\u{008F}'| '\u{0090}'| '\u{0091}'| '\u{0092}'| '\u{0093}'| '\u{0094}'|
555+
'\u{0095}'| '\u{0096}'| '\u{0097}'| '\u{0098}'| '\u{0099}'| '\u{009A}'|
556+
'\u{009B}'| '\u{009C}'| '\u{009D}'| '\u{009E}'| '\u{009F}'|
557+
558+
// Other, Format
559+
'\u{00AD}'| '\u{0600}'| '\u{0601}'| '\u{0602}'| '\u{0603}'| '\u{0604}'|
560+
'\u{0605}'| '\u{061C}'| '\u{06DD}'| '\u{070F}'| '\u{08E2}'| '\u{180E}'|
561+
'\u{200B}'| '\u{200C}'| '\u{200D}'| '\u{200E}'| '\u{200F}'| '\u{202A}'|
562+
'\u{202B}'| '\u{202C}'| '\u{202D}'| '\u{202E}'| '\u{2060}'| '\u{2061}'|
563+
'\u{2062}'| '\u{2063}'| '\u{2064}'| '\u{2066}'| '\u{2067}'| '\u{2068}'|
564+
'\u{2069}'| '\u{206A}'| '\u{206B}'| '\u{206C}'| '\u{206D}'| '\u{206E}'|
565+
'\u{206F}'| '\u{FEFF}'| '\u{FFF9}'| '\u{FFFA}'| '\u{FFFB}'| '\u{110BD}'|
566+
'\u{110CD}'| '\u{13430}'| '\u{13431}'| '\u{13432}'| '\u{13433}'|
567+
'\u{13434}'| '\u{13435}'| '\u{13436}'| '\u{13437}'| '\u{13438}'|
568+
'\u{1BCA0}'| '\u{1BCA1}'| '\u{1BCA2}'| '\u{1BCA3}'| '\u{1D173}'|
569+
'\u{1D174}'| '\u{1D175}'| '\u{1D176}'| '\u{1D177}'| '\u{1D178}'|
570+
'\u{1D179}'| '\u{1D17A}'| '\u{E0001}'| '\u{E0020}'| '\u{E0021}'|
571+
'\u{E0022}'| '\u{E0023}'| '\u{E0024}'| '\u{E0025}'| '\u{E0026}'|
572+
'\u{E0027}'| '\u{E0028}'| '\u{E0029}'| '\u{E002A}'| '\u{E002B}'|
573+
'\u{E002C}'| '\u{E002D}'| '\u{E002E}'| '\u{E002F}'| '\u{E0030}'|
574+
'\u{E0031}'| '\u{E0032}'| '\u{E0033}'| '\u{E0034}'| '\u{E0035}'|
575+
'\u{E0036}'| '\u{E0037}'| '\u{E0038}'| '\u{E0039}'| '\u{E003A}'|
576+
'\u{E003B}'| '\u{E003C}'| '\u{E003D}'| '\u{E003E}'| '\u{E003F}'|
577+
'\u{E0040}'| '\u{E0041}'| '\u{E0042}'| '\u{E0043}'| '\u{E0044}'|
578+
'\u{E0045}'| '\u{E0046}'| '\u{E0047}'| '\u{E0048}'| '\u{E0049}'|
579+
'\u{E004A}'| '\u{E004B}'| '\u{E004C}'| '\u{E004D}'| '\u{E004E}'|
580+
'\u{E004F}'| '\u{E0050}'| '\u{E0051}'| '\u{E0052}'| '\u{E0053}'|
581+
'\u{E0054}'| '\u{E0055}'| '\u{E0056}'| '\u{E0057}'| '\u{E0058}'|
582+
'\u{E0059}'| '\u{E005A}'| '\u{E005B}'| '\u{E005C}'| '\u{E005D}'|
583+
'\u{E005E}'| '\u{E005F}'| '\u{E0060}'| '\u{E0061}'| '\u{E0062}'|
584+
'\u{E0063}'| '\u{E0064}'| '\u{E0065}'| '\u{E0066}'| '\u{E0067}'|
585+
'\u{E0068}'| '\u{E0069}'| '\u{E006A}'| '\u{E006B}'| '\u{E006C}'|
586+
'\u{E006D}'| '\u{E006E}'| '\u{E006F}'| '\u{E0070}'| '\u{E0071}'|
587+
'\u{E0072}'| '\u{E0073}'| '\u{E0074}'| '\u{E0075}'| '\u{E0076}'|
588+
'\u{E0077}'| '\u{E0078}'| '\u{E0079}'| '\u{E007A}'| '\u{E007B}'|
589+
'\u{E007C}'| '\u{E007D}'| '\u{E007E}'| '\u{E007F}'|
590+
591+
// Other, Private Use
592+
'\u{E000}'| '\u{F8FF}'| '\u{F0000}'| '\u{FFFFD}'| '\u{100000}'|
593+
'\u{10FFFD}'|
594+
595+
// Separator, Line; Separator, Paragraph
596+
'\u{2028}'| '\u{2029}'|
597+
598+
// Separator, Space
599+
'\u{00A0}'| '\u{1680}'| '\u{2000}'| '\u{2001}'| '\u{2002}'| '\u{2003}'|
600+
'\u{2004}'| '\u{2005}'| '\u{2006}'| '\u{2007}'| '\u{2008}'| '\u{2009}'|
601+
'\u{200A}'| '\u{202F}'| '\u{205F}'| '\u{3000}' => false,
602+
_ => true
603+
}
604+
})
605+
}
606+
522607
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
523608
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
524609
#[pymethod]

0 commit comments

Comments
 (0)