From 65d009382213fbdd112679a97446ed3194a5530a Mon Sep 17 00:00:00 2001 From: Alan Justino Date: Sun, 9 Jun 2019 13:45:37 -0300 Subject: [PATCH 1/4] Dont fail on '{!r}'.format(...) --- tests/snippets/strings.py | 3 +++ vm/src/format.rs | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/snippets/strings.py b/tests/snippets/strings.py index aaeaed5f3e..3aa8e0b010 100644 --- a/tests/snippets/strings.py +++ b/tests/snippets/strings.py @@ -186,6 +186,9 @@ assert "{0} {1}".format(2,3) == "2 3" assert "--{:s>4}--".format(1) == "--sss1--" assert "{keyword} {0}".format(1, keyword=2) == "2 1" +assert "repr() shows quotes: {!r}; str() doesn't: {!s}".format( + 'test1', 'test2' +) == "repr() shows quotes: 'test1'; str() doesn't: test2", 'Output: {!r}, {!s}'.format('test1', 'test2') assert 'a' < 'b' assert 'a' <= 'b' diff --git a/vm/src/format.rs b/vm/src/format.rs index e89e4f731d..86372e7022 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -467,6 +467,17 @@ impl FormatString { String::new() }; + // On parts[0] can still be the preconversor (!r, !s, !a) + let parts: Vec<&str> = arg_part .splitn(2, '!').collect(); + // before the bang is a keyword or arg index, after the comma is maybe a conversor spec. + let arg_part = parts[0]; + + let preconversor_spec = if parts.len() > 1 { + parts[1].to_string() + } else { + String::new() + }; + if arg_part.is_empty() { return Ok(FormatPart::AutoSpec(format_spec)); } From e19b674abfb3f505967e2e16b47c42938449dafd Mon Sep 17 00:00:00 2001 From: Alan Justino Date: Sun, 9 Jun 2019 16:22:22 -0300 Subject: [PATCH 2/4] FormatSpec got a preconversor:FormatPreconversor to handle !r, !s and !a --- vm/src/format.rs | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index 86372e7022..fdeac812e3 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -3,6 +3,24 @@ use num_traits::Signed; use std::cmp; use std::str::FromStr; +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FormatPreconversor { + Str, + Repr, + Ascii, +} + +impl FormatPreconversor { + fn from_char(c: char) -> Option { + match c { + 's' => Some(FormatPreconversor::Str), + 'r' => Some(FormatPreconversor::Repr), + 'a' => Some(FormatPreconversor::Ascii), + _ => None, + } + } +} + #[derive(Debug, Copy, Clone, PartialEq)] pub enum FormatAlign { Left, @@ -56,6 +74,7 @@ pub enum FormatType { #[derive(Debug, PartialEq)] pub struct FormatSpec { + preconversor: Option, fill: Option, align: Option, sign: Option, @@ -75,6 +94,23 @@ fn get_num_digits(text: &str) -> usize { text.len() } +fn parse_preconversor(text: &str) -> (Option, &str) { + let mut chars = text.chars(); + if chars.next() != Some('!') { + return (None, text); + } + + match chars.next() { + None => (None, text), // Should fail instead? + Some(c) => { + match FormatPreconversor::from_char(c) { + Some(preconversor) => (Some(preconversor), chars.as_str()), + None => (None, text), // Should fail instead? + } + }, + } +} + fn parse_align(text: &str) -> (Option, &str) { let mut chars = text.chars(); let maybe_align = chars.next().and_then(FormatAlign::from_char); @@ -186,7 +222,8 @@ fn parse_format_type(text: &str) -> (Option, &str) { } fn parse_format_spec(text: &str) -> FormatSpec { - let (fill, align, after_align) = parse_fill_and_align(text); + let (preconversor, after_preconversor) = parse_preconversor(text); + let (fill, align, after_align) = parse_fill_and_align(after_preconversor); let (sign, after_sign) = parse_sign(after_align); let (alternate_form, after_alternate_form) = parse_alternate_form(after_sign); let after_zero = parse_zero(after_alternate_form); @@ -196,6 +233,7 @@ fn parse_format_spec(text: &str) -> FormatSpec { let (format_type, _) = parse_format_type(after_precision); FormatSpec { + preconversor, fill, align, sign, @@ -473,10 +511,11 @@ impl FormatString { let arg_part = parts[0]; let preconversor_spec = if parts.len() > 1 { - parts[1].to_string() + "!".to_string() + parts[1] } else { String::new() }; + let format_spec = preconversor_spec + &format_spec; if arg_part.is_empty() { return Ok(FormatPart::AutoSpec(format_spec)); From f0a2b4c50bc0b638d5257f12bae2e9d3f2966934 Mon Sep 17 00:00:00 2001 From: Alan Justino Date: Sun, 9 Jun 2019 18:11:10 -0300 Subject: [PATCH 3/4] Apply the {!r} on str.format calls By the Python docs, the `!` forces a conversion of the argument before applying the normal formating logic described after the `:` marker. See: https://docs.python.org/3.4/library/string.html#format-string-syntax --- tests/snippets/strings.py | 18 ++++++++++++++-- vm/src/format.rs | 44 +++++++++++++++++++++++++-------------- vm/src/obj/objstr.rs | 12 +++++++++-- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/tests/snippets/strings.py b/tests/snippets/strings.py index 3aa8e0b010..ef334b065c 100644 --- a/tests/snippets/strings.py +++ b/tests/snippets/strings.py @@ -182,14 +182,28 @@ assert not '123'.isidentifier() # String Formatting -assert "{} {}".format(1,2) == "1 2" -assert "{0} {1}".format(2,3) == "2 3" +assert "{} {}".format(1, 2) == "1 2" +assert "{0} {1}".format(2, 3) == "2 3" assert "--{:s>4}--".format(1) == "--sss1--" assert "{keyword} {0}".format(1, keyword=2) == "2 1" assert "repr() shows quotes: {!r}; str() doesn't: {!s}".format( 'test1', 'test2' ) == "repr() shows quotes: 'test1'; str() doesn't: test2", 'Output: {!r}, {!s}'.format('test1', 'test2') + +class Foo: + def __str__(self): + return 'str(Foo)' + + def __repr__(self): + return 'repr(Foo)' + + +f = Foo() +assert "{} {!s} {!r} {!a}".format(f, f, f, f) == 'str(Foo) str(Foo) repr(Foo) repr(Foo)' +assert "{foo} {foo!s} {foo!r} {foo!a}".format(foo=f) == 'str(Foo) str(Foo) repr(Foo) repr(Foo)' +# assert '{} {!r} {:10} {!r:10} {foo!r:10} {foo!r} {foo}'.format('txt1', 'txt2', 'txt3', 'txt4', 'txt5', foo='bar') + assert 'a' < 'b' assert 'a' <= 'b' assert 'a' <= 'a' diff --git a/vm/src/format.rs b/vm/src/format.rs index fdeac812e3..dc4645165e 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -11,7 +11,7 @@ pub enum FormatPreconversor { } impl FormatPreconversor { - fn from_char(c: char) -> Option { + pub fn from_char(c: char) -> Option { match c { 's' => Some(FormatPreconversor::Str), 'r' => Some(FormatPreconversor::Repr), @@ -19,6 +19,31 @@ impl FormatPreconversor { _ => None, } } + + pub fn from_str(text: &str) -> Option { + let mut chars = text.chars(); + if chars.next() != Some('!') { + return None; + } + + match chars.next() { + None => None, // Should fail instead? + Some(c) => FormatPreconversor::from_char(c), + } + } + + pub fn parse_and_consume(text: &str) -> (Option, &str) { + let preconversor = FormatPreconversor::from_str(text); + match preconversor { + None => (None, text), + Some(_) => { + let mut chars = text.chars(); + chars.next(); // Consume the bang + chars.next(); // Consume one r,s,a char + (preconversor, chars.as_str()) + } + } + } } #[derive(Debug, Copy, Clone, PartialEq)] @@ -95,20 +120,7 @@ fn get_num_digits(text: &str) -> usize { } fn parse_preconversor(text: &str) -> (Option, &str) { - let mut chars = text.chars(); - if chars.next() != Some('!') { - return (None, text); - } - - match chars.next() { - None => (None, text), // Should fail instead? - Some(c) => { - match FormatPreconversor::from_char(c) { - Some(preconversor) => (Some(preconversor), chars.as_str()), - None => (None, text), // Should fail instead? - } - }, - } + FormatPreconversor::parse_and_consume(text) } fn parse_align(text: &str) -> (Option, &str) { @@ -506,7 +518,7 @@ impl FormatString { }; // On parts[0] can still be the preconversor (!r, !s, !a) - let parts: Vec<&str> = arg_part .splitn(2, '!').collect(); + let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); // before the bang is a keyword or arg index, after the comma is maybe a conversor spec. let arg_part = parts[0]; diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index 40d1912968..ce9625cdf0 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -10,7 +10,7 @@ use unicode_casing::CharExt; use unicode_segmentation::UnicodeSegmentation; use unicode_xid::UnicodeXID; -use crate::format::{FormatParseError, FormatPart, FormatString}; +use crate::format::{FormatParseError, FormatPart, FormatPreconversor, FormatString}; use crate::function::{single_or_tuple_any, OptionalArg, PyFuncArgs}; use crate::pyhash; use crate::pyobject::{ @@ -1026,7 +1026,15 @@ fn count_char(s: &str, c: char) -> usize { } fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: &str) -> PyResult { - let returned_type = vm.ctx.new_str(format_spec.to_string()); + let (preconversor, new_format_spec) = FormatPreconversor::parse_and_consume(format_spec); + let argument = match preconversor { + Some(FormatPreconversor::Str) => vm.call_method(&argument, "__str__", vec![])?, + Some(FormatPreconversor::Repr) => vm.call_method(&argument, "__repr__", vec![])?, + Some(FormatPreconversor::Ascii) => vm.call_method(&argument, "__repr__", vec![])?, + None => argument, + }; + let returned_type = vm.ctx.new_str(new_format_spec.to_string()); + let result = vm.call_method(&argument, "__format__", vec![returned_type])?; if !objtype::isinstance(&result, &vm.ctx.str_type()) { let result_type = result.class(); From 854bacf45293b06e7be4a44d980765294600976a Mon Sep 17 00:00:00 2001 From: Alan Justino Date: Sun, 9 Jun 2019 18:45:08 -0300 Subject: [PATCH 4/4] Fix the Rust tests --- vm/src/format.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vm/src/format.rs b/vm/src/format.rs index dc4645165e..eb59bf84ac 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -613,6 +613,7 @@ mod tests { #[test] fn test_width_only() { let expected = FormatSpec { + preconversor: None, fill: None, align: None, sign: None, @@ -628,6 +629,7 @@ mod tests { #[test] fn test_fill_and_width() { let expected = FormatSpec { + preconversor: None, fill: Some('<'), align: Some(FormatAlign::Right), sign: None, @@ -643,6 +645,7 @@ mod tests { #[test] fn test_all() { let expected = FormatSpec { + preconversor: None, fill: Some('<'), align: Some(FormatAlign::Right), sign: Some(FormatSign::Minus),