From c4ddff84cef3f53e35ba5685f8a9a716940250cc Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Thu, 30 Jul 2020 07:36:32 +1200 Subject: [PATCH 01/10] Implement "{x.attr}".format(...). --- vm/src/format.rs | 309 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 236 insertions(+), 73 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index 7a8945e1a6..f7d3721eef 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -2,8 +2,9 @@ use crate::function::PyFuncArgs; use crate::obj::objint::PyInt; use crate::obj::objstr::PyString; use crate::obj::{objstr, objtype}; -use crate::pyobject::{ItemProtocol, PyObjectRef, PyResult, TypeProtocol}; +use crate::pyobject::{IntoPyObject, ItemProtocol, PyObjectRef, PyResult, TypeProtocol}; use crate::vm::VirtualMachine; +use itertools::{Itertools, PeekingNext}; use num_bigint::{BigInt, Sign}; use num_traits::cast::ToPrimitive; use num_traits::Signed; @@ -557,6 +558,9 @@ pub(crate) enum FormatParseError { MissingStartBracket, UnescapedStartBracketInLiteral, InvalidFormatSpecifier, + EmptyAttribute, + MissingRightBracket, + InvalidCharacterAfterRightBracket, } impl FromStr for FormatSpec { @@ -566,25 +570,116 @@ impl FromStr for FormatSpec { } } +#[derive(Debug, PartialEq)] +pub(crate) enum FieldNamePart { + Attribute(String), + Index(usize), + StringIndex(String), +} + +impl FieldNamePart { + fn parse_part( + chars: &mut impl PeekingNext, + ) -> Result { + let ch = chars.next().unwrap(); + if ch == '.' { + let mut attribute = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + attribute.push(ch); + } + if attribute.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else { + Ok(FieldNamePart::Attribute(attribute)) + } + } else if ch == '[' { + let mut index = String::new(); + for ch in chars { + if ch == ']' { + return if index.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else if let Ok(index) = index.parse::() { + Ok(FieldNamePart::Index(index)) + } else { + Ok(FieldNamePart::StringIndex(index)) + }; + } + index.push(ch); + } + Err(FormatParseError::MissingRightBracket) + } else { + Err(FormatParseError::InvalidCharacterAfterRightBracket) + } + } +} + +#[derive(Debug, PartialEq)] +pub(crate) enum FieldType { + AutoSpec, + IndexSpec(usize), + KeywordSpec(String), +} + +#[derive(Debug, PartialEq)] +pub(crate) struct FieldName { + field_type: FieldType, + parts: Vec, +} + +impl FieldName { + fn parse(text: &str) -> Result { + let mut chars = text.chars().peekable(); + let mut first = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + first.push(ch); + } + + let field_type = if first.is_empty() { + FieldType::AutoSpec + } else if let Ok(index) = first.parse::() { + FieldType::IndexSpec(index) + } else { + FieldType::KeywordSpec(first) + }; + + let mut parts = Vec::new(); + while chars.peek().is_some() { + parts.push(FieldNamePart::parse_part(&mut chars)?) + } + + Ok(FieldName { field_type, parts }) + } +} + #[derive(Debug, PartialEq)] enum FormatPart { - AutoSpec(String), - IndexSpec(usize, String), - KeywordSpec(String, String), + Field(FieldName, String), Literal(String), } impl FormatPart { fn is_auto(&self) -> bool { match self { - FormatPart::AutoSpec(_) => true, + FormatPart::Field( + FieldName { + field_type: FieldType::AutoSpec, + .. + }, + _, + ) => true, _ => false, } } fn is_index(&self) -> bool { match self { - FormatPart::IndexSpec(_, _) => true, + FormatPart::Field( + FieldName { + field_type: FieldType::IndexSpec(_), + .. + }, + _, + ) => true, _ => false, } } @@ -655,16 +750,7 @@ impl FormatString { String::new() }; let format_spec = preconversor_spec + &format_spec; - - if arg_part.is_empty() { - return Ok(FormatPart::AutoSpec(format_spec)); - } - - if let Ok(index) = arg_part.parse::() { - Ok(FormatPart::IndexSpec(index, format_spec)) - } else { - Ok(FormatPart::KeywordSpec(arg_part.to_owned(), format_spec)) - } + Ok(FormatPart::Field(FieldName::parse(arg_part)?, format_spec)) } fn parse_spec<'a>( @@ -724,46 +810,34 @@ impl FormatString { } } - pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult { + fn format_internal( + &self, + vm: &VirtualMachine, + mut field_func: impl FnMut(&FieldType) -> PyResult, + ) -> PyResult { let mut final_string = String::new(); - if self.format_parts.iter().any(FormatPart::is_auto) - && self.format_parts.iter().any(FormatPart::is_index) - { - return Err(vm.new_value_error( - "cannot switch from automatic field numbering to manual field specification" - .to_owned(), - )); - } - let mut auto_argument_index: usize = 1; for part in &self.format_parts { let result_string: String = match part { - FormatPart::AutoSpec(format_spec) => { - let result = match arguments.args.get(auto_argument_index) { - Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?, - None => { - return Err(vm.new_index_error("tuple index out of range".to_owned())); - } - }; - auto_argument_index += 1; - objstr::clone_value(&result) - } - FormatPart::IndexSpec(index, format_spec) => { - let result = match arguments.args.get(*index + 1) { - Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?, - None => { - return Err(vm.new_index_error("tuple index out of range".to_owned())); - } - }; - objstr::clone_value(&result) - } - FormatPart::KeywordSpec(keyword, format_spec) => { - let result = match arguments.get_optional_kwarg(&keyword) { - Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?, - None => { - return Err(vm.new_key_error(vm.new_str(keyword.to_owned()))); + FormatPart::Field(FieldName { field_type, parts }, format_spec) => { + let mut argument = field_func(field_type)?; + + for name_part in parts { + match name_part { + FieldNamePart::Attribute(attribute) => { + argument = vm.get_attribute(argument, attribute.as_str())?; + } + FieldNamePart::Index(index) => { + // TODO Implement DictKey for usize so we can pass index directly + argument = argument.get_item(&index.into_pyobject(vm)?, vm)?; + } + FieldNamePart::StringIndex(index) => { + argument = argument.get_item(index, vm)?; + } } - }; - objstr::clone_value(&result) + } + + let value = call_object_format(vm, argument, &format_spec)?; + objstr::clone_value(&value) } FormatPart::Literal(literal) => literal.clone(), }; @@ -772,25 +846,44 @@ impl FormatString { Ok(vm.ctx.new_str(final_string)) } - pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult { - let mut final_string = String::new(); - for part in &self.format_parts { - let result_string: String = match part { - FormatPart::AutoSpec(_) | FormatPart::IndexSpec(_, _) => { - return Err( - vm.new_value_error("Format string contains positional fields".to_owned()) - ); - } - FormatPart::KeywordSpec(keyword, format_spec) => { - let argument = dict.get_item(keyword, &vm)?; - let result = call_object_format(vm, argument.clone(), &format_spec)?; - objstr::clone_value(&result) - } - FormatPart::Literal(literal) => literal.clone(), - }; - final_string.push_str(&result_string); + pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult { + if self.format_parts.iter().any(FormatPart::is_auto) + && self.format_parts.iter().any(FormatPart::is_index) + { + return Err(vm.new_value_error( + "cannot switch from automatic field numbering to manual field specification" + .to_owned(), + )); } - Ok(vm.ctx.new_str(final_string)) + + let mut auto_argument_index: usize = 1; + self.format_internal(vm, |field_type| match field_type { + FieldType::AutoSpec => { + auto_argument_index += 1; + arguments + .args + .get(auto_argument_index - 1) + .cloned() + .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) + } + FieldType::IndexSpec(index) => arguments + .args + .get(*index + 1) + .cloned() + .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())), + FieldType::KeywordSpec(keyword) => arguments + .get_optional_kwarg(&keyword) + .ok_or_else(|| vm.new_key_error(vm.new_str(keyword.to_owned()))), + }) + } + + pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult { + self.format_internal(vm, |field_type| match field_type { + FieldType::AutoSpec | FieldType::IndexSpec(_) => { + Err(vm.new_value_error("Format string contains positional fields".to_owned())) + } + FieldType::KeywordSpec(keyword) => dict.get_item(keyword, &vm), + }) } } @@ -968,9 +1061,21 @@ mod tests { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("abcd".to_owned()), - FormatPart::IndexSpec(1, String::new()), + FormatPart::Field( + FieldName { + field_type: FieldType::IndexSpec(1), + parts: Vec::new(), + }, + String::new(), + ), FormatPart::Literal(":".to_owned()), - FormatPart::KeywordSpec("key".to_owned(), String::new()), + FormatPart::Field( + FieldName { + field_type: FieldType::KeywordSpec("key".to_owned()), + parts: Vec::new(), + }, + String::new(), + ), ], }); @@ -993,7 +1098,13 @@ mod tests { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("{".to_owned()), - FormatPart::KeywordSpec("key".to_owned(), String::new()), + FormatPart::Field( + FieldName { + field_type: FieldType::KeywordSpec("key".to_owned()), + parts: Vec::new(), + }, + String::new(), + ), FormatPart::Literal("}ddfe".to_owned()), ], }); @@ -1014,4 +1125,56 @@ mod tests { assert_eq!(parse_format_spec("o!"), Err("Invalid format specifier")); assert_eq!(parse_format_spec("d "), Err("Invalid format specifier")); } + + #[test] + fn test_parse_field_name() { + assert_eq!( + FieldName::parse(""), + Ok(FieldName { + field_type: FieldType::AutoSpec, + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("0"), + Ok(FieldName { + field_type: FieldType::IndexSpec(0), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key"), + Ok(FieldName { + field_type: FieldType::KeywordSpec("key".to_owned()), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key.attr[0][string]"), + Ok(FieldName { + field_type: FieldType::KeywordSpec("key".to_owned()), + parts: vec![ + FieldNamePart::Attribute("attr".to_owned()), + FieldNamePart::Index(0), + FieldNamePart::StringIndex("string".to_owned()) + ], + }) + ); + assert_eq!( + FieldName::parse("key.."), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key[]"), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key["), + Err(FormatParseError::MissingRightBracket) + ); + assert_eq!( + FieldName::parse("key[0]after"), + Err(FormatParseError::InvalidCharacterAfterRightBracket) + ); + } } From e90e2bfc6e1bd4a2b84d3ddabf1499c1e3c13ebf Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 11:30:06 +1200 Subject: [PATCH 02/10] Implement string module. --- vm/src/format.rs | 231 ++++++++++++++++++++-------------------- vm/src/obj/objlist.rs | 7 ++ vm/src/obj/objstr.rs | 51 ++------- vm/src/stdlib/string.rs | 96 +++++++++++++++-- 4 files changed, 221 insertions(+), 164 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index f7d3721eef..c217911ae4 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -1,6 +1,5 @@ +use crate::exceptions::PyBaseExceptionRef; use crate::function::PyFuncArgs; -use crate::obj::objint::PyInt; -use crate::obj::objstr::PyString; use crate::obj::{objstr, objtype}; use crate::pyobject::{IntoPyObject, ItemProtocol, PyObjectRef, PyResult, TypeProtocol}; use crate::vm::VirtualMachine; @@ -558,11 +557,23 @@ pub(crate) enum FormatParseError { MissingStartBracket, UnescapedStartBracketInLiteral, InvalidFormatSpecifier, + UnknownConversion, EmptyAttribute, MissingRightBracket, InvalidCharacterAfterRightBracket, } +impl FormatParseError { + pub fn into_pyobject(self, vm: &VirtualMachine) -> PyBaseExceptionRef { + match self { + FormatParseError::UnmatchedBracket => { + vm.new_value_error("expected '}' before end of string".to_owned()) + } + _ => vm.new_value_error("Unexpected error parsing format string".to_owned()), + } + } +} + impl FromStr for FormatSpec { type Err = &'static str; fn from_str(s: &str) -> Result { @@ -622,12 +633,12 @@ pub(crate) enum FieldType { #[derive(Debug, PartialEq)] pub(crate) struct FieldName { - field_type: FieldType, - parts: Vec, + pub(crate) field_type: FieldType, + pub(crate) parts: Vec, } impl FieldName { - fn parse(text: &str) -> Result { + pub(crate) fn parse(text: &str) -> Result { let mut chars = text.chars().peekable(); let mut first = String::new(); for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { @@ -652,42 +663,18 @@ impl FieldName { } #[derive(Debug, PartialEq)] -enum FormatPart { - Field(FieldName, String), +pub(crate) enum FormatPart { + Field { + field_name: String, + preconversion_spec: Option, + format_spec: String, + }, Literal(String), } -impl FormatPart { - fn is_auto(&self) -> bool { - match self { - FormatPart::Field( - FieldName { - field_type: FieldType::AutoSpec, - .. - }, - _, - ) => true, - _ => false, - } - } - - fn is_index(&self) -> bool { - match self { - FormatPart::Field( - FieldName { - field_type: FieldType::IndexSpec(_), - .. - }, - _, - ) => true, - _ => false, - } - } -} - #[derive(Debug, PartialEq)] pub(crate) struct FormatString { - format_parts: Vec, + pub(crate) format_parts: Vec, } impl FormatString { @@ -744,22 +731,31 @@ impl FormatString { // before the bang is a keyword or arg index, after the comma is maybe a conversor spec. let arg_part = parts[0]; - let preconversor_spec = if parts.len() > 1 { - "!".to_owned() + parts[1] + let preconversion_spec = if let Some(conversion) = parts.get(1) { + let mut chars = conversion.chars(); + if let Some(ch) = chars.next() { + // conversions are only every one character + if chars.next().is_some() { + return Err(FormatParseError::UnknownConversion); + } + Some(ch) + } else { + return Err(FormatParseError::UnknownConversion); + } } else { - String::new() + None }; - let format_spec = preconversor_spec + &format_spec; - Ok(FormatPart::Field(FieldName::parse(arg_part)?, format_spec)) + + Ok(FormatPart::Field { + field_name: arg_part.to_owned(), + preconversion_spec, + format_spec, + }) } - fn parse_spec<'a>( - text: &'a str, - args: &'a PyFuncArgs, - ) -> Result<(FormatPart, &'a str), FormatParseError> { + fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> { let mut nested = false; let mut end_bracket_pos = None; - let mut spec_template = String::new(); let mut left = String::new(); // There may be one layer nesting brackets in spec @@ -773,30 +769,18 @@ impl FormatString { return Err(FormatParseError::InvalidFormatSpecifier); } else { nested = true; + left.push(c); continue; } } else if c == '}' { if nested { nested = false; - if let Some(obj) = args.kwargs.get(&spec_template) { - if let Some(s) = (*obj).clone().payload::() { - left.push_str(s.as_str()); - } else if let Some(i) = (*obj).clone().payload::() { - left.push_str(&PyInt::repr(i)); - } else { - return Err(FormatParseError::InvalidFormatSpecifier); - } - } else { - // CPython return KeyError here - return Err(FormatParseError::InvalidFormatSpecifier); - } + left.push(c); continue; } else { end_bracket_pos = Some(idx); break; } - } else if nested { - spec_template.push(c); } else { left.push(c); } @@ -813,13 +797,20 @@ impl FormatString { fn format_internal( &self, vm: &VirtualMachine, - mut field_func: impl FnMut(&FieldType) -> PyResult, - ) -> PyResult { + field_func: &mut impl FnMut(&FieldType) -> PyResult, + ) -> PyResult { let mut final_string = String::new(); for part in &self.format_parts { let result_string: String = match part { - FormatPart::Field(FieldName { field_type, parts }, format_spec) => { - let mut argument = field_func(field_type)?; + FormatPart::Field { + field_name, + preconversion_spec, + format_spec, + } => { + let FieldName { field_type, parts } = + FieldName::parse(field_name.as_str()).map_err(|e| e.into_pyobject(vm))?; + + let mut argument = field_func(&field_type)?; for name_part in parts { match name_part { @@ -831,54 +822,66 @@ impl FormatString { argument = argument.get_item(&index.into_pyobject(vm)?, vm)?; } FieldNamePart::StringIndex(index) => { - argument = argument.get_item(index, vm)?; + argument = argument.get_item(&index, vm)?; } } } - let value = call_object_format(vm, argument, &format_spec)?; + let nested_format = + FormatString::from_str(&format_spec).map_err(|e| e.into_pyobject(vm))?; + let format_spec = nested_format.format_internal(vm, field_func)?; + + let value = + call_object_format(vm, argument, *preconversion_spec, &format_spec)?; objstr::clone_value(&value) } FormatPart::Literal(literal) => literal.clone(), }; final_string.push_str(&result_string); } - Ok(vm.ctx.new_str(final_string)) + Ok(final_string) } - pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult { - if self.format_parts.iter().any(FormatPart::is_auto) - && self.format_parts.iter().any(FormatPart::is_index) - { - return Err(vm.new_value_error( - "cannot switch from automatic field numbering to manual field specification" - .to_owned(), - )); - } - - let mut auto_argument_index: usize = 1; - self.format_internal(vm, |field_type| match field_type { + pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult { + let mut auto_argument_index: usize = 0; + let mut seen_index = false; + self.format_internal(vm, &mut |field_type| match field_type { FieldType::AutoSpec => { + if seen_index { + return Err(vm.new_value_error( + "cannot switch from manual field specification to automatic field numbering" + .to_owned(), + )); + } auto_argument_index += 1; arguments .args - .get(auto_argument_index - 1) + .get(auto_argument_index) + .cloned() + .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) + } + FieldType::IndexSpec(index) => { + if auto_argument_index != 0 { + return Err(vm.new_value_error( + "cannot switch from automatic field numbering to manual field specification" + .to_owned(), + )); + } + seen_index = true; + arguments + .args + .get(*index) .cloned() .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) } - FieldType::IndexSpec(index) => arguments - .args - .get(*index + 1) - .cloned() - .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())), FieldType::KeywordSpec(keyword) => arguments .get_optional_kwarg(&keyword) .ok_or_else(|| vm.new_key_error(vm.new_str(keyword.to_owned()))), }) } - pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult { - self.format_internal(vm, |field_type| match field_type { + pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult { + self.format_internal(vm, &mut |field_type| match field_type { FieldType::AutoSpec | FieldType::IndexSpec(_) => { Err(vm.new_value_error("Format string contains positional fields".to_owned())) } @@ -887,16 +890,20 @@ impl FormatString { } } -fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: &str) -> PyResult { - let (preconversor, new_format_spec) = FormatPreconversor::parse_and_consume(format_spec); - let argument = match preconversor { +fn call_object_format( + vm: &VirtualMachine, + argument: PyObjectRef, + preconversion_spec: Option, + format_spec: &str, +) -> PyResult { + let argument = match preconversion_spec.and_then(|c| FormatPreconversor::from_char(c)) { Some(FormatPreconversor::Str) => vm.call_method(&argument, "__str__", vec![])?, Some(FormatPreconversor::Repr) => vm.call_method(&argument, "__repr__", vec![])?, Some(FormatPreconversor::Ascii) => vm.call_method(&argument, "__repr__", vec![])?, Some(FormatPreconversor::Bytes) => vm.call_method(&argument, "decode", vec![])?, None => argument, }; - let returned_type = vm.ctx.new_str(new_format_spec.to_owned()); + let returned_type = vm.ctx.new_str(format_spec.to_owned()); let result = vm.call_method(&argument, "__format__", vec![returned_type])?; if !objtype::isinstance(&result, &vm.ctx.types.str_type) { @@ -909,20 +916,20 @@ fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: & pub(crate) trait FromTemplate<'a>: Sized { type Err; - fn from_str(s: &'a str, arg: &'a PyFuncArgs) -> Result; + fn from_str(s: &'a str) -> Result; } impl<'a> FromTemplate<'a> for FormatString { type Err = FormatParseError; - fn from_str(text: &'a str, args: &'a PyFuncArgs) -> Result { + fn from_str(text: &'a str) -> Result { let mut cur_text: &str = text; let mut parts: Vec = Vec::new(); while !cur_text.is_empty() { // Try to parse both literals and bracketed format parts util we // run out of text cur_text = FormatString::parse_literal(cur_text) - .or_else(|_| FormatString::parse_spec(cur_text, &args)) + .or_else(|_| FormatString::parse_spec(cur_text)) .map(|(part, new_text)| { parts.push(part); new_text @@ -1061,21 +1068,17 @@ mod tests { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("abcd".to_owned()), - FormatPart::Field( - FieldName { - field_type: FieldType::IndexSpec(1), - parts: Vec::new(), - }, - String::new(), - ), + FormatPart::Field { + field_name: "1".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, FormatPart::Literal(":".to_owned()), - FormatPart::Field( - FieldName { - field_type: FieldType::KeywordSpec("key".to_owned()), - parts: Vec::new(), - }, - String::new(), - ), + FormatPart::Field { + field_name: "key".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, ], }); @@ -1098,13 +1101,11 @@ mod tests { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("{".to_owned()), - FormatPart::Field( - FieldName { - field_type: FieldType::KeywordSpec("key".to_owned()), - parts: Vec::new(), - }, - String::new(), - ), + FormatPart::Field { + field_name: "key".to_owned(), + preconversion_spec: None, + format_spec: String::new(), + }, FormatPart::Literal("}ddfe".to_owned()), ], }); diff --git a/vm/src/obj/objlist.rs b/vm/src/obj/objlist.rs index 90e9629e6c..a81468f422 100644 --- a/vm/src/obj/objlist.rs +++ b/vm/src/obj/objlist.rs @@ -1,4 +1,5 @@ use std::fmt; +use std::iter::FromIterator; use std::mem::size_of; use std::ops::{DerefMut, Range}; @@ -47,6 +48,12 @@ impl From> for PyList { } } +impl FromIterator for PyList { + fn from_iter>(iter: T) -> Self { + Vec::from_iter(iter).into() + } +} + impl PyValue for PyList { fn class(vm: &VirtualMachine) -> PyClassRef { vm.ctx.list_type() diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index 21b57976eb..e4e1e7d1c4 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -19,7 +19,7 @@ use super::objiter; use super::objnone::PyNone; use super::objsequence::{PySliceableSequence, SequenceIndex}; use super::objtype::{self, PyClassRef}; -use crate::format::{FormatParseError, FormatSpec, FormatString, FromTemplate}; +use crate::format::{FormatSpec, FormatString, FromTemplate}; use crate::function::{OptionalArg, OptionalOption, PyFuncArgs}; use crate::pyobject::{ IdProtocol, IntoPyObject, ItemProtocol, PyClassImpl, PyContext, PyIterable, PyObjectRef, PyRef, @@ -587,31 +587,10 @@ impl PyString { } #[pymethod] - fn format(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { - if args.args.is_empty() { - return Err(vm.new_type_error( - "descriptor 'format' of 'str' object needs an argument".to_owned(), - )); - } - - let zelf = &args.args[0]; - if !objtype::isinstance(&zelf, &vm.ctx.types.str_type) { - let zelf_typ = zelf.class(); - let actual_type = vm.to_pystr(&zelf_typ)?; - return Err(vm.new_type_error(format!( - "descriptor 'format' requires a 'str' object but received a '{}'", - actual_type - ))); - } - let format_string_text = borrow_value(zelf); - match FormatString::from_str(format_string_text, &args) { + fn format(&self, args: PyFuncArgs, vm: &VirtualMachine) -> PyResult { + match FormatString::from_str(&self.value) { Ok(format_string) => format_string.format(&args, vm), - Err(err) => match err { - FormatParseError::UnmatchedBracket => { - Err(vm.new_value_error("expected '}' before end of string".to_owned())) - } - _ => Err(vm.new_value_error("Unexpected error parsing format string".to_owned())), - }, + Err(err) => Err(err.into_pyobject(vm)), } } @@ -620,24 +599,10 @@ impl PyString { /// Return a formatted version of S, using substitutions from mapping. /// The substitutions are identified by braces ('{' and '}'). #[pymethod] - fn format_map(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { - if args.args.len() != 2 { - return Err(vm.new_type_error(format!( - "format_map() takes exactly one argument ({} given)", - args.args.len() - 1 - ))); - } - - let zelf = &args.args[0]; - let format_string_text = borrow_value(zelf); - match FormatString::from_str(format_string_text, &args) { - Ok(format_string) => format_string.format_map(&args.args[1], vm), - Err(err) => match err { - FormatParseError::UnmatchedBracket => { - Err(vm.new_value_error("expected '}' before end of string".to_owned())) - } - _ => Err(vm.new_value_error("Unexpected error parsing format string".to_owned())), - }, + fn format_map(&self, mapping: PyObjectRef, vm: &VirtualMachine) -> PyResult { + match FormatString::from_str(&self.value) { + Ok(format_string) => format_string.format_map(&mapping, vm), + Err(err) => Err(err.into_pyobject(vm)), } } diff --git a/vm/src/stdlib/string.rs b/vm/src/stdlib/string.rs index 215dce21f7..7ab1484a4d 100644 --- a/vm/src/stdlib/string.rs +++ b/vm/src/stdlib/string.rs @@ -1,12 +1,96 @@ /* String builtin module */ -use crate::pyobject::PyObjectRef; -use crate::vm::VirtualMachine; +pub(crate) use _string::make_module; -pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { - // let ctx = &vm.ctx; +#[pymodule] +mod _string { + use std::mem; - // Constants: - py_module!(vm, "_string", {}) + use crate::format::{ + FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate, + }; + use crate::obj::objlist::PyList; + use crate::obj::objstr::PyStringRef; + use crate::pyobject::{IntoPyObject, PyObjectRef, PyResult}; + use crate::vm::VirtualMachine; + + fn create_format_part( + literal: String, + field_name: Option, + format_spec: Option, + preconversion_spec: Option, + vm: &VirtualMachine, + ) -> PyResult { + let tuple = ( + literal, + field_name, + format_spec, + preconversion_spec.map(|c| c.to_string()), + ); + tuple.into_pyobject(vm) + } + + #[pyfunction] + fn formatter_parser(text: PyStringRef, vm: &VirtualMachine) -> PyResult { + let format_string = + FormatString::from_str(text.as_str()).map_err(|e| e.into_pyobject(vm))?; + + let mut result = Vec::new(); + let mut literal = String::new(); + for part in format_string.format_parts { + match part { + FormatPart::Field { + field_name, + preconversion_spec, + format_spec, + } => { + result.push(create_format_part( + mem::take(&mut literal), + Some(field_name), + Some(format_spec), + preconversion_spec, + vm, + )?); + } + FormatPart::Literal(text) => literal.push_str(&text), + } + } + if !literal.is_empty() { + result.push(create_format_part( + mem::take(&mut literal), + None, + None, + None, + vm, + )?); + } + Ok(result.into()) + } + + #[pyfunction] + fn formatter_field_name_split( + text: PyStringRef, + vm: &VirtualMachine, + ) -> PyResult<(PyObjectRef, PyList)> { + let field_name = FieldName::parse(text.as_str()).map_err(|e| e.into_pyobject(vm))?; + + let first = match field_name.field_type { + FieldType::AutoSpec => vm.new_str("".to_owned()), + FieldType::IndexSpec(index) => index.into_pyobject(vm)?, + FieldType::KeywordSpec(attribute) => attribute.into_pyobject(vm)?, + }; + + let rest = field_name + .parts + .iter() + .map(|p| match p { + FieldNamePart::Attribute(attribute) => (true, attribute).into_pyobject(vm).unwrap(), + FieldNamePart::StringIndex(index) => (false, index).into_pyobject(vm).unwrap(), + FieldNamePart::Index(index) => (false, *index).into_pyobject(vm).unwrap(), + }) + .collect(); + + Ok((first, rest)) + } } From 8ae65dad76374e75ab9411d4f652359529261569 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 11:48:40 +1200 Subject: [PATCH 03/10] Fix bug in format auto spec indexing. --- vm/src/format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index c217911ae4..c489b67ed9 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -856,7 +856,7 @@ impl FormatString { auto_argument_index += 1; arguments .args - .get(auto_argument_index) + .get(auto_argument_index - 1) .cloned() .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) } From 815572beb32ef766185713544e6d9bcdb988ae6b Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 11:50:26 +1200 Subject: [PATCH 04/10] Copy test_string.py from CPython 3.8.3. --- Lib/test/test_string.py | 480 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100644 Lib/test/test_string.py diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py new file mode 100644 index 0000000000..0be28fdb60 --- /dev/null +++ b/Lib/test/test_string.py @@ -0,0 +1,480 @@ +import unittest +import string +from string import Template + + +class ModuleTest(unittest.TestCase): + + def test_attrs(self): + # While the exact order of the items in these attributes is not + # technically part of the "language spec", in practice there is almost + # certainly user code that depends on the order, so de-facto it *is* + # part of the spec. + self.assertEqual(string.whitespace, ' \t\n\r\x0b\x0c') + self.assertEqual(string.ascii_lowercase, 'abcdefghijklmnopqrstuvwxyz') + self.assertEqual(string.ascii_uppercase, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') + self.assertEqual(string.ascii_letters, string.ascii_lowercase + string.ascii_uppercase) + self.assertEqual(string.digits, '0123456789') + self.assertEqual(string.hexdigits, string.digits + 'abcdefABCDEF') + self.assertEqual(string.octdigits, '01234567') + self.assertEqual(string.punctuation, '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~') + self.assertEqual(string.printable, string.digits + string.ascii_lowercase + string.ascii_uppercase + string.punctuation + string.whitespace) + + def test_capwords(self): + self.assertEqual(string.capwords('abc def ghi'), 'Abc Def Ghi') + self.assertEqual(string.capwords('abc\tdef\nghi'), 'Abc Def Ghi') + self.assertEqual(string.capwords('abc\t def \nghi'), 'Abc Def Ghi') + self.assertEqual(string.capwords('ABC DEF GHI'), 'Abc Def Ghi') + self.assertEqual(string.capwords('ABC-DEF-GHI', '-'), 'Abc-Def-Ghi') + self.assertEqual(string.capwords('ABC-def DEF-ghi GHI'), 'Abc-def Def-ghi Ghi') + self.assertEqual(string.capwords(' aBc DeF '), 'Abc Def') + self.assertEqual(string.capwords('\taBc\tDeF\t'), 'Abc Def') + self.assertEqual(string.capwords('\taBc\tDeF\t', '\t'), '\tAbc\tDef\t') + + def test_basic_formatter(self): + fmt = string.Formatter() + self.assertEqual(fmt.format("foo"), "foo") + self.assertEqual(fmt.format("foo{0}", "bar"), "foobar") + self.assertEqual(fmt.format("foo{1}{0}-{1}", "bar", 6), "foo6bar-6") + self.assertRaises(TypeError, fmt.format) + self.assertRaises(TypeError, string.Formatter.format) + + def test_format_keyword_arguments(self): + fmt = string.Formatter() + self.assertEqual(fmt.format("-{arg}-", arg='test'), '-test-') + self.assertRaises(KeyError, fmt.format, "-{arg}-") + self.assertEqual(fmt.format("-{self}-", self='test'), '-test-') + self.assertRaises(KeyError, fmt.format, "-{self}-") + self.assertEqual(fmt.format("-{format_string}-", format_string='test'), + '-test-') + self.assertRaises(KeyError, fmt.format, "-{format_string}-") + with self.assertRaisesRegex(TypeError, "format_string"): + fmt.format(format_string="-{arg}-", arg='test') + + def test_auto_numbering(self): + fmt = string.Formatter() + self.assertEqual(fmt.format('foo{}{}', 'bar', 6), + 'foo{}{}'.format('bar', 6)) + self.assertEqual(fmt.format('foo{1}{num}{1}', None, 'bar', num=6), + 'foo{1}{num}{1}'.format(None, 'bar', num=6)) + self.assertEqual(fmt.format('{:^{}}', 'bar', 6), + '{:^{}}'.format('bar', 6)) + self.assertEqual(fmt.format('{:^{}} {}', 'bar', 6, 'X'), + '{:^{}} {}'.format('bar', 6, 'X')) + self.assertEqual(fmt.format('{:^{pad}}{}', 'foo', 'bar', pad=6), + '{:^{pad}}{}'.format('foo', 'bar', pad=6)) + + with self.assertRaises(ValueError): + fmt.format('foo{1}{}', 'bar', 6) + + with self.assertRaises(ValueError): + fmt.format('foo{}{1}', 'bar', 6) + + def test_conversion_specifiers(self): + fmt = string.Formatter() + self.assertEqual(fmt.format("-{arg!r}-", arg='test'), "-'test'-") + self.assertEqual(fmt.format("{0!s}", 'test'), 'test') + self.assertRaises(ValueError, fmt.format, "{0!h}", 'test') + # issue13579 + self.assertEqual(fmt.format("{0!a}", 42), '42') + self.assertEqual(fmt.format("{0!a}", string.ascii_letters), + "'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'") + self.assertEqual(fmt.format("{0!a}", chr(255)), "'\\xff'") + self.assertEqual(fmt.format("{0!a}", chr(256)), "'\\u0100'") + + def test_name_lookup(self): + fmt = string.Formatter() + class AnyAttr: + def __getattr__(self, attr): + return attr + x = AnyAttr() + self.assertEqual(fmt.format("{0.lumber}{0.jack}", x), 'lumberjack') + with self.assertRaises(AttributeError): + fmt.format("{0.lumber}{0.jack}", '') + + def test_index_lookup(self): + fmt = string.Formatter() + lookup = ["eggs", "and", "spam"] + self.assertEqual(fmt.format("{0[2]}{0[0]}", lookup), 'spameggs') + with self.assertRaises(IndexError): + fmt.format("{0[2]}{0[0]}", []) + with self.assertRaises(KeyError): + fmt.format("{0[2]}{0[0]}", {}) + + def test_override_get_value(self): + class NamespaceFormatter(string.Formatter): + def __init__(self, namespace={}): + string.Formatter.__init__(self) + self.namespace = namespace + + def get_value(self, key, args, kwds): + if isinstance(key, str): + try: + # Check explicitly passed arguments first + return kwds[key] + except KeyError: + return self.namespace[key] + else: + string.Formatter.get_value(key, args, kwds) + + fmt = NamespaceFormatter({'greeting':'hello'}) + self.assertEqual(fmt.format("{greeting}, world!"), 'hello, world!') + + + def test_override_format_field(self): + class CallFormatter(string.Formatter): + def format_field(self, value, format_spec): + return format(value(), format_spec) + + fmt = CallFormatter() + self.assertEqual(fmt.format('*{0}*', lambda : 'result'), '*result*') + + + def test_override_convert_field(self): + class XFormatter(string.Formatter): + def convert_field(self, value, conversion): + if conversion == 'x': + return None + return super().convert_field(value, conversion) + + fmt = XFormatter() + self.assertEqual(fmt.format("{0!r}:{0!x}", 'foo', 'foo'), "'foo':None") + + + def test_override_parse(self): + class BarFormatter(string.Formatter): + # returns an iterable that contains tuples of the form: + # (literal_text, field_name, format_spec, conversion) + def parse(self, format_string): + for field in format_string.split('|'): + if field[0] == '+': + # it's markup + field_name, _, format_spec = field[1:].partition(':') + yield '', field_name, format_spec, None + else: + yield field, None, None, None + + fmt = BarFormatter() + self.assertEqual(fmt.format('*|+0:^10s|*', 'foo'), '* foo *') + + def test_check_unused_args(self): + class CheckAllUsedFormatter(string.Formatter): + def check_unused_args(self, used_args, args, kwargs): + # Track which arguments actually got used + unused_args = set(kwargs.keys()) + unused_args.update(range(0, len(args))) + + for arg in used_args: + unused_args.remove(arg) + + if unused_args: + raise ValueError("unused arguments") + + fmt = CheckAllUsedFormatter() + self.assertEqual(fmt.format("{0}", 10), "10") + self.assertEqual(fmt.format("{0}{i}", 10, i=100), "10100") + self.assertEqual(fmt.format("{0}{i}{1}", 10, 20, i=100), "1010020") + self.assertRaises(ValueError, fmt.format, "{0}{i}{1}", 10, 20, i=100, j=0) + self.assertRaises(ValueError, fmt.format, "{0}", 10, 20) + self.assertRaises(ValueError, fmt.format, "{0}", 10, 20, i=100) + self.assertRaises(ValueError, fmt.format, "{i}", 10, 20, i=100) + + def test_vformat_recursion_limit(self): + fmt = string.Formatter() + args = () + kwargs = dict(i=100) + with self.assertRaises(ValueError) as err: + fmt._vformat("{i}", args, kwargs, set(), -1) + self.assertIn("recursion", str(err.exception)) + + +# Template tests (formerly housed in test_pep292.py) + +class Bag: + pass + +class Mapping: + def __getitem__(self, name): + obj = self + for part in name.split('.'): + try: + obj = getattr(obj, part) + except AttributeError: + raise KeyError(name) + return obj + + +class TestTemplate(unittest.TestCase): + def test_regular_templates(self): + s = Template('$who likes to eat a bag of $what worth $$100') + self.assertEqual(s.substitute(dict(who='tim', what='ham')), + 'tim likes to eat a bag of ham worth $100') + self.assertRaises(KeyError, s.substitute, dict(who='tim')) + self.assertRaises(TypeError, Template.substitute) + + def test_regular_templates_with_braces(self): + s = Template('$who likes ${what} for ${meal}') + d = dict(who='tim', what='ham', meal='dinner') + self.assertEqual(s.substitute(d), 'tim likes ham for dinner') + self.assertRaises(KeyError, s.substitute, + dict(who='tim', what='ham')) + + def test_regular_templates_with_upper_case(self): + s = Template('$WHO likes ${WHAT} for ${MEAL}') + d = dict(WHO='tim', WHAT='ham', MEAL='dinner') + self.assertEqual(s.substitute(d), 'tim likes ham for dinner') + + def test_regular_templates_with_non_letters(self): + s = Template('$_wh0_ likes ${_w_h_a_t_} for ${mea1}') + d = dict(_wh0_='tim', _w_h_a_t_='ham', mea1='dinner') + self.assertEqual(s.substitute(d), 'tim likes ham for dinner') + + def test_escapes(self): + eq = self.assertEqual + s = Template('$who likes to eat a bag of $$what worth $$100') + eq(s.substitute(dict(who='tim', what='ham')), + 'tim likes to eat a bag of $what worth $100') + s = Template('$who likes $$') + eq(s.substitute(dict(who='tim', what='ham')), 'tim likes $') + + def test_percents(self): + eq = self.assertEqual + s = Template('%(foo)s $foo ${foo}') + d = dict(foo='baz') + eq(s.substitute(d), '%(foo)s baz baz') + eq(s.safe_substitute(d), '%(foo)s baz baz') + + def test_stringification(self): + eq = self.assertEqual + s = Template('tim has eaten $count bags of ham today') + d = dict(count=7) + eq(s.substitute(d), 'tim has eaten 7 bags of ham today') + eq(s.safe_substitute(d), 'tim has eaten 7 bags of ham today') + s = Template('tim has eaten ${count} bags of ham today') + eq(s.substitute(d), 'tim has eaten 7 bags of ham today') + + def test_tupleargs(self): + eq = self.assertEqual + s = Template('$who ate ${meal}') + d = dict(who=('tim', 'fred'), meal=('ham', 'kung pao')) + eq(s.substitute(d), "('tim', 'fred') ate ('ham', 'kung pao')") + eq(s.safe_substitute(d), "('tim', 'fred') ate ('ham', 'kung pao')") + + def test_SafeTemplate(self): + eq = self.assertEqual + s = Template('$who likes ${what} for ${meal}') + eq(s.safe_substitute(dict(who='tim')), 'tim likes ${what} for ${meal}') + eq(s.safe_substitute(dict(what='ham')), '$who likes ham for ${meal}') + eq(s.safe_substitute(dict(what='ham', meal='dinner')), + '$who likes ham for dinner') + eq(s.safe_substitute(dict(who='tim', what='ham')), + 'tim likes ham for ${meal}') + eq(s.safe_substitute(dict(who='tim', what='ham', meal='dinner')), + 'tim likes ham for dinner') + + def test_invalid_placeholders(self): + raises = self.assertRaises + s = Template('$who likes $') + raises(ValueError, s.substitute, dict(who='tim')) + s = Template('$who likes ${what)') + raises(ValueError, s.substitute, dict(who='tim')) + s = Template('$who likes $100') + raises(ValueError, s.substitute, dict(who='tim')) + # Template.idpattern should match to only ASCII characters. + # https://bugs.python.org/issue31672 + s = Template("$who likes $\u0131") # (DOTLESS I) + raises(ValueError, s.substitute, dict(who='tim')) + s = Template("$who likes $\u0130") # (LATIN CAPITAL LETTER I WITH DOT ABOVE) + raises(ValueError, s.substitute, dict(who='tim')) + + def test_idpattern_override(self): + class PathPattern(Template): + idpattern = r'[_a-z][._a-z0-9]*' + m = Mapping() + m.bag = Bag() + m.bag.foo = Bag() + m.bag.foo.who = 'tim' + m.bag.what = 'ham' + s = PathPattern('$bag.foo.who likes to eat a bag of $bag.what') + self.assertEqual(s.substitute(m), 'tim likes to eat a bag of ham') + + def test_flags_override(self): + class MyPattern(Template): + flags = 0 + s = MyPattern('$wHO likes ${WHAT} for ${meal}') + d = dict(wHO='tim', WHAT='ham', meal='dinner', w='fred') + self.assertRaises(ValueError, s.substitute, d) + self.assertEqual(s.safe_substitute(d), 'fredHO likes ${WHAT} for dinner') + + def test_idpattern_override_inside_outside(self): + # bpo-1198569: Allow the regexp inside and outside braces to be + # different when deriving from Template. + class MyPattern(Template): + idpattern = r'[a-z]+' + braceidpattern = r'[A-Z]+' + flags = 0 + m = dict(foo='foo', BAR='BAR') + s = MyPattern('$foo ${BAR}') + self.assertEqual(s.substitute(m), 'foo BAR') + + def test_idpattern_override_inside_outside_invalid_unbraced(self): + # bpo-1198569: Allow the regexp inside and outside braces to be + # different when deriving from Template. + class MyPattern(Template): + idpattern = r'[a-z]+' + braceidpattern = r'[A-Z]+' + flags = 0 + m = dict(foo='foo', BAR='BAR') + s = MyPattern('$FOO') + self.assertRaises(ValueError, s.substitute, m) + s = MyPattern('${bar}') + self.assertRaises(ValueError, s.substitute, m) + + def test_pattern_override(self): + class MyPattern(Template): + pattern = r""" + (?P@{2}) | + @(?P[_a-z][._a-z0-9]*) | + @{(?P[_a-z][._a-z0-9]*)} | + (?P@) + """ + m = Mapping() + m.bag = Bag() + m.bag.foo = Bag() + m.bag.foo.who = 'tim' + m.bag.what = 'ham' + s = MyPattern('@bag.foo.who likes to eat a bag of @bag.what') + self.assertEqual(s.substitute(m), 'tim likes to eat a bag of ham') + + class BadPattern(Template): + pattern = r""" + (?P.*) | + (?P@{2}) | + @(?P[_a-z][._a-z0-9]*) | + @{(?P[_a-z][._a-z0-9]*)} | + (?P@) | + """ + s = BadPattern('@bag.foo.who likes to eat a bag of @bag.what') + self.assertRaises(ValueError, s.substitute, {}) + self.assertRaises(ValueError, s.safe_substitute, {}) + + def test_braced_override(self): + class MyTemplate(Template): + pattern = r""" + \$(?: + (?P$) | + (?P[_a-z][_a-z0-9]*) | + @@(?P[_a-z][_a-z0-9]*)@@ | + (?P) | + ) + """ + + tmpl = 'PyCon in $@@location@@' + t = MyTemplate(tmpl) + self.assertRaises(KeyError, t.substitute, {}) + val = t.substitute({'location': 'Cleveland'}) + self.assertEqual(val, 'PyCon in Cleveland') + + def test_braced_override_safe(self): + class MyTemplate(Template): + pattern = r""" + \$(?: + (?P$) | + (?P[_a-z][_a-z0-9]*) | + @@(?P[_a-z][_a-z0-9]*)@@ | + (?P) | + ) + """ + + tmpl = 'PyCon in $@@location@@' + t = MyTemplate(tmpl) + self.assertEqual(t.safe_substitute(), tmpl) + val = t.safe_substitute({'location': 'Cleveland'}) + self.assertEqual(val, 'PyCon in Cleveland') + + def test_invalid_with_no_lines(self): + # The error formatting for invalid templates + # has a special case for no data that the default + # pattern can't trigger (always has at least '$') + # So we craft a pattern that is always invalid + # with no leading data. + class MyTemplate(Template): + pattern = r""" + (?P) | + unreachable( + (?P) | + (?P) | + (?P) + ) + """ + s = MyTemplate('') + with self.assertRaises(ValueError) as err: + s.substitute({}) + self.assertIn('line 1, col 1', str(err.exception)) + + def test_unicode_values(self): + s = Template('$who likes $what') + d = dict(who='t\xffm', what='f\xfe\fed') + self.assertEqual(s.substitute(d), 't\xffm likes f\xfe\x0ced') + + def test_keyword_arguments(self): + eq = self.assertEqual + s = Template('$who likes $what') + eq(s.substitute(who='tim', what='ham'), 'tim likes ham') + eq(s.substitute(dict(who='tim'), what='ham'), 'tim likes ham') + eq(s.substitute(dict(who='fred', what='kung pao'), + who='tim', what='ham'), + 'tim likes ham') + s = Template('the mapping is $mapping') + eq(s.substitute(dict(foo='none'), mapping='bozo'), + 'the mapping is bozo') + eq(s.substitute(dict(mapping='one'), mapping='two'), + 'the mapping is two') + + s = Template('the self is $self') + eq(s.substitute(self='bozo'), 'the self is bozo') + + def test_keyword_arguments_safe(self): + eq = self.assertEqual + raises = self.assertRaises + s = Template('$who likes $what') + eq(s.safe_substitute(who='tim', what='ham'), 'tim likes ham') + eq(s.safe_substitute(dict(who='tim'), what='ham'), 'tim likes ham') + eq(s.safe_substitute(dict(who='fred', what='kung pao'), + who='tim', what='ham'), + 'tim likes ham') + s = Template('the mapping is $mapping') + eq(s.safe_substitute(dict(foo='none'), mapping='bozo'), + 'the mapping is bozo') + eq(s.safe_substitute(dict(mapping='one'), mapping='two'), + 'the mapping is two') + d = dict(mapping='one') + raises(TypeError, s.substitute, d, {}) + raises(TypeError, s.safe_substitute, d, {}) + + s = Template('the self is $self') + eq(s.safe_substitute(self='bozo'), 'the self is bozo') + + def test_delimiter_override(self): + eq = self.assertEqual + raises = self.assertRaises + class AmpersandTemplate(Template): + delimiter = '&' + s = AmpersandTemplate('this &gift is for &{who} &&') + eq(s.substitute(gift='bud', who='you'), 'this bud is for you &') + raises(KeyError, s.substitute) + eq(s.safe_substitute(gift='bud', who='you'), 'this bud is for you &') + eq(s.safe_substitute(), 'this &gift is for &{who} &') + s = AmpersandTemplate('this &gift is for &{who} &') + raises(ValueError, s.substitute, dict(gift='bud', who='you')) + eq(s.safe_substitute(), 'this &gift is for &{who} &') + + class PieDelims(Template): + delimiter = '@' + s = PieDelims('@who likes to eat a bag of @{what} worth $100') + self.assertEqual(s.substitute(dict(who='tim', what='ham')), + 'tim likes to eat a bag of ham worth $100') + + +if __name__ == '__main__': + unittest.main() From 0bfe830e7aa557c8782cdaea7277590ded4f1af1 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 12:34:05 +1200 Subject: [PATCH 05/10] Mark failing test_string.py tests. --- Lib/test/test_string.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index 0be28fdb60..242d211e53 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -272,6 +272,8 @@ def test_SafeTemplate(self): eq(s.safe_substitute(dict(who='tim', what='ham', meal='dinner')), 'tim likes ham for dinner') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_invalid_placeholders(self): raises = self.assertRaises s = Template('$who likes $') @@ -298,6 +300,8 @@ class PathPattern(Template): s = PathPattern('$bag.foo.who likes to eat a bag of $bag.what') self.assertEqual(s.substitute(m), 'tim likes to eat a bag of ham') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_flags_override(self): class MyPattern(Template): flags = 0 @@ -317,6 +321,8 @@ class MyPattern(Template): s = MyPattern('$foo ${BAR}') self.assertEqual(s.substitute(m), 'foo BAR') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_idpattern_override_inside_outside_invalid_unbraced(self): # bpo-1198569: Allow the regexp inside and outside braces to be # different when deriving from Template. @@ -455,6 +461,8 @@ def test_keyword_arguments_safe(self): s = Template('the self is $self') eq(s.safe_substitute(self='bozo'), 'the self is bozo') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_delimiter_override(self): eq = self.assertEqual raises = self.assertRaises From fa3e571889ab2ec8e7812026980ae8a13779e318 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 12:52:35 +1200 Subject: [PATCH 06/10] Unmark tests that are now passing in test_unicode.py. --- Lib/test/test_unicode.py | 6 ------ vm/src/format.rs | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 09782eb963..69aff15181 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1273,8 +1273,6 @@ def __repr__(self): self.assertEqual("{!s}".format(n), 'N(data)') self.assertRaises(TypeError, "{}".format, n) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_map(self): self.assertEqual(''.format_map({}), '') self.assertEqual('a'.format_map({}), 'a') @@ -2958,8 +2956,6 @@ def test_pep393_utf8_caching_bug(self): self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) class StringModuleTest(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_formatter_parser(self): def parse(format): return list(_string.formatter_parser(format)) @@ -2993,8 +2989,6 @@ def parse(format): self.assertRaises(TypeError, _string.formatter_parser, 1) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_formatter_field_name_split(self): def split(name): items = list(_string.formatter_field_name_split(name)) diff --git a/vm/src/format.rs b/vm/src/format.rs index c489b67ed9..391294c42c 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -685,7 +685,7 @@ impl FormatString { if first_char == '{' || first_char == '}' { let maybe_next_char = chars.next(); // if we see a bracket, it has to be escaped by doubling up to be in a literal - if maybe_next_char.is_some() && maybe_next_char.unwrap() != first_char { + if maybe_next_char.is_none() || maybe_next_char.unwrap() != first_char { return Err(FormatParseError::UnescapedStartBracketInLiteral); } else { return Ok((first_char, chars.as_str())); From 058e73a37627847b4043922ef419001b70d10792 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 13:14:34 +1200 Subject: [PATCH 07/10] Fix compilation of rust unit tests. --- vm/src/format.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index 391294c42c..42c7e64d10 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -1083,7 +1083,7 @@ mod tests { }); assert_eq!( - FormatString::from_str("abcd{1}:{key}", &PyFuncArgs::default()), + FormatString::from_str("abcd{1}:{key}"), expected ); } @@ -1091,7 +1091,7 @@ mod tests { #[test] fn test_format_parse_fail() { assert_eq!( - FormatString::from_str("{s", &PyFuncArgs::default()), + FormatString::from_str("{s"), Err(FormatParseError::UnmatchedBracket) ); } @@ -1111,7 +1111,7 @@ mod tests { }); assert_eq!( - FormatString::from_str("{{{key}}}ddfe", &PyFuncArgs::default()), + FormatString::from_str("{{{key}}}ddfe"), expected ); } From 1c18d990447c00beb9f1b4363395d37dea95adc4 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 13:23:59 +1200 Subject: [PATCH 08/10] Run rustfmt --- vm/src/format.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index 42c7e64d10..ce9aad7ee1 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -1082,10 +1082,7 @@ mod tests { ], }); - assert_eq!( - FormatString::from_str("abcd{1}:{key}"), - expected - ); + assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected); } #[test] @@ -1110,10 +1107,7 @@ mod tests { ], }); - assert_eq!( - FormatString::from_str("{{{key}}}ddfe"), - expected - ); + assert_eq!(FormatString::from_str("{{{key}}}ddfe"), expected); } #[test] From e9a2944138c4f4b5641e04b8636eab8ebeaf6761 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sat, 1 Aug 2020 13:48:44 +1200 Subject: [PATCH 09/10] Fix clippy warnings --- vm/src/format.rs | 32 ++++++++++++++++---------------- vm/src/stdlib/string.rs | 6 +++--- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index ce9aad7ee1..547e4bebdb 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -626,9 +626,9 @@ impl FieldNamePart { #[derive(Debug, PartialEq)] pub(crate) enum FieldType { - AutoSpec, - IndexSpec(usize), - KeywordSpec(String), + Auto, + Index(usize), + Keyword(String), } #[derive(Debug, PartialEq)] @@ -646,11 +646,11 @@ impl FieldName { } let field_type = if first.is_empty() { - FieldType::AutoSpec + FieldType::Auto } else if let Ok(index) = first.parse::() { - FieldType::IndexSpec(index) + FieldType::Index(index) } else { - FieldType::KeywordSpec(first) + FieldType::Keyword(first) }; let mut parts = Vec::new(); @@ -846,7 +846,7 @@ impl FormatString { let mut auto_argument_index: usize = 0; let mut seen_index = false; self.format_internal(vm, &mut |field_type| match field_type { - FieldType::AutoSpec => { + FieldType::Auto => { if seen_index { return Err(vm.new_value_error( "cannot switch from manual field specification to automatic field numbering" @@ -860,7 +860,7 @@ impl FormatString { .cloned() .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) } - FieldType::IndexSpec(index) => { + FieldType::Index(index) => { if auto_argument_index != 0 { return Err(vm.new_value_error( "cannot switch from automatic field numbering to manual field specification" @@ -874,7 +874,7 @@ impl FormatString { .cloned() .ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())) } - FieldType::KeywordSpec(keyword) => arguments + FieldType::Keyword(keyword) => arguments .get_optional_kwarg(&keyword) .ok_or_else(|| vm.new_key_error(vm.new_str(keyword.to_owned()))), }) @@ -882,10 +882,10 @@ impl FormatString { pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult { self.format_internal(vm, &mut |field_type| match field_type { - FieldType::AutoSpec | FieldType::IndexSpec(_) => { + FieldType::Auto | FieldType::Index(_) => { Err(vm.new_value_error("Format string contains positional fields".to_owned())) } - FieldType::KeywordSpec(keyword) => dict.get_item(keyword, &vm), + FieldType::Keyword(keyword) => dict.get_item(keyword, &vm), }) } } @@ -896,7 +896,7 @@ fn call_object_format( preconversion_spec: Option, format_spec: &str, ) -> PyResult { - let argument = match preconversion_spec.and_then(|c| FormatPreconversor::from_char(c)) { + let argument = match preconversion_spec.and_then(FormatPreconversor::from_char) { Some(FormatPreconversor::Str) => vm.call_method(&argument, "__str__", vec![])?, Some(FormatPreconversor::Repr) => vm.call_method(&argument, "__repr__", vec![])?, Some(FormatPreconversor::Ascii) => vm.call_method(&argument, "__repr__", vec![])?, @@ -1126,28 +1126,28 @@ mod tests { assert_eq!( FieldName::parse(""), Ok(FieldName { - field_type: FieldType::AutoSpec, + field_type: FieldType::Auto, parts: Vec::new(), }) ); assert_eq!( FieldName::parse("0"), Ok(FieldName { - field_type: FieldType::IndexSpec(0), + field_type: FieldType::Index(0), parts: Vec::new(), }) ); assert_eq!( FieldName::parse("key"), Ok(FieldName { - field_type: FieldType::KeywordSpec("key".to_owned()), + field_type: FieldType::Keyword("key".to_owned()), parts: Vec::new(), }) ); assert_eq!( FieldName::parse("key.attr[0][string]"), Ok(FieldName { - field_type: FieldType::KeywordSpec("key".to_owned()), + field_type: FieldType::Keyword("key".to_owned()), parts: vec![ FieldNamePart::Attribute("attr".to_owned()), FieldNamePart::Index(0), diff --git a/vm/src/stdlib/string.rs b/vm/src/stdlib/string.rs index 7ab1484a4d..8319f04b8b 100644 --- a/vm/src/stdlib/string.rs +++ b/vm/src/stdlib/string.rs @@ -76,9 +76,9 @@ mod _string { let field_name = FieldName::parse(text.as_str()).map_err(|e| e.into_pyobject(vm))?; let first = match field_name.field_type { - FieldType::AutoSpec => vm.new_str("".to_owned()), - FieldType::IndexSpec(index) => index.into_pyobject(vm)?, - FieldType::KeywordSpec(attribute) => attribute.into_pyobject(vm)?, + FieldType::Auto => vm.new_str("".to_owned()), + FieldType::Index(index) => index.into_pyobject(vm)?, + FieldType::Keyword(attribute) => attribute.into_pyobject(vm)?, }; let rest = field_name From 928bca88c6edf162d0c62e6344f84d16b7ce4f1b Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Sun, 2 Aug 2020 07:37:47 +1200 Subject: [PATCH 10/10] Code review changes. --- vm/src/format.rs | 68 +++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/vm/src/format.rs b/vm/src/format.rs index 547e4bebdb..133cc94097 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -591,36 +591,40 @@ pub(crate) enum FieldNamePart { impl FieldNamePart { fn parse_part( chars: &mut impl PeekingNext, - ) -> Result { - let ch = chars.next().unwrap(); - if ch == '.' { - let mut attribute = String::new(); - for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { - attribute.push(ch); - } - if attribute.is_empty() { - Err(FormatParseError::EmptyAttribute) - } else { - Ok(FieldNamePart::Attribute(attribute)) - } - } else if ch == '[' { - let mut index = String::new(); - for ch in chars { - if ch == ']' { - return if index.is_empty() { + ) -> Result, FormatParseError> { + chars + .next() + .map(|ch| match ch { + '.' => { + let mut attribute = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + attribute.push(ch); + } + if attribute.is_empty() { Err(FormatParseError::EmptyAttribute) - } else if let Ok(index) = index.parse::() { - Ok(FieldNamePart::Index(index)) } else { - Ok(FieldNamePart::StringIndex(index)) - }; + Ok(FieldNamePart::Attribute(attribute)) + } } - index.push(ch); - } - Err(FormatParseError::MissingRightBracket) - } else { - Err(FormatParseError::InvalidCharacterAfterRightBracket) - } + '[' => { + let mut index = String::new(); + for ch in chars { + if ch == ']' { + return if index.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else if let Ok(index) = index.parse::() { + Ok(FieldNamePart::Index(index)) + } else { + Ok(FieldNamePart::StringIndex(index)) + }; + } + index.push(ch); + } + Err(FormatParseError::MissingRightBracket) + } + _ => Err(FormatParseError::InvalidCharacterAfterRightBracket), + }) + .transpose() } } @@ -633,8 +637,8 @@ pub(crate) enum FieldType { #[derive(Debug, PartialEq)] pub(crate) struct FieldName { - pub(crate) field_type: FieldType, - pub(crate) parts: Vec, + pub field_type: FieldType, + pub parts: Vec, } impl FieldName { @@ -654,8 +658,8 @@ impl FieldName { }; let mut parts = Vec::new(); - while chars.peek().is_some() { - parts.push(FieldNamePart::parse_part(&mut chars)?) + while let Some(part) = FieldNamePart::parse_part(&mut chars)? { + parts.push(part) } Ok(FieldName { field_type, parts }) @@ -674,7 +678,7 @@ pub(crate) enum FormatPart { #[derive(Debug, PartialEq)] pub(crate) struct FormatString { - pub(crate) format_parts: Vec, + pub format_parts: Vec, } impl FormatString {