Skip to content

Commit e90e2bf

Browse files
committed
Implement string module.
1 parent c4ddff8 commit e90e2bf

File tree

4 files changed

+221
-164
lines changed

4 files changed

+221
-164
lines changed

vm/src/format.rs

+116-115
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1+
use crate::exceptions::PyBaseExceptionRef;
12
use crate::function::PyFuncArgs;
2-
use crate::obj::objint::PyInt;
3-
use crate::obj::objstr::PyString;
43
use crate::obj::{objstr, objtype};
54
use crate::pyobject::{IntoPyObject, ItemProtocol, PyObjectRef, PyResult, TypeProtocol};
65
use crate::vm::VirtualMachine;
@@ -558,11 +557,23 @@ pub(crate) enum FormatParseError {
558557
MissingStartBracket,
559558
UnescapedStartBracketInLiteral,
560559
InvalidFormatSpecifier,
560+
UnknownConversion,
561561
EmptyAttribute,
562562
MissingRightBracket,
563563
InvalidCharacterAfterRightBracket,
564564
}
565565

566+
impl FormatParseError {
567+
pub fn into_pyobject(self, vm: &VirtualMachine) -> PyBaseExceptionRef {
568+
match self {
569+
FormatParseError::UnmatchedBracket => {
570+
vm.new_value_error("expected '}' before end of string".to_owned())
571+
}
572+
_ => vm.new_value_error("Unexpected error parsing format string".to_owned()),
573+
}
574+
}
575+
}
576+
566577
impl FromStr for FormatSpec {
567578
type Err = &'static str;
568579
fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -622,12 +633,12 @@ pub(crate) enum FieldType {
622633

623634
#[derive(Debug, PartialEq)]
624635
pub(crate) struct FieldName {
625-
field_type: FieldType,
626-
parts: Vec<FieldNamePart>,
636+
pub(crate) field_type: FieldType,
637+
pub(crate) parts: Vec<FieldNamePart>,
627638
}
628639

629640
impl FieldName {
630-
fn parse(text: &str) -> Result<FieldName, FormatParseError> {
641+
pub(crate) fn parse(text: &str) -> Result<FieldName, FormatParseError> {
631642
let mut chars = text.chars().peekable();
632643
let mut first = String::new();
633644
for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') {
@@ -652,42 +663,18 @@ impl FieldName {
652663
}
653664

654665
#[derive(Debug, PartialEq)]
655-
enum FormatPart {
656-
Field(FieldName, String),
666+
pub(crate) enum FormatPart {
667+
Field {
668+
field_name: String,
669+
preconversion_spec: Option<char>,
670+
format_spec: String,
671+
},
657672
Literal(String),
658673
}
659674

660-
impl FormatPart {
661-
fn is_auto(&self) -> bool {
662-
match self {
663-
FormatPart::Field(
664-
FieldName {
665-
field_type: FieldType::AutoSpec,
666-
..
667-
},
668-
_,
669-
) => true,
670-
_ => false,
671-
}
672-
}
673-
674-
fn is_index(&self) -> bool {
675-
match self {
676-
FormatPart::Field(
677-
FieldName {
678-
field_type: FieldType::IndexSpec(_),
679-
..
680-
},
681-
_,
682-
) => true,
683-
_ => false,
684-
}
685-
}
686-
}
687-
688675
#[derive(Debug, PartialEq)]
689676
pub(crate) struct FormatString {
690-
format_parts: Vec<FormatPart>,
677+
pub(crate) format_parts: Vec<FormatPart>,
691678
}
692679

693680
impl FormatString {
@@ -744,22 +731,31 @@ impl FormatString {
744731
// before the bang is a keyword or arg index, after the comma is maybe a conversor spec.
745732
let arg_part = parts[0];
746733

747-
let preconversor_spec = if parts.len() > 1 {
748-
"!".to_owned() + parts[1]
734+
let preconversion_spec = if let Some(conversion) = parts.get(1) {
735+
let mut chars = conversion.chars();
736+
if let Some(ch) = chars.next() {
737+
// conversions are only every one character
738+
if chars.next().is_some() {
739+
return Err(FormatParseError::UnknownConversion);
740+
}
741+
Some(ch)
742+
} else {
743+
return Err(FormatParseError::UnknownConversion);
744+
}
749745
} else {
750-
String::new()
746+
None
751747
};
752-
let format_spec = preconversor_spec + &format_spec;
753-
Ok(FormatPart::Field(FieldName::parse(arg_part)?, format_spec))
748+
749+
Ok(FormatPart::Field {
750+
field_name: arg_part.to_owned(),
751+
preconversion_spec,
752+
format_spec,
753+
})
754754
}
755755

756-
fn parse_spec<'a>(
757-
text: &'a str,
758-
args: &'a PyFuncArgs,
759-
) -> Result<(FormatPart, &'a str), FormatParseError> {
756+
fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> {
760757
let mut nested = false;
761758
let mut end_bracket_pos = None;
762-
let mut spec_template = String::new();
763759
let mut left = String::new();
764760

765761
// There may be one layer nesting brackets in spec
@@ -773,30 +769,18 @@ impl FormatString {
773769
return Err(FormatParseError::InvalidFormatSpecifier);
774770
} else {
775771
nested = true;
772+
left.push(c);
776773
continue;
777774
}
778775
} else if c == '}' {
779776
if nested {
780777
nested = false;
781-
if let Some(obj) = args.kwargs.get(&spec_template) {
782-
if let Some(s) = (*obj).clone().payload::<PyString>() {
783-
left.push_str(s.as_str());
784-
} else if let Some(i) = (*obj).clone().payload::<PyInt>() {
785-
left.push_str(&PyInt::repr(i));
786-
} else {
787-
return Err(FormatParseError::InvalidFormatSpecifier);
788-
}
789-
} else {
790-
// CPython return KeyError here
791-
return Err(FormatParseError::InvalidFormatSpecifier);
792-
}
778+
left.push(c);
793779
continue;
794780
} else {
795781
end_bracket_pos = Some(idx);
796782
break;
797783
}
798-
} else if nested {
799-
spec_template.push(c);
800784
} else {
801785
left.push(c);
802786
}
@@ -813,13 +797,20 @@ impl FormatString {
813797
fn format_internal(
814798
&self,
815799
vm: &VirtualMachine,
816-
mut field_func: impl FnMut(&FieldType) -> PyResult,
817-
) -> PyResult {
800+
field_func: &mut impl FnMut(&FieldType) -> PyResult,
801+
) -> PyResult<String> {
818802
let mut final_string = String::new();
819803
for part in &self.format_parts {
820804
let result_string: String = match part {
821-
FormatPart::Field(FieldName { field_type, parts }, format_spec) => {
822-
let mut argument = field_func(field_type)?;
805+
FormatPart::Field {
806+
field_name,
807+
preconversion_spec,
808+
format_spec,
809+
} => {
810+
let FieldName { field_type, parts } =
811+
FieldName::parse(field_name.as_str()).map_err(|e| e.into_pyobject(vm))?;
812+
813+
let mut argument = field_func(&field_type)?;
823814

824815
for name_part in parts {
825816
match name_part {
@@ -831,54 +822,66 @@ impl FormatString {
831822
argument = argument.get_item(&index.into_pyobject(vm)?, vm)?;
832823
}
833824
FieldNamePart::StringIndex(index) => {
834-
argument = argument.get_item(index, vm)?;
825+
argument = argument.get_item(&index, vm)?;
835826
}
836827
}
837828
}
838829

839-
let value = call_object_format(vm, argument, &format_spec)?;
830+
let nested_format =
831+
FormatString::from_str(&format_spec).map_err(|e| e.into_pyobject(vm))?;
832+
let format_spec = nested_format.format_internal(vm, field_func)?;
833+
834+
let value =
835+
call_object_format(vm, argument, *preconversion_spec, &format_spec)?;
840836
objstr::clone_value(&value)
841837
}
842838
FormatPart::Literal(literal) => literal.clone(),
843839
};
844840
final_string.push_str(&result_string);
845841
}
846-
Ok(vm.ctx.new_str(final_string))
842+
Ok(final_string)
847843
}
848844

849-
pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult {
850-
if self.format_parts.iter().any(FormatPart::is_auto)
851-
&& self.format_parts.iter().any(FormatPart::is_index)
852-
{
853-
return Err(vm.new_value_error(
854-
"cannot switch from automatic field numbering to manual field specification"
855-
.to_owned(),
856-
));
857-
}
858-
859-
let mut auto_argument_index: usize = 1;
860-
self.format_internal(vm, |field_type| match field_type {
845+
pub(crate) fn format(&self, arguments: &PyFuncArgs, vm: &VirtualMachine) -> PyResult<String> {
846+
let mut auto_argument_index: usize = 0;
847+
let mut seen_index = false;
848+
self.format_internal(vm, &mut |field_type| match field_type {
861849
FieldType::AutoSpec => {
850+
if seen_index {
851+
return Err(vm.new_value_error(
852+
"cannot switch from manual field specification to automatic field numbering"
853+
.to_owned(),
854+
));
855+
}
862856
auto_argument_index += 1;
863857
arguments
864858
.args
865-
.get(auto_argument_index - 1)
859+
.get(auto_argument_index)
860+
.cloned()
861+
.ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned()))
862+
}
863+
FieldType::IndexSpec(index) => {
864+
if auto_argument_index != 0 {
865+
return Err(vm.new_value_error(
866+
"cannot switch from automatic field numbering to manual field specification"
867+
.to_owned(),
868+
));
869+
}
870+
seen_index = true;
871+
arguments
872+
.args
873+
.get(*index)
866874
.cloned()
867875
.ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned()))
868876
}
869-
FieldType::IndexSpec(index) => arguments
870-
.args
871-
.get(*index + 1)
872-
.cloned()
873-
.ok_or_else(|| vm.new_index_error("tuple index out of range".to_owned())),
874877
FieldType::KeywordSpec(keyword) => arguments
875878
.get_optional_kwarg(&keyword)
876879
.ok_or_else(|| vm.new_key_error(vm.new_str(keyword.to_owned()))),
877880
})
878881
}
879882

880-
pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult {
881-
self.format_internal(vm, |field_type| match field_type {
883+
pub(crate) fn format_map(&self, dict: &PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
884+
self.format_internal(vm, &mut |field_type| match field_type {
882885
FieldType::AutoSpec | FieldType::IndexSpec(_) => {
883886
Err(vm.new_value_error("Format string contains positional fields".to_owned()))
884887
}
@@ -887,16 +890,20 @@ impl FormatString {
887890
}
888891
}
889892

890-
fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: &str) -> PyResult {
891-
let (preconversor, new_format_spec) = FormatPreconversor::parse_and_consume(format_spec);
892-
let argument = match preconversor {
893+
fn call_object_format(
894+
vm: &VirtualMachine,
895+
argument: PyObjectRef,
896+
preconversion_spec: Option<char>,
897+
format_spec: &str,
898+
) -> PyResult {
899+
let argument = match preconversion_spec.and_then(|c| FormatPreconversor::from_char(c)) {
893900
Some(FormatPreconversor::Str) => vm.call_method(&argument, "__str__", vec![])?,
894901
Some(FormatPreconversor::Repr) => vm.call_method(&argument, "__repr__", vec![])?,
895902
Some(FormatPreconversor::Ascii) => vm.call_method(&argument, "__repr__", vec![])?,
896903
Some(FormatPreconversor::Bytes) => vm.call_method(&argument, "decode", vec![])?,
897904
None => argument,
898905
};
899-
let returned_type = vm.ctx.new_str(new_format_spec.to_owned());
906+
let returned_type = vm.ctx.new_str(format_spec.to_owned());
900907

901908
let result = vm.call_method(&argument, "__format__", vec![returned_type])?;
902909
if !objtype::isinstance(&result, &vm.ctx.types.str_type) {
@@ -909,20 +916,20 @@ fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: &
909916

910917
pub(crate) trait FromTemplate<'a>: Sized {
911918
type Err;
912-
fn from_str(s: &'a str, arg: &'a PyFuncArgs) -> Result<Self, Self::Err>;
919+
fn from_str(s: &'a str) -> Result<Self, Self::Err>;
913920
}
914921

915922
impl<'a> FromTemplate<'a> for FormatString {
916923
type Err = FormatParseError;
917924

918-
fn from_str(text: &'a str, args: &'a PyFuncArgs) -> Result<Self, Self::Err> {
925+
fn from_str(text: &'a str) -> Result<Self, Self::Err> {
919926
let mut cur_text: &str = text;
920927
let mut parts: Vec<FormatPart> = Vec::new();
921928
while !cur_text.is_empty() {
922929
// Try to parse both literals and bracketed format parts util we
923930
// run out of text
924931
cur_text = FormatString::parse_literal(cur_text)
925-
.or_else(|_| FormatString::parse_spec(cur_text, &args))
932+
.or_else(|_| FormatString::parse_spec(cur_text))
926933
.map(|(part, new_text)| {
927934
parts.push(part);
928935
new_text
@@ -1061,21 +1068,17 @@ mod tests {
10611068
let expected = Ok(FormatString {
10621069
format_parts: vec![
10631070
FormatPart::Literal("abcd".to_owned()),
1064-
FormatPart::Field(
1065-
FieldName {
1066-
field_type: FieldType::IndexSpec(1),
1067-
parts: Vec::new(),
1068-
},
1069-
String::new(),
1070-
),
1071+
FormatPart::Field {
1072+
field_name: "1".to_owned(),
1073+
preconversion_spec: None,
1074+
format_spec: String::new(),
1075+
},
10711076
FormatPart::Literal(":".to_owned()),
1072-
FormatPart::Field(
1073-
FieldName {
1074-
field_type: FieldType::KeywordSpec("key".to_owned()),
1075-
parts: Vec::new(),
1076-
},
1077-
String::new(),
1078-
),
1077+
FormatPart::Field {
1078+
field_name: "key".to_owned(),
1079+
preconversion_spec: None,
1080+
format_spec: String::new(),
1081+
},
10791082
],
10801083
});
10811084

@@ -1098,13 +1101,11 @@ mod tests {
10981101
let expected = Ok(FormatString {
10991102
format_parts: vec![
11001103
FormatPart::Literal("{".to_owned()),
1101-
FormatPart::Field(
1102-
FieldName {
1103-
field_type: FieldType::KeywordSpec("key".to_owned()),
1104-
parts: Vec::new(),
1105-
},
1106-
String::new(),
1107-
),
1104+
FormatPart::Field {
1105+
field_name: "key".to_owned(),
1106+
preconversion_spec: None,
1107+
format_spec: String::new(),
1108+
},
11081109
FormatPart::Literal("}ddfe".to_owned()),
11091110
],
11101111
});

0 commit comments

Comments
 (0)