From dc6960111060753cfbcf097edbc1b26431764f2a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 19 Apr 2020 04:06:49 +0900 Subject: [PATCH 1/4] Drop lifetime generic argument from PyCommonString --- vm/src/obj/objbyteinner.rs | 2 +- vm/src/obj/objstr.rs | 2 +- vm/src/obj/pystr.rs | 5 +---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 1a4c1d305c..4a4bb10e4c 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -1299,7 +1299,7 @@ pub fn bytes_zfill(bytes: &[u8], width: usize) -> Vec { const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b]; -impl PyCommonString<'_, u8> for [u8] { +impl PyCommonString for [u8] { fn get_slice(&self, range: std::ops::Range) -> &Self { &self[range] } diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index a53603cc31..cb170e9732 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -1799,7 +1799,7 @@ mod tests { } } -impl PyCommonString<'_, char> for str { +impl PyCommonString for str { fn get_slice(&self, range: std::ops::Range) -> &Self { &self[range] } diff --git a/vm/src/obj/pystr.rs b/vm/src/obj/pystr.rs index ec2677854b..de5f939afa 100644 --- a/vm/src/obj/pystr.rs +++ b/vm/src/obj/pystr.rs @@ -37,10 +37,7 @@ impl StringRange for std::ops::Range { } } -pub trait PyCommonString<'a, E> -where - Self: 'a, -{ +pub trait PyCommonString { fn get_slice(&self, range: std::ops::Range) -> &Self; fn len(&self) -> usize; From 162e49f007e6b6571f5adfd2e7102e33183cb91b Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 17 Apr 2020 10:51:37 +0900 Subject: [PATCH 2/4] Generic support for FromArgs --- derive/src/from_args.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/derive/src/from_args.rs b/derive/src/from_args.rs index 99d7011886..42559fba25 100644 --- a/derive/src/from_args.rs +++ b/derive/src/from_args.rs @@ -16,14 +16,11 @@ enum ParameterKind { impl ParameterKind { fn from_ident(ident: &Ident) -> Option { - if ident == "positional_only" { - Some(ParameterKind::PositionalOnly) - } else if ident == "positional_or_keyword" { - Some(ParameterKind::PositionalOrKeyword) - } else if ident == "keyword_only" { - Some(ParameterKind::KeywordOnly) - } else { - None + match ident.to_string().as_str() { + "positional_only" => Some(ParameterKind::PositionalOnly), + "positional_or_keyword" => Some(ParameterKind::PositionalOrKeyword), + "keyword_only" => Some(ParameterKind::KeywordOnly), + _ => None, } } } @@ -150,6 +147,13 @@ fn generate_field(field: &Field) -> Result { }; let name = &field.ident; + if let Some(name) = name { + if name.to_string().starts_with("_phantom") { + return Ok(quote! { + #name: std::marker::PhantomData, + }); + } + } let middle = quote! { .map(|x| ::rustpython_vm::pyobject::TryFromObject::try_from_object(vm, x)).transpose()? }; @@ -210,8 +214,9 @@ pub fn impl_from_args(input: DeriveInput) -> Result { }; let name = input.ident; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); let output = quote! { - impl ::rustpython_vm::function::FromArgs for #name { + impl #impl_generics ::rustpython_vm::function::FromArgs for #name #ty_generics #where_clause { fn from_args( vm: &::rustpython_vm::VirtualMachine, args: &mut ::rustpython_vm::function::PyFuncArgs From b7a399ec4b990907e6327940eca147f27dcf0997 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 17 Apr 2020 11:49:52 +0900 Subject: [PATCH 3/4] merge SplitArgs from PyByte* and PyString --- vm/src/obj/objbyteinner.rs | 55 +++++++++++------------------------ vm/src/obj/objstr.rs | 45 +++++++++++------------------ vm/src/obj/pystr.rs | 59 ++++++++++++++++++++++++++++++++------ 3 files changed, 83 insertions(+), 76 deletions(-) diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 4a4bb10e4c..8bf0c984a0 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -14,7 +14,7 @@ use super::objnone::PyNoneRef; use super::objsequence::PySliceableSequence; use super::objslice::PySliceRef; use super::objstr::{self, PyString, PyStringRef}; -use super::pystr::{self, PyCommonString, StringRange}; +use super::pystr::{self, PyCommonString, PyCommonStringWrapper, StringRange}; use crate::function::{OptionalArg, OptionalOption}; use crate::pyhash; use crate::pyobject::{ @@ -255,28 +255,7 @@ impl ByteInnerTranslateOptions { } } -#[derive(FromArgs)] -pub struct ByteInnerSplitOptions { - #[pyarg(positional_or_keyword, default = "None")] - sep: Option, - #[pyarg(positional_or_keyword, default = "-1")] - maxsplit: isize, -} - -impl ByteInnerSplitOptions { - pub fn get_value(self, vm: &VirtualMachine) -> PyResult<(Option>, isize)> { - let sep = if let Some(s) = self.sep { - let sep = s.elements; - if sep.is_empty() { - return Err(vm.new_value_error("empty separator".to_owned())); - } - Some(sep) - } else { - None - }; - Ok((sep, self.maxsplit)) - } -} +pub type ByteInnerSplitOptions = pystr::SplitArgs; #[derive(FromArgs)] pub struct ByteInnerExpandtabsOptions { @@ -967,19 +946,13 @@ impl PyByteInner { where F: Fn(&[u8], &VirtualMachine) -> PyObjectRef, { - let (sep, maxsplit) = options.get_value(vm)?; - let sep_ref = match sep { - Some(ref v) => Some(&v[..]), - None => None, - }; let elements = self.elements.py_split( - sep_ref, - maxsplit, + options, vm, |v, s, vm| v.split_str(s).map(|v| convert(v, vm)).collect(), |v, s, n, vm| v.splitn_str(n, s).map(|v| convert(v, vm)).collect(), |v, n, vm| v.py_split_whitespace(n, |v| convert(v, vm)), - ); + )?; Ok(vm.ctx.new_list(elements)) } @@ -992,19 +965,13 @@ impl PyByteInner { where F: Fn(&[u8], &VirtualMachine) -> PyObjectRef, { - let (sep, maxsplit) = options.get_value(vm)?; - let sep_ref = match sep { - Some(ref v) => Some(&v[..]), - None => None, - }; let mut elements = self.elements.py_split( - sep_ref, - maxsplit, + options, vm, |v, s, vm| v.rsplit_str(s).map(|v| convert(v, vm)).collect(), |v, s, n, vm| v.rsplitn_str(n, s).map(|v| convert(v, vm)).collect(), |v, n, vm| v.py_rsplit_whitespace(n, |v| convert(v, vm)), - ); + )?; elements.reverse(); Ok(vm.ctx.new_list(elements)) } @@ -1297,6 +1264,12 @@ pub fn bytes_zfill(bytes: &[u8], width: usize) -> Vec { } } +impl PyCommonStringWrapper<[u8]> for PyByteInner { + fn as_ref(&self) -> &[u8] { + &self.elements + } +} + const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b]; impl PyCommonString for [u8] { @@ -1304,6 +1277,10 @@ impl PyCommonString for [u8] { &self[range] } + fn is_empty(&self) -> bool { + Self::is_empty(self) + } + fn len(&self) -> usize { Self::len(self) } diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index cb170e9732..b219c12966 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -21,7 +21,7 @@ use super::objsequence::PySliceableSequence; use super::objslice::PySliceRef; use super::objtuple; use super::objtype::{self, PyClassRef}; -use super::pystr::{adjust_indices, PyCommonString, StringRange}; +use super::pystr::{self, adjust_indices, PyCommonString, PyCommonStringWrapper, StringRange}; use crate::cformat::{ CFormatPart, CFormatPreconversor, CFormatQuantity, CFormatSpec, CFormatString, CFormatType, CNumberType, @@ -457,26 +457,24 @@ impl PyString { #[pymethod] fn split(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult { let elements = self.value.py_split( - args.non_empty_sep(vm)?, - args.maxsplit, + args, vm, |v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s)).collect(), |v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s)).collect(), |v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s)), - ); + )?; Ok(vm.ctx.new_list(elements)) } #[pymethod] fn rsplit(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult { let mut elements = self.value.py_split( - args.non_empty_sep(vm)?, - args.maxsplit, + args, vm, |v, s, vm| v.rsplit(s).map(|s| vm.ctx.new_str(s)).collect(), |v, s, n, vm| v.rsplitn(n, s).map(|s| vm.ctx.new_str(s)).collect(), |v, n, vm| v.py_rsplit_whitespace(n, |s| vm.ctx.new_str(s)), - ); + )?; // Unlike Python rsplit, Rust rsplitn returns an iterator that // starts from the end of the string. elements.reverse(); @@ -1298,28 +1296,7 @@ impl TryFromObject for std::ffi::CString { } } -#[derive(FromArgs)] -struct SplitArgs { - #[pyarg(positional_or_keyword, default = "None")] - sep: Option, - #[pyarg(positional_or_keyword, default = "-1")] - maxsplit: isize, -} - -impl SplitArgs { - fn non_empty_sep<'a>(&'a self, vm: &VirtualMachine) -> PyResult> { - let sep = if let Some(s) = self.sep.as_ref() { - let sep = s.as_str(); - if sep.is_empty() { - return Err(vm.new_value_error("empty separator".to_owned())); - } - Some(sep) - } else { - None - }; - Ok(sep) - } -} +type SplitArgs = pystr::SplitArgs; pub fn init(ctx: &PyContext) { PyString::extend_class(ctx, &ctx.types.str_type); @@ -1799,11 +1776,21 @@ mod tests { } } +impl PyCommonStringWrapper for PyStringRef { + fn as_ref(&self) -> &str { + self.value.as_str() + } +} + impl PyCommonString for str { fn get_slice(&self, range: std::ops::Range) -> &Self { &self[range] } + fn is_empty(&self) -> bool { + Self::is_empty(self) + } + fn len(&self) -> usize { Self::len(self) } diff --git a/vm/src/obj/pystr.rs b/vm/src/obj/pystr.rs index de5f939afa..27c2433f32 100644 --- a/vm/src/obj/pystr.rs +++ b/vm/src/obj/pystr.rs @@ -2,6 +2,39 @@ use crate::function::{single_or_tuple_any, OptionalOption}; use crate::pyobject::{PyObjectRef, PyResult, TryFromObject, TypeProtocol}; use crate::vm::VirtualMachine; +#[derive(FromArgs)] +pub struct SplitArgs +where + T: TryFromObject + PyCommonStringWrapper, + S: ?Sized + PyCommonString, +{ + #[pyarg(positional_or_keyword, default = "None")] + sep: Option, + #[pyarg(positional_or_keyword, default = "-1")] + maxsplit: isize, + _phantom1: std::marker::PhantomData, + _phantom2: std::marker::PhantomData, +} + +impl SplitArgs +where + T: TryFromObject + PyCommonStringWrapper, + S: ?Sized + PyCommonString, +{ + pub fn get_value(self, vm: &VirtualMachine) -> PyResult<(Option, isize)> { + let sep = if let Some(s) = self.sep { + let sep = s.as_ref(); + if sep.is_empty() { + return Err(vm.new_value_error("empty separator".to_owned())); + } + Some(s) + } else { + None + }; + Ok((sep, self.maxsplit)) + } +} + // help get optional string indices pub fn adjust_indices( start: OptionalOption, @@ -37,33 +70,43 @@ impl StringRange for std::ops::Range { } } +pub trait PyCommonStringWrapper +where + S: ?Sized, +{ + fn as_ref(&self) -> &S; +} + pub trait PyCommonString { fn get_slice(&self, range: std::ops::Range) -> &Self; fn len(&self) -> usize; + fn is_empty(&self) -> bool; - fn py_split( + fn py_split( &self, - sep: Option<&Self>, - maxsplit: isize, + args: SplitArgs, vm: &VirtualMachine, split: SP, splitn: SN, splitw: SW, - ) -> Vec + ) -> PyResult> where + T: TryFromObject + PyCommonStringWrapper, SP: Fn(&Self, &Self, &VirtualMachine) -> Vec, SN: Fn(&Self, &Self, usize, &VirtualMachine) -> Vec, SW: Fn(&Self, isize, &VirtualMachine) -> Vec, { - if let Some(pattern) = sep { + let (sep, maxsplit) = args.get_value(vm)?; + let splited = if let Some(pattern) = sep { if maxsplit < 0 { - split(self, pattern, vm) + split(self, pattern.as_ref(), vm) } else { - splitn(self, pattern, (maxsplit + 1) as usize, vm) + splitn(self, pattern.as_ref(), (maxsplit + 1) as usize, vm) } } else { splitw(self, maxsplit, vm) - } + }; + Ok(splited) } fn py_split_whitespace(&self, maxsplit: isize, convert: F) -> Vec where From 14f1a4294de3a9454d3483ba88b3ac9939780f44 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 19 Apr 2020 03:53:36 +0900 Subject: [PATCH 4/4] Move SplitLinesArgs and ExpandTabsArg to pystr --- vm/src/obj/objbytearray.rs | 11 +++++------ vm/src/obj/objbyteinner.rs | 25 ++++--------------------- vm/src/obj/objbytes.rs | 10 +++++----- vm/src/obj/objstr.rs | 21 ++++----------------- vm/src/obj/pystr.rs | 19 +++++++++++++++++++ 5 files changed, 37 insertions(+), 49 deletions(-) diff --git a/vm/src/obj/objbytearray.rs b/vm/src/obj/objbytearray.rs index c79aa38f68..113bb57212 100644 --- a/vm/src/obj/objbytearray.rs +++ b/vm/src/obj/objbytearray.rs @@ -4,16 +4,15 @@ use std::convert::TryFrom; use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use super::objbyteinner::{ - ByteInnerExpandtabsOptions, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, - ByteInnerSplitOptions, ByteInnerSplitlinesOptions, ByteInnerTranslateOptions, ByteOr, - PyByteInner, + ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, + ByteInnerTranslateOptions, ByteOr, PyByteInner, }; use super::objint::PyIntRef; use super::objiter; use super::objslice::PySliceRef; use super::objstr::{PyString, PyStringRef}; use super::objtype::PyClassRef; -use super::pystr::PyCommonString; +use super::pystr::{self, PyCommonString}; use crate::cformat::CFormatString; use crate::function::{OptionalArg, OptionalOption}; use crate::obj::objstr::do_cformat_string; @@ -440,12 +439,12 @@ impl PyByteArray { } #[pymethod(name = "expandtabs")] - fn expandtabs(&self, options: ByteInnerExpandtabsOptions) -> PyByteArray { + fn expandtabs(&self, options: pystr::ExpandTabsArgs) -> PyByteArray { self.borrow_value().expandtabs(options).into() } #[pymethod(name = "splitlines")] - fn splitlines(&self, options: ByteInnerSplitlinesOptions, vm: &VirtualMachine) -> PyResult { + fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult { let as_bytes = self .borrow_value() .splitlines(options) diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 8bf0c984a0..aa6e821098 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -257,21 +257,6 @@ impl ByteInnerTranslateOptions { pub type ByteInnerSplitOptions = pystr::SplitArgs; -#[derive(FromArgs)] -pub struct ByteInnerExpandtabsOptions { - #[pyarg(positional_or_keyword, optional = true)] - tabsize: OptionalArg, -} - -impl ByteInnerExpandtabsOptions { - pub fn get_value(self) -> usize { - match self.tabsize.into_option() { - Some(int) => int.as_bigint().to_usize().unwrap_or(0), - None => 8, - } - } -} - #[derive(FromArgs)] pub struct ByteInnerSplitlinesOptions { #[pyarg(positional_or_keyword, optional = true)] @@ -1014,8 +999,8 @@ impl PyByteInner { Ok((front, has_mid, back)) } - pub fn expandtabs(&self, options: ByteInnerExpandtabsOptions) -> Vec { - let tabsize = options.get_value(); + pub fn expandtabs(&self, options: pystr::ExpandTabsArgs) -> Vec { + let tabsize = options.tabsize(); let mut counter: usize = 0; let mut res = vec![]; @@ -1046,9 +1031,7 @@ impl PyByteInner { res } - pub fn splitlines(&self, options: ByteInnerSplitlinesOptions) -> Vec<&[u8]> { - let keepends = options.get_value(); - + pub fn splitlines(&self, options: pystr::SplitLinesArgs) -> Vec<&[u8]> { let mut res = vec![]; if self.elements.is_empty() { @@ -1057,7 +1040,7 @@ impl PyByteInner { let mut prev_index = 0; let mut index = 0; - let keep = if keepends { 1 } else { 0 }; + let keep = if options.keepends { 1 } else { 0 }; let slice = &self.elements; while index < slice.len() { diff --git a/vm/src/obj/objbytes.rs b/vm/src/obj/objbytes.rs index ffd18f5455..c6a4c02dd6 100644 --- a/vm/src/obj/objbytes.rs +++ b/vm/src/obj/objbytes.rs @@ -4,15 +4,15 @@ use std::ops::Deref; use std::str::FromStr; use super::objbyteinner::{ - ByteInnerExpandtabsOptions, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, - ByteInnerSplitOptions, ByteInnerSplitlinesOptions, ByteInnerTranslateOptions, PyByteInner, + ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, + ByteInnerTranslateOptions, PyByteInner, }; use super::objint::PyIntRef; use super::objiter; use super::objslice::PySliceRef; use super::objstr::{PyString, PyStringRef}; use super::objtype::PyClassRef; -use super::pystr::PyCommonString; +use super::pystr::{self, PyCommonString}; use crate::cformat::CFormatString; use crate::function::{OptionalArg, OptionalOption}; use crate::obj::objstr::do_cformat_string; @@ -401,12 +401,12 @@ impl PyBytes { } #[pymethod(name = "expandtabs")] - fn expandtabs(&self, options: ByteInnerExpandtabsOptions) -> PyBytes { + fn expandtabs(&self, options: pystr::ExpandTabsArgs) -> PyBytes { self.inner.expandtabs(options).into() } #[pymethod(name = "splitlines")] - fn splitlines(&self, options: ByteInnerSplitlinesOptions, vm: &VirtualMachine) -> PyResult { + fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult { let as_bytes = self .inner .splitlines(options) diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index b219c12966..6b1f57120a 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -168,18 +168,6 @@ struct StrArgs { errors: OptionalArg, } -#[derive(FromArgs)] -struct SplitLineArgs { - #[pyarg(positional_or_keyword, optional = true)] - keepends: OptionalArg, -} - -#[derive(FromArgs)] -struct ExpandtabsArgs { - #[pyarg(positional_or_keyword, optional = true)] - tabsize: OptionalArg, -} - #[pyimpl(flags(BASETYPE))] impl PyString { #[pyslot] @@ -797,14 +785,13 @@ impl PyString { } #[pymethod] - fn splitlines(&self, args: SplitLineArgs, vm: &VirtualMachine) -> PyObjectRef { - let keepends = args.keepends.unwrap_or(false); + fn splitlines(&self, args: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyObjectRef { let mut elements = vec![]; let mut curr = "".to_owned(); let mut chars = self.value.chars().peekable(); while let Some(ch) = chars.next() { if ch == '\n' || ch == '\r' { - if keepends { + if args.keepends { curr.push(ch); } if ch == '\r' && chars.peek() == Some(&'\n') { @@ -1080,8 +1067,8 @@ impl PyString { } #[pymethod] - fn expandtabs(&self, args: ExpandtabsArgs) -> String { - let tab_stop = args.tabsize.unwrap_or(8); + fn expandtabs(&self, args: pystr::ExpandTabsArgs) -> String { + let tab_stop = args.tabsize(); let mut expanded_str = String::with_capacity(self.value.len()); let mut tab_size = tab_stop; let mut col_count = 0 as usize; diff --git a/vm/src/obj/pystr.rs b/vm/src/obj/pystr.rs index 27c2433f32..b78d3dbc5e 100644 --- a/vm/src/obj/pystr.rs +++ b/vm/src/obj/pystr.rs @@ -1,6 +1,7 @@ use crate::function::{single_or_tuple_any, OptionalOption}; use crate::pyobject::{PyObjectRef, PyResult, TryFromObject, TypeProtocol}; use crate::vm::VirtualMachine; +use num_traits::cast::ToPrimitive; #[derive(FromArgs)] pub struct SplitArgs @@ -35,6 +36,24 @@ where } } +#[derive(FromArgs)] +pub struct SplitLinesArgs { + #[pyarg(positional_or_keyword, default = "false")] + pub keepends: bool, +} + +#[derive(FromArgs)] +pub struct ExpandTabsArgs { + #[pyarg(positional_or_keyword, default = "8")] + tabsize: isize, +} + +impl ExpandTabsArgs { + pub fn tabsize(&self) -> usize { + self.tabsize.to_usize().unwrap_or(0) + } +} + // help get optional string indices pub fn adjust_indices( start: OptionalOption,