From 4dccbfb3a634d1e970f59a9b3e20efe43b0e970b Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 18 Oct 2021 14:53:32 +0200 Subject: [PATCH 01/20] Fix buffer protocol and memoryview --- stdlib/src/array.rs | 13 +- vm/src/builtins/bytearray.rs | 47 +++-- vm/src/builtins/bytes.rs | 29 ++- vm/src/builtins/memory.rs | 386 ++++++++++++++--------------------- vm/src/cformat.rs | 14 +- vm/src/protocol/buffer.rs | 140 +++++++++---- vm/src/protocol/mod.rs | 2 +- vm/src/stdlib/io.rs | 69 ++++--- vm/src/stdlib/os.rs | 6 +- 9 files changed, 345 insertions(+), 361 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 8bda4fd6f9..ee248c1bb3 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1234,24 +1234,21 @@ mod array { } } - #[derive(Debug)] - struct PyArrayBufferInternal(PyRef); - - impl BufferInternal for PyArrayBufferInternal { + impl BufferInternal for PyArray { fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.0.get_bytes().into() + self.get_bytes().into() } fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - self.0.get_bytes_mut().into() + self.get_bytes_mut().into() } fn release(&self) { - self.0.exports.fetch_sub(1); + self.exports.fetch_sub(1); } fn retain(&self) { - self.0.exports.fetch_add(1); + self.exports.fetch_add(1); } } diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 5678d6db66..007ecf4f7f 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -9,6 +9,7 @@ use crate::common::{ PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, }, + static_cell, }; use crate::{ anystr::{self, AnyStr}, @@ -20,7 +21,7 @@ use crate::{ }, function::{ArgBytesLike, ArgIterable, FuncArgs, IntoPyObject, OptionalArg, OptionalOption}, protocol::{ - BufferInternal, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, PyMappingMethods, + BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, PyMappingMethods, }, sliceable::{PySliceableSequence, PySliceableSequenceMut, SequenceIndex}, types::{ @@ -33,6 +34,7 @@ use crate::{ }; use bstr::ByteSlice; use crossbeam_utils::atomic::AtomicCell; +use rustpython_common::atomic::Radium; use std::mem::size_of; /// "bytearray(iterable_of_ints) -> bytearray\n\ @@ -709,38 +711,39 @@ impl Comparable for PyByteArray { } impl AsBuffer for PyByteArray { - fn as_buffer(zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyRef, _vm: &VirtualMachine) -> PyResult { + static_cell! { + static METHODS: BufferMethods; + } + let methods = METHODS.get_or_init(|| BufferMethods { + obj_bytes: |zelf| zelf.payload::().unwrap().borrow_buf().into(), + obj_bytes_mut: |zelf| { + let zelf = zelf.payload::().unwrap(); + PyRwLockWriteGuard::map(zelf.inner_mut(), |x| &mut *x.elements).into() + }, + release: Some(|zelf| { + zelf.payload::().unwrap().exports.fetch_sub(1); + }), + retain: Some(|zelf| { + zelf.payload::().unwrap().exports.fetch_add(1); + }), + contiguous: None, + contiguous_mut: None, + collect_bytes: None, + }); let buffer = PyBuffer::new( - zelf.as_object().to_owned(), - zelf.to_owned(), + zelf.as_object().clone(), BufferOptions { readonly: false, len: zelf.len(), ..Default::default() }, + methods, ); Ok(buffer) } } -impl BufferInternal for PyRef { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.borrow_buf().into() - } - - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - PyRwLockWriteGuard::map(self.inner_mut(), |inner| &mut *inner.elements).into() - } - - fn release(&self) { - self.exports.fetch_sub(1); - } - - fn retain(&self) { - self.exports.fetch_add(1); - } -} - impl<'a> BufferResizeGuard<'a> for PyByteArray { type Resizable = PyRwLockWriteGuard<'a, PyBytesInner>; diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 3f4a45762b..72ce97d7a2 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -10,7 +10,7 @@ use crate::{ function::{ ArgBytesLike, ArgIterable, IntoPyObject, IntoPyResult, OptionalArg, OptionalOption, }, - protocol::{BufferInternal, BufferOptions, PyBuffer, PyIterReturn, PyMappingMethods}, + protocol::{BufferMethods, BufferOptions, PyBuffer, PyIterReturn, PyMappingMethods}, types::{ AsBuffer, AsMapping, Callable, Comparable, Constructor, Hashable, IterNext, IterNextIterable, Iterable, PyComparisonOp, Unconstructible, @@ -541,33 +541,30 @@ impl PyBytes { } } +static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |zelf| zelf.payload::().unwrap().as_bytes().into(), + obj_bytes_mut: |_| panic!(), + contiguous: None, + contiguous_mut: None, + collect_bytes: None, + release: None, + retain: None, +}; + impl AsBuffer for PyBytes { fn as_buffer(zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyResult { let buf = PyBuffer::new( - zelf.as_object().to_owned(), - zelf.to_owned(), + zelf.as_object().clone(), BufferOptions { len: zelf.len(), ..Default::default() }, + &BUFFER_METHODS, ); Ok(buf) } } -impl BufferInternal for PyRef { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.as_bytes().into() - } - - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - unreachable!("bytes is not mutable") - } - - fn release(&self) {} - fn retain(&self) {} -} - impl AsMapping for PyBytes { fn as_mapping(_zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { PyMappingMethods { diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index f72a5ee88e..a7e8018f93 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -3,12 +3,11 @@ use crate::common::{ borrow::{BorrowedValue, BorrowedValueMut}, hash::PyHash, lock::OnceCell, - rc::PyRc, }; use crate::{ bytesinner::bytes_to_hex, function::{FuncArgs, IntoPyObject, OptionalArg}, - protocol::{BufferInternal, BufferOptions, PyBuffer, PyMappingMethods}, + protocol::{BufferMethods, BufferOptions, PyBuffer, PyMappingMethods}, sliceable::{wrap_index, SaturatedSlice, SequenceIndex}, stdlib::pystruct::FormatSpec, types::{AsBuffer, AsMapping, Comparable, Constructor, Hashable, PyComparisonOp}, @@ -20,7 +19,6 @@ use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; use num_traits::ToPrimitive; use std::fmt::Debug; -use std::ops::Deref; #[derive(FromArgs)] pub struct PyMemoryViewNewArgs { @@ -31,6 +29,8 @@ pub struct PyMemoryViewNewArgs { #[derive(Debug)] pub struct PyMemoryView { buffer: PyBuffer, + // the released memoryview does not mean the buffer is destoryed + // because the possible another memeoryview is viewing from it released: AtomicCell, // start should always less or equal to the stop // start and stop pointing to the memory index not slice index @@ -38,9 +38,14 @@ pub struct PyMemoryView { start: usize, stop: usize, // step can be negative, means read the memory from stop-1 to start + // the memoryview is not contiguous if the buffer is not contiguous + // or the step is not 1 or -1 step: isize, format_spec: FormatSpec, hash: OnceCell, + // exports + // memoryview has no exports count by itself + // instead it relay on the buffer it viewing to maintain the count } impl Constructor for PyMemoryView { @@ -58,14 +63,13 @@ impl PyMemoryView { #[cfg(debug_assertions)] fn validate(self) -> Self { let options = &self.buffer.options; - let bytes_len = options.len * options.itemsize; - let buffer_len = self.buffer.internal.obj_bytes().len(); - let t1 = self.stop - self.start == bytes_len; - let t2 = buffer_len >= self.stop; - let t3 = buffer_len >= self.start + bytes_len; - assert!(t1); - assert!(t2); - assert!(t3); + let bytes_len = options.len * options.itemsize * self.step.abs() as usize; + let buffer_len = self.buffer._obj_bytes().len(); + assert!(self.start <= self.stop); + assert!(self.step != 0); + assert!(self.start + bytes_len <= buffer_len); + assert!(self.stop <= buffer_len); + assert!(self.stop - self.start == bytes_len); self } #[cfg(not(debug_assertions))] @@ -116,45 +120,20 @@ impl PyMemoryView { .validate()) } - fn as_contiguous(&self) -> Option> { - if !self.buffer.options.contiguous { - return None; - } - Some(self.obj_bytes()) - } - - fn as_contiguous_mut(&self) -> Option> { - if !self.buffer.options.contiguous { - return None; - } - Some(self.obj_bytes_mut()) - } - pub fn try_bytes(&self, vm: &VirtualMachine, f: F) -> PyResult where F: FnOnce(&[u8]) -> R, { self.try_not_released(vm)?; - self.as_contiguous().map(|x| f(&*x)).ok_or_else(|| { + self.buffer.as_contiguous().map(|x| f(&*x)).ok_or_else(|| { vm.new_type_error("non-contiguous memoryview is not a bytes-like object".to_owned()) }) } #[pymethod] fn release(&self) { - self._release(); - } - - fn _release(&self) { - // avoid double release if self.released.compare_exchange(false, true).is_ok() { - unsafe { - // SAFETY: this branch is only once accessible form _release and guarded by AtomicCell released - let buffer: &std::cell::UnsafeCell = std::mem::transmute(&self.buffer); - let buffer = &mut *buffer.get(); - let internal = std::mem::replace(&mut buffer.internal, PyRc::new(Released)); - internal.release(); - } + self.buffer._release(); } } @@ -220,27 +199,30 @@ impl PyMemoryView { #[pymethod(magic)] fn exit(&self, _args: FuncArgs) { - self._release(); + self.release(); } // translate the slice index to memory index fn get_pos(&self, i: isize) -> Option { let len = self.buffer.options.len; - let itemsize = self.buffer.options.itemsize; let i = wrap_index(i, len)?; - let i = if self.step < 0 { - (len - 1 + (self.step as usize) * i) * itemsize + Some(self.get_pos_no_wrap(i)) + } + + fn get_pos_no_wrap(&self, i: usize) -> usize { + let itemsize = self.buffer.options.itemsize; + if self.step < 0 { + self.stop - itemsize - (-self.step as usize) * i * itemsize } else { - self.step as usize * i * itemsize - }; - Some(i) + self.start + self.step as usize * i * itemsize + } } fn getitem_by_idx(zelf: PyRef, i: isize, vm: &VirtualMachine) -> PyResult { let i = zelf .get_pos(i) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; - let bytes = &zelf.obj_bytes()[i..i + zelf.buffer.options.itemsize]; + let bytes = &zelf.buffer._obj_bytes()[i..i + zelf.buffer.options.itemsize]; zelf.format_spec.unpack(bytes, vm).map(|x| { if x.len() == 1 { x.fast_getitem(0) @@ -362,7 +344,7 @@ impl PyMemoryView { .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; let itemsize = zelf.buffer.options.itemsize; let data = zelf.format_spec.pack(vec![value], vm)?; - zelf.obj_bytes_mut()[i..i + itemsize].copy_from_slice(&data); + zelf.buffer._obj_bytes_mut()[i..i + itemsize].copy_from_slice(&data); Ok(()) } @@ -397,65 +379,66 @@ impl PyMemoryView { let (range, step, is_negative_step) = SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(zelf.buffer.options.len); - let bytes = items.to_contiguous(); - assert_eq!(bytes.len(), len * itemsize); + items.contiguous_or_collect(|bytes| { + assert_eq!(bytes.len(), len * itemsize); - if !is_negative_step && step == Some(1) { - if range.end - range.start != len { - return diff_err(); - } + if !is_negative_step && step == Some(1) { + if range.end - range.start != len { + return diff_err(); + } - if let Some(mut buffer) = zelf.as_contiguous_mut() { - buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); - return Ok(()); + if let Some(mut buffer) = zelf.buffer.as_contiguous_mut() { + buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); + return Ok(()); + } } - } - if let Some(step) = step { - let slicelen = if range.end > range.start { - (range.end - range.start - 1) / step + 1 - } else { - 0 - }; + if let Some(step) = step { + let slicelen = if range.end > range.start { + (range.end - range.start - 1) / step + 1 + } else { + 0 + }; - if slicelen != len { - return diff_err(); - } + if slicelen != len { + return diff_err(); + } - let indexes = if is_negative_step { - itertools::Either::Left(range.rev().step_by(step)) - } else { - itertools::Either::Right(range.step_by(step)) - }; + let indexes = if is_negative_step { + itertools::Either::Left(range.rev().step_by(step)) + } else { + itertools::Either::Right(range.step_by(step)) + }; - let item_index = (0..len).step_by(itemsize); + let item_index = (0..len).step_by(itemsize); - let mut buffer = zelf.obj_bytes_mut(); + let mut buffer = zelf.obj_bytes_mut(); - indexes - .map(|i| zelf.get_pos(i as isize).unwrap()) - .zip(item_index) - .for_each(|(i, item_i)| { - buffer[i..i + itemsize].copy_from_slice(&bytes[item_i..item_i + itemsize]); - }); - Ok(()) - } else { - let slicelen = if range.start < range.end { 1 } else { 0 }; - if match len { - 0 => slicelen == 0, - 1 => { - let mut buffer = zelf.obj_bytes_mut(); - let i = zelf.get_pos(range.start as isize).unwrap(); - buffer[i..i + itemsize].copy_from_slice(&bytes); - true - } - _ => false, - } { + indexes + .map(|i| zelf.get_pos(i as isize).unwrap()) + .zip(item_index) + .for_each(|(i, item_i)| { + buffer[i..i + itemsize].copy_from_slice(&bytes[item_i..item_i + itemsize]); + }); Ok(()) } else { - diff_err() + let slicelen = if range.start < range.end { 1 } else { 0 }; + if match len { + 0 => slicelen == 0, + 1 => { + let mut buffer = zelf.obj_bytes_mut(); + let i = zelf.get_pos(range.start as isize).unwrap(); + buffer[i..i + itemsize].copy_from_slice(&bytes); + true + } + _ => false, + } { + Ok(()) + } else { + diff_err() + } } - } + }) } #[pymethod(magic)] @@ -480,39 +463,24 @@ impl PyMemoryView { self.try_not_released(vm).map(|_| self.buffer.options.len) } - fn to_bytes_vec(zelf: &crate::PyObjectView) -> Vec { - if let Some(bytes) = zelf.as_contiguous() { - bytes.to_vec() - } else { - let bytes = &*zelf.obj_bytes(); - let len = zelf.buffer.options.len; - let itemsize = zelf.buffer.options.itemsize; - (0..len) - .map(|i| zelf.get_pos(i as isize).unwrap()) - .flat_map(|i| bytes[i..i + itemsize].to_vec()) - .collect() - } - } - #[pymethod] - fn tobytes(zelf: PyRef, vm: &VirtualMachine) -> PyResult { - zelf.try_not_released(vm)?; - Ok(PyBytes::from(Self::to_bytes_vec(&zelf)).into_ref(vm)) + fn tobytes(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm)?; + let mut v = vec![]; + self.buffer.collect_bytes(&mut v); + Ok(PyBytes::from(v).into_ref(vm)) } #[pymethod] - fn tolist(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { - zelf.try_not_released(vm)?; - - let bytes = &*zelf.obj_bytes(); - let elements: Vec = (0..zelf.buffer.options.len) - .map(|i| zelf.get_pos(i as isize).unwrap()) + fn tolist(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm)?; + let len = self.buffer.options.len; + let itemsize = self.buffer.options.itemsize; + let bytes = &*self.buffer._obj_bytes(); + let elements: Vec = (0..len) .map(|i| { - format_unpack( - &zelf.format_spec, - &bytes[i..i + zelf.buffer.options.itemsize], - vm, - ) + let i = self.get_pos_no_wrap(i); + format_unpack(&self.format_spec, &bytes[i..i + itemsize], vm) }) .try_collect()?; @@ -548,33 +516,21 @@ impl PyMemoryView { #[pymethod] fn hex( - zelf: PyRef, + &self, sep: OptionalArg>, bytes_per_sep: OptionalArg, vm: &VirtualMachine, ) -> PyResult { - zelf.try_not_released(vm)?; - let guard; - let vec; - let bytes = match zelf.as_contiguous() { - Some(bytes) => { - guard = bytes; - &*guard - } - None => { - vec = Self::to_bytes_vec(&zelf); - vec.as_slice() - } - }; - - bytes_to_hex(bytes, sep, bytes_per_sep, vm) + self.try_not_released(vm)?; + self.buffer + .contiguous_or_collect(|x| bytes_to_hex(x, sep, bytes_per_sep, vm)) } // TODO: support cast shape #[pymethod] - fn cast(zelf: PyRef, format: PyStrRef, vm: &VirtualMachine) -> PyResult> { - zelf.try_not_released(vm)?; - if !zelf.buffer.options.contiguous { + fn cast(&self, format: PyStrRef, vm: &VirtualMachine) -> PyResult> { + self.try_not_released(vm)?; + if !self.buffer.options.contiguous { return Err(vm.new_type_error( "memoryview: casts are restricted to C-contiguous views".to_owned(), )); @@ -582,7 +538,7 @@ impl PyMemoryView { let format_spec = Self::parse_format(format.as_str(), vm)?; let itemsize = format_spec.size(); - let bytelen = zelf.buffer.options.len * zelf.buffer.options.itemsize; + let bytelen = self.buffer.options.len * self.buffer.options.itemsize; if bytelen % itemsize != 0 { return Err( @@ -590,11 +546,11 @@ impl PyMemoryView { ); } - let buffer = zelf.buffer.clone_with_options(BufferOptions { + let buffer = self.buffer.clone_with_options(BufferOptions { itemsize, len: bytelen / itemsize, format: format.to_string().into(), - ..zelf.buffer.options.clone() + ..self.buffer.options.clone() }); Ok(PyMemoryView { buffer, @@ -634,39 +590,18 @@ impl PyMemoryView { return Ok(false); } - let a_guard; - let a_vec; - let a = match zelf.as_contiguous() { - Some(bytes) => { - a_guard = bytes; - &*a_guard - } - None => { - a_vec = Self::to_bytes_vec(zelf); - a_vec.as_slice() - } - }; - let b_guard; - let b_vec; - let b = match other.as_contiguous() { - Some(bytes) => { - b_guard = bytes; - &*b_guard - } - None => { - b_vec = other.to_contiguous(); - b_vec.as_slice() - } - }; - - if a_options.format == b_options.format { - Ok(a == b) - } else { - let a_list = unpack_bytes_seq_to_list(a, &a_options.format, vm)?; - let b_list = unpack_bytes_seq_to_list(b, &b_options.format, vm)?; + zelf.buffer.contiguous_or_collect(|a| { + other.contiguous_or_collect(|b| { + if a_options.format == b_options.format { + Ok(a == b) + } else { + let a_list = unpack_bytes_seq_to_list(a, &a_options.format, vm)?; + let b_list = unpack_bytes_seq_to_list(b, &b_options.format, vm)?; - Ok(vm.bool_eq(a_list.as_object(), b_list.as_object())?) - } + Ok(vm.bool_eq(a_list.as_object(), b_list.as_object())?) + } + }) + }) } #[pymethod(magic)] @@ -678,67 +613,58 @@ impl PyMemoryView { fn reduce(_zelf: PyRef, vm: &VirtualMachine) -> PyResult { Err(vm.new_type_error("cannot pickle 'memoryview' object".to_owned())) } + + fn obj_bytes(&self) -> BorrowedValue<[u8]> { + self.buffer._obj_bytes() + } + fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + self.buffer._obj_bytes_mut() + } + fn contiguous(&self) -> BorrowedValue<[u8]> { + BorrowedValue::map(self.buffer._contiguous(), |x| &x[self.start..self.stop]) + } + fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { + BorrowedValueMut::map(self.buffer._contiguous_mut(), |x| { + &mut x[self.start..self.stop] + }) + } + fn collect_bytes(&self, buf: &mut Vec) { + let bytes = &*self.buffer._obj_bytes(); + let len = self.buffer.options.len; + let itemsize = self.buffer.options.itemsize; + buf.reserve(len * itemsize); + for i in 0..len { + let i = self.get_pos_no_wrap(i); + buf.extend_from_slice(&bytes[i..i + itemsize]); + } + } } +static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |zelf| zelf.payload::().unwrap().obj_bytes(), + obj_bytes_mut: |zelf| zelf.payload::().unwrap().obj_bytes_mut(), + contiguous: Some(|zelf| zelf.payload::().unwrap().contiguous()), + contiguous_mut: Some(|zelf| zelf.payload::().unwrap().contiguous_mut()), + collect_bytes: Some(|zelf, buf| zelf.payload::().unwrap().collect_bytes(buf)), + release: Some(|zelf| zelf.payload::().unwrap().buffer._release()), + retain: Some(|zelf| zelf.payload::().unwrap().buffer._retain()), +}; + impl AsBuffer for PyMemoryView { fn as_buffer(zelf: &crate::PyObjectView, vm: &VirtualMachine) -> PyResult { if zelf.released.load() { Err(vm.new_value_error("operation forbidden on released memoryview object".to_owned())) } else { Ok(PyBuffer::new( - zelf.as_object().to_owned(), - zelf.to_owned(), + zelf.as_object().clone(), zelf.buffer.options.clone(), + &BUFFER_METHODS, )) } } } -#[derive(Debug)] struct Released; -impl BufferInternal for Released { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - panic!(); - } - - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - panic!(); - } - - fn release(&self) {} - - fn retain(&self) {} -} - -impl BufferInternal for PyMemoryView { - // NOTE: This impl maybe is anti-pattern. Only used for internal usage. - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - BorrowedValue::map(self.buffer.internal.obj_bytes(), |x| { - &x[self.start..self.stop] - }) - } - - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - BorrowedValueMut::map(self.buffer.internal.obj_bytes_mut(), |x| { - &mut x[self.start..self.stop] - }) - } - - fn release(&self) {} - - fn retain(&self) {} -} - -impl BufferInternal for PyRef { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.deref().obj_bytes() - } - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - self.deref().obj_bytes_mut() - } - fn release(&self) {} - fn retain(&self) {} -} impl AsMapping for PyMemoryView { fn as_mapping(_zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { @@ -807,19 +733,9 @@ impl Hashable for PyMemoryView { vm.new_value_error("cannot hash writable memoryview object".to_owned()) ); } - let guard; - let vec; - let bytes = match zelf.as_contiguous() { - Some(bytes) => { - guard = bytes; - &*guard - } - None => { - vec = Self::to_bytes_vec(zelf); - vec.as_slice() - } - }; - Ok(vm.state.hash_secret.hash_bytes(bytes)) + Ok(zelf + .buffer + .contiguous_or_collect(|bytes| vm.state.hash_secret.hash_bytes(bytes))) }) .map(|&x| x) } diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index c166b1fd81..01f67bcc69 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -368,19 +368,7 @@ impl CFormatSpec { } CFormatPreconversor::Str | CFormatPreconversor::Bytes => { if let Ok(buffer) = PyBuffer::try_from_borrowed_object(vm, &obj) { - let guard; - let vec; - let bytes = match buffer.as_contiguous() { - Some(bytes) => { - guard = bytes; - &*guard - } - None => { - vec = buffer.to_contiguous(); - vec.as_slice() - } - }; - Ok(self.format_bytes(bytes)) + Ok(buffer.contiguous_or_collect(|bytes| self.format_bytes(bytes))) } else { let bytes = vm .get_special_method(obj, "__bytes__")? diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index 1f970ff096..b73a8fa5e0 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -6,66 +6,132 @@ use crate::PyThreadingConstraint; use crate::{PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TypeProtocol, VirtualMachine}; use std::{borrow::Cow, fmt::Debug}; -pub trait BufferInternal: Debug + PyThreadingConstraint { - /// Get the full inner buffer of this memory. You probably want `as_contiguous()`, as - /// `obj_bytes` doesn't take into account the range a memoryview might operate on, among other - /// footguns. - fn obj_bytes(&self) -> BorrowedValue<[u8]>; - /// Get the full inner buffer of this memory, mutably. You probably want - /// `as_contiguous_mut()`, as `obj_bytes` doesn't take into account the range a memoryview - /// might operate on, among other footguns. - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]>; - fn release(&self); - // not included in PyBuffer protocol itself - fn retain(&self); +pub struct BufferMethods { + // always reflecting the whole bytes of the most top object + obj_bytes: fn(&PyObjectRef) -> BorrowedValue<[u8]>, + // always reflecting the whole bytes of the most top object + obj_bytes_mut: fn(&PyObjectRef) -> BorrowedValueMut<[u8]>, + // GUARANTEE: called only if the buffer option is contiguous + contiguous: Option BorrowedValue<[u8]>>, + // GUARANTEE: called only if the buffer option is contiguous + contiguous_mut: Option BorrowedValueMut<[u8]>>, + // collect bytes to buf when buffer option is not contiguous + collect_bytes: Option)>, + release: Option, + retain: Option, +} + +impl Debug for BufferMethods { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BufferMethods") + .field("obj_bytes", &(self.obj_bytes as usize)) + .field("obj_bytes_mut", &(self.obj_bytes_mut as usize)) + .field("contiguous", &self.contiguous.map(|x| x as usize)) + .field("contiguous_mut", &self.contiguous_mut.map(|x| x as usize)) + .field("collect_bytes", &self.collect_bytes.map(|x| x as usize)) + .field("release", &self.release.map(|x| x as usize)) + .field("retain", &self.retain.map(|x| x as usize)) + .finish() + } } #[derive(Debug)] pub struct PyBuffer { pub obj: PyObjectRef, pub options: BufferOptions, - pub(crate) internal: PyRc, + pub(crate) methods: &'static BufferMethods, } impl PyBuffer { - pub fn new( - obj: PyObjectRef, - buffer: impl BufferInternal + 'static, - options: BufferOptions, - ) -> Self { - buffer.retain(); - Self { + pub fn new(obj: PyObjectRef, options: BufferOptions, methods: &'static BufferMethods) -> Self { + let zelf = Self { obj, options, - internal: PyRc::new(buffer), - } + methods, + }; + zelf._retain(); + zelf } pub fn as_contiguous(&self) -> Option> { - if !self.options.contiguous { - return None; - } - Some(self.internal.obj_bytes()) + self.options.contiguous.then(|| self._contiguous()) } pub fn as_contiguous_mut(&self) -> Option> { - if !self.options.contiguous { - return None; + self.options.contiguous.then(|| self._contiguous_mut()) + } + + pub fn collect_bytes(&self, buf: &mut Vec) { + if self.options.contiguous { + buf.extend_from_slice(&self._contiguous()); + } else { + self._collect_bytes(buf); } - Some(self.internal.obj_bytes_mut()) } - pub fn to_contiguous(&self) -> Vec { - self.internal.obj_bytes().to_vec() + pub fn contiguous_or_collect R>(&self, f: F) -> R { + let borrowed; + let collected; + let v = if self.options.contiguous { + borrowed = self._contiguous(); + &*borrowed + } else { + collected = vec![]; + self._collect_bytes(&mut collected); + &collected + }; + f(v) } pub fn clone_with_options(&self, options: BufferOptions) -> Self { - self.internal.retain(); - Self { - obj: self.obj.clone(), - options, - internal: self.internal.clone(), + Self::new(self.obj.clone(), options, self.methods) + } + + pub fn move_with_options(self, options: BufferOptions) -> Self { + Self { options, ..self } + } + + // SAFETY: should only called if option has contiguous + pub(crate) fn _contiguous(&self) -> BorrowedValue<[u8]> { + self.methods + .contiguous + .map(|f| f(&self.obj)) + .unwrap_or_else(|| (self.methods.obj_bytes)(&self.obj)) + } + + // SAFETY: should only called if option has contiguous + pub(crate) fn _contiguous_mut(&self) -> BorrowedValueMut<[u8]> { + self.methods + .contiguous_mut + .map(|f| f(&self.obj)) + .unwrap_or_else(|| (self.methods.obj_bytes_mut)(&self.obj)) + } + + pub(crate) fn _obj_bytes(&self) -> BorrowedValue<[u8]> { + (self.methods.obj_bytes)(&self.obj) + } + + pub(crate) fn _obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + (self.methods.obj_bytes_mut)(&self.obj) + } + + pub(crate) fn _release(&self) { + if let Some(f) = self.methods.release { + f(&self.obj) + } + } + + pub(crate) fn _retain(&self) { + if let Some(f) = self.methods.retain { + f(&self.obj) } } + + pub(crate) fn _collect_bytes(&self, buf: &mut Vec) { + self.methods + .collect_bytes + .map(|f| f(&self.obj, buf)) + .unwrap_or_else(|| buf.extend_from_slice(&(self.methods.obj_bytes)(&self.obj))) + } } #[derive(Debug, Clone)] @@ -123,7 +189,7 @@ impl TryFromBorrowedObject for PyBuffer { // but it is not supported by Rust impl Drop for PyBuffer { fn drop(&mut self) { - self.internal.release(); + self._release(); } } diff --git a/vm/src/protocol/mod.rs b/vm/src/protocol/mod.rs index 38c6a547e5..16e25089be 100644 --- a/vm/src/protocol/mod.rs +++ b/vm/src/protocol/mod.rs @@ -3,6 +3,6 @@ mod iter; mod mapping; mod object; -pub use buffer::{BufferInternal, BufferOptions, BufferResizeGuard, PyBuffer}; +pub use buffer::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer}; pub use iter::{PyIter, PyIterIter, PyIterReturn}; pub use mapping::{PyMapping, PyMappingMethods}; diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index a191177078..35fd53cc55 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -85,7 +85,7 @@ mod _io { ArgBytesLike, ArgIterable, ArgMemoryBuffer, FuncArgs, IntoPyObject, OptionalArg, OptionalOption, }, - protocol::{BufferInternal, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn}, + protocol::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn}, types::{Constructor, Destructor, IterNext, Iterable}, utils::Either, vm::{ReprGuard, VirtualMachine}, @@ -1322,25 +1322,38 @@ mod _io { // this is a bit fancier than what CPython does, but in CPython if you store // the memoryobj for the buffer until after the BufferedIO is destroyed, you // can get a use-after-free, so this is a bit safe + #[pyclass(noattr, module = false, name = "_buffered_raw_buffer")] #[derive(Debug)] struct BufferedRawBuffer { data: PyMutex>, range: Range, } - impl BufferInternal for BufferedRawBuffer { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - BorrowedValue::map(self.data.lock().into(), |data| &data[self.range.clone()]) - } + #[pyimpl(flags(BASETYPE))] + impl BufferedRawBuffer {} - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - BorrowedValueMut::map(self.data.lock().into(), |data| { - &mut data[self.range.clone()] - }) - } + static BUFFERED_RAW_BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |zelf| { + zelf.downcast_ref::(zelf) + .unwrap() + .data + .lock() + .into() + }, + }; + // impl BufferInternal for BufferedRawBuffer { + // fn obj_bytes(&self) -> BorrowedValue<[u8]> { + // BorrowedValue::map(self.data.lock().into(), |data| &data[self.range.clone()]) + // } - fn release(&self) {} - fn retain(&self) {} - } + // fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + // BorrowedValueMut::map(self.data.lock().into(), |data| { + // &mut data[self.range.clone()] + // }) + // } + + // fn release(&self) {} + // fn retain(&self) {} + // } pub fn get_offset(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { let int = vm.to_index(&obj)?; @@ -3345,24 +3358,24 @@ mod _io { } } - impl BufferInternal for PyRef { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - PyRwLockReadGuard::map(self.buffer.read(), |x| x.cursor.get_ref().as_slice()).into() - } + // impl BufferInternal for PyRef { + // fn obj_bytes(&self) -> BorrowedValue<[u8]> { + // PyRwLockReadGuard::map(self.buffer.read(), |x| x.cursor.get_ref().as_slice()).into() + // } - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - PyRwLockWriteGuard::map(self.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) - .into() - } + // fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + // PyRwLockWriteGuard::map(self.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) + // .into() + // } - fn release(&self) { - self.exports.fetch_sub(1); - } + // fn release(&self) { + // self.exports.fetch_sub(1); + // } - fn retain(&self) { - self.exports.fetch_add(1); - } - } + // fn retain(&self) { + // self.exports.fetch_add(1); + // } + // } impl<'a> BufferResizeGuard<'a> for BytesIO { type Resizable = PyRwLockWriteGuard<'a, BufferedIO>; diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 63bddbcf01..8db62e8094 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -191,7 +191,11 @@ impl TryFromObject for PyPathLike { fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { // path_converter in CPython let obj = match PyBuffer::try_from_borrowed_object(vm, &obj) { - Ok(buffer) => PyBytes::from(buffer.internal.obj_bytes().to_vec()).into_pyobject(vm), + Ok(buffer) => { + let mut bytes = vec![]; + buffer.collect_bytes(&mut bytes); + PyBytes::from(bytes).into_pyobject(vm) + } Err(_) => obj, }; let fs_path = FsPath::try_from(obj, true, vm)?; From 75d5744f4f55464b7ac09370d7648cbfbae99752 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Tue, 19 Oct 2021 16:24:24 +0200 Subject: [PATCH 02/20] Fix io and array the right way to use buffer protocol --- stdlib/src/array.rs | 37 +++++------ vm/src/builtins/bytearray.rs | 2 - vm/src/builtins/bytes.rs | 5 +- vm/src/builtins/memory.rs | 4 +- vm/src/protocol/buffer.rs | 20 +++--- vm/src/stdlib/io.rs | 116 ++++++++++++++++++++--------------- vm/src/stdlib/sre.rs | 28 ++------- 7 files changed, 98 insertions(+), 114 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index ee248c1bb3..c2193895e1 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -3,7 +3,6 @@ pub(crate) use array::make_module; #[pymodule(name = "array")] mod array { use crate::common::{ - borrow::{BorrowedValue, BorrowedValueMut}, lock::{ PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, @@ -20,7 +19,7 @@ mod array { ArgBytesLike, ArgIntoFloat, ArgIterable, IntoPyObject, IntoPyResult, OptionalArg, }, protocol::{ - BufferInternal, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, + BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, PyMappingMethods, }, sliceable::{PySliceableSequence, PySliceableSequenceMut, SaturatedSlice, SequenceIndex}, @@ -1220,8 +1219,7 @@ mod array { fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let array = zelf.read(); let buf = PyBuffer::new( - zelf.as_object().to_owned(), - PyArrayBufferInternal(zelf.to_owned()), + zelf.as_object().clone(), BufferOptions { readonly: false, len: array.len(), @@ -1229,28 +1227,25 @@ mod array { format: array.typecode_str().into(), ..Default::default() }, + &BUFFER_METHODS, ); Ok(buf) } } - impl BufferInternal for PyArray { - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.get_bytes().into() - } - - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - self.get_bytes_mut().into() - } - - fn release(&self) { - self.exports.fetch_sub(1); - } - - fn retain(&self) { - self.exports.fetch_add(1); - } - } + static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |zelf| zelf.payload::().unwrap().get_bytes().into(), + obj_bytes_mut: |zelf| zelf.payload::().unwrap().get_bytes_mut().into(), + contiguous: None, + contiguous_mut: None, + collect_bytes: None, + release: Some(|zelf| { + zelf.payload::().unwrap().exports.fetch_sub(1); + }), + retain: Some(|zelf| { + zelf.payload::().unwrap().exports.fetch_add(1); + }), + }; impl AsMapping for PyArray { fn as_mapping(_zelf: &PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 007ecf4f7f..29ced6e1a3 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -4,7 +4,6 @@ use super::{ PyTypeRef, }; use crate::common::{ - borrow::{BorrowedValue, BorrowedValueMut}, lock::{ PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, @@ -34,7 +33,6 @@ use crate::{ }; use bstr::ByteSlice; use crossbeam_utils::atomic::AtomicCell; -use rustpython_common::atomic::Radium; use std::mem::size_of; /// "bytearray(iterable_of_ints) -> bytearray\n\ diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 72ce97d7a2..84ee69d91c 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -20,10 +20,7 @@ use crate::{ PyValue, TryFromBorrowedObject, TypeProtocol, VirtualMachine, }; use bstr::ByteSlice; -use rustpython_common::{ - borrow::{BorrowedValue, BorrowedValueMut}, - lock::PyMutex, -}; +use rustpython_common::lock::PyMutex; use std::mem::size_of; use std::ops::Deref; diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index a7e8018f93..4cd5b6cbbc 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -131,7 +131,7 @@ impl PyMemoryView { } #[pymethod] - fn release(&self) { + pub fn release(&self) { if self.released.compare_exchange(false, true).is_ok() { self.buffer._release(); } @@ -664,8 +664,6 @@ impl AsBuffer for PyMemoryView { } } -struct Released; - impl AsMapping for PyMemoryView { fn as_mapping(_zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { PyMappingMethods { diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index b73a8fa5e0..694e9bf4a2 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -8,17 +8,17 @@ use std::{borrow::Cow, fmt::Debug}; pub struct BufferMethods { // always reflecting the whole bytes of the most top object - obj_bytes: fn(&PyObjectRef) -> BorrowedValue<[u8]>, + pub obj_bytes: fn(&PyObjectRef) -> BorrowedValue<[u8]>, // always reflecting the whole bytes of the most top object - obj_bytes_mut: fn(&PyObjectRef) -> BorrowedValueMut<[u8]>, + pub obj_bytes_mut: fn(&PyObjectRef) -> BorrowedValueMut<[u8]>, // GUARANTEE: called only if the buffer option is contiguous - contiguous: Option BorrowedValue<[u8]>>, + pub contiguous: Option BorrowedValue<[u8]>>, // GUARANTEE: called only if the buffer option is contiguous - contiguous_mut: Option BorrowedValueMut<[u8]>>, + pub contiguous_mut: Option BorrowedValueMut<[u8]>>, // collect bytes to buf when buffer option is not contiguous - collect_bytes: Option)>, - release: Option, - retain: Option, + pub collect_bytes: Option)>, + pub release: Option, + pub retain: Option, } impl Debug for BufferMethods { @@ -70,7 +70,7 @@ impl PyBuffer { pub fn contiguous_or_collect R>(&self, f: F) -> R { let borrowed; - let collected; + let mut collected; let v = if self.options.contiguous { borrowed = self._contiguous(); &*borrowed @@ -86,10 +86,6 @@ impl PyBuffer { Self::new(self.obj.clone(), options, self.methods) } - pub fn move_with_options(self, options: BufferOptions) -> Self { - Self { options, ..self } - } - // SAFETY: should only called if option has contiguous pub(crate) fn _contiguous(&self) -> BorrowedValue<[u8]> { self.methods diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 35fd53cc55..c5f740017d 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -74,7 +74,6 @@ mod _io { PyMappedThreadMutexGuard, PyMutex, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, PyThreadMutex, PyThreadMutexGuard, }, - rc::PyRc, }; use crate::{ builtins::{ @@ -86,10 +85,10 @@ mod _io { OptionalOption, }, protocol::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn}, - types::{Constructor, Destructor, IterNext, Iterable}, + types::{AsBuffer, Constructor, Destructor, IterNext, Iterable}, utils::Either, vm::{ReprGuard, VirtualMachine}, - IdProtocol, PyContext, PyObject, PyObjectRef, PyRef, PyResult, PyValue, StaticType, + IdProtocol, PyContext, PyObjectRef, PyObjectWrap, PyRef, PyResult, PyValue, StaticType, TryFromBorrowedObject, TryFromObject, TypeProtocol, }; use bstr::ByteSlice; @@ -882,16 +881,17 @@ mod _io { }; // TODO: see if we can encapsulate this pattern in a function in memory.rs like // fn slice_as_memory(s: &[u8], f: impl FnOnce(PyMemoryViewRef) -> R) -> R - let writebuf = PyRc::new(BufferedRawBuffer { + let writebuf = BufferedRawBuffer { data: std::mem::take(&mut self.buffer).into(), range: buf_range, - }); + } + .into_ref(vm); let raw = self.raw.as_ref().unwrap(); let memobj = PyMemoryView::from_buffer( PyBuffer { - obj: raw.clone(), + obj: writebuf.clone().into_object(), options, - internal: writebuf.clone(), + methods: &BUFFERED_RAW_BUFFER_METHODS, }, vm, )? @@ -1121,15 +1121,16 @@ mod _io { }; // TODO: see if we can encapsulate this pattern in a function in memory.rs like // fn slice_as_memory(s: &[u8], f: impl FnOnce(PyMemoryViewRef) -> R) -> R - let readbuf = PyRc::new(BufferedRawBuffer { + let readbuf = BufferedRawBuffer { data: std::mem::take(v).into(), range: buf_range, - }); + } + .into_ref(vm); let memobj = PyMemoryView::from_buffer( PyBuffer { - obj: vm.ctx.none(), + obj: readbuf.clone().into_object(), options, - internal: readbuf.clone(), + methods: &BUFFERED_RAW_BUFFER_METHODS, }, vm, )? @@ -1323,37 +1324,50 @@ mod _io { // the memoryobj for the buffer until after the BufferedIO is destroyed, you // can get a use-after-free, so this is a bit safe #[pyclass(noattr, module = false, name = "_buffered_raw_buffer")] - #[derive(Debug)] + #[derive(Debug, PyValue)] struct BufferedRawBuffer { data: PyMutex>, range: Range, } - #[pyimpl(flags(BASETYPE))] + #[pyimpl(with(AsBuffer))] impl BufferedRawBuffer {} static BUFFERED_RAW_BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |zelf| { - zelf.downcast_ref::(zelf) - .unwrap() - .data - .lock() - .into() + let zelf = zelf.downcast_ref::().unwrap(); + BorrowedValue::map(zelf.data.lock().into(), |x| x.as_slice()) }, + obj_bytes_mut: |zelf| { + let zelf = zelf.downcast_ref::().unwrap(); + BorrowedValueMut::map(zelf.data.lock().into(), |x| x.as_mut_slice()) + }, + contiguous: Some(|zelf| { + let zelf = zelf.downcast_ref::().unwrap(); + BorrowedValue::map(zelf.data.lock().into(), |x| &x[zelf.range.clone()]) + }), + contiguous_mut: Some(|zelf| { + let zelf = zelf.downcast_ref::().unwrap(); + BorrowedValueMut::map(zelf.data.lock().into(), |x| &mut x[zelf.range.clone()]) + }), + collect_bytes: None, + // TODO: export counting to make sure no exports before retrive data to buffer + release: None, + retain: None, }; - // impl BufferInternal for BufferedRawBuffer { - // fn obj_bytes(&self) -> BorrowedValue<[u8]> { - // BorrowedValue::map(self.data.lock().into(), |data| &data[self.range.clone()]) - // } - - // fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - // BorrowedValueMut::map(self.data.lock().into(), |data| { - // &mut data[self.range.clone()] - // }) - // } - // fn release(&self) {} - // fn retain(&self) {} - // } + impl AsBuffer for BufferedRawBuffer { + fn as_buffer(zelf: &PyRef, _vm: &VirtualMachine) -> PyResult { + let buf = PyBuffer::new( + zelf.as_object().clone(), + BufferOptions { + len: zelf.range.end - zelf.range.start, + ..Default::default() + }, + &BUFFERED_RAW_BUFFER_METHODS, + ); + Ok(buf) + } + } pub fn get_offset(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { let int = vm.to_index(&obj)?; @@ -3352,30 +3366,32 @@ mod _io { len: self.buffer.read().cursor.get_ref().len(), ..Default::default() }; - let buffer = PyBuffer::new(self.as_object().to_owned(), self, options); + let buffer = PyBuffer::new(self.as_object().clone(), options, &BYTES_IO_BUFFER_METHODS); let view = PyMemoryView::from_buffer(buffer, vm)?; Ok(view) } } - // impl BufferInternal for PyRef { - // fn obj_bytes(&self) -> BorrowedValue<[u8]> { - // PyRwLockReadGuard::map(self.buffer.read(), |x| x.cursor.get_ref().as_slice()).into() - // } - - // fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - // PyRwLockWriteGuard::map(self.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) - // .into() - // } - - // fn release(&self) { - // self.exports.fetch_sub(1); - // } - - // fn retain(&self) { - // self.exports.fetch_add(1); - // } - // } + static BYTES_IO_BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |zelf| { + let zelf = zelf.downcast_ref::().unwrap(); + PyRwLockReadGuard::map(zelf.buffer.read(), |x| x.cursor.get_ref().as_slice()).into() + }, + obj_bytes_mut: |zelf| { + let zelf = zelf.downcast_ref::().unwrap(); + PyRwLockWriteGuard::map(zelf.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) + .into() + }, + contiguous: None, + contiguous_mut: None, + collect_bytes: None, + release: Some(|zelf| { + zelf.downcast_ref::().unwrap().exports.fetch_sub(1); + }), + retain: Some(|zelf| { + zelf.downcast_ref::().unwrap().exports.fetch_add(1); + }), + }; impl<'a> BufferResizeGuard<'a> for BytesIO { type Resizable = PyRwLockWriteGuard<'a, BufferedIO>; diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 7c216b2bf5..c9c8ba286e 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -140,31 +140,15 @@ mod _sre { vm: &VirtualMachine, f: F, ) -> PyResult { - let buffer; - let guard; - let vec; - let s; - let str_drive = if self.isbytes { - buffer = PyBuffer::try_from_borrowed_object(vm, &string)?; - let bytes = match buffer.as_contiguous() { - Some(bytes) => { - guard = bytes; - &*guard - } - None => { - vec = buffer.to_contiguous(); - vec.as_slice() - } - }; - StrDrive::Bytes(bytes) + if self.isbytes { + let buffer = PyBuffer::try_from_borrowed_object(vm, &string)?; + buffer.contiguous_or_collect(|bytes| f(StrDrive::Bytes(bytes))) } else { - s = string + let s = string .payload::() .ok_or_else(|| vm.new_type_error("expected string".to_owned()))?; - StrDrive::Str(s.as_str()) - }; - - f(str_drive) + f(StrDrive::Str(s.as_str())) + } } fn with_state PyResult>( From 18c7add8146ab9e0b23b9b142b00c51339001f5c Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sat, 23 Oct 2021 16:35:48 +0200 Subject: [PATCH 03/20] fix rebase --- stdlib/src/array.rs | 6 +++--- vm/src/builtins/bytearray.rs | 6 +++--- vm/src/builtins/bytes.rs | 10 +++++----- vm/src/builtins/memory.rs | 13 +++++++------ vm/src/stdlib/io.rs | 14 +++++++++----- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index c2193895e1..417b11e925 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -27,8 +27,8 @@ mod array { AsBuffer, AsMapping, Comparable, Constructor, IterNext, IterNextIterable, Iterable, PyComparisonOp, }, - IdProtocol, PyComparisonValue, PyObject, PyObjectRef, PyObjectView, PyRef, PyResult, - PyValue, TryFromObject, TypeProtocol, VirtualMachine, + IdProtocol, PyComparisonValue, PyObject, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, + PyResult, PyValue, TryFromObject, TypeProtocol, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; @@ -1219,7 +1219,7 @@ mod array { fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let array = zelf.read(); let buf = PyBuffer::new( - zelf.as_object().clone(), + zelf.to_owned().into_object(), BufferOptions { readonly: false, len: array.len(), diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 29ced6e1a3..4e0ae8e629 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -29,7 +29,7 @@ use crate::{ }, utils::Either, IdProtocol, PyClassDef, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, - PyRef, PyResult, PyValue, TypeProtocol, VirtualMachine, + PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TypeProtocol, VirtualMachine, }; use bstr::ByteSlice; use crossbeam_utils::atomic::AtomicCell; @@ -709,7 +709,7 @@ impl Comparable for PyByteArray { } impl AsBuffer for PyByteArray { - fn as_buffer(zelf: &PyRef, _vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { static_cell! { static METHODS: BufferMethods; } @@ -730,7 +730,7 @@ impl AsBuffer for PyByteArray { collect_bytes: None, }); let buffer = PyBuffer::new( - zelf.as_object().clone(), + zelf.to_owned().into_object(), BufferOptions { readonly: false, len: zelf.len(), diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 84ee69d91c..10899c4652 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -16,8 +16,8 @@ use crate::{ IterNextIterable, Iterable, PyComparisonOp, Unconstructible, }, utils::Either, - IdProtocol, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, PyRef, PyResult, - PyValue, TryFromBorrowedObject, TypeProtocol, VirtualMachine, + IdProtocol, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, PyObjectView, + PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TypeProtocol, VirtualMachine, }; use bstr::ByteSlice; use rustpython_common::lock::PyMutex; @@ -549,9 +549,9 @@ static BUFFER_METHODS: BufferMethods = BufferMethods { }; impl AsBuffer for PyBytes { - fn as_buffer(zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { let buf = PyBuffer::new( - zelf.as_object().clone(), + zelf.to_owned().into_object(), BufferOptions { len: zelf.len(), ..Default::default() @@ -563,7 +563,7 @@ impl AsBuffer for PyBytes { } impl AsMapping for PyBytes { - fn as_mapping(_zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { + fn as_mapping(_zelf: &PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { PyMappingMethods { length: Some(Self::length), subscript: Some(Self::subscript), diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 4cd5b6cbbc..6793d266b2 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -13,7 +13,8 @@ use crate::{ types::{AsBuffer, AsMapping, Comparable, Constructor, Hashable, PyComparisonOp}, utils::Either, IdProtocol, PyClassDef, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, - PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, TypeProtocol, VirtualMachine, + PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, + TypeProtocol, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; @@ -484,7 +485,7 @@ impl PyMemoryView { }) .try_collect()?; - Ok(elements) + Ok(vm.ctx.new_list(elements)) } #[pymethod] @@ -557,7 +558,7 @@ impl PyMemoryView { released: AtomicCell::new(false), format_spec, hash: OnceCell::new(), - ..**zelf + ..*self } .validate() .into_ref(vm)) @@ -651,12 +652,12 @@ static BUFFER_METHODS: BufferMethods = BufferMethods { }; impl AsBuffer for PyMemoryView { - fn as_buffer(zelf: &crate::PyObjectView, vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { if zelf.released.load() { Err(vm.new_value_error("operation forbidden on released memoryview object".to_owned())) } else { Ok(PyBuffer::new( - zelf.as_object().clone(), + zelf.to_owned().into_object(), zelf.buffer.options.clone(), &BUFFER_METHODS, )) @@ -665,7 +666,7 @@ impl AsBuffer for PyMemoryView { } impl AsMapping for PyMemoryView { - fn as_mapping(_zelf: &crate::PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { + fn as_mapping(_zelf: &PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { PyMappingMethods { length: Some(Self::length), subscript: Some(Self::subscript), diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index c5f740017d..a9f9899e25 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -88,8 +88,8 @@ mod _io { types::{AsBuffer, Constructor, Destructor, IterNext, Iterable}, utils::Either, vm::{ReprGuard, VirtualMachine}, - IdProtocol, PyContext, PyObjectRef, PyObjectWrap, PyRef, PyResult, PyValue, StaticType, - TryFromBorrowedObject, TryFromObject, TypeProtocol, + IdProtocol, PyContext, PyObject, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, + PyValue, StaticType, TryFromBorrowedObject, TryFromObject, TypeProtocol, }; use bstr::ByteSlice; use crossbeam_utils::atomic::AtomicCell; @@ -1356,9 +1356,9 @@ mod _io { }; impl AsBuffer for BufferedRawBuffer { - fn as_buffer(zelf: &PyRef, _vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { let buf = PyBuffer::new( - zelf.as_object().clone(), + zelf.to_owned().into_object(), BufferOptions { len: zelf.range.end - zelf.range.start, ..Default::default() @@ -3366,7 +3366,11 @@ mod _io { len: self.buffer.read().cursor.get_ref().len(), ..Default::default() }; - let buffer = PyBuffer::new(self.as_object().clone(), options, &BYTES_IO_BUFFER_METHODS); + let buffer = PyBuffer::new( + self.clone().into_object(), + options, + &BYTES_IO_BUFFER_METHODS, + ); let view = PyMemoryView::from_buffer(buffer, vm)?; Ok(view) } From ee2dacd33d4201639e94fd276b941dd4d104aa9b Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sun, 24 Oct 2021 20:52:09 +0200 Subject: [PATCH 04/20] fix memoryview and introduce VecBuffer --- stdlib/src/array.rs | 16 +-- vm/src/builtins/bytearray.rs | 51 ++++----- vm/src/builtins/bytes.rs | 4 +- vm/src/builtins/memory.rs | 215 +++++++++++++++++++---------------- vm/src/function/buffer.rs | 16 +-- vm/src/protocol/buffer.rs | 160 ++++++++++++++++++-------- vm/src/protocol/mod.rs | 2 +- vm/src/stdlib/io.rs | 129 +++++---------------- vm/src/types/zoo.rs | 4 +- 9 files changed, 297 insertions(+), 300 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 417b11e925..6132ed9c2e 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1234,17 +1234,17 @@ mod array { } static BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |zelf| zelf.payload::().unwrap().get_bytes().into(), - obj_bytes_mut: |zelf| zelf.payload::().unwrap().get_bytes_mut().into(), + obj_bytes: |buffer| buffer.obj_as::().get_bytes().into(), + obj_bytes_mut: |buffer| buffer.obj_as::().get_bytes_mut().into(), + release: Some(|buffer| { + buffer.obj_as::().exports.fetch_sub(1); + }), + retain: Some(|buffer| { + buffer.obj_as::().exports.fetch_add(1); + }), contiguous: None, contiguous_mut: None, collect_bytes: None, - release: Some(|zelf| { - zelf.payload::().unwrap().exports.fetch_sub(1); - }), - retain: Some(|zelf| { - zelf.payload::().unwrap().exports.fetch_add(1); - }), }; impl AsMapping for PyArray { diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 4e0ae8e629..fe6a829ade 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -3,12 +3,9 @@ use super::{ PositionIterInternal, PyBytes, PyBytesRef, PyDictRef, PyIntRef, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, }; -use crate::common::{ - lock::{ - PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, PyRwLockReadGuard, - PyRwLockWriteGuard, - }, - static_cell, +use crate::common::lock::{ + PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, PyRwLockReadGuard, + PyRwLockWriteGuard, }; use crate::{ anystr::{self, AnyStr}, @@ -708,27 +705,27 @@ impl Comparable for PyByteArray { } } +static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |buffer| buffer.obj_as::().borrow_buf().into(), + obj_bytes_mut: |buffer| { + PyMappedRwLockWriteGuard::map(buffer.obj_as::().borrow_buf_mut(), |x| { + x.as_mut_slice() + }) + .into() + }, + release: Some(|buffer| { + buffer.obj_as::().exports.fetch_sub(1); + }), + retain: Some(|buffer| { + buffer.obj_as::().exports.fetch_add(1); + }), + contiguous: None, + contiguous_mut: None, + collect_bytes: None, +}; + impl AsBuffer for PyByteArray { - fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { - static_cell! { - static METHODS: BufferMethods; - } - let methods = METHODS.get_or_init(|| BufferMethods { - obj_bytes: |zelf| zelf.payload::().unwrap().borrow_buf().into(), - obj_bytes_mut: |zelf| { - let zelf = zelf.payload::().unwrap(); - PyRwLockWriteGuard::map(zelf.inner_mut(), |x| &mut *x.elements).into() - }, - release: Some(|zelf| { - zelf.payload::().unwrap().exports.fetch_sub(1); - }), - retain: Some(|zelf| { - zelf.payload::().unwrap().exports.fetch_add(1); - }), - contiguous: None, - contiguous_mut: None, - collect_bytes: None, - }); + fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let buffer = PyBuffer::new( zelf.to_owned().into_object(), BufferOptions { @@ -736,7 +733,7 @@ impl AsBuffer for PyByteArray { len: zelf.len(), ..Default::default() }, - methods, + &BUFFER_METHODS, ); Ok(buffer) } diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 10899c4652..e1e7e79d5a 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -539,7 +539,7 @@ impl PyBytes { } static BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |zelf| zelf.payload::().unwrap().as_bytes().into(), + obj_bytes: |buffer| buffer.obj_as::().as_bytes().into(), obj_bytes_mut: |_| panic!(), contiguous: None, contiguous_mut: None, @@ -549,7 +549,7 @@ static BUFFER_METHODS: BufferMethods = BufferMethods { }; impl AsBuffer for PyBytes { - fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { + fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let buf = PyBuffer::new( zelf.to_owned().into_object(), BufferOptions { diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 6793d266b2..e855b482ec 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -43,6 +43,8 @@ pub struct PyMemoryView { // or the step is not 1 or -1 step: isize, format_spec: FormatSpec, + // memoryview's options could be different from buffer's options + options: BufferOptions, hash: OnceCell, // exports // memoryview has no exports count by itself @@ -63,9 +65,10 @@ impl Constructor for PyMemoryView { impl PyMemoryView { #[cfg(debug_assertions)] fn validate(self) -> Self { - let options = &self.buffer.options; - let bytes_len = options.len * options.itemsize * self.step.abs() as usize; - let buffer_len = self.buffer._obj_bytes().len(); + let options = &self.options; + let bytes_len = options.len * options.itemsize + + (self.step.abs() as usize - 1) * options.itemsize * options.len.saturating_sub(1); + let buffer_len = self.buffer.obj_bytes().len(); assert!(self.start <= self.stop); assert!(self.step != 0); assert!(self.start + bytes_len <= buffer_len); @@ -86,7 +89,7 @@ impl PyMemoryView { // when we get a buffer means the buffered object is size locked // so we can assume the buffer's options will never change as long // as memoryview is still alive - let options = &buffer.options; + let options = buffer.options.clone(); let stop = options.len * options.itemsize; let format_spec = Self::parse_format(&options.format, vm)?; Ok(PyMemoryView { @@ -95,6 +98,7 @@ impl PyMemoryView { start: 0, stop, step: 1, + options, format_spec, hash: OnceCell::new(), } @@ -106,8 +110,9 @@ impl PyMemoryView { range: std::ops::Range, vm: &VirtualMachine, ) -> PyResult { - let itemsize = buffer.options.itemsize; - let format_spec = Self::parse_format(&buffer.options.format, vm)?; + let options = buffer.options.clone(); + let itemsize = options.itemsize; + let format_spec = Self::parse_format(&options.format, vm)?; buffer.options.len = range.len(); Ok(PyMemoryView { buffer, @@ -115,26 +120,17 @@ impl PyMemoryView { start: range.start * itemsize, stop: range.end * itemsize, step: 1, + options, format_spec, hash: OnceCell::new(), } .validate()) } - pub fn try_bytes(&self, vm: &VirtualMachine, f: F) -> PyResult - where - F: FnOnce(&[u8]) -> R, - { - self.try_not_released(vm)?; - self.buffer.as_contiguous().map(|x| f(&*x)).ok_or_else(|| { - vm.new_type_error("non-contiguous memoryview is not a bytes-like object".to_owned()) - }) - } - #[pymethod] pub fn release(&self) { if self.released.compare_exchange(false, true).is_ok() { - self.buffer._release(); + self.buffer.release(); } } @@ -154,31 +150,29 @@ impl PyMemoryView { #[pyproperty] fn nbytes(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm) - .map(|_| self.buffer.options.len * self.buffer.options.itemsize) + .map(|_| self.options.len * self.options.itemsize) } #[pyproperty] fn readonly(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm) - .map(|_| self.buffer.options.readonly) + self.try_not_released(vm).map(|_| self.options.readonly) } #[pyproperty] fn itemsize(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm) - .map(|_| self.buffer.options.itemsize) + self.try_not_released(vm).map(|_| self.options.itemsize) } #[pyproperty] fn ndim(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.buffer.options.ndim) + self.try_not_released(vm).map(|_| self.options.ndim) } // TODO #[pyproperty] fn shape(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm) - .map(|_| (self.buffer.options.len,).into_pyobject(vm)) + .map(|_| (self.options.len,).into_pyobject(vm)) } // TODO @@ -190,7 +184,7 @@ impl PyMemoryView { #[pyproperty] fn format(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm) - .map(|_| PyStr::from(self.buffer.options.format.clone())) + .map(|_| PyStr::from(self.options.format.clone())) } #[pymethod(magic)] @@ -205,13 +199,13 @@ impl PyMemoryView { // translate the slice index to memory index fn get_pos(&self, i: isize) -> Option { - let len = self.buffer.options.len; + let len = self.options.len; let i = wrap_index(i, len)?; Some(self.get_pos_no_wrap(i)) } fn get_pos_no_wrap(&self, i: usize) -> usize { - let itemsize = self.buffer.options.itemsize; + let itemsize = self.options.itemsize; if self.step < 0 { self.stop - itemsize - (-self.step as usize) * i * itemsize } else { @@ -223,7 +217,7 @@ impl PyMemoryView { let i = zelf .get_pos(i) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; - let bytes = &zelf.buffer._obj_bytes()[i..i + zelf.buffer.options.itemsize]; + let bytes = &zelf.buffer.obj_bytes()[i..i + zelf.options.itemsize]; zelf.format_spec.unpack(bytes, vm).map(|x| { if x.len() == 1 { x.fast_getitem(0) @@ -235,7 +229,10 @@ impl PyMemoryView { fn getitem_by_slice(zelf: PyRef, slice: PyRef, vm: &VirtualMachine) -> PyResult { // slicing a memoryview return a new memoryview - let len = zelf.buffer.options.len; + let options = zelf.options.clone(); + let buffer = zelf.buffer.clone(); + + let len = options.len; let (range, step, is_negative_step) = SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(len); let abs_step = step.unwrap(); @@ -245,7 +242,7 @@ impl PyMemoryView { abs_step as isize }; let newstep = step * zelf.step; - let itemsize = zelf.buffer.options.itemsize; + let itemsize = options.itemsize; let format_spec = zelf.format_spec.clone(); @@ -254,15 +251,16 @@ impl PyMemoryView { let start = zelf.start + range.start * itemsize; let stop = zelf.start + range.end * itemsize; return Ok(PyMemoryView { - buffer: zelf.buffer.clone_with_options(BufferOptions { - len: newlen, - contiguous: true, - ..zelf.buffer.options.clone() - }), + buffer, released: AtomicCell::new(false), start, stop, step: 1, + options: BufferOptions { + len: newlen, + contiguous: true, + ..options + }, format_spec, hash: OnceCell::new(), } @@ -272,15 +270,16 @@ impl PyMemoryView { if range.start >= range.end { return Ok(PyMemoryView { - buffer: zelf.buffer.clone_with_options(BufferOptions { - len: 0, - contiguous: true, - ..zelf.buffer.options.clone() - }), + buffer, released: AtomicCell::new(false), start: range.end, stop: range.end, step: 1, + options: BufferOptions { + len: 0, + contiguous: true, + ..options + }, format_spec, hash: OnceCell::new(), } @@ -307,17 +306,17 @@ impl PyMemoryView { (start, stop) }; - let options = BufferOptions { - len: newlen, - contiguous: false, - ..zelf.buffer.options.clone() - }; Ok(PyMemoryView { - buffer: zelf.buffer.clone_with_options(options), + buffer, released: AtomicCell::new(false), start, stop, step: newstep, + options: BufferOptions { + len: newlen, + contiguous: false, + ..options + }, format_spec, hash: OnceCell::new(), } @@ -343,9 +342,9 @@ impl PyMemoryView { let i = zelf .get_pos(i) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; - let itemsize = zelf.buffer.options.itemsize; + let itemsize = zelf.options.itemsize; let data = zelf.format_spec.pack(vec![value], vm)?; - zelf.buffer._obj_bytes_mut()[i..i + itemsize].copy_from_slice(&data); + zelf.buffer.obj_bytes_mut()[i..i + itemsize].copy_from_slice(&data); Ok(()) } @@ -360,10 +359,10 @@ impl PyMemoryView { let len = options.len; let itemsize = options.itemsize; - if itemsize != zelf.buffer.options.itemsize { + if itemsize != zelf.options.itemsize { return Err(vm.new_type_error(format!( "memoryview: invalid type for format '{}'", - zelf.buffer.options.format + zelf.options.format ))); } @@ -373,12 +372,12 @@ impl PyMemoryView { )) }; - if options.format != zelf.buffer.options.format { + if options.format != zelf.options.format { return diff_err(); } let (range, step, is_negative_step) = - SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(zelf.buffer.options.len); + SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(zelf.options.len); items.contiguous_or_collect(|bytes| { assert_eq!(bytes.len(), len * itemsize); @@ -388,7 +387,7 @@ impl PyMemoryView { return diff_err(); } - if let Some(mut buffer) = zelf.buffer.as_contiguous_mut() { + if let Some(mut buffer) = zelf.as_contiguous_mut() { buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); return Ok(()); } @@ -413,7 +412,7 @@ impl PyMemoryView { let item_index = (0..len).step_by(itemsize); - let mut buffer = zelf.obj_bytes_mut(); + let mut buffer = zelf.buffer.obj_bytes_mut(); indexes .map(|i| zelf.get_pos(i as isize).unwrap()) @@ -427,7 +426,7 @@ impl PyMemoryView { if match len { 0 => slicelen == 0, 1 => { - let mut buffer = zelf.obj_bytes_mut(); + let mut buffer = zelf.buffer.obj_bytes_mut(); let i = zelf.get_pos(range.start as isize).unwrap(); buffer[i..i + itemsize].copy_from_slice(&bytes); true @@ -450,7 +449,7 @@ impl PyMemoryView { vm: &VirtualMachine, ) -> PyResult<()> { zelf.try_not_released(vm)?; - if zelf.buffer.options.readonly { + if zelf.options.readonly { return Err(vm.new_type_error("cannot modify read-only memory".to_owned())); } match SequenceIndex::try_from_object_for(vm, needle, Self::NAME)? { @@ -461,23 +460,29 @@ impl PyMemoryView { #[pymethod(magic)] fn len(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.buffer.options.len) + self.try_not_released(vm) + .map(|_| (self.stop - self.start) / self.options.itemsize / self.step.abs() as usize) } #[pymethod] fn tobytes(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; - let mut v = vec![]; - self.buffer.collect_bytes(&mut v); + let v = if self.options.contiguous { + self.contiguous().to_vec() + } else { + let mut collected = vec![]; + self.collect_bytes(&mut collected); + collected + }; Ok(PyBytes::from(v).into_ref(vm)) } #[pymethod] fn tolist(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; - let len = self.buffer.options.len; - let itemsize = self.buffer.options.itemsize; - let bytes = &*self.buffer._obj_bytes(); + let len = self.options.len; + let itemsize = self.options.itemsize; + let bytes = &*self.buffer.obj_bytes(); let elements: Vec = (0..len) .map(|i| { let i = self.get_pos_no_wrap(i); @@ -491,15 +496,15 @@ impl PyMemoryView { #[pymethod] fn toreadonly(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { zelf.try_not_released(vm)?; - let buffer = zelf.buffer.clone_with_options(BufferOptions { - readonly: true, - ..zelf.buffer.options.clone() - }); Ok(PyMemoryView { - buffer, + buffer: zelf.buffer.clone(), released: AtomicCell::new(false), format_spec: zelf.format_spec.clone(), hash: OnceCell::new(), + options: BufferOptions { + readonly: true, + ..zelf.options.clone() + }, ..**zelf } .validate() @@ -523,15 +528,14 @@ impl PyMemoryView { vm: &VirtualMachine, ) -> PyResult { self.try_not_released(vm)?; - self.buffer - .contiguous_or_collect(|x| bytes_to_hex(x, sep, bytes_per_sep, vm)) + self.contiguous_or_collect(|x| bytes_to_hex(x, sep, bytes_per_sep, vm)) } // TODO: support cast shape #[pymethod] fn cast(&self, format: PyStrRef, vm: &VirtualMachine) -> PyResult> { self.try_not_released(vm)?; - if !self.buffer.options.contiguous { + if !self.options.contiguous { return Err(vm.new_type_error( "memoryview: casts are restricted to C-contiguous views".to_owned(), )); @@ -539,7 +543,7 @@ impl PyMemoryView { let format_spec = Self::parse_format(format.as_str(), vm)?; let itemsize = format_spec.size(); - let bytelen = self.buffer.options.len * self.buffer.options.itemsize; + let bytelen = self.options.len * self.options.itemsize; if bytelen % itemsize != 0 { return Err( @@ -547,15 +551,15 @@ impl PyMemoryView { ); } - let buffer = self.buffer.clone_with_options(BufferOptions { - itemsize, - len: bytelen / itemsize, - format: format.to_string().into(), - ..self.buffer.options.clone() - }); Ok(PyMemoryView { - buffer, + buffer: self.buffer.clone(), released: AtomicCell::new(false), + options: BufferOptions { + itemsize, + len: bytelen / itemsize, + format: format.to_string().into(), + ..self.buffer.options.clone() + }, format_spec, hash: OnceCell::new(), ..*self @@ -581,7 +585,7 @@ impl PyMemoryView { Err(_) => return Ok(false), }; - let a_options = &zelf.buffer.options; + let a_options = &zelf.options; let b_options = &other.options; if a_options.len != b_options.len @@ -591,7 +595,7 @@ impl PyMemoryView { return Ok(false); } - zelf.buffer.contiguous_or_collect(|a| { + zelf.contiguous_or_collect(|a| { other.contiguous_or_collect(|b| { if a_options.format == b_options.format { Ok(a == b) @@ -615,12 +619,27 @@ impl PyMemoryView { Err(vm.new_type_error("cannot pickle 'memoryview' object".to_owned())) } - fn obj_bytes(&self) -> BorrowedValue<[u8]> { - self.buffer._obj_bytes() + fn as_contiguous_mut(&self) -> Option> { + self.options.contiguous.then(|| self.contiguous_mut()) } - fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - self.buffer._obj_bytes_mut() + + fn contiguous_or_collect(&self, f: F) -> R + where + F: FnOnce(&[u8]) -> R, + { + let borrowed; + let mut collected; + let v = if self.options.contiguous { + borrowed = self.contiguous(); + &*borrowed + } else { + collected = vec![]; + self.collect_bytes(&mut collected); + &*collected + }; + f(v) } + fn contiguous(&self) -> BorrowedValue<[u8]> { BorrowedValue::map(self.buffer._contiguous(), |x| &x[self.start..self.stop]) } @@ -630,9 +649,9 @@ impl PyMemoryView { }) } fn collect_bytes(&self, buf: &mut Vec) { - let bytes = &*self.buffer._obj_bytes(); - let len = self.buffer.options.len; - let itemsize = self.buffer.options.itemsize; + let bytes = &*self.buffer.obj_bytes(); + let len = self.options.len; + let itemsize = self.options.itemsize; buf.reserve(len * itemsize); for i in 0..len { let i = self.get_pos_no_wrap(i); @@ -642,13 +661,13 @@ impl PyMemoryView { } static BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |zelf| zelf.payload::().unwrap().obj_bytes(), - obj_bytes_mut: |zelf| zelf.payload::().unwrap().obj_bytes_mut(), - contiguous: Some(|zelf| zelf.payload::().unwrap().contiguous()), - contiguous_mut: Some(|zelf| zelf.payload::().unwrap().contiguous_mut()), - collect_bytes: Some(|zelf, buf| zelf.payload::().unwrap().collect_bytes(buf)), - release: Some(|zelf| zelf.payload::().unwrap().buffer._release()), - retain: Some(|zelf| zelf.payload::().unwrap().buffer._retain()), + obj_bytes: |buffer| buffer.obj_as::().buffer.obj_bytes(), + obj_bytes_mut: |buffer| buffer.obj_as::().buffer.obj_bytes_mut(), + contiguous: Some(|buffer| buffer.obj_as::().contiguous()), + contiguous_mut: Some(|buffer| buffer.obj_as::().contiguous_mut()), + collect_bytes: Some(|buffer, buf| buffer.obj_as::().collect_bytes(buf)), + release: Some(|buffer| buffer.obj_as::().buffer.release()), + retain: Some(|buffer| buffer.obj_as::().buffer.retain()), }; impl AsBuffer for PyMemoryView { @@ -658,7 +677,7 @@ impl AsBuffer for PyMemoryView { } else { Ok(PyBuffer::new( zelf.to_owned().into_object(), - zelf.buffer.options.clone(), + zelf.options.clone(), &BUFFER_METHODS, )) } @@ -727,14 +746,12 @@ impl Hashable for PyMemoryView { zelf.hash .get_or_try_init(|| { zelf.try_not_released(vm)?; - if !zelf.buffer.options.readonly { + if !zelf.options.readonly { return Err( vm.new_value_error("cannot hash writable memoryview object".to_owned()) ); } - Ok(zelf - .buffer - .contiguous_or_collect(|bytes| vm.state.hash_secret.hash_bytes(bytes))) + Ok(zelf.contiguous_or_collect(|bytes| vm.state.hash_secret.hash_bytes(bytes))) }) .map(|&x| x) } diff --git a/vm/src/function/buffer.rs b/vm/src/function/buffer.rs index 0700d8a312..8dca7fe998 100644 --- a/vm/src/function/buffer.rs +++ b/vm/src/function/buffer.rs @@ -40,7 +40,7 @@ impl PyObject { impl ArgBytesLike { pub fn borrow_buf(&self) -> BorrowedValue<'_, [u8]> { - self.0.as_contiguous().unwrap() + self.0._contiguous() } pub fn with_ref(&self, f: F) -> R @@ -51,15 +51,11 @@ impl ArgBytesLike { } pub fn len(&self) -> usize { - self.borrow_buf().len() + self.0.options.len } pub fn is_empty(&self) -> bool { - self.borrow_buf().is_empty() - } - - pub fn to_cow(&self) -> std::borrow::Cow<[u8]> { - self.borrow_buf().to_vec().into() + self.len() == 0 } } @@ -86,7 +82,7 @@ pub struct ArgMemoryBuffer(PyBuffer); impl ArgMemoryBuffer { pub fn borrow_buf_mut(&self) -> BorrowedValueMut<'_, [u8]> { - self.0.as_contiguous_mut().unwrap() + self.0._contiguous_mut() } pub fn with_ref(&self, f: F) -> R @@ -97,11 +93,11 @@ impl ArgMemoryBuffer { } pub fn len(&self) -> usize { - self.borrow_buf_mut().len() + self.0.options.len } pub fn is_empty(&self) -> bool { - self.borrow_buf_mut().is_empty() + self.len() == 0 } } diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index 694e9bf4a2..f3194dbaec 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -1,24 +1,30 @@ //! Buffer protocol -use crate::common::borrow::{BorrowedValue, BorrowedValueMut}; -use crate::common::rc::PyRc; -use crate::PyThreadingConstraint; -use crate::{PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TypeProtocol, VirtualMachine}; +use crate::{ + builtins::PyTypeRef, + common::{ + borrow::{BorrowedValue, BorrowedValueMut}, + lock::{MapImmutable, PyMutex, PyMutexGuard}, + }, + types::{Constructor, Unconstructible}, + PyObject, PyObjectPayload, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, + TryFromBorrowedObject, TypeProtocol, VirtualMachine, +}; use std::{borrow::Cow, fmt::Debug}; pub struct BufferMethods { // always reflecting the whole bytes of the most top object - pub obj_bytes: fn(&PyObjectRef) -> BorrowedValue<[u8]>, + pub obj_bytes: fn(&PyBuffer) -> BorrowedValue<[u8]>, // always reflecting the whole bytes of the most top object - pub obj_bytes_mut: fn(&PyObjectRef) -> BorrowedValueMut<[u8]>, + pub obj_bytes_mut: fn(&PyBuffer) -> BorrowedValueMut<[u8]>, // GUARANTEE: called only if the buffer option is contiguous - pub contiguous: Option BorrowedValue<[u8]>>, + pub contiguous: Option BorrowedValue<[u8]>>, // GUARANTEE: called only if the buffer option is contiguous - pub contiguous_mut: Option BorrowedValueMut<[u8]>>, + pub contiguous_mut: Option BorrowedValueMut<[u8]>>, // collect bytes to buf when buffer option is not contiguous - pub collect_bytes: Option)>, - pub release: Option, - pub retain: Option, + pub collect_bytes: Option)>, + pub release: Option, + pub retain: Option, } impl Debug for BufferMethods { @@ -35,11 +41,11 @@ impl Debug for BufferMethods { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct PyBuffer { pub obj: PyObjectRef, pub options: BufferOptions, - pub(crate) methods: &'static BufferMethods, + methods: &'static BufferMethods, } impl PyBuffer { @@ -49,9 +55,10 @@ impl PyBuffer { options, methods, }; - zelf._retain(); + zelf.retain(); zelf } + pub fn as_contiguous(&self) -> Option> { self.options.contiguous.then(|| self._contiguous()) } @@ -82,51 +89,51 @@ impl PyBuffer { f(v) } - pub fn clone_with_options(&self, options: BufferOptions) -> Self { - Self::new(self.obj.clone(), options, self.methods) - } - - // SAFETY: should only called if option has contiguous - pub(crate) fn _contiguous(&self) -> BorrowedValue<[u8]> { - self.methods - .contiguous - .map(|f| f(&self.obj)) - .unwrap_or_else(|| (self.methods.obj_bytes)(&self.obj)) - } - - // SAFETY: should only called if option has contiguous - pub(crate) fn _contiguous_mut(&self) -> BorrowedValueMut<[u8]> { - self.methods - .contiguous_mut - .map(|f| f(&self.obj)) - .unwrap_or_else(|| (self.methods.obj_bytes_mut)(&self.obj)) + pub fn obj_as(&self) -> &PyObjectView { + self.obj.downcast_ref().unwrap() } - pub(crate) fn _obj_bytes(&self) -> BorrowedValue<[u8]> { - (self.methods.obj_bytes)(&self.obj) + pub fn obj_bytes(&self) -> BorrowedValue<[u8]> { + (self.methods.obj_bytes)(self) } - pub(crate) fn _obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - (self.methods.obj_bytes_mut)(&self.obj) + pub fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + (self.methods.obj_bytes_mut)(self) } - pub(crate) fn _release(&self) { + pub fn release(&self) { if let Some(f) = self.methods.release { - f(&self.obj) + f(self) } } - pub(crate) fn _retain(&self) { + pub fn retain(&self) { if let Some(f) = self.methods.retain { - f(&self.obj) + f(self) } } + // SAFETY: should only called if option has contiguous + pub(crate) fn _contiguous(&self) -> BorrowedValue<[u8]> { + self.methods + .contiguous + .map(|f| f(self)) + .unwrap_or_else(|| self.obj_bytes()) + } + + // SAFETY: should only called if option has contiguous + pub(crate) fn _contiguous_mut(&self) -> BorrowedValueMut<[u8]> { + self.methods + .contiguous_mut + .map(|f| f(self)) + .unwrap_or_else(|| self.obj_bytes_mut()) + } + pub(crate) fn _collect_bytes(&self, buf: &mut Vec) { self.methods .collect_bytes - .map(|f| f(&self.obj, buf)) - .unwrap_or_else(|| buf.extend_from_slice(&(self.methods.obj_bytes)(&self.obj))) + .map(|f| f(self, buf)) + .unwrap_or_else(|| buf.extend_from_slice(&self.obj_bytes())) } } @@ -185,13 +192,7 @@ impl TryFromBorrowedObject for PyBuffer { // but it is not supported by Rust impl Drop for PyBuffer { fn drop(&mut self) { - self._release(); - } -} - -impl Clone for PyBuffer { - fn clone(&self) -> Self { - self.clone_with_options(self.options.clone()) + self.release(); } } @@ -199,3 +200,64 @@ pub trait BufferResizeGuard<'a> { type Resizable: 'a; fn try_resizable(&'a self, vm: &VirtualMachine) -> PyResult; } + +#[pyclass(module = false, name = "vec_buffer")] +#[derive(Debug)] +pub struct VecBuffer(PyMutex>); + +#[pyimpl(flags(BASETYPE), with(Constructor))] +impl VecBuffer { + pub fn new(v: Vec) -> Self { + Self(PyMutex::new(v)) + } + pub fn take(&self) -> Vec { + std::mem::take(&mut *self.0.lock()) + } +} +impl Unconstructible for VecBuffer {} +impl PyValue for VecBuffer { + fn class(vm: &VirtualMachine) -> &PyTypeRef { + &vm.ctx.types.vec_buffer_type + } +} + +impl PyRef { + pub fn into_pybuffer(self) -> PyBuffer { + let len = self.0.lock().len(); + PyBuffer::new( + self.into_object(), + BufferOptions { + len, + readonly: false, + ..Default::default() + }, + &VEC_BUFFER_METHODS, + ) + } + pub fn into_readonly_pybuffer(self) -> PyBuffer { + let len = self.0.lock().len(); + PyBuffer::new( + self.into_object(), + BufferOptions { + len, + readonly: true, + ..Default::default() + }, + &VEC_BUFFER_METHODS, + ) + } +} + +static VEC_BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |buffer| { + PyMutexGuard::map_immutable(buffer.obj_as::().0.lock(), |x| x.as_slice()).into() + }, + obj_bytes_mut: |buffer| { + PyMutexGuard::map(buffer.obj_as::().0.lock(), |x| x.as_mut_slice()).into() + }, + contiguous: None, + contiguous_mut: None, + collect_bytes: None, + release: None, + retain: None, +}; diff --git a/vm/src/protocol/mod.rs b/vm/src/protocol/mod.rs index 16e25089be..bd3ff19b3f 100644 --- a/vm/src/protocol/mod.rs +++ b/vm/src/protocol/mod.rs @@ -3,6 +3,6 @@ mod iter; mod mapping; mod object; -pub use buffer::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer}; +pub use buffer::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, VecBuffer}; pub use iter::{PyIter, PyIterIter, PyIterReturn}; pub use mapping::{PyMapping, PyMappingMethods}; diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index a9f9899e25..b158a7fc5a 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -69,9 +69,8 @@ mod _io { use super::*; use crate::common::{ - borrow::{BorrowedValue, BorrowedValueMut}, lock::{ - PyMappedThreadMutexGuard, PyMutex, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, + PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, PyThreadMutex, PyThreadMutexGuard, }, }; @@ -84,11 +83,13 @@ mod _io { ArgBytesLike, ArgIterable, ArgMemoryBuffer, FuncArgs, IntoPyObject, OptionalArg, OptionalOption, }, - protocol::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn}, - types::{AsBuffer, Constructor, Destructor, IterNext, Iterable}, + protocol::{ + BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, VecBuffer, + }, + types::{Constructor, Destructor, IterNext, Iterable}, utils::Either, vm::{ReprGuard, VirtualMachine}, - IdProtocol, PyContext, PyObject, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, + IdProtocol, PyContext, PyObject, PyObjectRef, PyObjectWrap, PyRef, PyResult, PyValue, StaticType, TryFromBorrowedObject, TryFromObject, TypeProtocol, }; use bstr::ByteSlice; @@ -875,33 +876,18 @@ mod _io { // TODO: loop if write() raises an interrupt vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj,))? } else { - let options = BufferOptions { - len, - ..Default::default() - }; - // TODO: see if we can encapsulate this pattern in a function in memory.rs like - // fn slice_as_memory(s: &[u8], f: impl FnOnce(PyMemoryViewRef) -> R) -> R - let writebuf = BufferedRawBuffer { - data: std::mem::take(&mut self.buffer).into(), - range: buf_range, - } - .into_ref(vm); - let raw = self.raw.as_ref().unwrap(); - let memobj = PyMemoryView::from_buffer( - PyBuffer { - obj: writebuf.clone().into_object(), - options, - methods: &BUFFERED_RAW_BUFFER_METHODS, - }, - vm, - )? - .into_ref(vm); + let v = std::mem::take(&mut self.buffer); + let writebuf = VecBuffer::new(v).into_ref(vm); + let memobj = + PyMemoryView::from_buffer(writebuf.clone().into_readonly_pybuffer(), vm)? + .into_ref(vm); + let raw = self.raw.as_ref().unwrap(); // TODO: loop if write() raises an interrupt let res = vm.call_method(raw, "write", (memobj.clone(),)); memobj.release(); - self.buffer = std::mem::take(&mut writebuf.data.lock()); + self.buffer = writebuf.take(); res? }; @@ -1114,24 +1100,10 @@ mod _io { let res = match v { Either::A(v) => { let v = v.unwrap_or(&mut self.buffer); - let options = BufferOptions { - len, - readonly: false, - ..Default::default() - }; - // TODO: see if we can encapsulate this pattern in a function in memory.rs like - // fn slice_as_memory(s: &[u8], f: impl FnOnce(PyMemoryViewRef) -> R) -> R - let readbuf = BufferedRawBuffer { - data: std::mem::take(v).into(), - range: buf_range, - } - .into_ref(vm); - let memobj = PyMemoryView::from_buffer( - PyBuffer { - obj: readbuf.clone().into_object(), - options, - methods: &BUFFERED_RAW_BUFFER_METHODS, - }, + let readbuf = VecBuffer::new(std::mem::take(v)).into_ref(vm); + let memobj = PyMemoryView::from_buffer_range( + readbuf.clone().into_pybuffer(), + buf_range, vm, )? .into_ref(vm); @@ -1141,7 +1113,7 @@ mod _io { vm.call_method(self.raw.as_ref().unwrap(), "readinto", (memobj.clone(),)); memobj.release(); - std::mem::swap(v, &mut readbuf.data.lock()); + std::mem::swap(v, &mut readbuf.take()); res? } @@ -1320,55 +1292,6 @@ mod _io { } } - // this is a bit fancier than what CPython does, but in CPython if you store - // the memoryobj for the buffer until after the BufferedIO is destroyed, you - // can get a use-after-free, so this is a bit safe - #[pyclass(noattr, module = false, name = "_buffered_raw_buffer")] - #[derive(Debug, PyValue)] - struct BufferedRawBuffer { - data: PyMutex>, - range: Range, - } - #[pyimpl(with(AsBuffer))] - impl BufferedRawBuffer {} - - static BUFFERED_RAW_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |zelf| { - let zelf = zelf.downcast_ref::().unwrap(); - BorrowedValue::map(zelf.data.lock().into(), |x| x.as_slice()) - }, - obj_bytes_mut: |zelf| { - let zelf = zelf.downcast_ref::().unwrap(); - BorrowedValueMut::map(zelf.data.lock().into(), |x| x.as_mut_slice()) - }, - contiguous: Some(|zelf| { - let zelf = zelf.downcast_ref::().unwrap(); - BorrowedValue::map(zelf.data.lock().into(), |x| &x[zelf.range.clone()]) - }), - contiguous_mut: Some(|zelf| { - let zelf = zelf.downcast_ref::().unwrap(); - BorrowedValueMut::map(zelf.data.lock().into(), |x| &mut x[zelf.range.clone()]) - }), - collect_bytes: None, - // TODO: export counting to make sure no exports before retrive data to buffer - release: None, - retain: None, - }; - - impl AsBuffer for BufferedRawBuffer { - fn as_buffer(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { - let buf = PyBuffer::new( - zelf.to_owned().into_object(), - BufferOptions { - len: zelf.range.end - zelf.range.start, - ..Default::default() - }, - &BUFFERED_RAW_BUFFER_METHODS, - ); - Ok(buf) - } - } - pub fn get_offset(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { let int = vm.to_index(&obj)?; int.as_bigint().try_into().map_err(|_| { @@ -3377,23 +3300,23 @@ mod _io { } static BYTES_IO_BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |zelf| { - let zelf = zelf.downcast_ref::().unwrap(); + obj_bytes: |buffer| { + let zelf = buffer.obj_as::(); PyRwLockReadGuard::map(zelf.buffer.read(), |x| x.cursor.get_ref().as_slice()).into() }, - obj_bytes_mut: |zelf| { - let zelf = zelf.downcast_ref::().unwrap(); + obj_bytes_mut: |buffer| { + let zelf = buffer.obj_as::(); PyRwLockWriteGuard::map(zelf.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) .into() }, contiguous: None, contiguous_mut: None, collect_bytes: None, - release: Some(|zelf| { - zelf.downcast_ref::().unwrap().exports.fetch_sub(1); + release: Some(|buffer| { + buffer.obj_as::().exports.fetch_sub(1); }), - retain: Some(|zelf| { - zelf.downcast_ref::().unwrap().exports.fetch_add(1); + retain: Some(|buffer| { + buffer.obj_as::().exports.fetch_add(1); }), }; diff --git a/vm/src/types/zoo.rs b/vm/src/types/zoo.rs index f780343f4f..d2feffb158 100644 --- a/vm/src/types/zoo.rs +++ b/vm/src/types/zoo.rs @@ -5,7 +5,7 @@ use crate::builtins::{ pytype::{self, PyTypeRef}, range, set, singletons, slice, staticmethod, traceback, tuple, weakproxy, weakref, zip, }; -use crate::{PyContext, StaticType}; +use crate::{PyContext, StaticType, protocol::VecBuffer}; /// Holder of references to builtin types. #[derive(Debug, Clone)] @@ -81,6 +81,7 @@ pub struct TypeZoo { pub none_type: PyTypeRef, pub not_implemented_type: PyTypeRef, pub generic_alias_type: PyTypeRef, + pub vec_buffer_type: PyTypeRef, } impl TypeZoo { @@ -167,6 +168,7 @@ impl TypeZoo { none_type: singletons::PyNone::init_bare_type().clone(), not_implemented_type: singletons::PyNotImplemented::init_bare_type().clone(), generic_alias_type: genericalias::PyGenericAlias::init_bare_type().clone(), + vec_buffer_type: VecBuffer::init_bare_type().clone(), } } From a3c559709de4224fe370cbb98b0a7c9038141ce5 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 25 Oct 2021 17:10:01 +0200 Subject: [PATCH 05/20] fix deadlock --- extra_tests/snippets/memoryview.py | 6 +- vm/src/builtins/memory.rs | 113 +++++++++++++++-------------- vm/src/protocol/buffer.rs | 17 +++-- vm/src/stdlib/io.rs | 17 ++--- vm/src/types/zoo.rs | 2 +- 5 files changed, 81 insertions(+), 74 deletions(-) diff --git a/extra_tests/snippets/memoryview.py b/extra_tests/snippets/memoryview.py index 121028d418..ca50ece56c 100644 --- a/extra_tests/snippets/memoryview.py +++ b/extra_tests/snippets/memoryview.py @@ -45,6 +45,8 @@ def test_slice(): assert m[::2][1:-1].tobytes() == b'357' assert m[::2][::2].tobytes() == b'159' assert m[::2][1::2].tobytes() == b'37' + assert m[::-1].tobytes() == b'987654321' + assert m[::-2].tobytes() == b'97531' test_slice() @@ -56,9 +58,9 @@ def test_resizable(): m.release() b.append(6) m2 = memoryview(b) - m4 = memoryview(b) + m4 = memoryview(m2) assert_raises(BufferError, lambda: b.append(5)) - m3 = memoryview(b) + m3 = memoryview(m2) assert_raises(BufferError, lambda: b.append(5)) m2.release() assert_raises(BufferError, lambda: b.append(5)) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index e855b482ec..6e07cd2fee 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -106,21 +106,23 @@ impl PyMemoryView { } pub fn from_buffer_range( - mut buffer: PyBuffer, + buffer: PyBuffer, range: std::ops::Range, vm: &VirtualMachine, ) -> PyResult { let options = buffer.options.clone(); let itemsize = options.itemsize; let format_spec = Self::parse_format(&options.format, vm)?; - buffer.options.len = range.len(); Ok(PyMemoryView { buffer, released: AtomicCell::new(false), start: range.start * itemsize, stop: range.end * itemsize, step: 1, - options, + options: BufferOptions { + len: range.len(), + ..options + }, format_spec, hash: OnceCell::new(), } @@ -130,6 +132,7 @@ impl PyMemoryView { #[pymethod] pub fn release(&self) { if self.released.compare_exchange(false, true).is_ok() { + self.buffer.manually_release = true; self.buffer.release(); } } @@ -379,66 +382,69 @@ impl PyMemoryView { let (range, step, is_negative_step) = SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(zelf.options.len); - items.contiguous_or_collect(|bytes| { - assert_eq!(bytes.len(), len * itemsize); + // TODO: try borrow the vec, now cause deadlock + // items.contiguous_or_collect(|bytes| { + let mut bytes = vec![]; + items.collect_bytes(&mut bytes); + assert_eq!(bytes.len(), len * itemsize); - if !is_negative_step && step == Some(1) { - if range.end - range.start != len { - return diff_err(); - } + if !is_negative_step && step == Some(1) { + if range.end - range.start != len { + return diff_err(); + } - if let Some(mut buffer) = zelf.as_contiguous_mut() { - buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); - return Ok(()); - } + if let Some(mut buffer) = zelf.as_contiguous_mut() { + buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); + return Ok(()); } + } - if let Some(step) = step { - let slicelen = if range.end > range.start { - (range.end - range.start - 1) / step + 1 - } else { - 0 - }; + if let Some(step) = step { + let slicelen = if range.end > range.start { + (range.end - range.start - 1) / step + 1 + } else { + 0 + }; - if slicelen != len { - return diff_err(); - } + if slicelen != len { + return diff_err(); + } - let indexes = if is_negative_step { - itertools::Either::Left(range.rev().step_by(step)) - } else { - itertools::Either::Right(range.step_by(step)) - }; + let indexes = if is_negative_step { + itertools::Either::Left(range.rev().step_by(step)) + } else { + itertools::Either::Right(range.step_by(step)) + }; - let item_index = (0..len).step_by(itemsize); + let item_index = (0..len).step_by(itemsize); - let mut buffer = zelf.buffer.obj_bytes_mut(); + let mut buffer = zelf.buffer.obj_bytes_mut(); - indexes - .map(|i| zelf.get_pos(i as isize).unwrap()) - .zip(item_index) - .for_each(|(i, item_i)| { - buffer[i..i + itemsize].copy_from_slice(&bytes[item_i..item_i + itemsize]); - }); + indexes + .map(|i| zelf.get_pos(i as isize).unwrap()) + .zip(item_index) + .for_each(|(i, item_i)| { + buffer[i..i + itemsize].copy_from_slice(&bytes[item_i..item_i + itemsize]); + }); + Ok(()) + } else { + let slicelen = if range.start < range.end { 1 } else { 0 }; + if match len { + 0 => slicelen == 0, + 1 => { + let mut buffer = zelf.buffer.obj_bytes_mut(); + let i = zelf.get_pos(range.start as isize).unwrap(); + buffer[i..i + itemsize].copy_from_slice(&bytes); + true + } + _ => false, + } { Ok(()) } else { - let slicelen = if range.start < range.end { 1 } else { 0 }; - if match len { - 0 => slicelen == 0, - 1 => { - let mut buffer = zelf.buffer.obj_bytes_mut(); - let i = zelf.get_pos(range.start as isize).unwrap(); - buffer[i..i + itemsize].copy_from_slice(&bytes); - true - } - _ => false, - } { - Ok(()) - } else { - diff_err() - } + diff_err() } - }) + } + // }) } #[pymethod(magic)] @@ -460,8 +466,7 @@ impl PyMemoryView { #[pymethod(magic)] fn len(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm) - .map(|_| (self.stop - self.start) / self.options.itemsize / self.step.abs() as usize) + self.try_not_released(vm).map(|_| self.options.len) } #[pymethod] @@ -558,7 +563,7 @@ impl PyMemoryView { itemsize, len: bytelen / itemsize, format: format.to_string().into(), - ..self.buffer.options.clone() + ..self.options.clone() }, format_spec, hash: OnceCell::new(), diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index f3194dbaec..4963c3694c 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -45,6 +45,8 @@ impl Debug for BufferMethods { pub struct PyBuffer { pub obj: PyObjectRef, pub options: BufferOptions, + // if true, don't call release when drop + pub manually_release: bool, methods: &'static BufferMethods, } @@ -53,6 +55,7 @@ impl PyBuffer { let zelf = Self { obj, options, + manually_release: false, methods, }; zelf.retain(); @@ -174,15 +177,13 @@ impl Default for BufferOptions { impl TryFromBorrowedObject for PyBuffer { fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { - let obj_cls = obj.class(); - for cls in obj_cls.iter_mro() { - if let Some(f) = cls.slots.as_buffer.as_ref() { - return f(obj, vm); - } + let cls = obj.class(); + if let Some(f) = cls.mro_find_map(|cls| cls.slots.as_buffer) { + return f(obj, vm); } Err(vm.new_type_error(format!( "a bytes-like object is required, not '{}'", - obj_cls.name() + cls.name() ))) } } @@ -192,7 +193,9 @@ impl TryFromBorrowedObject for PyBuffer { // but it is not supported by Rust impl Drop for PyBuffer { fn drop(&mut self) { - self.release(); + if !self.manually_release { + self.release(); + } } } diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index b158a7fc5a..092ff8eafe 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -68,11 +68,9 @@ impl TryFromObject for Fildes { mod _io { use super::*; - use crate::common::{ - lock::{ - PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, - PyThreadMutex, PyThreadMutexGuard, - }, + use crate::common::lock::{ + PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, PyThreadMutex, + PyThreadMutexGuard, }; use crate::{ builtins::{ @@ -89,8 +87,8 @@ mod _io { types::{Constructor, Destructor, IterNext, Iterable}, utils::Either, vm::{ReprGuard, VirtualMachine}, - IdProtocol, PyContext, PyObject, PyObjectRef, PyObjectWrap, PyRef, PyResult, - PyValue, StaticType, TryFromBorrowedObject, TryFromObject, TypeProtocol, + IdProtocol, PyContext, PyObject, PyObjectRef, PyObjectWrap, PyRef, PyResult, PyValue, + StaticType, TryFromBorrowedObject, TryFromObject, TypeProtocol, }; use bstr::ByteSlice; use crossbeam_utils::atomic::AtomicCell; @@ -879,12 +877,11 @@ mod _io { let v = std::mem::take(&mut self.buffer); let writebuf = VecBuffer::new(v).into_ref(vm); let memobj = - PyMemoryView::from_buffer(writebuf.clone().into_readonly_pybuffer(), vm)? + PyMemoryView::from_buffer_range(writebuf.clone().into_readonly_pybuffer(), buf_range, vm)? .into_ref(vm); - let raw = self.raw.as_ref().unwrap(); // TODO: loop if write() raises an interrupt - let res = vm.call_method(raw, "write", (memobj.clone(),)); + let res = vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj.clone(),)); memobj.release(); self.buffer = writebuf.take(); diff --git a/vm/src/types/zoo.rs b/vm/src/types/zoo.rs index d2feffb158..fa5ce0cc06 100644 --- a/vm/src/types/zoo.rs +++ b/vm/src/types/zoo.rs @@ -5,7 +5,7 @@ use crate::builtins::{ pytype::{self, PyTypeRef}, range, set, singletons, slice, staticmethod, traceback, tuple, weakproxy, weakref, zip, }; -use crate::{PyContext, StaticType, protocol::VecBuffer}; +use crate::{protocol::VecBuffer, PyContext, StaticType}; /// Holder of references to builtin types. #[derive(Debug, Clone)] From a39b260823f09705028f183cfda01fca59cfe109 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 25 Oct 2021 17:26:54 +0200 Subject: [PATCH 06/20] fix memoryview avoid double release buffer --- vm/src/builtins/bytearray.rs | 6 ++++++ vm/src/builtins/memory.rs | 20 +++++++++++++++----- vm/src/protocol/buffer.rs | 19 +++++++++++++------ vm/src/stdlib/io.rs | 15 +++++++-------- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index fe6a829ade..8a98e0e2d8 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -115,6 +115,12 @@ impl PyByteArray { Ok(()) } + #[cfg(debug_assertions)] + #[pyproperty] + fn exports(&self) -> usize { + self.exports.load() + } + #[inline] fn inner(&self) -> PyRwLockReadGuard<'_, PyBytesInner> { self.inner.read() diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 6e07cd2fee..eb29d3323d 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -19,7 +19,7 @@ use crate::{ use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; use num_traits::ToPrimitive; -use std::fmt::Debug; +use std::{fmt::Debug, mem::ManuallyDrop}; #[derive(FromArgs)] pub struct PyMemoryViewNewArgs { @@ -29,7 +29,8 @@ pub struct PyMemoryViewNewArgs { #[pyclass(module = false, name = "memoryview")] #[derive(Debug)] pub struct PyMemoryView { - buffer: PyBuffer, + // avoid double release when memoryview had released the buffer before drop + buffer: ManuallyDrop, // the released memoryview does not mean the buffer is destoryed // because the possible another memeoryview is viewing from it released: AtomicCell, @@ -93,7 +94,7 @@ impl PyMemoryView { let stop = options.len * options.itemsize; let format_spec = Self::parse_format(&options.format, vm)?; Ok(PyMemoryView { - buffer, + buffer: ManuallyDrop::new(buffer), released: AtomicCell::new(false), start: 0, stop, @@ -114,7 +115,7 @@ impl PyMemoryView { let itemsize = options.itemsize; let format_spec = Self::parse_format(&options.format, vm)?; Ok(PyMemoryView { - buffer, + buffer: ManuallyDrop::new(buffer), released: AtomicCell::new(false), start: range.start * itemsize, stop: range.end * itemsize, @@ -132,7 +133,6 @@ impl PyMemoryView { #[pymethod] pub fn release(&self) { if self.released.compare_exchange(false, true).is_ok() { - self.buffer.manually_release = true; self.buffer.release(); } } @@ -689,6 +689,16 @@ impl AsBuffer for PyMemoryView { } } +impl Drop for PyMemoryView { + fn drop(&mut self) { + if self.released.load() { + unsafe { self.buffer.drop_without_release() }; + } else { + unsafe { ManuallyDrop::drop(&mut self.buffer) }; + } + } +} + impl AsMapping for PyMemoryView { fn as_mapping(_zelf: &PyObjectView, _vm: &VirtualMachine) -> PyMappingMethods { PyMappingMethods { diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index 4963c3694c..8c9fa960ee 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -12,6 +12,7 @@ use crate::{ }; use std::{borrow::Cow, fmt::Debug}; +#[allow(clippy::type_complexity)] pub struct BufferMethods { // always reflecting the whole bytes of the most top object pub obj_bytes: fn(&PyBuffer) -> BorrowedValue<[u8]>, @@ -45,8 +46,6 @@ impl Debug for BufferMethods { pub struct PyBuffer { pub obj: PyObjectRef, pub options: BufferOptions, - // if true, don't call release when drop - pub manually_release: bool, methods: &'static BufferMethods, } @@ -55,7 +54,6 @@ impl PyBuffer { let zelf = Self { obj, options, - manually_release: false, methods, }; zelf.retain(); @@ -132,12 +130,23 @@ impl PyBuffer { .unwrap_or_else(|| self.obj_bytes_mut()) } + // WARNING: should always try to clone from the contiguous first pub(crate) fn _collect_bytes(&self, buf: &mut Vec) { self.methods .collect_bytes .map(|f| f(self, buf)) .unwrap_or_else(|| buf.extend_from_slice(&self.obj_bytes())) } + + // drop PyBuffer without calling release + // after this function, the owner should use forget() + // or wrap PyBuffer in the ManaullyDrop to prevent drop() + pub(crate) unsafe fn drop_without_release(&mut self) { + // self.obj = PyObjectRef::from_raw(0 as *const PyObject); + // self.options = BufferOptions::default(); + std::ptr::drop_in_place(&mut self.obj); + std::ptr::drop_in_place(&mut self.options); + } } #[derive(Debug, Clone)] @@ -193,9 +202,7 @@ impl TryFromBorrowedObject for PyBuffer { // but it is not supported by Rust impl Drop for PyBuffer { fn drop(&mut self) { - if !self.manually_release { - self.release(); - } + self.release(); } } diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 092ff8eafe..e63f0dff2d 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -876,9 +876,12 @@ mod _io { } else { let v = std::mem::take(&mut self.buffer); let writebuf = VecBuffer::new(v).into_ref(vm); - let memobj = - PyMemoryView::from_buffer_range(writebuf.clone().into_readonly_pybuffer(), buf_range, vm)? - .into_ref(vm); + let memobj = PyMemoryView::from_buffer_range( + writebuf.clone().into_readonly_pybuffer(), + buf_range, + vm, + )? + .into_ref(vm); // TODO: loop if write() raises an interrupt let res = vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj.clone(),)); @@ -3286,11 +3289,7 @@ mod _io { len: self.buffer.read().cursor.get_ref().len(), ..Default::default() }; - let buffer = PyBuffer::new( - self.clone().into_object(), - options, - &BYTES_IO_BUFFER_METHODS, - ); + let buffer = PyBuffer::new(self.into_object(), options, &BYTES_IO_BUFFER_METHODS); let view = PyMemoryView::from_buffer(buffer, vm)?; Ok(view) } From 27d67c2568e3a1880a5ebb8392d8b033443ed8ea Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 28 Oct 2021 07:38:04 +0200 Subject: [PATCH 07/20] impl ndim buffer support --- stdlib/src/array.rs | 3 - vm/src/builtins/memory.rs | 24 +--- vm/src/protocol/buffer.rs | 275 +++++++++++++++++++++++++------------- 3 files changed, 182 insertions(+), 120 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 6132ed9c2e..8e503488e4 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1242,9 +1242,6 @@ mod array { retain: Some(|buffer| { buffer.obj_as::().exports.fetch_add(1); }), - contiguous: None, - contiguous_mut: None, - collect_bytes: None, }; impl AsMapping for PyArray { diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index eb29d3323d..5d318663ad 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -64,24 +64,6 @@ impl Constructor for PyMemoryView { #[pyimpl(with(Hashable, Comparable, AsBuffer, AsMapping, Constructor))] impl PyMemoryView { - #[cfg(debug_assertions)] - fn validate(self) -> Self { - let options = &self.options; - let bytes_len = options.len * options.itemsize - + (self.step.abs() as usize - 1) * options.itemsize * options.len.saturating_sub(1); - let buffer_len = self.buffer.obj_bytes().len(); - assert!(self.start <= self.stop); - assert!(self.step != 0); - assert!(self.start + bytes_len <= buffer_len); - assert!(self.stop <= buffer_len); - assert!(self.stop - self.start == bytes_len); - self - } - #[cfg(not(debug_assertions))] - fn validate(self) -> Self { - self - } - fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult { FormatSpec::parse(format.as_bytes(), vm) } @@ -668,9 +650,9 @@ impl PyMemoryView { static BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |buffer| buffer.obj_as::().buffer.obj_bytes(), obj_bytes_mut: |buffer| buffer.obj_as::().buffer.obj_bytes_mut(), - contiguous: Some(|buffer| buffer.obj_as::().contiguous()), - contiguous_mut: Some(|buffer| buffer.obj_as::().contiguous_mut()), - collect_bytes: Some(|buffer, buf| buffer.obj_as::().collect_bytes(buf)), + // contiguous: Some(|buffer| buffer.obj_as::().contiguous()), + // contiguous_mut: Some(|buffer| buffer.obj_as::().contiguous_mut()), + // collect_bytes: Some(|buffer, buf| buffer.obj_as::().collect_bytes(buf)), release: Some(|buffer| buffer.obj_as::().buffer.release()), retain: Some(|buffer| buffer.obj_as::().buffer.retain()), }; diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index 8c9fa960ee..e7245e7a21 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -1,5 +1,7 @@ //! Buffer protocol +use itertools::Itertools; + use crate::{ builtins::PyTypeRef, common::{ @@ -10,20 +12,12 @@ use crate::{ PyObject, PyObjectPayload, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TypeProtocol, VirtualMachine, }; -use std::{borrow::Cow, fmt::Debug}; +use std::{borrow::Cow, fmt::Debug, ops::Range}; #[allow(clippy::type_complexity)] pub struct BufferMethods { - // always reflecting the whole bytes of the most top object pub obj_bytes: fn(&PyBuffer) -> BorrowedValue<[u8]>, - // always reflecting the whole bytes of the most top object pub obj_bytes_mut: fn(&PyBuffer) -> BorrowedValueMut<[u8]>, - // GUARANTEE: called only if the buffer option is contiguous - pub contiguous: Option BorrowedValue<[u8]>>, - // GUARANTEE: called only if the buffer option is contiguous - pub contiguous_mut: Option BorrowedValueMut<[u8]>>, - // collect bytes to buf when buffer option is not contiguous - pub collect_bytes: Option)>, pub release: Option, pub retain: Option, } @@ -33,9 +27,6 @@ impl Debug for BufferMethods { f.debug_struct("BufferMethods") .field("obj_bytes", &(self.obj_bytes as usize)) .field("obj_bytes_mut", &(self.obj_bytes_mut as usize)) - .field("contiguous", &self.contiguous.map(|x| x as usize)) - .field("contiguous_mut", &self.contiguous_mut.map(|x| x as usize)) - .field("collect_bytes", &self.collect_bytes.map(|x| x as usize)) .field("release", &self.release.map(|x| x as usize)) .field("retain", &self.retain.map(|x| x as usize)) .finish() @@ -53,7 +44,7 @@ impl PyBuffer { pub fn new(obj: PyObjectRef, options: BufferOptions, methods: &'static BufferMethods) -> Self { let zelf = Self { obj, - options, + options: options.validate(), methods, }; zelf.retain(); @@ -61,30 +52,32 @@ impl PyBuffer { } pub fn as_contiguous(&self) -> Option> { - self.options.contiguous.then(|| self._contiguous()) + self.options.is_contiguous().then(|| self.obj_bytes()) } pub fn as_contiguous_mut(&self) -> Option> { - self.options.contiguous.then(|| self._contiguous_mut()) + self.options.is_contiguous().then(|| self.obj_bytes_mut()) } - pub fn collect_bytes(&self, buf: &mut Vec) { - if self.options.contiguous { - buf.extend_from_slice(&self._contiguous()); + pub fn collect(&self, buf: &mut Vec) { + if self.options.is_contiguous() { + buf.extend_from_slice(&self.obj_bytes()); } else { - self._collect_bytes(buf); + let bytes = &*self.obj_bytes(); + self.options + .for_each_segment(|range| buf.extend_from_slice(&bytes[range])); } } pub fn contiguous_or_collect R>(&self, f: F) -> R { let borrowed; let mut collected; - let v = if self.options.contiguous { - borrowed = self._contiguous(); + let v = if self.options.is_contiguous() { + borrowed = self.obj_bytes(); &*borrowed } else { collected = vec![]; - self._collect_bytes(&mut collected); + self.collect(&mut collected); &collected }; f(v) @@ -114,76 +107,15 @@ impl PyBuffer { } } - // SAFETY: should only called if option has contiguous - pub(crate) fn _contiguous(&self) -> BorrowedValue<[u8]> { - self.methods - .contiguous - .map(|f| f(self)) - .unwrap_or_else(|| self.obj_bytes()) - } - - // SAFETY: should only called if option has contiguous - pub(crate) fn _contiguous_mut(&self) -> BorrowedValueMut<[u8]> { - self.methods - .contiguous_mut - .map(|f| f(self)) - .unwrap_or_else(|| self.obj_bytes_mut()) - } - - // WARNING: should always try to clone from the contiguous first - pub(crate) fn _collect_bytes(&self, buf: &mut Vec) { - self.methods - .collect_bytes - .map(|f| f(self, buf)) - .unwrap_or_else(|| buf.extend_from_slice(&self.obj_bytes())) - } - // drop PyBuffer without calling release // after this function, the owner should use forget() // or wrap PyBuffer in the ManaullyDrop to prevent drop() pub(crate) unsafe fn drop_without_release(&mut self) { - // self.obj = PyObjectRef::from_raw(0 as *const PyObject); - // self.options = BufferOptions::default(); std::ptr::drop_in_place(&mut self.obj); std::ptr::drop_in_place(&mut self.options); } } -#[derive(Debug, Clone)] -pub struct BufferOptions { - // buf - pub len: usize, - pub readonly: bool, - pub itemsize: usize, - pub format: Cow<'static, str>, - pub ndim: usize, // TODO: support multiple dimension array - pub shape: Vec, - pub strides: Vec, - // suboffsets - - // RustPython fields - pub contiguous: bool, -} - -impl BufferOptions { - pub const DEFAULT: Self = BufferOptions { - len: 0, - readonly: true, - itemsize: 1, - format: Cow::Borrowed("B"), - ndim: 1, - shape: Vec::new(), - strides: Vec::new(), - contiguous: true, - }; -} - -impl Default for BufferOptions { - fn default() -> Self { - Self::DEFAULT - } -} - impl TryFromBorrowedObject for PyBuffer { fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { let cls = obj.class(); @@ -206,6 +138,168 @@ impl Drop for PyBuffer { } } +#[derive(Debug, Clone)] +pub struct BufferOptions { + /// product(shape) * itemsize + /// NOT the bytes length if buffer is discontiguous + pub len: usize, + pub readonly: bool, + pub itemsize: usize, + pub format: Cow<'static, str>, + // pub ndim: usize, + /// (shape, stride, suboffset) for each dimension + pub dim_descriptor: Vec<(usize, isize, isize)>, + // pub shape: Vec, + // pub strides: Vec, + // pub suboffsets: Vec, +} + +impl BufferOptions { + pub fn simple(bytes_len: usize, readonly: bool) -> Self { + Self { + len: bytes_len, + readonly, + itemsize: 1, + format: Cow::Borrowed("B"), + dim_descriptor: vec![(bytes_len, 1, 0)], + } + } + + pub fn format( + bytes_len: usize, + readonly: bool, + itemsize: usize, + format: Cow<'static, str>, + ) -> Self { + Self { + len: bytes_len, + readonly, + itemsize, + format, + dim_descriptor: vec![(bytes_len / itemsize, itemsize as isize, 0)], + } + } + + #[cfg(debug_assertions)] + pub fn validate(self) -> Self { + assert!(self.itemsize != 0); + assert!(self.ndim() != 0); + let mut shape_product = 1; + for (shape, stride, suboffset) in self.dim_descriptor { + shape_product *= shape; + assert!(suboffset >= 0); + assert!(stride != 0); + if stride.is_negative() { + // if stride is negative, we access memory in reversed order + // so the suboffset should be n*stride shift the index to the tail + assert!(suboffset == -stride * shape as isize); + } else { + assert!(suboffset == 0); + } + } + assert!(shape_product == self.len); + self + } + + #[cfg(not(debug_assertions))] + pub fn validate(self) -> Self { + self + } + + pub fn ndim(&self) -> usize { + self.dim_descriptor.len() + } + + pub fn is_contiguous(&self) -> bool { + if self.len == 0 { + return true; + } + let mut sd = self.itemsize; + for (shape, stride, _) in self.dim_descriptor.into_iter().rev() { + if shape > 1 && stride != sd as isize { + return false; + } + sd *= shape; + } + true + } + + /// this function do not check the bound + pub fn get_position(&self, indices: &[usize]) -> usize { + let pos = 0; + for (&i, (_, stride, suboffset)) in indices.iter().zip_eq(self.dim_descriptor) { + pos += (i as isize * stride + suboffset) as usize; + } + pos + } + + pub fn for_each(&self, f: F) + where + F: FnMut(usize), + { + self._for_each(0, 0, f); + } + + fn _for_each(&self, mut index: isize, dim: usize, f: F) + where + F: FnMut(usize), + { + let (shape, stride, suboffset) = self.dim_descriptor[dim]; + if dim + 1 == self.ndim() { + for i in 0..shape { + f((index + suboffset) as usize); + index += stride; + } + return; + } + for i in 0..shape { + self._for_each(index + suboffset, dim + 1, f); + index += stride; + } + } + + pub fn for_each_segment(&self, f: F) + where + F: FnMut(Range), + { + if self.is_last_dim_contiguous() { + self._for_each_segment::<_, true>(0, 0, f); + } else { + self._for_each_segment::<_, false>(0, 0, f) + } + } + + fn _for_each_segment(&self, mut index: isize, dim: usize, f: F) + where + F: FnMut(Range), + { + let (shape, stride, suboffset) = self.dim_descriptor[dim]; + if dim + 1 == self.ndim() { + if CONTI { + f(index as usize..index as usize + shape * self.itemsize); + } else { + for i in 0..shape { + let pos = (index + suboffset) as usize; + f(pos..pos + self.itemsize); + index += stride; + } + } + return; + } + for i in 0..shape { + self._for_each_segment(index + suboffset, dim + 1, f); + index += stride; + } + } + + fn is_last_dim_contiguous(&self) -> bool { + let (_, stride, suboffset) = self.dim_descriptor[self.ndim() - 1]; + suboffset == 0 && stride == self.itemsize as isize + } + + // TODO: support fortain order +} + pub trait BufferResizeGuard<'a> { type Resizable: 'a; fn try_resizable(&'a self, vm: &VirtualMachine) -> PyResult; @@ -236,11 +330,7 @@ impl PyRef { let len = self.0.lock().len(); PyBuffer::new( self.into_object(), - BufferOptions { - len, - readonly: false, - ..Default::default() - }, + BufferOptions::simple(len, false), &VEC_BUFFER_METHODS, ) } @@ -248,11 +338,7 @@ impl PyRef { let len = self.0.lock().len(); PyBuffer::new( self.into_object(), - BufferOptions { - len, - readonly: true, - ..Default::default() - }, + BufferOptions::simple(len, true), &VEC_BUFFER_METHODS, ) } @@ -265,9 +351,6 @@ static VEC_BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes_mut: |buffer| { PyMutexGuard::map(buffer.obj_as::().0.lock(), |x| x.as_mut_slice()).into() }, - contiguous: None, - contiguous_mut: None, - collect_bytes: None, release: None, retain: None, }; From aef1679beb140f9bb15181772ae821ff3e9819d5 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 1 Nov 2021 14:48:04 +0200 Subject: [PATCH 08/20] first implement nd array for buffer and memoryview --- stdlib/src/array.rs | 47 +- vm/src/builtins/bytearray.rs | 55 +-- vm/src/builtins/bytes.rs | 20 +- vm/src/builtins/memory.rs | 911 ++++++++++++++++++++--------------- vm/src/function/buffer.rs | 14 +- vm/src/protocol/buffer.rs | 252 ++++++---- vm/src/protocol/mod.rs | 2 +- vm/src/stdlib/io.rs | 41 +- vm/src/stdlib/os.rs | 2 +- vm/src/types/zoo.rs | 4 +- 10 files changed, 778 insertions(+), 570 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 8e503488e4..ea8429046d 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -3,6 +3,7 @@ pub(crate) use array::make_module; #[pymodule(name = "array")] mod array { use crate::common::{ + atomic::{self, AtomicUsize}, lock::{ PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, @@ -19,7 +20,7 @@ mod array { ArgBytesLike, ArgIntoFloat, ArgIterable, IntoPyObject, IntoPyResult, OptionalArg, }, protocol::{ - BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, + BufferDescriptor, BufferMethods, BufferResizeGuard, PyBuffer, PyIterReturn, PyMappingMethods, }, sliceable::{PySliceableSequence, PySliceableSequenceMut, SaturatedSlice, SequenceIndex}, @@ -30,7 +31,6 @@ mod array { IdProtocol, PyComparisonValue, PyObject, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol, VirtualMachine, }; - use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; use num_traits::ToPrimitive; use std::cmp::Ordering; @@ -614,7 +614,7 @@ mod array { #[derive(Debug, PyValue)] pub struct PyArray { array: PyRwLock, - exports: AtomicCell, + exports: AtomicUsize, } pub type PyArrayRef = PyRef; @@ -623,7 +623,7 @@ mod array { fn from(array: ArrayContentType) -> Self { PyArray { array: PyRwLock::new(array), - exports: AtomicCell::new(0), + exports: AtomicUsize::new(0), } } } @@ -1220,13 +1220,12 @@ mod array { let array = zelf.read(); let buf = PyBuffer::new( zelf.to_owned().into_object(), - BufferOptions { - readonly: false, - len: array.len(), - itemsize: array.itemsize(), - format: array.typecode_str().into(), - ..Default::default() - }, + BufferDescriptor::format( + array.len(), + false, + array.itemsize(), + array.typecode_str().into(), + ), &BUFFER_METHODS, ); Ok(buf) @@ -1236,12 +1235,18 @@ mod array { static BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |buffer| buffer.obj_as::().get_bytes().into(), obj_bytes_mut: |buffer| buffer.obj_as::().get_bytes_mut().into(), - release: Some(|buffer| { - buffer.obj_as::().exports.fetch_sub(1); - }), - retain: Some(|buffer| { - buffer.obj_as::().exports.fetch_add(1); - }), + release: |buffer| { + buffer + .obj_as::() + .exports + .fetch_sub(1, atomic::Ordering::Release); + }, + retain: |buffer| { + buffer + .obj_as::() + .exports + .fetch_add(1, atomic::Ordering::Release); + }, }; impl AsMapping for PyArray { @@ -1279,7 +1284,7 @@ mod array { impl Iterable for PyArray { fn iter(zelf: PyRef, vm: &VirtualMachine) -> PyResult { Ok(PyArrayIter { - position: AtomicCell::new(0), + position: AtomicUsize::new(0), array: zelf, } .into_object(vm)) @@ -1291,7 +1296,7 @@ mod array { fn try_resizable(&'a self, vm: &VirtualMachine) -> PyResult { let w = self.write(); - if self.exports.load() == 0 { + if self.exports.load(atomic::Ordering::SeqCst) == 0 { Ok(w) } else { Err(vm.new_buffer_error( @@ -1305,7 +1310,7 @@ mod array { #[pyclass(name = "array_iterator")] #[derive(Debug, PyValue)] pub struct PyArrayIter { - position: AtomicCell, + position: AtomicUsize, array: PyArrayRef, } @@ -1315,7 +1320,7 @@ mod array { impl IterNextIterable for PyArrayIter {} impl IterNext for PyArrayIter { fn next(zelf: &PyObjectView, vm: &VirtualMachine) -> PyResult { - let pos = zelf.position.fetch_add(1); + let pos = zelf.position.fetch_add(1, atomic::Ordering::SeqCst); let r = if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm)? { PyIterReturn::Return(item) } else { diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 8a98e0e2d8..4dc7efe71c 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -3,10 +3,6 @@ use super::{ PositionIterInternal, PyBytes, PyBytesRef, PyDictRef, PyIntRef, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, }; -use crate::common::lock::{ - PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, PyRwLockReadGuard, - PyRwLockWriteGuard, -}; use crate::{ anystr::{self, AnyStr}, builtins::PyType, @@ -15,9 +11,17 @@ use crate::{ ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, }, + common::{ + atomic::{AtomicUsize, Ordering}, + lock::{ + PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutex, PyRwLock, + PyRwLockReadGuard, PyRwLockWriteGuard, + }, + }, function::{ArgBytesLike, ArgIterable, FuncArgs, IntoPyObject, OptionalArg, OptionalOption}, protocol::{ - BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, PyMappingMethods, + BufferDescriptor, BufferMethods, BufferResizeGuard, PyBuffer, PyIterReturn, + PyMappingMethods, }, sliceable::{PySliceableSequence, PySliceableSequenceMut, SequenceIndex}, types::{ @@ -29,7 +33,6 @@ use crate::{ PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TypeProtocol, VirtualMachine, }; use bstr::ByteSlice; -use crossbeam_utils::atomic::AtomicCell; use std::mem::size_of; /// "bytearray(iterable_of_ints) -> bytearray\n\ @@ -47,7 +50,7 @@ use std::mem::size_of; #[derive(Debug, Default)] pub struct PyByteArray { inner: PyRwLock, - exports: AtomicCell, + exports: AtomicUsize, } pub type PyByteArrayRef = PyRef; @@ -60,7 +63,7 @@ impl PyByteArray { fn from_inner(inner: PyBytesInner) -> Self { PyByteArray { inner: PyRwLock::new(inner), - exports: AtomicCell::new(0), + exports: AtomicUsize::new(0), } } @@ -118,7 +121,7 @@ impl PyByteArray { #[cfg(debug_assertions)] #[pyproperty] fn exports(&self) -> usize { - self.exports.load() + self.exports.load(Ordering::Relaxed) } #[inline] @@ -719,29 +722,27 @@ static BUFFER_METHODS: BufferMethods = BufferMethods { }) .into() }, - release: Some(|buffer| { - buffer.obj_as::().exports.fetch_sub(1); - }), - retain: Some(|buffer| { - buffer.obj_as::().exports.fetch_add(1); - }), - contiguous: None, - contiguous_mut: None, - collect_bytes: None, + release: |buffer| { + buffer + .obj_as::() + .exports + .fetch_sub(1, Ordering::Release); + }, + retain: |buffer| { + buffer + .obj_as::() + .exports + .fetch_add(1, Ordering::Release); + }, }; impl AsBuffer for PyByteArray { fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { - let buffer = PyBuffer::new( + Ok(PyBuffer::new( zelf.to_owned().into_object(), - BufferOptions { - readonly: false, - len: zelf.len(), - ..Default::default() - }, + BufferDescriptor::simple(zelf.len(), false), &BUFFER_METHODS, - ); - Ok(buffer) + )) } } @@ -750,7 +751,7 @@ impl<'a> BufferResizeGuard<'a> for PyByteArray { fn try_resizable(&'a self, vm: &VirtualMachine) -> PyResult { let w = self.inner.upgradable_read(); - if self.exports.load() == 0 { + if self.exports.load(Ordering::SeqCst) == 0 { Ok(parking_lot::lock_api::RwLockUpgradableReadGuard::upgrade(w)) } else { Err(vm diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index e1e7e79d5a..e24ec0ad61 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -6,11 +6,14 @@ use crate::{ bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, }, - common::hash::PyHash, + common::{ + hash::PyHash, + lock::PyMutex, + }, function::{ ArgBytesLike, ArgIterable, IntoPyObject, IntoPyResult, OptionalArg, OptionalOption, }, - protocol::{BufferMethods, BufferOptions, PyBuffer, PyIterReturn, PyMappingMethods}, + protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyIterReturn, PyMappingMethods}, types::{ AsBuffer, AsMapping, Callable, Comparable, Constructor, Hashable, IterNext, IterNextIterable, Iterable, PyComparisonOp, Unconstructible, @@ -20,7 +23,6 @@ use crate::{ PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TypeProtocol, VirtualMachine, }; use bstr::ByteSlice; -use rustpython_common::lock::PyMutex; use std::mem::size_of; use std::ops::Deref; @@ -541,21 +543,15 @@ impl PyBytes { static BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |buffer| buffer.obj_as::().as_bytes().into(), obj_bytes_mut: |_| panic!(), - contiguous: None, - contiguous_mut: None, - collect_bytes: None, - release: None, - retain: None, + release: |_| {}, + retain: |_| {}, }; impl AsBuffer for PyBytes { fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let buf = PyBuffer::new( zelf.to_owned().into_object(), - BufferOptions { - len: zelf.len(), - ..Default::default() - }, + BufferDescriptor::simple(zelf.len(), true), &BUFFER_METHODS, ); Ok(buf) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 5d318663ad..5625e64dce 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -1,14 +1,12 @@ -use super::{PyBytes, PyBytesRef, PyList, PyListRef, PySlice, PyStr, PyStrRef, PyTypeRef}; -use crate::common::{ - borrow::{BorrowedValue, BorrowedValueMut}, - hash::PyHash, - lock::OnceCell, +use super::{ + PyBytes, PyBytesRef, PyList, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, + PyTypeRef, }; use crate::{ bytesinner::bytes_to_hex, function::{FuncArgs, IntoPyObject, OptionalArg}, - protocol::{BufferMethods, BufferOptions, PyBuffer, PyMappingMethods}, - sliceable::{wrap_index, SaturatedSlice, SequenceIndex}, + protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods}, + sliceable::{wrap_index, SequenceIndex}, stdlib::pystruct::FormatSpec, types::{AsBuffer, AsMapping, Comparable, Constructor, Hashable, PyComparisonOp}, utils::Either, @@ -16,10 +14,17 @@ use crate::{ PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, TypeProtocol, VirtualMachine, }; +use crate::{ + common::{ + borrow::{BorrowedValue, BorrowedValueMut}, + hash::PyHash, + lock::OnceCell, + }, +}; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; use num_traits::ToPrimitive; -use std::{fmt::Debug, mem::ManuallyDrop}; +use std::{cmp::Ordering, fmt::Debug, mem::ManuallyDrop, ops::Range}; #[derive(FromArgs)] pub struct PyMemoryViewNewArgs { @@ -34,18 +39,10 @@ pub struct PyMemoryView { // the released memoryview does not mean the buffer is destoryed // because the possible another memeoryview is viewing from it released: AtomicCell, - // start should always less or equal to the stop - // start and stop pointing to the memory index not slice index - // if length is not zero than [start, stop) start: usize, - stop: usize, - // step can be negative, means read the memory from stop-1 to start - // the memoryview is not contiguous if the buffer is not contiguous - // or the step is not 1 or -1 - step: isize, format_spec: FormatSpec, // memoryview's options could be different from buffer's options - options: BufferOptions, + desc: BufferDescriptor, hash: OnceCell, // exports // memoryview has no exports count by itself @@ -56,8 +53,12 @@ impl Constructor for PyMemoryView { type Args = PyMemoryViewNewArgs; fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - let buffer = PyBuffer::try_from_borrowed_object(vm, &args.object)?; - let zelf = PyMemoryView::from_buffer(buffer, vm)?; + let zelf = if let Some(other) = args.object.payload::() { + other.new_view() + } else { + let buffer = PyBuffer::try_from_borrowed_object(vm, &args.object)?; + PyMemoryView::from_buffer(buffer, vm)? + }; zelf.into_pyresult_with_type(vm, cls) } } @@ -72,44 +73,50 @@ impl PyMemoryView { // when we get a buffer means the buffered object is size locked // so we can assume the buffer's options will never change as long // as memoryview is still alive - let options = buffer.options.clone(); - let stop = options.len * options.itemsize; - let format_spec = Self::parse_format(&options.format, vm)?; + let format_spec = Self::parse_format(&buffer.desc.format, vm)?; + let desc = buffer.desc.clone(); + Ok(PyMemoryView { buffer: ManuallyDrop::new(buffer), released: AtomicCell::new(false), start: 0, - stop, - step: 1, - options, format_spec, + desc, hash: OnceCell::new(), - } - .validate()) + }) } pub fn from_buffer_range( buffer: PyBuffer, - range: std::ops::Range, + range: Range, vm: &VirtualMachine, ) -> PyResult { - let options = buffer.options.clone(); - let itemsize = options.itemsize; - let format_spec = Self::parse_format(&options.format, vm)?; - Ok(PyMemoryView { + let format_spec = Self::parse_format(&buffer.desc.format, vm)?; + let desc = buffer.desc.clone(); + + let mut zelf = PyMemoryView { buffer: ManuallyDrop::new(buffer), released: AtomicCell::new(false), - start: range.start * itemsize, - stop: range.end * itemsize, - step: 1, - options: BufferOptions { - len: range.len(), - ..options - }, + start: range.start, format_spec, + desc, + hash: OnceCell::new(), + }; + + zelf.init_range(range, 0); + zelf.init_len(); + Ok(zelf) + } + + fn new_view(&self) -> Self { + PyMemoryView { + buffer: self.buffer.clone(), + released: AtomicCell::new(false), + start: self.start, + format_spec: self.format_spec.clone(), + desc: self.desc.clone(), hash: OnceCell::new(), } - .validate()) } #[pymethod] @@ -134,42 +141,81 @@ impl PyMemoryView { #[pyproperty] fn nbytes(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm) - .map(|_| self.options.len * self.options.itemsize) + self.try_not_released(vm).map(|_| self.desc.len) } #[pyproperty] fn readonly(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.options.readonly) + self.try_not_released(vm).map(|_| self.desc.readonly) } #[pyproperty] fn itemsize(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.options.itemsize) + self.try_not_released(vm).map(|_| self.desc.itemsize) } #[pyproperty] fn ndim(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.options.ndim) + self.try_not_released(vm).map(|_| self.desc.ndim()) } - // TODO #[pyproperty] - fn shape(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm) - .map(|_| (self.options.len,).into_pyobject(vm)) + fn shape(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm)?; + Ok(vm.ctx.new_tuple( + self.desc + .dim_desc + .iter() + .map(|(shape, _, _)| shape.into_pyobject(vm)) + .collect(), + )) } - // TODO #[pyproperty] - fn strides(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| (0,).into_pyobject(vm)) + fn strides(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm)?; + Ok(vm.ctx.new_tuple( + self.desc + .dim_desc + .iter() + .map(|(_, stride, _)| stride.into_pyobject(vm)) + .collect(), + )) + } + + #[pyproperty] + fn suboffsets(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm)?; + Ok(vm.ctx.new_tuple( + self.desc + .dim_desc + .iter() + .map(|(_, _, suboffset)| suboffset.into_pyobject(vm)) + .collect(), + )) } #[pyproperty] fn format(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm) - .map(|_| PyStr::from(self.options.format.clone())) + .map(|_| PyStr::from(self.desc.format.clone())) + } + + #[pyproperty] + fn contiguous(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm).map(|_| self.desc.is_contiguous()) + } + + #[pyproperty] + fn c_contiguous(&self, vm: &VirtualMachine) -> PyResult { + self.try_not_released(vm).map(|_| self.desc.is_contiguous()) + } + + #[pyproperty] + fn f_contiguous(&self, vm: &VirtualMachine) -> PyResult { + // TODO: fortain order + self.try_not_released(vm) + .map(|_| self.desc.ndim() <= 1 && self.desc.is_contiguous()) } #[pymethod(magic)] @@ -182,320 +228,317 @@ impl PyMemoryView { self.release(); } - // translate the slice index to memory index - fn get_pos(&self, i: isize) -> Option { - let len = self.options.len; - let i = wrap_index(i, len)?; - Some(self.get_pos_no_wrap(i)) - } - - fn get_pos_no_wrap(&self, i: usize) -> usize { - let itemsize = self.options.itemsize; - if self.step < 0 { - self.stop - itemsize - (-self.step as usize) * i * itemsize - } else { - self.start + self.step as usize * i * itemsize + fn getitem_by_idx(&self, i: isize, vm: &VirtualMachine) -> PyResult { + if self.desc.ndim() != 1 { + return Err(vm.new_not_implemented_error( + "multi-dimensional sub-views are not implemented".to_owned(), + )); } - } - - fn getitem_by_idx(zelf: PyRef, i: isize, vm: &VirtualMachine) -> PyResult { - let i = zelf - .get_pos(i) + let (shape, stride, suboffset) = self.desc.dim_desc[0]; + let index = wrap_index(i, shape) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; - let bytes = &zelf.buffer.obj_bytes()[i..i + zelf.options.itemsize]; - zelf.format_spec.unpack(bytes, vm).map(|x| { - if x.len() == 1 { - x.fast_getitem(0) - } else { - x.into() - } - }) + let index = index as isize * stride + suboffset; + let index = index as usize; + let bytes = self.obj_bytes(); + self.format_spec + .unpack(&bytes[index..index + self.desc.itemsize], vm) + .map(|x| { + if x.len() == 1 { + x.fast_getitem(0) + } else { + x.into() + } + }) } - fn getitem_by_slice(zelf: PyRef, slice: PyRef, vm: &VirtualMachine) -> PyResult { - // slicing a memoryview return a new memoryview - let options = zelf.options.clone(); - let buffer = zelf.buffer.clone(); + fn getitem_by_slice(&self, slice: &PySlice, vm: &VirtualMachine) -> PyResult { + let mut other = self.new_view(); + other.init_slice(slice, 0, vm)?; + other.init_len(); - let len = options.len; - let (range, step, is_negative_step) = - SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(len); - let abs_step = step.unwrap(); - let step = if is_negative_step { - -(abs_step as isize) - } else { - abs_step as isize - }; - let newstep = step * zelf.step; - let itemsize = options.itemsize; - - let format_spec = zelf.format_spec.clone(); - - if newstep == 1 { - let newlen = range.end - range.start; - let start = zelf.start + range.start * itemsize; - let stop = zelf.start + range.end * itemsize; - return Ok(PyMemoryView { - buffer, - released: AtomicCell::new(false), - start, - stop, - step: 1, - options: BufferOptions { - len: newlen, - contiguous: true, - ..options - }, - format_spec, - hash: OnceCell::new(), + Ok(other.into_ref(vm).into_object()) + } + + fn getitem_by_multi_idx(&self, tuple: &PyTuple, vm: &VirtualMachine) -> PyResult { + let tuple = tuple.as_slice(); + match tuple.len().cmp(&self.desc.ndim()) { + Ordering::Less => { + return Err(vm.new_not_implemented_error("sub-views are not implemented".to_owned())) } - .validate() - .into_object(vm)); - } - - if range.start >= range.end { - return Ok(PyMemoryView { - buffer, - released: AtomicCell::new(false), - start: range.end, - stop: range.end, - step: 1, - options: BufferOptions { - len: 0, - contiguous: true, - ..options - }, - format_spec, - hash: OnceCell::new(), + Ordering::Greater => { + return Err(vm.new_type_error(format!( + "cannot index {}-dimension view with {}-element tuple", + self.desc.ndim(), + tuple.len() + ))) } - .validate() - .into_object(vm)); - }; - - // overflow is not possible as dividing a positive integer - let newlen = ((range.end - range.start - 1) / abs_step) - .to_usize() - .unwrap() - + 1; - - // newlen will be 0 if step is overflowed - let newstep = newstep.to_isize().unwrap_or(-1); - - let (start, stop) = if newstep < 0 { - let stop = zelf.stop - range.start * itemsize * zelf.step.abs() as usize; - let start = stop - (newlen - 1) * itemsize * newstep.abs() as usize - itemsize; - (start, stop) - } else { - let start = zelf.start + range.start * itemsize * zelf.step.abs() as usize; - let stop = start + (newlen - 1) * itemsize * newstep.abs() as usize + itemsize; - (start, stop) - }; - - Ok(PyMemoryView { - buffer, - released: AtomicCell::new(false), - start, - stop, - step: newstep, - options: BufferOptions { - len: newlen, - contiguous: false, - ..options - }, - format_spec, - hash: OnceCell::new(), + Ordering::Equal => (), } - .validate() - .into_object(vm)) + + let indices: Vec = tuple + .iter() + .map(|x| isize::try_from_borrowed_object(vm, x)) + .try_collect()?; + let pos = self.desc.get_position(&indices, vm)?; + + let bytes = self.obj_bytes(); + self.format_spec + .unpack(&bytes[pos..pos + self.desc.itemsize], vm) + .map(|x| { + if x.len() == 1 { + x.fast_getitem(0) + } else { + x.into() + } + }) } #[pymethod(magic)] fn getitem(zelf: PyRef, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { zelf.try_not_released(vm)?; - match SequenceIndex::try_from_object_for(vm, needle, Self::NAME)? { - SequenceIndex::Int(index) => Self::getitem_by_idx(zelf, index, vm), - SequenceIndex::Slice(slice) => Self::getitem_by_slice(zelf, slice, vm), + if zelf.desc.ndim() == 0 { + // 0-d memoryview can be referenced using mv[...] or mv[()] only + if needle.is(&vm.ctx.ellipsis) { + return Ok(zelf.into_object()); + } + if let Some(tuple) = needle.payload::() { + if tuple.len() == 0 { + return Ok(zelf + .format_spec + .unpack(&zelf.obj_bytes()[..zelf.desc.itemsize], vm)? + .fast_getitem(0)); + } + } + return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); + } + + // TODO: avoid clone + if let Ok(seq_index) = SequenceIndex::try_from_object_for(vm, needle.clone(), Self::NAME) { + match seq_index { + SequenceIndex::Int(index) => zelf.getitem_by_idx(index, vm), + SequenceIndex::Slice(slice) => zelf.getitem_by_slice(&slice, vm), + } + } else if let Some(tuple) = needle.payload::() { + zelf.getitem_by_multi_idx(tuple, vm) + } else { + // TODO: support multi slice + Err(vm.new_type_error("memoryview: invalid slice key".to_owned())) } } - fn setitem_by_idx( - zelf: PyRef, - i: isize, - value: PyObjectRef, - vm: &VirtualMachine, - ) -> PyResult<()> { - let i = zelf - .get_pos(i) + fn setitem_by_idx(&self, i: isize, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + if self.desc.ndim() != 1 { + return Err(vm.new_not_implemented_error("sub-views are not implemented".to_owned())); + } + let (shape, stride, suboffset) = self.desc.dim_desc[0]; + let index = wrap_index(i, shape) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; - let itemsize = zelf.options.itemsize; - let data = zelf.format_spec.pack(vec![value], vm)?; - zelf.buffer.obj_bytes_mut()[i..i + itemsize].copy_from_slice(&data); + let index = index as isize * stride + suboffset; + let index = index as usize; + let bytes = &mut *self.obj_bytes_mut(); + let data = self.format_spec.pack(vec![value], vm)?; + bytes[index..index + self.desc.itemsize].copy_from_slice(&data); Ok(()) } fn setitem_by_slice( - zelf: PyRef, - slice: PyRef, + &self, + slice: &PySlice, items: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { - let items = PyBuffer::try_from_object(vm, items)?; - let options = &items.options; - let len = options.len; - let itemsize = options.itemsize; - - if itemsize != zelf.options.itemsize { - return Err(vm.new_type_error(format!( - "memoryview: invalid type for format '{}'", - zelf.options.format - ))); + if self.desc.ndim() != 1 { + return Err(vm.new_not_implemented_error("sub-view are not implemented".to_owned())); } + let src = PyBuffer::try_from_object(vm, items)?; + let mut dest = self.new_view(); + dest.init_slice(slice, 0, vm)?; + dest.init_len(); - let diff_err = || { - Err(vm.new_value_error( - "memoryview assignment: lvalue and rvalue have different structures".to_owned(), - )) - }; - - if options.format != zelf.options.format { - return diff_err(); + if !is_equiv_structure(&src.desc, &dest.desc) { + return Err(vm.new_type_error( + "memoryview assigment: lvalue and rvalue have different structures".to_owned(), + )); } - let (range, step, is_negative_step) = - SaturatedSlice::with_slice(&slice, vm)?.adjust_indices(zelf.options.len); + let mut bytes_mut = dest.obj_bytes_mut(); + let src_bytes = src.obj_bytes(); + dest.desc.zip_eq(&src.desc, true, |a_pos, b_pos, len| { + bytes_mut[a_pos..a_pos + len].copy_from_slice(&src_bytes[b_pos..b_pos + len]); + }); - // TODO: try borrow the vec, now cause deadlock - // items.contiguous_or_collect(|bytes| { - let mut bytes = vec![]; - items.collect_bytes(&mut bytes); - assert_eq!(bytes.len(), len * itemsize); + Ok(()) + } - if !is_negative_step && step == Some(1) { - if range.end - range.start != len { - return diff_err(); - } + #[pymethod(magic)] + fn setitem( + &self, + needle: PyObjectRef, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + self.try_not_released(vm)?; + if self.desc.readonly { + return Err(vm.new_type_error("cannot modify read-only memory".to_owned())); + } + if value.is(&vm.ctx.none) { + return Err(vm.new_type_error("cannot delete memory".to_owned())); + } - if let Some(mut buffer) = zelf.as_contiguous_mut() { - buffer[range.start * itemsize..range.end * itemsize].copy_from_slice(&bytes); - return Ok(()); + if self.desc.ndim() == 0 { + if needle.is(&vm.ctx.ellipsis) { + let bytes = &mut *self.obj_bytes_mut(); + let data = self.format_spec.pack(vec![value], vm)?; + // TODO: fix panic if data no march itemsize + bytes[..self.desc.itemsize].copy_from_slice(&data); + } else if let Some(tuple) = needle.payload::() { + if tuple.len() == 0 { + let bytes = &mut *self.obj_bytes_mut(); + let data = self.format_spec.pack(vec![value], vm)?; + // TODO: fix panic if data no march itemsize + bytes[..self.desc.itemsize].copy_from_slice(&data); + } } + return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); } - - if let Some(step) = step { - let slicelen = if range.end > range.start { - (range.end - range.start - 1) / step + 1 - } else { - 0 - }; - - if slicelen != len { - return diff_err(); + // TODO: SequenceIndex do not need to take the ownership + if let Ok(seq_index) = SequenceIndex::try_from_object_for(vm, needle.clone(), Self::NAME) { + match seq_index { + SequenceIndex::Int(index) => self.setitem_by_idx(index, value, vm), + SequenceIndex::Slice(slice) => self.setitem_by_slice(&slice, value, vm), } + } else if let Some(_tuple) = needle.payload::() { + Err(vm.new_type_error("TODO".to_owned())) + } else { + // TODO: support multi slice + Err(vm.new_type_error("memoryview: invalid slice key".to_owned())) + } + } - let indexes = if is_negative_step { - itertools::Either::Left(range.rev().step_by(step)) - } else { - itertools::Either::Right(range.step_by(step)) - }; - - let item_index = (0..len).step_by(itemsize); + fn init_len(&mut self) { + let product = self.desc.dim_desc.iter().map(|x| x.0).product(); + self.desc.len = product; + } - let mut buffer = zelf.buffer.obj_bytes_mut(); + fn init_range(&mut self, range: Range, dim: usize) { + let (shape, stride, _) = self.desc.dim_desc[dim]; + debug_assert!(shape >= range.len()); - indexes - .map(|i| zelf.get_pos(i as isize).unwrap()) - .zip(item_index) - .for_each(|(i, item_i)| { - buffer[i..i + itemsize].copy_from_slice(&bytes[item_i..item_i + itemsize]); - }); - Ok(()) - } else { - let slicelen = if range.start < range.end { 1 } else { 0 }; - if match len { - 0 => slicelen == 0, - 1 => { - let mut buffer = zelf.buffer.obj_bytes_mut(); - let i = zelf.get_pos(range.start as isize).unwrap(); - buffer[i..i + itemsize].copy_from_slice(&bytes); - true - } - _ => false, - } { - Ok(()) - } else { - diff_err() + let mut is_adjusted_suboffset = false; + for (_, _, suboffset) in self.desc.dim_desc.iter_mut().rev() { + if *suboffset != 0 { + *suboffset += stride * range.start as isize; + is_adjusted_suboffset = true; + break; } } - // }) + if !is_adjusted_suboffset { + self.start += stride as usize * range.start; + } + let newlen = range.len(); + self.desc.dim_desc[dim].0 = newlen; } - #[pymethod(magic)] - fn setitem( - zelf: PyRef, - needle: PyObjectRef, - value: PyObjectRef, - vm: &VirtualMachine, - ) -> PyResult<()> { - zelf.try_not_released(vm)?; - if zelf.options.readonly { - return Err(vm.new_type_error("cannot modify read-only memory".to_owned())); + fn init_slice(&mut self, slice: &PySlice, dim: usize, vm: &VirtualMachine) -> PyResult<()> { + let (shape, stride, _) = self.desc.dim_desc[dim]; + let slice = slice.to_saturated(vm)?; + let (range, step, is_negative_step) = slice.adjust_indices(shape); + let abs_step = step.unwrap(); + let step = if is_negative_step { + -(abs_step as isize) + } else { + abs_step as isize + }; + + let mut is_adjusted_suboffset = false; + for (_, _, suboffset) in self.desc.dim_desc.iter_mut().rev() { + if *suboffset != 0 { + *suboffset += stride * range.start as isize; + is_adjusted_suboffset = true; + break; + } } - match SequenceIndex::try_from_object_for(vm, needle, Self::NAME)? { - SequenceIndex::Int(index) => Self::setitem_by_idx(zelf, index, value, vm), - SequenceIndex::Slice(slice) => Self::setitem_by_slice(zelf, slice, value, vm), + if !is_adjusted_suboffset { + self.start += stride as usize * range.start; } + // overflow is not possible as dividing a positive integer + let newlen = ((range.end - range.start - 1) / abs_step) + .to_usize() + .unwrap() + + 1; + self.desc.dim_desc[dim].0 = newlen; + self.desc.dim_desc[dim].1 *= step; + + Ok(()) } #[pymethod(magic)] fn len(&self, vm: &VirtualMachine) -> PyResult { - self.try_not_released(vm).map(|_| self.options.len) + self.try_not_released(vm)?; + Ok(if self.desc.ndim() == 0 { + 1 + } else { + // shape for dim[0] + self.desc.dim_desc[0].0 + }) } #[pymethod] fn tobytes(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; - let v = if self.options.contiguous { - self.contiguous().to_vec() - } else { - let mut collected = vec![]; - self.collect_bytes(&mut collected); - collected - }; + let mut v = vec![]; + self.collect(&mut v); Ok(PyBytes::from(v).into_ref(vm)) } + fn _to_list( + &self, + bytes: &[u8], + mut index: isize, + dim: usize, + vm: &VirtualMachine, + ) -> PyResult { + let (shape, stride, suboffset) = self.desc.dim_desc[dim]; + if dim + 1 == self.desc.ndim() { + let mut v = Vec::with_capacity(shape); + for _ in 0..shape { + let pos = (index + suboffset) as usize; + let obj = + format_unpack(&self.format_spec, &bytes[pos..pos + self.desc.itemsize], vm)?; + v.push(obj); + index += stride; + } + return Ok(vm.ctx.new_list(v)); + } + + let mut v = Vec::with_capacity(shape); + for _ in 0..shape { + let obj = self + ._to_list(bytes, index + suboffset, dim + 1, vm)? + .into_object(); + v.push(obj); + index += stride; + } + Ok(vm.ctx.new_list(v)) + } + #[pymethod] fn tolist(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; - let len = self.options.len; - let itemsize = self.options.itemsize; - let bytes = &*self.buffer.obj_bytes(); - let elements: Vec = (0..len) - .map(|i| { - let i = self.get_pos_no_wrap(i); - format_unpack(&self.format_spec, &bytes[i..i + itemsize], vm) - }) - .try_collect()?; - - Ok(vm.ctx.new_list(elements)) + if self.desc.ndim() == 0 { + // TODO: unpack_single(view->buf, fmt) + return Ok(vm.ctx.new_list(vec![])); + } + let bytes = self.obj_bytes(); + self._to_list(&bytes, 0, 0, vm) } #[pymethod] - fn toreadonly(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { - zelf.try_not_released(vm)?; - Ok(PyMemoryView { - buffer: zelf.buffer.clone(), - released: AtomicCell::new(false), - format_spec: zelf.format_spec.clone(), - hash: OnceCell::new(), - options: BufferOptions { - readonly: true, - ..zelf.options.clone() - }, - ..**zelf - } - .validate() - .into_ref(vm)) + fn toreadonly(&self, vm: &VirtualMachine) -> PyResult> { + self.try_not_released(vm)?; + let mut other = self.new_view(); + other.desc.readonly = true; + Ok(other.into_ref(vm)) } #[pymethod(magic)] @@ -518,41 +561,100 @@ impl PyMemoryView { self.contiguous_or_collect(|x| bytes_to_hex(x, sep, bytes_per_sep, vm)) } - // TODO: support cast shape - #[pymethod] - fn cast(&self, format: PyStrRef, vm: &VirtualMachine) -> PyResult> { - self.try_not_released(vm)?; - if !self.options.contiguous { - return Err(vm.new_type_error( - "memoryview: casts are restricted to C-contiguous views".to_owned(), - )); - } - + fn cast_to_1d(&self, format: PyStrRef, vm: &VirtualMachine) -> PyResult { let format_spec = Self::parse_format(format.as_str(), vm)?; let itemsize = format_spec.size(); - let bytelen = self.options.len * self.options.itemsize; - - if bytelen % itemsize != 0 { + if self.desc.len % itemsize != 0 { return Err( vm.new_type_error("memoryview: length is not a multiple of itemsize".to_owned()) ); } - Ok(PyMemoryView { + Ok(Self { buffer: self.buffer.clone(), released: AtomicCell::new(false), - options: BufferOptions { + start: self.start, + format_spec, + desc: BufferDescriptor { + len: self.desc.len, + readonly: self.desc.readonly, itemsize, - len: bytelen / itemsize, format: format.to_string().into(), - ..self.options.clone() + dim_desc: vec![(self.desc.len / itemsize, itemsize as isize, 0)], }, - format_spec, hash: OnceCell::new(), - ..*self + }) + } + + #[pymethod] + fn cast(&self, args: CastArgs, vm: &VirtualMachine) -> PyResult> { + self.try_not_released(vm)?; + if !self.desc.is_contiguous() { + return Err(vm.new_type_error( + "memoryview: casts are restricted to C-contiguous views".to_owned(), + )); + } + + let CastArgs { format, shape } = args; + + if let OptionalArg::Present(shape) = shape { + if self.desc.is_zero_in_shape() { + return Err(vm.new_type_error( + "memoryview: cannot cast view with zeros in shape or strides".to_owned(), + )); + } + + let shape_vec = shape.borrow_vec(); + let shape_ndim = shape_vec.len(); + // TODO: MAX_NDIM + if self.desc.ndim() != 1 && shape_ndim != 1 { + return Err( + vm.new_type_error("memoryview: cast must be 1D -> ND or ND -> 1D".to_owned()) + ); + } + + let mut other = self.cast_to_1d(format, vm)?; + let itemsize = other.desc.itemsize; + + // 0 ndim is single item + if shape_ndim == 0 { + other.desc.dim_desc = vec![]; + other.desc.len = itemsize; + return Ok(other.into_ref(vm)); + } + + let mut product_shape = itemsize; + let mut dim_descriptor = Vec::with_capacity(shape_ndim); + + for x in shape_vec.iter() { + let x = usize::try_from_borrowed_object(vm, x)?; + + if x > isize::MAX as usize / product_shape { + return Err(vm.new_value_error( + "memoryview.cast(): product(shape) > SSIZE_MAX".to_owned(), + )); + } + product_shape *= x; + dim_descriptor.push((x, 0, 0)); + } + + dim_descriptor.last_mut().unwrap().1 = itemsize as isize; + for i in (0..dim_descriptor.len() - 1).rev() { + dim_descriptor[i].1 = dim_descriptor[i + 1].1 * dim_descriptor[i + 1].0 as isize; + } + + if product_shape != other.desc.len { + return Err(vm.new_type_error( + "memoryview: product(shape) * itemsize != buffer size".to_owned(), + )); + } + + other.desc.dim_desc = dim_descriptor; + + Ok(other.into_ref(vm)) + } else { + Ok(self.cast_to_1d(format, vm)?.into_ref(vm)) } - .validate() - .into_ref(vm)) } fn eq( @@ -567,31 +669,39 @@ impl PyMemoryView { return Ok(false); } + if let Some(other) = other.payload::() { + if other.released.load() { + return Ok(false); + } + } + let other = match PyBuffer::try_from_borrowed_object(vm, other) { Ok(buf) => buf, Err(_) => return Ok(false), }; - let a_options = &zelf.options; - let b_options = &other.options; - - if a_options.len != b_options.len - || a_options.ndim != b_options.ndim - || a_options.shape != b_options.shape - { + if !is_equiv_shape(&zelf.desc, &other.desc) { return Ok(false); } + let a_itemsize = zelf.desc.itemsize; + let b_itemsize = other.desc.itemsize; + let a_format_spec = &zelf.format_spec; + let b_format_spec = &Self::parse_format(&other.desc.format, vm)?; + + if zelf.desc.ndim() == 0 { + let a_val = format_unpack(a_format_spec, &zelf.obj_bytes()[..a_itemsize], vm)?; + let b_val = format_unpack(b_format_spec, &other.obj_bytes()[..b_itemsize], vm)?; + return Ok(vm.bool_eq(&a_val, &b_val)?); + } + zelf.contiguous_or_collect(|a| { other.contiguous_or_collect(|b| { - if a_options.format == b_options.format { - Ok(a == b) - } else { - let a_list = unpack_bytes_seq_to_list(a, &a_options.format, vm)?; - let b_list = unpack_bytes_seq_to_list(b, &b_options.format, vm)?; + // TODO: optimize cmp by format + let a_list = unpack_bytes_seq_to_list(a, a_format_spec, vm)?; + let b_list = unpack_bytes_seq_to_list(b, b_format_spec, vm)?; - Ok(vm.bool_eq(a_list.as_object(), b_list.as_object())?) - } + vm.bool_eq(a_list.as_object(), b_list.as_object()) }) }) } @@ -606,55 +716,66 @@ impl PyMemoryView { Err(vm.new_type_error("cannot pickle 'memoryview' object".to_owned())) } - fn as_contiguous_mut(&self) -> Option> { - self.options.contiguous.then(|| self.contiguous_mut()) + fn obj_bytes(&self) -> BorrowedValue<[u8]> { + BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..]) + } + + fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| &mut x[self.start..]) + } + + fn as_contiguous(&self) -> Option> { + self.desc + .is_contiguous() + .then(|| BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..self.desc.len])) + } + + fn _as_contiguous_mut(&self) -> Option> { + self.desc.is_contiguous().then(|| { + BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| { + &mut x[self.start..self.desc.len] + }) + }) + } + + fn collect(&self, buf: &mut Vec) { + if let Some(bytes) = self.as_contiguous() { + buf.extend_from_slice(&bytes); + } else { + let bytes = &*self.obj_bytes(); + self.desc + .for_each_segment(true, |range| buf.extend_from_slice(&bytes[range])); + } } - fn contiguous_or_collect(&self, f: F) -> R - where - F: FnOnce(&[u8]) -> R, - { + fn contiguous_or_collect R>(&self, f: F) -> R { let borrowed; let mut collected; - let v = if self.options.contiguous { - borrowed = self.contiguous(); + let v = if let Some(bytes) = self.as_contiguous() { + borrowed = bytes; &*borrowed } else { collected = vec![]; - self.collect_bytes(&mut collected); - &*collected + self.collect(&mut collected); + &collected }; f(v) } +} - fn contiguous(&self) -> BorrowedValue<[u8]> { - BorrowedValue::map(self.buffer._contiguous(), |x| &x[self.start..self.stop]) - } - fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { - BorrowedValueMut::map(self.buffer._contiguous_mut(), |x| { - &mut x[self.start..self.stop] - }) - } - fn collect_bytes(&self, buf: &mut Vec) { - let bytes = &*self.buffer.obj_bytes(); - let len = self.options.len; - let itemsize = self.options.itemsize; - buf.reserve(len * itemsize); - for i in 0..len { - let i = self.get_pos_no_wrap(i); - buf.extend_from_slice(&bytes[i..i + itemsize]); - } - } +#[derive(FromArgs)] +struct CastArgs { + #[pyarg(any)] + format: PyStrRef, + #[pyarg(any, optional)] + shape: OptionalArg, } static BUFFER_METHODS: BufferMethods = BufferMethods { - obj_bytes: |buffer| buffer.obj_as::().buffer.obj_bytes(), - obj_bytes_mut: |buffer| buffer.obj_as::().buffer.obj_bytes_mut(), - // contiguous: Some(|buffer| buffer.obj_as::().contiguous()), - // contiguous_mut: Some(|buffer| buffer.obj_as::().contiguous_mut()), - // collect_bytes: Some(|buffer, buf| buffer.obj_as::().collect_bytes(buf)), - release: Some(|buffer| buffer.obj_as::().buffer.release()), - retain: Some(|buffer| buffer.obj_as::().buffer.retain()), + obj_bytes: |buffer| buffer.obj_as::().obj_bytes(), + obj_bytes_mut: |buffer| buffer.obj_as::().obj_bytes_mut(), + release: |buffer| buffer.obj_as::().buffer.release(), + retain: |buffer| buffer.obj_as::().buffer.retain(), }; impl AsBuffer for PyMemoryView { @@ -664,7 +785,7 @@ impl AsBuffer for PyMemoryView { } else { Ok(PyBuffer::new( zelf.to_owned().into_object(), - zelf.options.clone(), + zelf.desc.clone(), &BUFFER_METHODS, )) } @@ -708,9 +829,7 @@ impl AsMapping for PyMemoryView { vm: &VirtualMachine, ) -> PyResult<()> { match value { - Some(value) => { - Self::downcast(zelf, vm).map(|zelf| Self::setitem(zelf, needle, value, vm))? - } + Some(value) => Self::downcast(zelf, vm).map(|zelf| zelf.setitem(needle, value, vm))?, None => Err(vm.new_type_error("cannot delete memory".to_owned())), } } @@ -743,7 +862,7 @@ impl Hashable for PyMemoryView { zelf.hash .get_or_try_init(|| { zelf.try_not_released(vm)?; - if !zelf.options.readonly { + if !zelf.desc.readonly { return Err( vm.new_value_error("cannot hash writable memoryview object".to_owned()) ); @@ -780,10 +899,9 @@ fn format_unpack( fn unpack_bytes_seq_to_list( bytes: &[u8], - format: &str, + format_spec: &FormatSpec, vm: &VirtualMachine, ) -> PyResult { - let format_spec = PyMemoryView::parse_format(format, vm)?; let itemsize = format_spec.size(); if bytes.len() % itemsize != 0 { @@ -798,3 +916,30 @@ fn unpack_bytes_seq_to_list( Ok(PyList::from(elements).into_ref(vm)) } + +fn is_equiv_shape(a: &BufferDescriptor, b: &BufferDescriptor) -> bool { + if a.ndim() != b.ndim() { + return false; + } + + let a_iter = a.dim_desc.iter().map(|x| x.0); + let b_iter = b.dim_desc.iter().map(|x| x.0); + for (a_shape, b_shape) in a_iter.zip(b_iter) { + if a_shape != b_shape { + return false; + } + if a_shape == 0 { + break; + } + } + true +} + +fn is_equiv_format(a: &BufferDescriptor, b: &BufferDescriptor) -> bool { + // TODO: skip @ + a.itemsize == b.itemsize && a.format == b.format +} + +fn is_equiv_structure(a: &BufferDescriptor, b: &BufferDescriptor) -> bool { + is_equiv_format(a, b) && is_equiv_shape(a, b) +} diff --git a/vm/src/function/buffer.rs b/vm/src/function/buffer.rs index 8dca7fe998..aa5613398f 100644 --- a/vm/src/function/buffer.rs +++ b/vm/src/function/buffer.rs @@ -40,7 +40,7 @@ impl PyObject { impl ArgBytesLike { pub fn borrow_buf(&self) -> BorrowedValue<'_, [u8]> { - self.0._contiguous() + self.0.obj_bytes() } pub fn with_ref(&self, f: F) -> R @@ -51,7 +51,7 @@ impl ArgBytesLike { } pub fn len(&self) -> usize { - self.0.options.len + self.0.desc.len } pub fn is_empty(&self) -> bool { @@ -68,7 +68,7 @@ impl From for PyBuffer { impl TryFromBorrowedObject for ArgBytesLike { fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { let buffer = PyBuffer::try_from_borrowed_object(vm, obj)?; - if buffer.options.contiguous { + if buffer.desc.is_contiguous() { Ok(Self(buffer)) } else { Err(vm.new_type_error("non-contiguous buffer is not a bytes-like object".to_owned())) @@ -82,7 +82,7 @@ pub struct ArgMemoryBuffer(PyBuffer); impl ArgMemoryBuffer { pub fn borrow_buf_mut(&self) -> BorrowedValueMut<'_, [u8]> { - self.0._contiguous_mut() + self.0.obj_bytes_mut() } pub fn with_ref(&self, f: F) -> R @@ -93,7 +93,7 @@ impl ArgMemoryBuffer { } pub fn len(&self) -> usize { - self.0.options.len + self.0.desc.len } pub fn is_empty(&self) -> bool { @@ -110,9 +110,9 @@ impl From for PyBuffer { impl TryFromBorrowedObject for ArgMemoryBuffer { fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { let buffer = PyBuffer::try_from_borrowed_object(vm, obj)?; - if !buffer.options.contiguous { + if !buffer.desc.is_contiguous() { Err(vm.new_type_error("non-contiguous buffer is not a bytes-like object".to_owned())) - } else if buffer.options.readonly { + } else if buffer.desc.readonly { Err(vm.new_type_error("buffer is not a read-write bytes-like object".to_owned())) } else { Ok(Self(buffer)) diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index e7245e7a21..084f08fd2e 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -1,15 +1,16 @@ //! Buffer protocol +//! https://docs.python.org/3/c-api/buffer.html use itertools::Itertools; use crate::{ - builtins::PyTypeRef, common::{ borrow::{BorrowedValue, BorrowedValueMut}, lock::{MapImmutable, PyMutex, PyMutexGuard}, }, + sliceable::wrap_index, types::{Constructor, Unconstructible}, - PyObject, PyObjectPayload, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, + PyObject, PyObjectPayload, PyObjectRef, PyObjectView, PyObjectWrap, PyRef, PyResult, TryFromBorrowedObject, TypeProtocol, VirtualMachine, }; use std::{borrow::Cow, fmt::Debug, ops::Range}; @@ -18,8 +19,8 @@ use std::{borrow::Cow, fmt::Debug, ops::Range}; pub struct BufferMethods { pub obj_bytes: fn(&PyBuffer) -> BorrowedValue<[u8]>, pub obj_bytes_mut: fn(&PyBuffer) -> BorrowedValueMut<[u8]>, - pub release: Option, - pub retain: Option, + pub release: fn(&PyBuffer), + pub retain: fn(&PyBuffer), } impl Debug for BufferMethods { @@ -27,8 +28,8 @@ impl Debug for BufferMethods { f.debug_struct("BufferMethods") .field("obj_bytes", &(self.obj_bytes as usize)) .field("obj_bytes_mut", &(self.obj_bytes_mut as usize)) - .field("release", &self.release.map(|x| x as usize)) - .field("retain", &self.retain.map(|x| x as usize)) + .field("release", &(self.release as usize)) + .field("retain", &(self.retain as usize)) .finish() } } @@ -36,15 +37,15 @@ impl Debug for BufferMethods { #[derive(Debug, Clone)] pub struct PyBuffer { pub obj: PyObjectRef, - pub options: BufferOptions, + pub desc: BufferDescriptor, methods: &'static BufferMethods, } impl PyBuffer { - pub fn new(obj: PyObjectRef, options: BufferOptions, methods: &'static BufferMethods) -> Self { + pub fn new(obj: PyObjectRef, desc: BufferDescriptor, methods: &'static BufferMethods) -> Self { let zelf = Self { obj, - options: options.validate(), + desc: desc.validate(), methods, }; zelf.retain(); @@ -52,27 +53,31 @@ impl PyBuffer { } pub fn as_contiguous(&self) -> Option> { - self.options.is_contiguous().then(|| self.obj_bytes()) + self.desc + .is_contiguous() + .then(|| BorrowedValue::map(self.obj_bytes(), |x| &x[..self.desc.len])) } pub fn as_contiguous_mut(&self) -> Option> { - self.options.is_contiguous().then(|| self.obj_bytes_mut()) + self.desc + .is_contiguous() + .then(|| BorrowedValueMut::map(self.obj_bytes_mut(), |x| &mut x[..self.desc.len])) } pub fn collect(&self, buf: &mut Vec) { - if self.options.is_contiguous() { + if self.desc.is_contiguous() { buf.extend_from_slice(&self.obj_bytes()); } else { let bytes = &*self.obj_bytes(); - self.options - .for_each_segment(|range| buf.extend_from_slice(&bytes[range])); + self.desc + .for_each_segment(true, |range| buf.extend_from_slice(&bytes[range])); } } pub fn contiguous_or_collect R>(&self, f: F) -> R { let borrowed; let mut collected; - let v = if self.options.is_contiguous() { + let v = if self.desc.is_contiguous() { borrowed = self.obj_bytes(); &*borrowed } else { @@ -84,7 +89,7 @@ impl PyBuffer { } pub fn obj_as(&self) -> &PyObjectView { - self.obj.downcast_ref().unwrap() + unsafe { self.obj.downcast_unchecked_ref() } } pub fn obj_bytes(&self) -> BorrowedValue<[u8]> { @@ -96,15 +101,11 @@ impl PyBuffer { } pub fn release(&self) { - if let Some(f) = self.methods.release { - f(self) - } + (self.methods.release)(self) } pub fn retain(&self) { - if let Some(f) = self.methods.retain { - f(self) - } + (self.methods.retain)(self) } // drop PyBuffer without calling release @@ -112,7 +113,7 @@ impl PyBuffer { // or wrap PyBuffer in the ManaullyDrop to prevent drop() pub(crate) unsafe fn drop_without_release(&mut self) { std::ptr::drop_in_place(&mut self.obj); - std::ptr::drop_in_place(&mut self.options); + std::ptr::drop_in_place(&mut self.desc); } } @@ -139,29 +140,25 @@ impl Drop for PyBuffer { } #[derive(Debug, Clone)] -pub struct BufferOptions { +pub struct BufferDescriptor { /// product(shape) * itemsize /// NOT the bytes length if buffer is discontiguous pub len: usize, pub readonly: bool, pub itemsize: usize, pub format: Cow<'static, str>, - // pub ndim: usize, /// (shape, stride, suboffset) for each dimension - pub dim_descriptor: Vec<(usize, isize, isize)>, - // pub shape: Vec, - // pub strides: Vec, - // pub suboffsets: Vec, + pub dim_desc: Vec<(usize, isize, isize)>, } -impl BufferOptions { +impl BufferDescriptor { pub fn simple(bytes_len: usize, readonly: bool) -> Self { Self { len: bytes_len, readonly, itemsize: 1, format: Cow::Borrowed("B"), - dim_descriptor: vec![(bytes_len, 1, 0)], + dim_desc: vec![(bytes_len, 1, 0)], } } @@ -176,7 +173,7 @@ impl BufferOptions { readonly, itemsize, format, - dim_descriptor: vec![(bytes_len / itemsize, itemsize as isize, 0)], + dim_desc: vec![(bytes_len / itemsize, itemsize as isize, 0)], } } @@ -185,16 +182,14 @@ impl BufferOptions { assert!(self.itemsize != 0); assert!(self.ndim() != 0); let mut shape_product = 1; - for (shape, stride, suboffset) in self.dim_descriptor { + for (shape, stride, suboffset) in self.dim_desc.iter().cloned() { shape_product *= shape; assert!(suboffset >= 0); assert!(stride != 0); if stride.is_negative() { // if stride is negative, we access memory in reversed order // so the suboffset should be n*stride shift the index to the tail - assert!(suboffset == -stride * shape as isize); - } else { - assert!(suboffset == 0); + assert!(suboffset >= -stride * shape as isize); } } assert!(shape_product == self.len); @@ -207,7 +202,7 @@ impl BufferOptions { } pub fn ndim(&self) -> usize { - self.dim_descriptor.len() + self.dim_desc.len() } pub fn is_contiguous(&self) -> bool { @@ -215,7 +210,7 @@ impl BufferOptions { return true; } let mut sd = self.itemsize; - for (shape, stride, _) in self.dim_descriptor.into_iter().rev() { + for (shape, stride, _) in self.dim_desc.iter().cloned().rev() { if shape > 1 && stride != sd as isize { return false; } @@ -225,78 +220,145 @@ impl BufferOptions { } /// this function do not check the bound - pub fn get_position(&self, indices: &[usize]) -> usize { - let pos = 0; - for (&i, (_, stride, suboffset)) in indices.iter().zip_eq(self.dim_descriptor) { + /// panic if indices.len() != ndim + pub fn get_position_fast(&self, indices: &[usize]) -> usize { + let mut pos = 0; + for (i, (_, stride, suboffset)) in indices + .iter() + .cloned() + .zip_eq(self.dim_desc.iter().cloned()) + { pos += (i as isize * stride + suboffset) as usize; } pos } - pub fn for_each(&self, f: F) + /// panic if indices.len() != ndim + pub fn get_position(&self, indices: &[isize], vm: &VirtualMachine) -> PyResult { + let mut pos = 0; + for (i, (shape, stride, suboffset)) in indices + .iter() + .cloned() + .zip_eq(self.dim_desc.iter().cloned()) + { + let i = wrap_index(i, shape).ok_or_else(|| { + vm.new_index_error(format!("index out of bounds on dimension {}", i)) + })?; + pos += (i as isize * stride + suboffset) as usize; + } + Ok(pos) + } + + pub fn for_each_segment(&self, try_conti: bool, mut f: F) where - F: FnMut(usize), + F: FnMut(Range), { - self._for_each(0, 0, f); + if self.ndim() == 0 { + f(0..self.itemsize); + return; + } + if try_conti && self.is_last_dim_contiguous() { + self._for_each_segment::<_, true>(0, 0, &mut f); + } else { + self._for_each_segment::<_, false>(0, 0, &mut f); + } } - fn _for_each(&self, mut index: isize, dim: usize, f: F) + fn _for_each_segment(&self, mut index: isize, dim: usize, f: &mut F) where - F: FnMut(usize), + F: FnMut(Range), { - let (shape, stride, suboffset) = self.dim_descriptor[dim]; + let (shape, stride, suboffset) = self.dim_desc[dim]; if dim + 1 == self.ndim() { - for i in 0..shape { - f((index + suboffset) as usize); - index += stride; + if CONTI { + f(index as usize..index as usize + shape * self.itemsize); + } else { + for _ in 0..shape { + let pos = (index + suboffset) as usize; + f(pos..pos + self.itemsize); + index += stride; + } } return; } - for i in 0..shape { - self._for_each(index + suboffset, dim + 1, f); + for _ in 0..shape { + self._for_each_segment::(index + suboffset, dim + 1, f); index += stride; } } - pub fn for_each_segment(&self, f: F) + pub fn zip_eq(&self, other: &Self, try_conti: bool, mut f: F) where - F: FnMut(Range), + F: FnMut(usize, usize, usize), { - if self.is_last_dim_contiguous() { - self._for_each_segment::<_, true>(0, 0, f); + debug_assert_eq!(self.itemsize, other.itemsize); + debug_assert_eq!(self.len, other.len); + + if self.ndim() == 0 { + f(0, 0, self.itemsize); + return; + } + if try_conti && self.is_last_dim_contiguous() { + self._zip_eq::<_, true>(other, 0, 0, 0, &mut f); } else { - self._for_each_segment::<_, false>(0, 0, f) + self._zip_eq::<_, false>(other, 0, 0, 0, &mut f); } } - fn _for_each_segment(&self, mut index: isize, dim: usize, f: F) - where - F: FnMut(Range), + fn _zip_eq( + &self, + other: &Self, + mut a_index: isize, + mut b_index: isize, + dim: usize, + f: &mut F, + ) where + F: FnMut(usize, usize, usize), { - let (shape, stride, suboffset) = self.dim_descriptor[dim]; + let (shape, a_stride, a_suboffset) = self.dim_desc[dim]; + let (_b_shape, b_stride, b_suboffset) = other.dim_desc[dim]; + debug_assert_eq!(shape, _b_shape); if dim + 1 == self.ndim() { if CONTI { - f(index as usize..index as usize + shape * self.itemsize); + f(a_index as usize, b_index as usize, shape * self.itemsize); } else { - for i in 0..shape { - let pos = (index + suboffset) as usize; - f(pos..pos + self.itemsize); - index += stride; + for _ in 0..shape { + let a_pos = (a_index + a_suboffset) as usize; + let b_pos = (b_index + b_suboffset) as usize; + f(a_pos, b_pos, self.itemsize); + a_index += a_stride; + b_index += b_stride; } } - return; } - for i in 0..shape { - self._for_each_segment(index + suboffset, dim + 1, f); - index += stride; + + for _ in 0..shape { + self._zip_eq::( + other, + a_index + a_suboffset, + b_index + b_suboffset, + dim + 1, + f, + ); + a_index += a_stride; + b_index += b_stride; } } fn is_last_dim_contiguous(&self) -> bool { - let (_, stride, suboffset) = self.dim_descriptor[self.ndim() - 1]; + let (_, stride, suboffset) = self.dim_desc[self.ndim() - 1]; suboffset == 0 && stride == self.itemsize as isize } + pub fn is_zero_in_shape(&self) -> bool { + for (shape, _, _) in self.dim_desc.iter().cloned() { + if shape == 0 { + return true; + } + } + return false; + } + // TODO: support fortain order } @@ -306,39 +368,33 @@ pub trait BufferResizeGuard<'a> { } #[pyclass(module = false, name = "vec_buffer")] -#[derive(Debug)] -pub struct VecBuffer(PyMutex>); +#[derive(Debug, PyValue)] +pub struct VecBuffer { + data: PyMutex>, + len: usize, +} #[pyimpl(flags(BASETYPE), with(Constructor))] impl VecBuffer { - pub fn new(v: Vec) -> Self { - Self(PyMutex::new(v)) + pub fn new(data: Vec) -> Self { + let len = data.len(); + Self { + data: PyMutex::new(data), + len, + } } pub fn take(&self) -> Vec { - std::mem::take(&mut *self.0.lock()) + std::mem::take(&mut self.data.lock()) } } impl Unconstructible for VecBuffer {} -impl PyValue for VecBuffer { - fn class(vm: &VirtualMachine) -> &PyTypeRef { - &vm.ctx.types.vec_buffer_type - } -} impl PyRef { - pub fn into_pybuffer(self) -> PyBuffer { - let len = self.0.lock().len(); - PyBuffer::new( - self.into_object(), - BufferOptions::simple(len, false), - &VEC_BUFFER_METHODS, - ) - } - pub fn into_readonly_pybuffer(self) -> PyBuffer { - let len = self.0.lock().len(); + pub fn into_pybuffer(self, readonly: bool) -> PyBuffer { + let len = self.len; PyBuffer::new( self.into_object(), - BufferOptions::simple(len, true), + BufferDescriptor::simple(len, readonly), &VEC_BUFFER_METHODS, ) } @@ -346,11 +402,15 @@ impl PyRef { static VEC_BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |buffer| { - PyMutexGuard::map_immutable(buffer.obj_as::().0.lock(), |x| x.as_slice()).into() + PyMutexGuard::map_immutable(buffer.obj_as::().data.lock(), |x| x.as_slice()) + .into() }, obj_bytes_mut: |buffer| { - PyMutexGuard::map(buffer.obj_as::().0.lock(), |x| x.as_mut_slice()).into() + PyMutexGuard::map(buffer.obj_as::().data.lock(), |x| { + x.as_mut_slice() + }) + .into() }, - release: None, - retain: None, + release: |_| {}, + retain: |_| {}, }; diff --git a/vm/src/protocol/mod.rs b/vm/src/protocol/mod.rs index bd3ff19b3f..24c88c31da 100644 --- a/vm/src/protocol/mod.rs +++ b/vm/src/protocol/mod.rs @@ -3,6 +3,6 @@ mod iter; mod mapping; mod object; -pub use buffer::{BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, VecBuffer}; +pub use buffer::{BufferDescriptor, BufferMethods, BufferResizeGuard, PyBuffer, VecBuffer}; pub use iter::{PyIter, PyIterIter, PyIterReturn}; pub use mapping::{PyMapping, PyMappingMethods}; diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index e63f0dff2d..639dd464e6 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -68,21 +68,23 @@ impl TryFromObject for Fildes { mod _io { use super::*; - use crate::common::lock::{ - PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, PyThreadMutex, - PyThreadMutexGuard, - }; use crate::{ builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyMemoryView, PyStr, PyStrRef, PyType, PyTypeRef, }, + common::{ + lock::{ + PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, + PyThreadMutex, PyThreadMutexGuard, + }, + }, function::{ ArgBytesLike, ArgIterable, ArgMemoryBuffer, FuncArgs, IntoPyObject, OptionalArg, OptionalOption, }, protocol::{ - BufferMethods, BufferOptions, BufferResizeGuard, PyBuffer, PyIterReturn, VecBuffer, + BufferDescriptor, BufferMethods, BufferResizeGuard, PyBuffer, PyIterReturn, VecBuffer, }, types::{Constructor, Destructor, IterNext, Iterable}, utils::Either, @@ -877,7 +879,7 @@ mod _io { let v = std::mem::take(&mut self.buffer); let writebuf = VecBuffer::new(v).into_ref(vm); let memobj = PyMemoryView::from_buffer_range( - writebuf.clone().into_readonly_pybuffer(), + writebuf.clone().into_pybuffer(true), buf_range, vm, )? @@ -1102,7 +1104,7 @@ mod _io { let v = v.unwrap_or(&mut self.buffer); let readbuf = VecBuffer::new(std::mem::take(v)).into_ref(vm); let memobj = PyMemoryView::from_buffer_range( - readbuf.clone().into_pybuffer(), + readbuf.clone().into_pybuffer(false), buf_range, vm, )? @@ -3284,12 +3286,12 @@ mod _io { #[pymethod] fn getbuffer(self, vm: &VirtualMachine) -> PyResult { - let options = BufferOptions { - readonly: false, - len: self.buffer.read().cursor.get_ref().len(), - ..Default::default() - }; - let buffer = PyBuffer::new(self.into_object(), options, &BYTES_IO_BUFFER_METHODS); + let len = self.buffer.read().cursor.get_ref().len(); + let buffer = PyBuffer::new( + self.as_object().to_owned(), + BufferDescriptor::simple(len, false), + &BYTES_IO_BUFFER_METHODS, + ); let view = PyMemoryView::from_buffer(buffer, vm)?; Ok(view) } @@ -3305,15 +3307,14 @@ mod _io { PyRwLockWriteGuard::map(zelf.buffer.write(), |x| x.cursor.get_mut().as_mut_slice()) .into() }, - contiguous: None, - contiguous_mut: None, - collect_bytes: None, - release: Some(|buffer| { + + release: |buffer| { buffer.obj_as::().exports.fetch_sub(1); - }), - retain: Some(|buffer| { + }, + + retain: |buffer| { buffer.obj_as::().exports.fetch_add(1); - }), + }, }; impl<'a> BufferResizeGuard<'a> for BytesIO { diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 8db62e8094..4f47ed93e6 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -193,7 +193,7 @@ impl TryFromObject for PyPathLike { let obj = match PyBuffer::try_from_borrowed_object(vm, &obj) { Ok(buffer) => { let mut bytes = vec![]; - buffer.collect_bytes(&mut bytes); + buffer.collect(&mut bytes); PyBytes::from(bytes).into_pyobject(vm) } Err(_) => obj, diff --git a/vm/src/types/zoo.rs b/vm/src/types/zoo.rs index fa5ce0cc06..0ec65de700 100644 --- a/vm/src/types/zoo.rs +++ b/vm/src/types/zoo.rs @@ -5,7 +5,7 @@ use crate::builtins::{ pytype::{self, PyTypeRef}, range, set, singletons, slice, staticmethod, traceback, tuple, weakproxy, weakref, zip, }; -use crate::{protocol::VecBuffer, PyContext, StaticType}; +use crate::{PyContext, StaticType}; /// Holder of references to builtin types. #[derive(Debug, Clone)] @@ -168,7 +168,7 @@ impl TypeZoo { none_type: singletons::PyNone::init_bare_type().clone(), not_implemented_type: singletons::PyNotImplemented::init_bare_type().clone(), generic_alias_type: genericalias::PyGenericAlias::init_bare_type().clone(), - vec_buffer_type: VecBuffer::init_bare_type().clone(), + vec_buffer_type: crate::protocol::VecBuffer::init_bare_type().clone(), } } From ee41499835f412c6b0b9885271b4e82a2ec3ac90 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 1 Nov 2021 17:45:32 +0200 Subject: [PATCH 09/20] fix slice adjust_indices --- stdlib/src/array.rs | 2 +- vm/src/builtins/bytes.rs | 5 +- vm/src/builtins/memory.rs | 87 ++++++++++----------- vm/src/builtins/slice.rs | 28 ++----- vm/src/function/buffer.rs | 4 +- vm/src/protocol/buffer.rs | 50 +++++++----- vm/src/sliceable.rs | 158 +++++++++++--------------------------- vm/src/stdlib/io.rs | 8 +- 8 files changed, 132 insertions(+), 210 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index ea8429046d..479c97dcff 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1221,7 +1221,7 @@ mod array { let buf = PyBuffer::new( zelf.to_owned().into_object(), BufferDescriptor::format( - array.len(), + array.len() * array.itemsize(), false, array.itemsize(), array.typecode_str().into(), diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index e24ec0ad61..0b0ee52289 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -6,10 +6,7 @@ use crate::{ bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, }, - common::{ - hash::PyHash, - lock::PyMutex, - }, + common::{hash::PyHash, lock::PyMutex}, function::{ ArgBytesLike, ArgIterable, IntoPyObject, IntoPyResult, OptionalArg, OptionalOption, }, diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 5625e64dce..0c15090fef 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -2,6 +2,11 @@ use super::{ PyBytes, PyBytesRef, PyList, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, }; +use crate::common::{ + borrow::{BorrowedValue, BorrowedValueMut}, + hash::PyHash, + lock::OnceCell, +}; use crate::{ bytesinner::bytes_to_hex, function::{FuncArgs, IntoPyObject, OptionalArg}, @@ -14,16 +19,8 @@ use crate::{ PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, TypeProtocol, VirtualMachine, }; -use crate::{ - common::{ - borrow::{BorrowedValue, BorrowedValueMut}, - hash::PyHash, - lock::OnceCell, - }, -}; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; -use num_traits::ToPrimitive; use std::{cmp::Ordering, fmt::Debug, mem::ManuallyDrop, ops::Range}; #[derive(FromArgs)] @@ -109,14 +106,16 @@ impl PyMemoryView { } fn new_view(&self) -> Self { - PyMemoryView { + let zelf = PyMemoryView { buffer: self.buffer.clone(), released: AtomicCell::new(false), start: self.start, format_spec: self.format_spec.clone(), desc: self.desc.clone(), hash: OnceCell::new(), - } + }; + zelf.buffer.retain(); + zelf } #[pymethod] @@ -238,10 +237,10 @@ impl PyMemoryView { let index = wrap_index(i, shape) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; let index = index as isize * stride + suboffset; - let index = index as usize; - let bytes = self.obj_bytes(); + let pos = (index + self.start as isize) as usize; + let bytes = self.buffer.obj_bytes(); self.format_spec - .unpack(&bytes[index..index + self.desc.itemsize], vm) + .unpack(&bytes[pos..pos + self.desc.itemsize], vm) .map(|x| { if x.len() == 1 { x.fast_getitem(0) @@ -280,8 +279,9 @@ impl PyMemoryView { .map(|x| isize::try_from_borrowed_object(vm, x)) .try_collect()?; let pos = self.desc.get_position(&indices, vm)?; + let pos = (pos + self.start as isize) as usize; - let bytes = self.obj_bytes(); + let bytes = self.buffer.obj_bytes(); self.format_spec .unpack(&bytes[pos..pos + self.desc.itemsize], vm) .map(|x| { @@ -305,7 +305,7 @@ impl PyMemoryView { if tuple.len() == 0 { return Ok(zelf .format_spec - .unpack(&zelf.obj_bytes()[..zelf.desc.itemsize], vm)? + .unpack(&zelf.buffer.obj_bytes()[..zelf.desc.itemsize], vm)? .fast_getitem(0)); } } @@ -334,10 +334,10 @@ impl PyMemoryView { let index = wrap_index(i, shape) .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; let index = index as isize * stride + suboffset; - let index = index as usize; - let bytes = &mut *self.obj_bytes_mut(); + let pos = (index + self.start as isize) as usize; + let bytes = &mut *self.buffer.obj_bytes_mut(); let data = self.format_spec.pack(vec![value], vm)?; - bytes[index..index + self.desc.itemsize].copy_from_slice(&data); + bytes[pos..pos + self.desc.itemsize].copy_from_slice(&data); Ok(()) } @@ -361,9 +361,11 @@ impl PyMemoryView { )); } - let mut bytes_mut = dest.obj_bytes_mut(); + let mut bytes_mut = dest.buffer.obj_bytes_mut(); let src_bytes = src.obj_bytes(); dest.desc.zip_eq(&src.desc, true, |a_pos, b_pos, len| { + let a_pos = (a_pos + self.start as isize) as usize; + let b_pos = b_pos as usize; bytes_mut[a_pos..a_pos + len].copy_from_slice(&src_bytes[b_pos..b_pos + len]); }); @@ -387,13 +389,13 @@ impl PyMemoryView { if self.desc.ndim() == 0 { if needle.is(&vm.ctx.ellipsis) { - let bytes = &mut *self.obj_bytes_mut(); + let bytes = &mut *self.buffer.obj_bytes_mut(); let data = self.format_spec.pack(vec![value], vm)?; // TODO: fix panic if data no march itemsize bytes[..self.desc.itemsize].copy_from_slice(&data); } else if let Some(tuple) = needle.payload::() { if tuple.len() == 0 { - let bytes = &mut *self.obj_bytes_mut(); + let bytes = &mut *self.buffer.obj_bytes_mut(); let data = self.format_spec.pack(vec![value], vm)?; // TODO: fix panic if data no march itemsize bytes[..self.desc.itemsize].copy_from_slice(&data); @@ -442,13 +444,7 @@ impl PyMemoryView { fn init_slice(&mut self, slice: &PySlice, dim: usize, vm: &VirtualMachine) -> PyResult<()> { let (shape, stride, _) = self.desc.dim_desc[dim]; let slice = slice.to_saturated(vm)?; - let (range, step, is_negative_step) = slice.adjust_indices(shape); - let abs_step = step.unwrap(); - let step = if is_negative_step { - -(abs_step as isize) - } else { - abs_step as isize - }; + let (range, step, slicelen) = slice.adjust_indices(shape); let mut is_adjusted_suboffset = false; for (_, _, suboffset) in self.desc.dim_desc.iter_mut().rev() { @@ -459,14 +455,15 @@ impl PyMemoryView { } } if !is_adjusted_suboffset { - self.start += stride as usize * range.start; + // TODO: AdjustIndices + self.start += stride as usize + * if step.is_negative() { + range.end - 1 + } else { + range.start + }; } - // overflow is not possible as dividing a positive integer - let newlen = ((range.end - range.start - 1) / abs_step) - .to_usize() - .unwrap() - + 1; - self.desc.dim_desc[dim].0 = newlen; + self.desc.dim_desc[dim].0 = slicelen; self.desc.dim_desc[dim].1 *= step; Ok(()) @@ -502,7 +499,8 @@ impl PyMemoryView { if dim + 1 == self.desc.ndim() { let mut v = Vec::with_capacity(shape); for _ in 0..shape { - let pos = (index + suboffset) as usize; + let pos = index + suboffset; + let pos = (pos + self.start as isize) as usize; let obj = format_unpack(&self.format_spec, &bytes[pos..pos + self.desc.itemsize], vm)?; v.push(obj); @@ -529,7 +527,7 @@ impl PyMemoryView { // TODO: unpack_single(view->buf, fmt) return Ok(vm.ctx.new_list(vec![])); } - let bytes = self.obj_bytes(); + let bytes = self.buffer.obj_bytes(); self._to_list(&bytes, 0, 0, vm) } @@ -690,7 +688,7 @@ impl PyMemoryView { let b_format_spec = &Self::parse_format(&other.desc.format, vm)?; if zelf.desc.ndim() == 0 { - let a_val = format_unpack(a_format_spec, &zelf.obj_bytes()[..a_itemsize], vm)?; + let a_val = format_unpack(a_format_spec, &zelf.buffer.obj_bytes()[..a_itemsize], vm)?; let b_val = format_unpack(b_format_spec, &other.obj_bytes()[..b_itemsize], vm)?; return Ok(vm.bool_eq(&a_val, &b_val)?); } @@ -727,13 +725,13 @@ impl PyMemoryView { fn as_contiguous(&self) -> Option> { self.desc .is_contiguous() - .then(|| BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..self.desc.len])) + .then(|| BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..self.start + self.desc.len])) } fn _as_contiguous_mut(&self) -> Option> { self.desc.is_contiguous().then(|| { BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| { - &mut x[self.start..self.desc.len] + &mut x[self.start..self.start + self.desc.len] }) }) } @@ -742,9 +740,12 @@ impl PyMemoryView { if let Some(bytes) = self.as_contiguous() { buf.extend_from_slice(&bytes); } else { - let bytes = &*self.obj_bytes(); - self.desc - .for_each_segment(true, |range| buf.extend_from_slice(&bytes[range])); + let bytes = &*self.buffer.obj_bytes(); + self.desc.for_each_segment(true, |range| { + let start = (range.start + self.start as isize) as usize; + let end = (range.end + self.start as isize) as usize; + buf.extend_from_slice(&bytes[start..end]); + }) } } diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index e1b1350353..8201cc2b08 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -287,11 +287,11 @@ impl SaturatedSlice { // Equivalent to PySlice_AdjustIndices /// Convert for usage in indexing the underlying rust collections. Called *after* /// __index__ has been called on the Slice which might mutate the collection. - pub fn adjust_indices(&self, len: usize) -> (Range, Option, bool) { + pub fn adjust_indices(&self, len: usize) -> (Range, isize, usize) { // len should always be <= isize::MAX let ilen = len.to_isize().unwrap_or(isize::MAX); let (start, stop, step) = (self.start, self.stop, self.step); - let (start, stop, step, is_negative_step) = if step.is_negative() { + let (start, stop) = if step.is_negative() { ( if stop == -1 { ilen.saturating_add(1) @@ -303,31 +303,19 @@ impl SaturatedSlice { } else { start.saturating_add(1) }, - step.saturating_abs(), - true, ) } else { - (start, stop, step, false) + (start, stop) }; - let step = step.to_usize(); - let range = saturate_index(start, len)..saturate_index(stop, len); - let range = if range.start >= range.end { - range.start..range.start + let (range, slicelen) = if range.start >= range.end { + (range.start..range.start, 0) } else { - // step overflow - if step.is_none() { - if is_negative_step { - (range.end - 1)..range.end - } else { - range.start..(range.start + 1) - } - } else { - range - } + let slicelen = (range.end - range.start - 1) / step.unsigned_abs() + 1; + (range, slicelen) }; - (range, step, is_negative_step) + (range, step, slicelen) } } diff --git a/vm/src/function/buffer.rs b/vm/src/function/buffer.rs index aa5613398f..028a34a85b 100644 --- a/vm/src/function/buffer.rs +++ b/vm/src/function/buffer.rs @@ -40,7 +40,7 @@ impl PyObject { impl ArgBytesLike { pub fn borrow_buf(&self) -> BorrowedValue<'_, [u8]> { - self.0.obj_bytes() + unsafe { self.0.contiguous() } } pub fn with_ref(&self, f: F) -> R @@ -82,7 +82,7 @@ pub struct ArgMemoryBuffer(PyBuffer); impl ArgMemoryBuffer { pub fn borrow_buf_mut(&self) -> BorrowedValueMut<'_, [u8]> { - self.0.obj_bytes_mut() + unsafe { self.0.contiguous_mut() } } pub fn with_ref(&self, f: F) -> R diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index 084f08fd2e..b9c2f26923 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -55,13 +55,21 @@ impl PyBuffer { pub fn as_contiguous(&self) -> Option> { self.desc .is_contiguous() - .then(|| BorrowedValue::map(self.obj_bytes(), |x| &x[..self.desc.len])) + .then(|| unsafe { self.contiguous() }) } pub fn as_contiguous_mut(&self) -> Option> { self.desc .is_contiguous() - .then(|| BorrowedValueMut::map(self.obj_bytes_mut(), |x| &mut x[..self.desc.len])) + .then(|| unsafe { self.contiguous_mut() }) + } + + pub unsafe fn contiguous(&self) -> BorrowedValue<[u8]> { + BorrowedValue::map(self.obj_bytes(), |x| &x[..self.desc.len]) + } + + pub unsafe fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { + BorrowedValueMut::map(self.obj_bytes_mut(), |x| &mut x[..self.desc.len]) } pub fn collect(&self, buf: &mut Vec) { @@ -69,8 +77,9 @@ impl PyBuffer { buf.extend_from_slice(&self.obj_bytes()); } else { let bytes = &*self.obj_bytes(); - self.desc - .for_each_segment(true, |range| buf.extend_from_slice(&bytes[range])); + self.desc.for_each_segment(true, |range| { + buf.extend_from_slice(&bytes[range.start as usize..range.end as usize]) + }); } } @@ -192,7 +201,7 @@ impl BufferDescriptor { assert!(suboffset >= -stride * shape as isize); } } - assert!(shape_product == self.len); + assert!(shape_product * self.itemsize == self.len); self } @@ -221,20 +230,20 @@ impl BufferDescriptor { /// this function do not check the bound /// panic if indices.len() != ndim - pub fn get_position_fast(&self, indices: &[usize]) -> usize { + pub fn get_position_fast(&self, indices: &[usize]) -> isize { let mut pos = 0; for (i, (_, stride, suboffset)) in indices .iter() .cloned() .zip_eq(self.dim_desc.iter().cloned()) { - pos += (i as isize * stride + suboffset) as usize; + pos += i as isize * stride + suboffset; } pos } /// panic if indices.len() != ndim - pub fn get_position(&self, indices: &[isize], vm: &VirtualMachine) -> PyResult { + pub fn get_position(&self, indices: &[isize], vm: &VirtualMachine) -> PyResult { let mut pos = 0; for (i, (shape, stride, suboffset)) in indices .iter() @@ -244,17 +253,17 @@ impl BufferDescriptor { let i = wrap_index(i, shape).ok_or_else(|| { vm.new_index_error(format!("index out of bounds on dimension {}", i)) })?; - pos += (i as isize * stride + suboffset) as usize; + pos += i as isize * stride + suboffset; } Ok(pos) } pub fn for_each_segment(&self, try_conti: bool, mut f: F) where - F: FnMut(Range), + F: FnMut(Range), { if self.ndim() == 0 { - f(0..self.itemsize); + f(0..self.itemsize as isize); return; } if try_conti && self.is_last_dim_contiguous() { @@ -266,16 +275,16 @@ impl BufferDescriptor { fn _for_each_segment(&self, mut index: isize, dim: usize, f: &mut F) where - F: FnMut(Range), + F: FnMut(Range), { let (shape, stride, suboffset) = self.dim_desc[dim]; if dim + 1 == self.ndim() { if CONTI { - f(index as usize..index as usize + shape * self.itemsize); + f(index..index + (shape * self.itemsize) as isize); } else { for _ in 0..shape { - let pos = (index + suboffset) as usize; - f(pos..pos + self.itemsize); + let pos = index + suboffset; + f(pos..pos + self.itemsize as isize); index += stride; } } @@ -289,7 +298,7 @@ impl BufferDescriptor { pub fn zip_eq(&self, other: &Self, try_conti: bool, mut f: F) where - F: FnMut(usize, usize, usize), + F: FnMut(isize, isize, usize), { debug_assert_eq!(self.itemsize, other.itemsize); debug_assert_eq!(self.len, other.len); @@ -313,23 +322,24 @@ impl BufferDescriptor { dim: usize, f: &mut F, ) where - F: FnMut(usize, usize, usize), + F: FnMut(isize, isize, usize), { let (shape, a_stride, a_suboffset) = self.dim_desc[dim]; let (_b_shape, b_stride, b_suboffset) = other.dim_desc[dim]; debug_assert_eq!(shape, _b_shape); if dim + 1 == self.ndim() { if CONTI { - f(a_index as usize, b_index as usize, shape * self.itemsize); + f(a_index, b_index, shape * self.itemsize); } else { for _ in 0..shape { - let a_pos = (a_index + a_suboffset) as usize; - let b_pos = (b_index + b_suboffset) as usize; + let a_pos = a_index + a_suboffset; + let b_pos = b_index + b_suboffset; f(a_pos, b_pos, self.itemsize); a_index += a_stride; b_index += b_stride; } } + return; } for _ in 0..shape { diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index 6eab0e5c64..b268fe1f18 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -28,56 +28,22 @@ pub trait PySliceableSequenceMut { slice: SaturatedSlice, items: &[Self::Item], ) -> PyResult<()> { - let (range, step, is_negative_step) = slice.adjust_indices(self.as_slice().len()); - if !is_negative_step && step == Some(1) { - return if range.end - range.start == items.len() { - self.do_set_range(range, items); - Ok(()) - } else { - Err(vm.new_buffer_error( - "Existing exports of data: object cannot be re-sized".to_owned(), - )) - }; - } - if let Some(step) = step { - let slicelen = if range.end > range.start { - (range.end - range.start - 1) / step + 1 - } else { - 0 - }; - + let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); + if step == 1 { if slicelen == items.len() { - let indexes = if is_negative_step { - itertools::Either::Left(range.rev().step_by(step)) - } else { - itertools::Either::Right(range.step_by(step)) - }; - self.do_replace_indexes(indexes, items); - Ok(()) - } else { - Err(vm.new_buffer_error( - "Existing exports of data: object cannot be re-sized".to_owned(), - )) + self.do_set_range(range, items); + return Ok(()); } - } else { - // edge case, step is too big for usize - // same behaviour as CPython - let slicelen = if range.start < range.end { 1 } else { 0 }; - if match items.len() { - 0 => slicelen == 0, - 1 => { - self.do_set_range(range, items); - true - } - _ => false, - } { - Ok(()) + } else if slicelen == items.len() { + let indexes = if step.is_negative() { + itertools::Either::Left(range.rev().step_by(step.unsigned_abs())) } else { - Err(vm.new_buffer_error( - "Existing exports of data: object cannot be re-sized".to_owned(), - )) - } + itertools::Either::Right(range.step_by(step.unsigned_abs())) + }; + self.do_replace_indexes(indexes, items); + return Ok(()); } + Err(vm.new_buffer_error("Existing exports of data: object cannot be re-sized".to_owned())) } fn set_slice_items( @@ -86,81 +52,47 @@ pub trait PySliceableSequenceMut { slice: SaturatedSlice, items: &[Self::Item], ) -> PyResult<()> { - let (range, step, is_negative_step) = slice.adjust_indices(self.as_slice().len()); - if !is_negative_step && step == Some(1) { + let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); + if step == 1 { self.do_set_range(range, items); return Ok(()); } - if let Some(step) = step { - let slicelen = if range.end > range.start { - (range.end - range.start - 1) / step + 1 - } else { - 0 - }; - if slicelen == items.len() { - let indexes = if is_negative_step { - itertools::Either::Left(range.rev().step_by(step)) - } else { - itertools::Either::Right(range.step_by(step)) - }; - self.do_replace_indexes(indexes, items); - Ok(()) + if slicelen == items.len() { + let indexes = if step.is_negative() { + itertools::Either::Left(range.rev().step_by(step.unsigned_abs())) } else { - Err(vm.new_value_error(format!( - "attempt to assign sequence of size {} to extended slice of size {}", - items.len(), - slicelen - ))) - } + itertools::Either::Right(range.step_by(step.unsigned_abs())) + }; + self.do_replace_indexes(indexes, items); + Ok(()) } else { - // edge case, step is too big for usize - // same behaviour as CPython - let slicelen = if range.start < range.end { 1 } else { 0 }; - if match items.len() { - 0 => slicelen == 0, - 1 => { - self.do_set_range(range, items); - true - } - _ => false, - } { - Ok(()) - } else { - Err(vm.new_value_error(format!( - "attempt to assign sequence of size {} to extended slice of size {}", - items.len(), - slicelen - ))) - } + Err(vm.new_value_error(format!( + "attempt to assign sequence of size {} to extended slice of size {}", + items.len(), + slicelen + ))) } } fn delete_slice(&mut self, _vm: &VirtualMachine, slice: SaturatedSlice) -> PyResult<()> { - let (range, step, is_negative_step) = slice.adjust_indices(self.as_slice().len()); - if range.start >= range.end { + let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); + if slicelen == 0 { return Ok(()); } - if !is_negative_step && step == Some(1) { + if step == 1 { self.do_delete_range(range); return Ok(()); } - // step is not negative here - if let Some(step) = step { - let indexes = if is_negative_step { - itertools::Either::Left(range.clone().rev().step_by(step).rev()) - } else { - itertools::Either::Right(range.clone().step_by(step)) - }; - - self.do_delete_indexes(range, indexes); + let indexes = if step.is_negative() { + itertools::Either::Left(range.clone().rev().step_by(step.unsigned_abs()).rev()) } else { - // edge case, step is too big for usize - // same behaviour as CPython - self.do_delete_range(range); - } + itertools::Either::Right(range.clone().step_by(step.unsigned_abs())) + }; + + self.do_delete_indexes(range, indexes); Ok(()) } } @@ -237,28 +169,24 @@ pub trait PySliceableSequence { _vm: &VirtualMachine, slice: SaturatedSlice, ) -> PyResult { - let (range, step, is_negative_step) = slice.adjust_indices(self.len()); - if range.start >= range.end { + let (range, step, slicelen) = slice.adjust_indices(self.len()); + if slicelen == 0 { return Ok(Self::empty()); } - if step == Some(1) { - return Ok(if is_negative_step { + if step == 1 { + return Ok(if step.is_negative() { self.do_slice_reverse(range) } else { self.do_slice(range) }); } - if let Some(step) = step { - Ok(if is_negative_step { - self.do_stepped_slice_reverse(range, step) - } else { - self.do_stepped_slice(range, step) - }) + Ok(if step.is_negative() { + self.do_stepped_slice_reverse(range, step.unsigned_abs()) } else { - Ok(self.do_slice(range)) - } + self.do_stepped_slice(range, step.unsigned_abs()) + }) } fn get_item( diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 639dd464e6..2c42b25aaf 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -73,11 +73,9 @@ mod _io { PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyMemoryView, PyStr, PyStrRef, PyType, PyTypeRef, }, - common::{ - lock::{ - PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, - PyThreadMutex, PyThreadMutexGuard, - }, + common::lock::{ + PyMappedThreadMutexGuard, PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard, + PyThreadMutex, PyThreadMutexGuard, }, function::{ ArgBytesLike, ArgIterable, ArgMemoryBuffer, FuncArgs, IntoPyObject, OptionalArg, From 41c3e13038ce944df648ea958cca55523f361e62 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Tue, 2 Nov 2021 21:43:14 +0200 Subject: [PATCH 10/20] fix adjust_indices introduct SaturatedSliceIterator --- @test_113974_tmp | 1 + vm/src/builtins/memory.rs | 8 ++-- vm/src/builtins/slice.rs | 76 +++++++++++++++++++++++--------- vm/src/sliceable.rs | 92 +++++++++++++++++---------------------- 4 files changed, 101 insertions(+), 76 deletions(-) create mode 100755 @test_113974_tmp diff --git a/@test_113974_tmp b/@test_113974_tmp new file mode 100755 index 0000000000..3b18e512db --- /dev/null +++ b/@test_113974_tmp @@ -0,0 +1 @@ +hello world diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 0c15090fef..42bc1ede18 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -723,9 +723,11 @@ impl PyMemoryView { } fn as_contiguous(&self) -> Option> { - self.desc - .is_contiguous() - .then(|| BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..self.start + self.desc.len])) + self.desc.is_contiguous().then(|| { + BorrowedValue::map(self.buffer.obj_bytes(), |x| { + &x[self.start..self.start + self.desc.len] + }) + }) } fn _as_contiguous_mut(&self) -> Option> { diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index 8201cc2b08..345863ca20 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -9,6 +9,7 @@ use crate::{ use num_bigint::{BigInt, ToBigInt}; use num_traits::{One, Signed, ToPrimitive, Zero}; use std::ops::Range; +use std::option::Option; #[pyclass(module = false, name = "slice")] #[derive(Debug)] @@ -288,40 +289,73 @@ impl SaturatedSlice { /// Convert for usage in indexing the underlying rust collections. Called *after* /// __index__ has been called on the Slice which might mutate the collection. pub fn adjust_indices(&self, len: usize) -> (Range, isize, usize) { - // len should always be <= isize::MAX - let ilen = len.to_isize().unwrap_or(isize::MAX); - let (start, stop, step) = (self.start, self.stop, self.step); - let (start, stop) = if step.is_negative() { - ( - if stop == -1 { - ilen.saturating_add(1) - } else { - stop.saturating_add(1) - }, - if start == -1 { - ilen - } else { - start.saturating_add(1) - }, - ) + let range = if self.step.is_negative() { + saturate_index(self.stop.saturating_add(1), len) + ..saturate_index(self.start.saturating_add(1), len) } else { - (start, stop) + saturate_index(self.start, len)..saturate_index(self.stop, len) }; - let range = saturate_index(start, len)..saturate_index(stop, len); let (range, slicelen) = if range.start >= range.end { (range.start..range.start, 0) } else { - let slicelen = (range.end - range.start - 1) / step.unsigned_abs() + 1; + let slicelen = (range.end - range.start - 1) / self.step.unsigned_abs() + 1; (range, slicelen) }; - (range, step, slicelen) + (range, self.step, slicelen) + } + + pub fn iter(&self, len: usize) -> SaturatedSliceIterator { + SaturatedSliceIterator::new(self, len) + } +} + +pub struct SaturatedSliceIterator { + pub index: isize, + pub step: isize, + pub len: usize, +} + +impl SaturatedSliceIterator { + pub fn new(slice: &SaturatedSlice, seq_len: usize) -> Self { + let (range, step, len) = slice.adjust_indices(seq_len); + Self::from_adjust_indices(range, step, len) + } + + pub fn from_adjust_indices(range: Range, step: isize, len: usize) -> Self { + let index = if step.is_negative() { + range.end as isize - 1 + } else { + range.start as isize + }; + Self { index, step, len } + } + + pub fn positive_order(mut self) -> Self { + if self.step.is_negative() { + self.index += self.step * self.len as isize + 1; + self.step = self.step.saturating_abs() + } + self + } +} + +impl Iterator for SaturatedSliceIterator { + type Item = usize; + fn next(&mut self) -> Option { + if self.len == 0 { + return None; + } + self.len -= 1; + let ret = self.index as usize; + self.index += self.step; + Some(ret) } } // Go from PyObjectRef to isize w/o overflow error, out of range values are substituted by // isize::MIN or isize::MAX depending on type and value of step. -// Equivalent to PyNumber_AsSsize_t with err equal to None. +// Equivalent to PyEval_SliceIndex. fn to_isize_index(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { if vm.is_none(obj) { return Ok(None); diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index b268fe1f18..5262b004ff 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -3,8 +3,8 @@ use std::ops::Range; use crate::builtins::int::PyInt; // export through slicable module, not slice. -use crate::builtins::slice::PySlice; pub use crate::builtins::slice::{saturate_index, SaturatedSlice}; +use crate::builtins::slice::{PySlice, SaturatedSliceIterator}; use crate::utils::Either; use crate::VirtualMachine; use crate::{PyObjectRef, PyRef, PyResult, TypeProtocol}; @@ -29,21 +29,19 @@ pub trait PySliceableSequenceMut { items: &[Self::Item], ) -> PyResult<()> { let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); - if step == 1 { - if slicelen == items.len() { - self.do_set_range(range, items); - return Ok(()); - } - } else if slicelen == items.len() { - let indexes = if step.is_negative() { - itertools::Either::Left(range.rev().step_by(step.unsigned_abs())) - } else { - itertools::Either::Right(range.step_by(step.unsigned_abs())) - }; - self.do_replace_indexes(indexes, items); - return Ok(()); + if slicelen != items.len() { + Err(vm + .new_buffer_error("Existing exports of data: object cannot be re-sized".to_owned())) + } else if step == 1 { + self.do_set_range(range, items); + Ok(()) + } else { + self.do_replace_indexes( + SaturatedSliceIterator::from_adjust_indices(range, step, slicelen), + items, + ); + Ok(()) } - Err(vm.new_buffer_error("Existing exports of data: object cannot be re-sized".to_owned())) } fn set_slice_items( @@ -55,16 +53,12 @@ pub trait PySliceableSequenceMut { let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); if step == 1 { self.do_set_range(range, items); - return Ok(()); - } - - if slicelen == items.len() { - let indexes = if step.is_negative() { - itertools::Either::Left(range.rev().step_by(step.unsigned_abs())) - } else { - itertools::Either::Right(range.step_by(step.unsigned_abs())) - }; - self.do_replace_indexes(indexes, items); + Ok(()) + } else if slicelen == items.len() { + self.do_replace_indexes( + SaturatedSliceIterator::from_adjust_indices(range, step, slicelen), + items, + ); Ok(()) } else { Err(vm.new_value_error(format!( @@ -78,22 +72,17 @@ pub trait PySliceableSequenceMut { fn delete_slice(&mut self, _vm: &VirtualMachine, slice: SaturatedSlice) -> PyResult<()> { let (range, step, slicelen) = slice.adjust_indices(self.as_slice().len()); if slicelen == 0 { - return Ok(()); - } - - if step == 1 { + Ok(()) + } else if step == 1 { self.do_delete_range(range); - return Ok(()); - } - - let indexes = if step.is_negative() { - itertools::Either::Left(range.clone().rev().step_by(step.unsigned_abs()).rev()) + Ok(()) } else { - itertools::Either::Right(range.clone().step_by(step.unsigned_abs())) - }; - - self.do_delete_indexes(range, indexes); - Ok(()) + self.do_delete_indexes( + range.clone(), + SaturatedSliceIterator::from_adjust_indices(range, step, slicelen).positive_order(), + ); + Ok(()) + } } } @@ -170,23 +159,22 @@ pub trait PySliceableSequence { slice: SaturatedSlice, ) -> PyResult { let (range, step, slicelen) = slice.adjust_indices(self.len()); - if slicelen == 0 { - return Ok(Self::empty()); - } - - if step == 1 { - return Ok(if step.is_negative() { + let sliced = if slicelen == 0 { + Self::empty() + } else if step == 1 { + if step.is_negative() { self.do_slice_reverse(range) } else { self.do_slice(range) - }); - } - - Ok(if step.is_negative() { - self.do_stepped_slice_reverse(range, step.unsigned_abs()) + } } else { - self.do_stepped_slice(range, step.unsigned_abs()) - }) + if step.is_negative() { + self.do_stepped_slice_reverse(range, step.unsigned_abs()) + } else { + self.do_stepped_slice(range, step.unsigned_abs()) + } + }; + Ok(sliced) } fn get_item( From 111cd3beedaf6a04f13a6342126f6d7c900b8200 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Wed, 3 Nov 2021 20:16:10 +0200 Subject: [PATCH 11/20] fix memoryview cmp --- vm/src/builtins/memory.rs | 87 +++++++++++++++++++++------------------ vm/src/protocol/buffer.rs | 34 +++++++++------ 2 files changed, 67 insertions(+), 54 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 42bc1ede18..d716b89b87 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -1,6 +1,5 @@ use super::{ - PyBytes, PyBytesRef, PyList, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, - PyTypeRef, + PyBytes, PyBytesRef, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, }; use crate::common::{ borrow::{BorrowedValue, BorrowedValueMut}, @@ -94,7 +93,7 @@ impl PyMemoryView { let mut zelf = PyMemoryView { buffer: ManuallyDrop::new(buffer), released: AtomicCell::new(false), - start: range.start, + start: 0, format_spec, desc, hash: OnceCell::new(), @@ -363,10 +362,12 @@ impl PyMemoryView { let mut bytes_mut = dest.buffer.obj_bytes_mut(); let src_bytes = src.obj_bytes(); - dest.desc.zip_eq(&src.desc, true, |a_pos, b_pos, len| { - let a_pos = (a_pos + self.start as isize) as usize; - let b_pos = b_pos as usize; - bytes_mut[a_pos..a_pos + len].copy_from_slice(&src_bytes[b_pos..b_pos + len]); + dest.desc.zip_eq(&src.desc, true, |a_range, b_range| { + let a_range = (a_range.start + self.start as isize) as usize + ..(a_range.end + self.start as isize) as usize; + let b_range = b_range.start as usize..b_range.end as usize; + bytes_mut[a_range].copy_from_slice(&src_bytes[b_range]); + false }); Ok(()) @@ -418,8 +419,8 @@ impl PyMemoryView { } fn init_len(&mut self) { - let product = self.desc.dim_desc.iter().map(|x| x.0).product(); - self.desc.len = product; + let product: usize = self.desc.dim_desc.iter().map(|x| x.0).product(); + self.desc.len = product * self.desc.itemsize; } fn init_range(&mut self, range: Range, dim: usize) { @@ -523,11 +524,14 @@ impl PyMemoryView { #[pymethod] fn tolist(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; + let bytes = self.buffer.obj_bytes(); if self.desc.ndim() == 0 { - // TODO: unpack_single(view->buf, fmt) - return Ok(vm.ctx.new_list(vec![])); + return Ok(vm.ctx.new_list(vec![format_unpack( + &self.format_spec, + &bytes[..self.desc.itemsize], + vm, + )?])); } - let bytes = self.buffer.obj_bytes(); self._to_list(&bytes, 0, 0, vm) } @@ -693,15 +697,36 @@ impl PyMemoryView { return Ok(vm.bool_eq(&a_val, &b_val)?); } - zelf.contiguous_or_collect(|a| { - other.contiguous_or_collect(|b| { - // TODO: optimize cmp by format - let a_list = unpack_bytes_seq_to_list(a, a_format_spec, vm)?; - let b_list = unpack_bytes_seq_to_list(b, b_format_spec, vm)?; - - vm.bool_eq(a_list.as_object(), b_list.as_object()) - }) - }) + // TODO: optimize cmp by format + let mut ret = Ok(true); + let a_bytes = zelf.buffer.obj_bytes(); + let b_bytes = other.obj_bytes(); + zelf.desc.zip_eq(&other.desc, false, |a_range, b_range| { + let a_range = (a_range.start + zelf.start as isize) as usize + ..(a_range.end + zelf.start as isize) as usize; + let b_range = b_range.start as usize..b_range.end as usize; + let a_val = match format_unpack(&a_format_spec, &a_bytes[a_range], vm) { + Ok(val) => val, + Err(e) => { + ret = Err(e); + return true; + } + }; + let b_val = match format_unpack(&b_format_spec, &b_bytes[b_range], vm) { + Ok(val) => val, + Err(e) => { + ret = Err(e); + return true; + } + }; + ret = vm.bool_eq(&a_val, &b_val); + if let Ok(b) = ret { + !b + } else { + true + } + }); + ret } #[pymethod(magic)] @@ -900,26 +925,6 @@ fn format_unpack( }) } -fn unpack_bytes_seq_to_list( - bytes: &[u8], - format_spec: &FormatSpec, - vm: &VirtualMachine, -) -> PyResult { - let itemsize = format_spec.size(); - - if bytes.len() % itemsize != 0 { - return Err(vm.new_value_error("bytes length not a multiple of item size".to_owned())); - } - - let len = bytes.len() / itemsize; - - let elements: Vec = (0..len) - .map(|i| format_unpack(&format_spec, &bytes[i..i + itemsize], vm)) - .try_collect()?; - - Ok(PyList::from(elements).into_ref(vm)) -} - fn is_equiv_shape(a: &BufferDescriptor, b: &BufferDescriptor) -> bool { if a.ndim() != b.ndim() { return false; diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index b9c2f26923..d939a5092e 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -73,8 +73,8 @@ impl PyBuffer { } pub fn collect(&self, buf: &mut Vec) { - if self.desc.is_contiguous() { - buf.extend_from_slice(&self.obj_bytes()); + if let Some(bytes) = self.as_contiguous() { + buf.extend_from_slice(&bytes); } else { let bytes = &*self.obj_bytes(); self.desc.for_each_segment(true, |range| { @@ -86,8 +86,8 @@ impl PyBuffer { pub fn contiguous_or_collect R>(&self, f: F) -> R { let borrowed; let mut collected; - let v = if self.desc.is_contiguous() { - borrowed = self.obj_bytes(); + let v = if let Some(bytes) = self.as_contiguous() { + borrowed = bytes; &*borrowed } else { collected = vec![]; @@ -151,7 +151,7 @@ impl Drop for PyBuffer { #[derive(Debug, Clone)] pub struct BufferDescriptor { /// product(shape) * itemsize - /// NOT the bytes length if buffer is discontiguous + /// bytes length, but not the length for obj_bytes() even is contiguous pub len: usize, pub readonly: bool, pub itemsize: usize, @@ -296,15 +296,13 @@ impl BufferDescriptor { } } + /// zip two BufferDescriptor with the same shape pub fn zip_eq(&self, other: &Self, try_conti: bool, mut f: F) where - F: FnMut(isize, isize, usize), + F: FnMut(Range, Range) -> bool, { - debug_assert_eq!(self.itemsize, other.itemsize); - debug_assert_eq!(self.len, other.len); - if self.ndim() == 0 { - f(0, 0, self.itemsize); + f(0..self.itemsize as isize, 0..other.itemsize as isize); return; } if try_conti && self.is_last_dim_contiguous() { @@ -322,19 +320,29 @@ impl BufferDescriptor { dim: usize, f: &mut F, ) where - F: FnMut(isize, isize, usize), + F: FnMut(Range, Range) -> bool, { let (shape, a_stride, a_suboffset) = self.dim_desc[dim]; let (_b_shape, b_stride, b_suboffset) = other.dim_desc[dim]; debug_assert_eq!(shape, _b_shape); if dim + 1 == self.ndim() { if CONTI { - f(a_index, b_index, shape * self.itemsize); + if f( + a_index..a_index + (shape * self.itemsize) as isize, + b_index..b_index + (shape * other.itemsize) as isize, + ) { + return; + } } else { for _ in 0..shape { let a_pos = a_index + a_suboffset; let b_pos = b_index + b_suboffset; - f(a_pos, b_pos, self.itemsize); + if f( + a_pos..a_pos + self.itemsize as isize, + b_pos..b_pos + other.itemsize as isize, + ) { + return; + } a_index += a_stride; b_index += b_stride; } From 970c40876a02d2822d7529cb19f204b491cf8546 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 4 Nov 2021 19:28:51 +0200 Subject: [PATCH 12/20] fix adjust_indices positive_order --- vm/src/builtins/slice.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index 345863ca20..77fd741ad3 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -289,9 +289,21 @@ impl SaturatedSlice { /// Convert for usage in indexing the underlying rust collections. Called *after* /// __index__ has been called on the Slice which might mutate the collection. pub fn adjust_indices(&self, len: usize) -> (Range, isize, usize) { + if len == 0 { + return (0..0, self.step, 0); + } let range = if self.step.is_negative() { - saturate_index(self.stop.saturating_add(1), len) - ..saturate_index(self.start.saturating_add(1), len) + let stop = if self.stop == -1 { + len + } else { + saturate_index(self.stop.saturating_add(1), len) + }; + let start = if self.start == -1 { + len + } else { + saturate_index(self.start.saturating_add(1), len) + }; + stop..start } else { saturate_index(self.start, len)..saturate_index(self.stop, len) }; @@ -333,7 +345,7 @@ impl SaturatedSliceIterator { pub fn positive_order(mut self) -> Self { if self.step.is_negative() { - self.index += self.step * self.len as isize + 1; + self.index += self.step * self.len.saturating_sub(1) as isize; self.step = self.step.saturating_abs() } self From 4c9a8bc808c473f8d43cfb10fcd789773ba5e537 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 4 Nov 2021 20:48:04 +0200 Subject: [PATCH 13/20] fix bug mark passed test --- Lib/test/test_gzip.py | 2 -- vm/src/builtins/memory.rs | 22 ++++++++++++++-------- vm/src/stdlib/io.rs | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 5ac37fa452..0b5113c20b 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -88,8 +88,6 @@ def test_write_read_with_pathlike_file(self): self.assertEqual(d, data1 * 51) self.assertIsInstance(f.name, str) - # TODO: RUSTPYTHON - @unittest.expectedFailure # The following test_write_xy methods test that write accepts # the corresponding bytes-like object type as input # and that the data written equals bytes(xy) in all cases. diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index d716b89b87..ca66c09436 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -49,12 +49,7 @@ impl Constructor for PyMemoryView { type Args = PyMemoryViewNewArgs; fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - let zelf = if let Some(other) = args.object.payload::() { - other.new_view() - } else { - let buffer = PyBuffer::try_from_borrowed_object(vm, &args.object)?; - PyMemoryView::from_buffer(buffer, vm)? - }; + let zelf = Self::from_object(&args.object, vm)?; zelf.into_pyresult_with_type(vm, cls) } } @@ -65,6 +60,17 @@ impl PyMemoryView { FormatSpec::parse(format.as_bytes(), vm) } + /// this should be the main entrence to create the memoryview + /// to avoid the chained memoryview + pub fn from_object(obj: &PyObject, vm: &VirtualMachine) -> PyResult { + if let Some(other) = obj.payload::() { + Ok(other.new_view()) + } else { + let buffer = PyBuffer::try_from_borrowed_object(vm, obj)?; + PyMemoryView::from_buffer(buffer, vm) + } + } + pub fn from_buffer(buffer: PyBuffer, vm: &VirtualMachine) -> PyResult { // when we get a buffer means the buffered object is size locked // so we can assume the buffer's options will never change as long @@ -363,8 +369,8 @@ impl PyMemoryView { let mut bytes_mut = dest.buffer.obj_bytes_mut(); let src_bytes = src.obj_bytes(); dest.desc.zip_eq(&src.desc, true, |a_range, b_range| { - let a_range = (a_range.start + self.start as isize) as usize - ..(a_range.end + self.start as isize) as usize; + let a_range = (a_range.start + dest.start as isize) as usize + ..(a_range.end + dest.start as isize) as usize; let b_range = b_range.start as usize..b_range.end as usize; bytes_mut[a_range].copy_from_slice(&src_bytes[b_range]); false diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 2c42b25aaf..0d63d98e39 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -3286,7 +3286,7 @@ mod _io { fn getbuffer(self, vm: &VirtualMachine) -> PyResult { let len = self.buffer.read().cursor.get_ref().len(); let buffer = PyBuffer::new( - self.as_object().to_owned(), + self.into_object(), BufferDescriptor::simple(len, false), &BYTES_IO_BUFFER_METHODS, ); From 072605af6812bb25fb9d8627ca78a30c19057191 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 4 Nov 2021 21:29:43 +0200 Subject: [PATCH 14/20] fix clippy --- vm/src/builtins/memory.rs | 10 +++++----- vm/src/protocol/buffer.rs | 10 ++++++---- vm/src/sliceable.rs | 8 +++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index ca66c09436..07870f6fde 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -307,7 +307,7 @@ impl PyMemoryView { return Ok(zelf.into_object()); } if let Some(tuple) = needle.payload::() { - if tuple.len() == 0 { + if tuple.is_empty() { return Ok(zelf .format_spec .unpack(&zelf.buffer.obj_bytes()[..zelf.desc.itemsize], vm)? @@ -401,7 +401,7 @@ impl PyMemoryView { // TODO: fix panic if data no march itemsize bytes[..self.desc.itemsize].copy_from_slice(&data); } else if let Some(tuple) = needle.payload::() { - if tuple.len() == 0 { + if tuple.is_empty() { let bytes = &mut *self.buffer.obj_bytes_mut(); let data = self.format_spec.pack(vec![value], vm)?; // TODO: fix panic if data no march itemsize @@ -700,7 +700,7 @@ impl PyMemoryView { if zelf.desc.ndim() == 0 { let a_val = format_unpack(a_format_spec, &zelf.buffer.obj_bytes()[..a_itemsize], vm)?; let b_val = format_unpack(b_format_spec, &other.obj_bytes()[..b_itemsize], vm)?; - return Ok(vm.bool_eq(&a_val, &b_val)?); + return vm.bool_eq(&a_val, &b_val); } // TODO: optimize cmp by format @@ -711,14 +711,14 @@ impl PyMemoryView { let a_range = (a_range.start + zelf.start as isize) as usize ..(a_range.end + zelf.start as isize) as usize; let b_range = b_range.start as usize..b_range.end as usize; - let a_val = match format_unpack(&a_format_spec, &a_bytes[a_range], vm) { + let a_val = match format_unpack(a_format_spec, &a_bytes[a_range], vm) { Ok(val) => val, Err(e) => { ret = Err(e); return true; } }; - let b_val = match format_unpack(&b_format_spec, &b_bytes[b_range], vm) { + let b_val = match format_unpack(b_format_spec, &b_bytes[b_range], vm) { Ok(val) => val, Err(e) => { ret = Err(e); diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index d939a5092e..c4ffb7f858 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -59,15 +59,17 @@ impl PyBuffer { } pub fn as_contiguous_mut(&self) -> Option> { - self.desc - .is_contiguous() - .then(|| unsafe { self.contiguous_mut() }) + (!self.desc.readonly && self.desc.is_contiguous()).then(|| unsafe { self.contiguous_mut() }) } + /// # Safety + /// assume the buffer is contiguous pub unsafe fn contiguous(&self) -> BorrowedValue<[u8]> { BorrowedValue::map(self.obj_bytes(), |x| &x[..self.desc.len]) } + /// # Safety + /// assume the buffer is contiguous and writable pub unsafe fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { BorrowedValueMut::map(self.obj_bytes_mut(), |x| &mut x[..self.desc.len]) } @@ -374,7 +376,7 @@ impl BufferDescriptor { return true; } } - return false; + false } // TODO: support fortain order diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index 5262b004ff..9bac818dd0 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -167,12 +167,10 @@ pub trait PySliceableSequence { } else { self.do_slice(range) } + } else if step.is_negative() { + self.do_stepped_slice_reverse(range, step.unsigned_abs()) } else { - if step.is_negative() { - self.do_stepped_slice_reverse(range, step.unsigned_abs()) - } else { - self.do_stepped_slice(range, step.unsigned_abs()) - } + self.do_stepped_slice(range, step.unsigned_abs()) }; Ok(sliced) } From 7033a7dc336d3e290e649c9930edc99a9a01b216 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Fri, 5 Nov 2021 07:58:17 +0200 Subject: [PATCH 15/20] fix set_item_slice deadlock, optimize buffer --- vm/src/builtins/memory.rs | 117 +++++++++++++++++++++++++++++++------- vm/src/protocol/buffer.rs | 14 +++-- 2 files changed, 104 insertions(+), 27 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 07870f6fde..8d33ebfbcd 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -9,7 +9,7 @@ use crate::common::{ use crate::{ bytesinner::bytes_to_hex, function::{FuncArgs, IntoPyObject, OptionalArg}, - protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods}, + protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods, VecBuffer}, sliceable::{wrap_index, SequenceIndex}, stdlib::pystruct::FormatSpec, types::{AsBuffer, AsMapping, Comparable, Constructor, Hashable, PyComparisonOp}, @@ -347,19 +347,46 @@ impl PyMemoryView { } fn setitem_by_slice( - &self, + zelf: PyRef, slice: &PySlice, - items: PyObjectRef, + src: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { - if self.desc.ndim() != 1 { + if zelf.desc.ndim() != 1 { return Err(vm.new_not_implemented_error("sub-view are not implemented".to_owned())); } - let src = PyBuffer::try_from_object(vm, items)?; - let mut dest = self.new_view(); + + let mut dest = zelf.new_view(); dest.init_slice(slice, 0, vm)?; dest.init_len(); + if zelf.is(&src) { + return if !is_equiv_structure(&zelf.desc, &dest.desc) { + Err(vm.new_type_error( + "memoryview assigment: lvalue and rvalue have different structures".to_owned(), + )) + } else { + // assign self[:] to self + Ok(()) + }; + }; + + let src = if let Some(src) = src.downcast_ref::() { + if zelf.buffer.obj.is(&src.buffer.obj) { + if !is_equiv_structure(&src.desc, &dest.desc) { + return Err(vm.new_type_error( + "memoryview assigment: lvalue and rvalue have different structures" + .to_owned(), + )); + } + src.to_contiguous(vm) + } else { + AsBuffer::as_buffer(src, vm)? + } + } else { + PyBuffer::try_from_object(vm, src)? + }; + if !is_equiv_structure(&src.desc, &dest.desc) { return Err(vm.new_type_error( "memoryview assigment: lvalue and rvalue have different structures".to_owned(), @@ -381,31 +408,31 @@ impl PyMemoryView { #[pymethod(magic)] fn setitem( - &self, + zelf: PyRef, needle: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine, ) -> PyResult<()> { - self.try_not_released(vm)?; - if self.desc.readonly { + zelf.try_not_released(vm)?; + if zelf.desc.readonly { return Err(vm.new_type_error("cannot modify read-only memory".to_owned())); } if value.is(&vm.ctx.none) { return Err(vm.new_type_error("cannot delete memory".to_owned())); } - if self.desc.ndim() == 0 { + if zelf.desc.ndim() == 0 { if needle.is(&vm.ctx.ellipsis) { - let bytes = &mut *self.buffer.obj_bytes_mut(); - let data = self.format_spec.pack(vec![value], vm)?; + let bytes = &mut *zelf.buffer.obj_bytes_mut(); + let data = zelf.format_spec.pack(vec![value], vm)?; // TODO: fix panic if data no march itemsize - bytes[..self.desc.itemsize].copy_from_slice(&data); + bytes[..zelf.desc.itemsize].copy_from_slice(&data); } else if let Some(tuple) = needle.payload::() { if tuple.is_empty() { - let bytes = &mut *self.buffer.obj_bytes_mut(); - let data = self.format_spec.pack(vec![value], vm)?; + let bytes = &mut *zelf.buffer.obj_bytes_mut(); + let data = zelf.format_spec.pack(vec![value], vm)?; // TODO: fix panic if data no march itemsize - bytes[..self.desc.itemsize].copy_from_slice(&data); + bytes[..zelf.desc.itemsize].copy_from_slice(&data); } } return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); @@ -413,8 +440,8 @@ impl PyMemoryView { // TODO: SequenceIndex do not need to take the ownership if let Ok(seq_index) = SequenceIndex::try_from_object_for(vm, needle.clone(), Self::NAME) { match seq_index { - SequenceIndex::Int(index) => self.setitem_by_idx(index, value, vm), - SequenceIndex::Slice(slice) => self.setitem_by_slice(&slice, value, vm), + SequenceIndex::Int(index) => zelf.setitem_by_idx(index, value, vm), + SequenceIndex::Slice(slice) => Self::setitem_by_slice(zelf, &slice, value, vm), } } else if let Some(_tuple) = needle.payload::() { Err(vm.new_type_error("TODO".to_owned())) @@ -746,11 +773,23 @@ impl PyMemoryView { } fn obj_bytes(&self) -> BorrowedValue<[u8]> { - BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..]) + if self.desc.is_contiguous() { + BorrowedValue::map(self.buffer.obj_bytes(), |x| { + &x[self.start..self.start + self.desc.len] + }) + } else { + BorrowedValue::map(self.buffer.obj_bytes(), |x| &x[self.start..]) + } } fn obj_bytes_mut(&self) -> BorrowedValueMut<[u8]> { - BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| &mut x[self.start..]) + if self.desc.is_contiguous() { + BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| { + &mut x[self.start..self.start + self.desc.len] + }) + } else { + BorrowedValueMut::map(self.buffer.obj_bytes_mut(), |x| &mut x[self.start..]) + } } fn as_contiguous(&self) -> Option> { @@ -773,6 +812,7 @@ impl PyMemoryView { if let Some(bytes) = self.as_contiguous() { buf.extend_from_slice(&bytes); } else { + buf.reserve(self.desc.len); let bytes = &*self.buffer.obj_bytes(); self.desc.for_each_segment(true, |range| { let start = (range.start + self.start as isize) as usize; @@ -795,6 +835,39 @@ impl PyMemoryView { }; f(v) } + + /// clone data from memoryview + /// keep the shape, convert to contiguous + pub fn to_contiguous(&self, vm: &VirtualMachine) -> PyBuffer { + let mut data = vec![]; + self.collect(&mut data); + + if self.desc.ndim() == 0 { + return VecBuffer::new(data) + .into_ref(vm) + .into_pybuffer_with_descriptor(self.desc.clone()); + } + + let mut dim_descriptor = self.desc.dim_desc.clone(); + dim_descriptor.last_mut().unwrap().1 = self.desc.itemsize as isize; + dim_descriptor.last_mut().unwrap().2 = 0; + for i in (0..dim_descriptor.len() - 1).rev() { + dim_descriptor[i].1 = dim_descriptor[i + 1].1 * dim_descriptor[i + 1].0 as isize; + dim_descriptor[i].2 = 0; + } + + let desc = BufferDescriptor { + len: self.desc.len, + readonly: self.desc.readonly, + itemsize: self.desc.itemsize, + format: self.desc.format.clone(), + dim_desc: dim_descriptor, + }; + + VecBuffer::new(data) + .into_ref(vm) + .into_pybuffer_with_descriptor(desc) + } } #[derive(FromArgs)] @@ -863,7 +936,9 @@ impl AsMapping for PyMemoryView { vm: &VirtualMachine, ) -> PyResult<()> { match value { - Some(value) => Self::downcast(zelf, vm).map(|zelf| zelf.setitem(needle, value, vm))?, + Some(value) => { + Self::downcast(zelf, vm).map(|zelf| Self::setitem(zelf, needle, value, vm))? + } None => Err(vm.new_type_error("cannot delete memory".to_owned())), } } diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index c4ffb7f858..ebf89be90a 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -65,13 +65,13 @@ impl PyBuffer { /// # Safety /// assume the buffer is contiguous pub unsafe fn contiguous(&self) -> BorrowedValue<[u8]> { - BorrowedValue::map(self.obj_bytes(), |x| &x[..self.desc.len]) + self.obj_bytes() } /// # Safety /// assume the buffer is contiguous and writable pub unsafe fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { - BorrowedValueMut::map(self.obj_bytes_mut(), |x| &mut x[..self.desc.len]) + self.obj_bytes_mut() } pub fn collect(&self, buf: &mut Vec) { @@ -391,18 +391,16 @@ pub trait BufferResizeGuard<'a> { #[derive(Debug, PyValue)] pub struct VecBuffer { data: PyMutex>, - len: usize, } #[pyimpl(flags(BASETYPE), with(Constructor))] impl VecBuffer { pub fn new(data: Vec) -> Self { - let len = data.len(); Self { data: PyMutex::new(data), - len, } } + pub fn take(&self) -> Vec { std::mem::take(&mut self.data.lock()) } @@ -411,13 +409,17 @@ impl Unconstructible for VecBuffer {} impl PyRef { pub fn into_pybuffer(self, readonly: bool) -> PyBuffer { - let len = self.len; + let len = self.data.lock().len(); PyBuffer::new( self.into_object(), BufferDescriptor::simple(len, readonly), &VEC_BUFFER_METHODS, ) } + + pub fn into_pybuffer_with_descriptor(self, desc: BufferDescriptor) -> PyBuffer { + PyBuffer::new(self.into_object(), desc, &VEC_BUFFER_METHODS) + } } static VEC_BUFFER_METHODS: BufferMethods = BufferMethods { From 29c9d0e4314f0745ea903f80f525366aafcfbdc4 Mon Sep 17 00:00:00 2001 From: Steve Shi Date: Fri, 5 Nov 2021 09:09:20 +0200 Subject: [PATCH 16/20] Delete @test_113974_tmp --- @test_113974_tmp | 1 - 1 file changed, 1 deletion(-) delete mode 100755 @test_113974_tmp diff --git a/@test_113974_tmp b/@test_113974_tmp deleted file mode 100755 index 3b18e512db..0000000000 --- a/@test_113974_tmp +++ /dev/null @@ -1 +0,0 @@ -hello world From ffeb63868b4824226c17c2a825b87d483519a295 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sat, 6 Nov 2021 15:39:36 +0900 Subject: [PATCH 17/20] Remove vec_buffer_type --- stdlib/src/array.rs | 2 +- vm/src/stdlib/builtins.rs | 6 ++++-- vm/src/types/zoo.rs | 2 -- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 479c97dcff..1ce1c9a187 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -1219,7 +1219,7 @@ mod array { fn as_buffer(zelf: &PyObjectView, _vm: &VirtualMachine) -> PyResult { let array = zelf.read(); let buf = PyBuffer::new( - zelf.to_owned().into_object(), + zelf.to_owned().into(), BufferDescriptor::format( array.len() * array.itemsize(), false, diff --git a/vm/src/stdlib/builtins.rs b/vm/src/stdlib/builtins.rs index 1c428b7099..4dace44c43 100644 --- a/vm/src/stdlib/builtins.rs +++ b/vm/src/stdlib/builtins.rs @@ -1,7 +1,7 @@ //! Builtin function definitions. //! //! Implements the list of [builtin Python functions](https://docs.python.org/3/library/builtins.html). -use crate::{PyObjectRef, VirtualMachine}; +use crate::{PyClassImpl, PyObjectRef, VirtualMachine}; /// Built-in functions, exceptions, and other objects. /// @@ -917,7 +917,9 @@ pub use builtins::{ascii, print}; pub fn make_module(vm: &VirtualMachine, module: PyObjectRef) { let ctx = &vm.ctx; - builtins::extend_module(vm, &module); + crate::protocol::VecBuffer::make_class(&vm.ctx); + + let _ = builtins::extend_module(vm, &module); let debug_mode: bool = vm.state.settings.optimize == 0; extend_module!(vm, module, { diff --git a/vm/src/types/zoo.rs b/vm/src/types/zoo.rs index 0ec65de700..f780343f4f 100644 --- a/vm/src/types/zoo.rs +++ b/vm/src/types/zoo.rs @@ -81,7 +81,6 @@ pub struct TypeZoo { pub none_type: PyTypeRef, pub not_implemented_type: PyTypeRef, pub generic_alias_type: PyTypeRef, - pub vec_buffer_type: PyTypeRef, } impl TypeZoo { @@ -168,7 +167,6 @@ impl TypeZoo { none_type: singletons::PyNone::init_bare_type().clone(), not_implemented_type: singletons::PyNotImplemented::init_bare_type().clone(), generic_alias_type: genericalias::PyGenericAlias::init_bare_type().clone(), - vec_buffer_type: crate::protocol::VecBuffer::init_bare_type().clone(), } } From 3d72bdea7c9e71791ec98d9a3fb8b39b9bb706e0 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sat, 6 Nov 2021 18:25:18 +0200 Subject: [PATCH 18/20] rusty names --- vm/src/builtins/memory.rs | 2 +- vm/src/builtins/slice.rs | 16 ++++++++-------- vm/src/function/buffer.rs | 4 ++-- vm/src/protocol/buffer.rs | 17 +++++++++-------- vm/src/sliceable.rs | 8 ++++---- vm/src/stdlib/os.rs | 2 +- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 8d33ebfbcd..43e6fc1a4b 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -283,7 +283,7 @@ impl PyMemoryView { .iter() .map(|x| isize::try_from_borrowed_object(vm, x)) .try_collect()?; - let pos = self.desc.get_position(&indices, vm)?; + let pos = self.desc.position(&indices, vm)?; let pos = (pos + self.start as isize) as usize; let bytes = self.buffer.obj_bytes(); diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index 77fd741ad3..99bcec7328 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -317,18 +317,18 @@ impl SaturatedSlice { (range, self.step, slicelen) } - pub fn iter(&self, len: usize) -> SaturatedSliceIterator { - SaturatedSliceIterator::new(self, len) + pub fn iter(&self, len: usize) -> SaturatedSliceIter { + SaturatedSliceIter::new(self, len) } } -pub struct SaturatedSliceIterator { - pub index: isize, - pub step: isize, - pub len: usize, +pub struct SaturatedSliceIter { + index: isize, + step: isize, + len: usize, } -impl SaturatedSliceIterator { +impl SaturatedSliceIter { pub fn new(slice: &SaturatedSlice, seq_len: usize) -> Self { let (range, step, len) = slice.adjust_indices(seq_len); Self::from_adjust_indices(range, step, len) @@ -352,7 +352,7 @@ impl SaturatedSliceIterator { } } -impl Iterator for SaturatedSliceIterator { +impl Iterator for SaturatedSliceIter { type Item = usize; fn next(&mut self) -> Option { if self.len == 0 { diff --git a/vm/src/function/buffer.rs b/vm/src/function/buffer.rs index 028a34a85b..fd7c20c9dd 100644 --- a/vm/src/function/buffer.rs +++ b/vm/src/function/buffer.rs @@ -40,7 +40,7 @@ impl PyObject { impl ArgBytesLike { pub fn borrow_buf(&self) -> BorrowedValue<'_, [u8]> { - unsafe { self.0.contiguous() } + unsafe { self.0.contiguous_unchecked() } } pub fn with_ref(&self, f: F) -> R @@ -82,7 +82,7 @@ pub struct ArgMemoryBuffer(PyBuffer); impl ArgMemoryBuffer { pub fn borrow_buf_mut(&self) -> BorrowedValueMut<'_, [u8]> { - unsafe { self.0.contiguous_mut() } + unsafe { self.0.contiguous_mut_unchecked() } } pub fn with_ref(&self, f: F) -> R diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index ebf89be90a..fa735f5f07 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -55,26 +55,27 @@ impl PyBuffer { pub fn as_contiguous(&self) -> Option> { self.desc .is_contiguous() - .then(|| unsafe { self.contiguous() }) + .then(|| unsafe { self.contiguous_unchecked() }) } pub fn as_contiguous_mut(&self) -> Option> { - (!self.desc.readonly && self.desc.is_contiguous()).then(|| unsafe { self.contiguous_mut() }) + (!self.desc.readonly && self.desc.is_contiguous()) + .then(|| unsafe { self.contiguous_mut_unchecked() }) } /// # Safety /// assume the buffer is contiguous - pub unsafe fn contiguous(&self) -> BorrowedValue<[u8]> { + pub unsafe fn contiguous_unchecked(&self) -> BorrowedValue<[u8]> { self.obj_bytes() } /// # Safety /// assume the buffer is contiguous and writable - pub unsafe fn contiguous_mut(&self) -> BorrowedValueMut<[u8]> { + pub unsafe fn contiguous_mut_unchecked(&self) -> BorrowedValueMut<[u8]> { self.obj_bytes_mut() } - pub fn collect(&self, buf: &mut Vec) { + pub fn append_to(&self, buf: &mut Vec) { if let Some(bytes) = self.as_contiguous() { buf.extend_from_slice(&bytes); } else { @@ -93,7 +94,7 @@ impl PyBuffer { &*borrowed } else { collected = vec![]; - self.collect(&mut collected); + self.append_to(&mut collected); &collected }; f(v) @@ -232,7 +233,7 @@ impl BufferDescriptor { /// this function do not check the bound /// panic if indices.len() != ndim - pub fn get_position_fast(&self, indices: &[usize]) -> isize { + pub fn fast_position(&self, indices: &[usize]) -> isize { let mut pos = 0; for (i, (_, stride, suboffset)) in indices .iter() @@ -245,7 +246,7 @@ impl BufferDescriptor { } /// panic if indices.len() != ndim - pub fn get_position(&self, indices: &[isize], vm: &VirtualMachine) -> PyResult { + pub fn position(&self, indices: &[isize], vm: &VirtualMachine) -> PyResult { let mut pos = 0; for (i, (shape, stride, suboffset)) in indices .iter() diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index 9bac818dd0..5f7e388cec 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -4,7 +4,7 @@ use std::ops::Range; use crate::builtins::int::PyInt; // export through slicable module, not slice. pub use crate::builtins::slice::{saturate_index, SaturatedSlice}; -use crate::builtins::slice::{PySlice, SaturatedSliceIterator}; +use crate::builtins::slice::{PySlice, SaturatedSliceIter}; use crate::utils::Either; use crate::VirtualMachine; use crate::{PyObjectRef, PyRef, PyResult, TypeProtocol}; @@ -37,7 +37,7 @@ pub trait PySliceableSequenceMut { Ok(()) } else { self.do_replace_indexes( - SaturatedSliceIterator::from_adjust_indices(range, step, slicelen), + SaturatedSliceIter::from_adjust_indices(range, step, slicelen), items, ); Ok(()) @@ -56,7 +56,7 @@ pub trait PySliceableSequenceMut { Ok(()) } else if slicelen == items.len() { self.do_replace_indexes( - SaturatedSliceIterator::from_adjust_indices(range, step, slicelen), + SaturatedSliceIter::from_adjust_indices(range, step, slicelen), items, ); Ok(()) @@ -79,7 +79,7 @@ pub trait PySliceableSequenceMut { } else { self.do_delete_indexes( range.clone(), - SaturatedSliceIterator::from_adjust_indices(range, step, slicelen).positive_order(), + SaturatedSliceIter::from_adjust_indices(range, step, slicelen).positive_order(), ); Ok(()) } diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 4f47ed93e6..e83647f5f9 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -193,7 +193,7 @@ impl TryFromObject for PyPathLike { let obj = match PyBuffer::try_from_borrowed_object(vm, &obj) { Ok(buffer) => { let mut bytes = vec![]; - buffer.collect(&mut bytes); + buffer.append_to(&mut bytes); PyBytes::from(bytes).into_pyobject(vm) } Err(_) => obj, From 43cf696816fe1bfc2c90be753bdd0828733f6035 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sun, 7 Nov 2021 21:07:18 +0200 Subject: [PATCH 19/20] impl memoryview multi index --- Lib/test/test_memoryview.py | 2 - vm/src/builtins/memory.rs | 195 +++++++++++++++++++++--------------- 2 files changed, 116 insertions(+), 81 deletions(-) diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 3fe85c5035..68d0bfa0ff 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -71,8 +71,6 @@ def setitem(value): m = None self.assertEqual(sys.getrefcount(b), oldrefcount) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_setitem_writable(self): if not self.rw_type: self.skipTest("no writable type to test") diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 43e6fc1a4b..a1ce1866bd 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -1,5 +1,5 @@ use super::{ - PyBytes, PyBytesRef, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, + PyBytes, PyBytesRef, PyInt, PyListRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyTypeRef, }; use crate::common::{ borrow::{BorrowedValue, BorrowedValueMut}, @@ -10,13 +10,13 @@ use crate::{ bytesinner::bytes_to_hex, function::{FuncArgs, IntoPyObject, OptionalArg}, protocol::{BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods, VecBuffer}, - sliceable::{wrap_index, SequenceIndex}, + sliceable::wrap_index, stdlib::pystruct::FormatSpec, types::{AsBuffer, AsMapping, Comparable, Constructor, Hashable, PyComparisonOp}, utils::Either, - IdProtocol, PyClassDef, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, - PyObjectView, PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, - TypeProtocol, VirtualMachine, + IdProtocol, PyClassImpl, PyComparisonValue, PyContext, PyObject, PyObjectRef, PyObjectView, + PyObjectWrap, PyRef, PyResult, PyValue, TryFromBorrowedObject, TryFromObject, TypeProtocol, + VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; @@ -243,16 +243,7 @@ impl PyMemoryView { .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; let index = index as isize * stride + suboffset; let pos = (index + self.start as isize) as usize; - let bytes = self.buffer.obj_bytes(); - self.format_spec - .unpack(&bytes[pos..pos + self.desc.itemsize], vm) - .map(|x| { - if x.len() == 1 { - x.fast_getitem(0) - } else { - x.into() - } - }) + self.unpack_single(pos, vm) } fn getitem_by_slice(&self, slice: &PySlice, vm: &VirtualMachine) -> PyResult { @@ -263,39 +254,10 @@ impl PyMemoryView { Ok(other.into_ref(vm).into_object()) } - fn getitem_by_multi_idx(&self, tuple: &PyTuple, vm: &VirtualMachine) -> PyResult { - let tuple = tuple.as_slice(); - match tuple.len().cmp(&self.desc.ndim()) { - Ordering::Less => { - return Err(vm.new_not_implemented_error("sub-views are not implemented".to_owned())) - } - Ordering::Greater => { - return Err(vm.new_type_error(format!( - "cannot index {}-dimension view with {}-element tuple", - self.desc.ndim(), - tuple.len() - ))) - } - Ordering::Equal => (), - } - - let indices: Vec = tuple - .iter() - .map(|x| isize::try_from_borrowed_object(vm, x)) - .try_collect()?; - let pos = self.desc.position(&indices, vm)?; - let pos = (pos + self.start as isize) as usize; - + fn getitem_by_multi_idx(&self, indexes: &[isize], vm: &VirtualMachine) -> PyResult { + let pos = self.pos_from_multi_index(indexes, vm)?; let bytes = self.buffer.obj_bytes(); - self.format_spec - .unpack(&bytes[pos..pos + self.desc.itemsize], vm) - .map(|x| { - if x.len() == 1 { - x.fast_getitem(0) - } else { - x.into() - } - }) + format_unpack(&self.format_spec, &bytes[pos..pos + self.desc.itemsize], vm) } #[pymethod(magic)] @@ -308,26 +270,16 @@ impl PyMemoryView { } if let Some(tuple) = needle.payload::() { if tuple.is_empty() { - return Ok(zelf - .format_spec - .unpack(&zelf.buffer.obj_bytes()[..zelf.desc.itemsize], vm)? - .fast_getitem(0)); + return zelf.unpack_single(0, vm); } } return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); } - // TODO: avoid clone - if let Ok(seq_index) = SequenceIndex::try_from_object_for(vm, needle.clone(), Self::NAME) { - match seq_index { - SequenceIndex::Int(index) => zelf.getitem_by_idx(index, vm), - SequenceIndex::Slice(slice) => zelf.getitem_by_slice(&slice, vm), - } - } else if let Some(tuple) = needle.payload::() { - zelf.getitem_by_multi_idx(tuple, vm) - } else { - // TODO: support multi slice - Err(vm.new_type_error("memoryview: invalid slice key".to_owned())) + match SubscriptNeedle::try_from_object(vm, needle)? { + SubscriptNeedle::Index(i) => zelf.getitem_by_idx(i, vm), + SubscriptNeedle::Slice(slice) => zelf.getitem_by_slice(&slice, vm), + SubscriptNeedle::MultiIndex(indices) => zelf.getitem_by_multi_idx(&indices, vm), } } @@ -340,10 +292,7 @@ impl PyMemoryView { .ok_or_else(|| vm.new_index_error("index out of range".to_owned()))?; let index = index as isize * stride + suboffset; let pos = (index + self.start as isize) as usize; - let bytes = &mut *self.buffer.obj_bytes_mut(); - let data = self.format_spec.pack(vec![value], vm)?; - bytes[pos..pos + self.desc.itemsize].copy_from_slice(&data); - Ok(()) + self.pack_single(pos, value, vm) } fn setitem_by_slice( @@ -362,7 +311,7 @@ impl PyMemoryView { if zelf.is(&src) { return if !is_equiv_structure(&zelf.desc, &dest.desc) { - Err(vm.new_type_error( + Err(vm.new_value_error( "memoryview assigment: lvalue and rvalue have different structures".to_owned(), )) } else { @@ -374,7 +323,7 @@ impl PyMemoryView { let src = if let Some(src) = src.downcast_ref::() { if zelf.buffer.obj.is(&src.buffer.obj) { if !is_equiv_structure(&src.desc, &dest.desc) { - return Err(vm.new_type_error( + return Err(vm.new_value_error( "memoryview assigment: lvalue and rvalue have different structures" .to_owned(), )); @@ -388,7 +337,7 @@ impl PyMemoryView { }; if !is_equiv_structure(&src.desc, &dest.desc) { - return Err(vm.new_type_error( + return Err(vm.new_value_error( "memoryview assigment: lvalue and rvalue have different structures".to_owned(), )); } @@ -406,6 +355,16 @@ impl PyMemoryView { Ok(()) } + fn setitem_by_multi_idx( + &self, + indexes: &[isize], + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + let pos = self.pos_from_multi_index(indexes, vm)?; + self.pack_single(pos, value, vm) + } + #[pymethod(magic)] fn setitem( zelf: PyRef, @@ -437,18 +396,58 @@ impl PyMemoryView { } return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); } - // TODO: SequenceIndex do not need to take the ownership - if let Ok(seq_index) = SequenceIndex::try_from_object_for(vm, needle.clone(), Self::NAME) { - match seq_index { - SequenceIndex::Int(index) => zelf.setitem_by_idx(index, value, vm), - SequenceIndex::Slice(slice) => Self::setitem_by_slice(zelf, &slice, value, vm), + match SubscriptNeedle::try_from_object(vm, needle)? { + SubscriptNeedle::Index(i) => zelf.setitem_by_idx(i, value, vm), + SubscriptNeedle::Slice(slice) => Self::setitem_by_slice(zelf, &slice, value, vm), + SubscriptNeedle::MultiIndex(indices) => zelf.setitem_by_multi_idx(&indices, value, vm), + } + } + + fn pack_single(&self, pos: usize, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let mut bytes = self.buffer.obj_bytes_mut(); + // TODO: Optimize + let data = self.format_spec.pack(vec![value], vm).map_err(|_| { + vm.new_type_error(format!( + "memoryview: invalid type for format '{}'", + &self.desc.format + )) + })?; + bytes[pos..pos + self.desc.itemsize].copy_from_slice(&data); + Ok(()) + } + + fn unpack_single(&self, pos: usize, vm: &VirtualMachine) -> PyResult { + let bytes = self.buffer.obj_bytes(); + // TODO: Optimize + self.format_spec + .unpack(&bytes[pos..pos + self.desc.itemsize], vm) + .map(|x| { + if x.len() == 1 { + x.fast_getitem(0) + } else { + x.into() + } + }) + } + + fn pos_from_multi_index(&self, indexes: &[isize], vm: &VirtualMachine) -> PyResult { + match indexes.len().cmp(&self.desc.ndim()) { + Ordering::Less => { + return Err(vm.new_not_implemented_error("sub-views are not implemented".to_owned())) } - } else if let Some(_tuple) = needle.payload::() { - Err(vm.new_type_error("TODO".to_owned())) - } else { - // TODO: support multi slice - Err(vm.new_type_error("memoryview: invalid slice key".to_owned())) + Ordering::Greater => { + return Err(vm.new_type_error(format!( + "cannot index {}-dimension view with {}-element tuple", + self.desc.ndim(), + indexes.len() + ))) + } + Ordering::Equal => (), } + + let pos = self.desc.position(indexes, vm)?; + let pos = (pos + self.start as isize) as usize; + Ok(pos) } fn init_len(&mut self) { @@ -878,6 +877,44 @@ struct CastArgs { shape: OptionalArg, } +enum SubscriptNeedle { + Index(isize), + Slice(PyRef), + MultiIndex(Vec), + // MultiSlice(Vec), +} + +impl TryFromObject for SubscriptNeedle { + fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { + if let Some(i) = obj.payload::() { + Ok(Self::Index(i.try_to_primitive(vm)?)) + } else if obj.payload_is::() { + Ok(Self::Slice(unsafe { obj.downcast_unchecked::() })) + } else if let Ok(i) = vm.to_index(&obj) { + Ok(Self::Index(i.try_to_primitive(vm)?)) + } else { + if let Some(tuple) = obj.payload::() { + let tuple = tuple.as_slice(); + if tuple.iter().all(|x| x.payload_is::()) { + let v = tuple + .iter() + .map(|x| { + unsafe { x.downcast_unchecked_ref::() } + .try_to_primitive::(vm) + }) + .try_collect()?; + return Ok(Self::MultiIndex(v)); + } else if tuple.iter().all(|x| x.payload_is::()) { + return Err(vm.new_not_implemented_error( + "multi-dimensional slicing is not implemented".to_owned(), + )); + } + } + Err(vm.new_type_error("memoryview: invalid slice key".to_owned())) + } + } +} + static BUFFER_METHODS: BufferMethods = BufferMethods { obj_bytes: |buffer| buffer.obj_as::().obj_bytes(), obj_bytes_mut: |buffer| buffer.obj_as::().obj_bytes_mut(), From ceafd4cfe42a1f7928c21b57431f3f6d0bc29eb4 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Tue, 9 Nov 2021 21:59:44 +0200 Subject: [PATCH 20/20] add comments --- vm/src/builtins/memory.rs | 79 ++++++++++++++++++--------------------- vm/src/protocol/buffer.rs | 22 +++++------ vm/src/stdlib/io.rs | 4 +- 3 files changed, 47 insertions(+), 58 deletions(-) diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index a1ce1866bd..df52bd76af 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -35,6 +35,9 @@ pub struct PyMemoryView { // the released memoryview does not mean the buffer is destoryed // because the possible another memeoryview is viewing from it released: AtomicCell, + // start does NOT mean the bytes before start will not be visited, + // it means the point we starting to get the absolute position via + // the needle start: usize, format_spec: FormatSpec, // memoryview's options could be different from buffer's options @@ -71,6 +74,9 @@ impl PyMemoryView { } } + /// don't use this function to create the memeoryview if the buffer is exporting + /// via another memoryview, use PyMemoryView::new_view() or PyMemoryView::from_object + /// to reduce the chain pub fn from_buffer(buffer: PyBuffer, vm: &VirtualMachine) -> PyResult { // when we get a buffer means the buffered object is size locked // so we can assume the buffer's options will never change as long @@ -88,29 +94,23 @@ impl PyMemoryView { }) } + /// don't use this function to create the memeoryview if the buffer is exporting + /// via another memoryview, use PyMemoryView::new_view() or PyMemoryView::from_object + /// to reduce the chain pub fn from_buffer_range( buffer: PyBuffer, range: Range, vm: &VirtualMachine, ) -> PyResult { - let format_spec = Self::parse_format(&buffer.desc.format, vm)?; - let desc = buffer.desc.clone(); - - let mut zelf = PyMemoryView { - buffer: ManuallyDrop::new(buffer), - released: AtomicCell::new(false), - start: 0, - format_spec, - desc, - hash: OnceCell::new(), - }; + let mut zelf = Self::from_buffer(buffer, vm)?; zelf.init_range(range, 0); zelf.init_len(); Ok(zelf) } - fn new_view(&self) -> Self { + /// this should be the only way to create a memroyview from another memoryview + pub fn new_view(&self) -> Self { let zelf = PyMemoryView { buffer: self.buffer.clone(), released: AtomicCell::new(false), @@ -322,12 +322,6 @@ impl PyMemoryView { let src = if let Some(src) = src.downcast_ref::() { if zelf.buffer.obj.is(&src.buffer.obj) { - if !is_equiv_structure(&src.desc, &dest.desc) { - return Err(vm.new_value_error( - "memoryview assigment: lvalue and rvalue have different structures" - .to_owned(), - )); - } src.to_contiguous(vm) } else { AsBuffer::as_buffer(src, vm)? @@ -381,17 +375,12 @@ impl PyMemoryView { } if zelf.desc.ndim() == 0 { + // TODO: merge branches when we got conditional if let if needle.is(&vm.ctx.ellipsis) { - let bytes = &mut *zelf.buffer.obj_bytes_mut(); - let data = zelf.format_spec.pack(vec![value], vm)?; - // TODO: fix panic if data no march itemsize - bytes[..zelf.desc.itemsize].copy_from_slice(&data); + return zelf.pack_single(0, value, vm); } else if let Some(tuple) = needle.payload::() { if tuple.is_empty() { - let bytes = &mut *zelf.buffer.obj_bytes_mut(); - let data = zelf.format_spec.pack(vec![value], vm)?; - // TODO: fix panic if data no march itemsize - bytes[..zelf.desc.itemsize].copy_from_slice(&data); + return zelf.pack_single(0, value, vm); } } return Err(vm.new_type_error("invalid indexing of 0-dim memory".to_owned())); @@ -459,15 +448,16 @@ impl PyMemoryView { let (shape, stride, _) = self.desc.dim_desc[dim]; debug_assert!(shape >= range.len()); - let mut is_adjusted_suboffset = false; + let mut is_adjusted = false; for (_, _, suboffset) in self.desc.dim_desc.iter_mut().rev() { if *suboffset != 0 { *suboffset += stride * range.start as isize; - is_adjusted_suboffset = true; + is_adjusted = true; break; } } - if !is_adjusted_suboffset { + if !is_adjusted { + // no suboffset setted, stride must be positive self.start += stride as usize * range.start; } let newlen = range.len(); @@ -488,7 +478,7 @@ impl PyMemoryView { } } if !is_adjusted_suboffset { - // TODO: AdjustIndices + // no suboffset setted, stride must be positive self.start += stride as usize * if step.is_negative() { range.end - 1 @@ -502,6 +492,7 @@ impl PyMemoryView { Ok(()) } + /// return the length of the first dimention #[pymethod(magic)] fn len(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; @@ -517,7 +508,7 @@ impl PyMemoryView { fn tobytes(&self, vm: &VirtualMachine) -> PyResult { self.try_not_released(vm)?; let mut v = vec![]; - self.collect(&mut v); + self.append_to(&mut v); Ok(PyBytes::from(v).into_ref(vm)) } @@ -807,7 +798,7 @@ impl PyMemoryView { }) } - fn collect(&self, buf: &mut Vec) { + fn append_to(&self, buf: &mut Vec) { if let Some(bytes) = self.as_contiguous() { buf.extend_from_slice(&bytes); } else { @@ -829,7 +820,7 @@ impl PyMemoryView { &*borrowed } else { collected = vec![]; - self.collect(&mut collected); + self.append_to(&mut collected); &collected }; f(v) @@ -839,20 +830,20 @@ impl PyMemoryView { /// keep the shape, convert to contiguous pub fn to_contiguous(&self, vm: &VirtualMachine) -> PyBuffer { let mut data = vec![]; - self.collect(&mut data); + self.append_to(&mut data); if self.desc.ndim() == 0 { - return VecBuffer::new(data) + return VecBuffer::from(data) .into_ref(vm) .into_pybuffer_with_descriptor(self.desc.clone()); } - let mut dim_descriptor = self.desc.dim_desc.clone(); - dim_descriptor.last_mut().unwrap().1 = self.desc.itemsize as isize; - dim_descriptor.last_mut().unwrap().2 = 0; - for i in (0..dim_descriptor.len() - 1).rev() { - dim_descriptor[i].1 = dim_descriptor[i + 1].1 * dim_descriptor[i + 1].0 as isize; - dim_descriptor[i].2 = 0; + let mut dim_desc = self.desc.dim_desc.clone(); + dim_desc.last_mut().unwrap().1 = self.desc.itemsize as isize; + dim_desc.last_mut().unwrap().2 = 0; + for i in (0..dim_desc.len() - 1).rev() { + dim_desc[i].1 = dim_desc[i + 1].1 * dim_desc[i + 1].0 as isize; + dim_desc[i].2 = 0; } let desc = BufferDescriptor { @@ -860,10 +851,10 @@ impl PyMemoryView { readonly: self.desc.readonly, itemsize: self.desc.itemsize, format: self.desc.format.clone(), - dim_desc: dim_descriptor, + dim_desc, }; - VecBuffer::new(data) + VecBuffer::from(data) .into_ref(vm) .into_pybuffer_with_descriptor(desc) } @@ -886,6 +877,7 @@ enum SubscriptNeedle { impl TryFromObject for SubscriptNeedle { fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { + // TODO: number protocol if let Some(i) = obj.payload::() { Ok(Self::Index(i.try_to_primitive(vm)?)) } else if obj.payload_is::() { @@ -1054,6 +1046,7 @@ fn is_equiv_shape(a: &BufferDescriptor, b: &BufferDescriptor) -> bool { if a_shape != b_shape { return false; } + // if both shape is 0, ignore the rest if a_shape == 0 { break; } diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index fa735f5f07..c69b4efc2f 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -142,9 +142,6 @@ impl TryFromBorrowedObject for PyBuffer { } } -// What we actually want to implement is: -// impl Drop for T where T: BufferInternal -// but it is not supported by Rust impl Drop for PyBuffer { fn drop(&mut self) { self.release(); @@ -161,6 +158,7 @@ pub struct BufferDescriptor { pub format: Cow<'static, str>, /// (shape, stride, suboffset) for each dimension pub dim_desc: Vec<(usize, isize, isize)>, + // TODO: flags } impl BufferDescriptor { @@ -198,11 +196,6 @@ impl BufferDescriptor { shape_product *= shape; assert!(suboffset >= 0); assert!(stride != 0); - if stride.is_negative() { - // if stride is negative, we access memory in reversed order - // so the suboffset should be n*stride shift the index to the tail - assert!(suboffset >= -stride * shape as isize); - } } assert!(shape_product * self.itemsize == self.len); self @@ -396,16 +389,19 @@ pub struct VecBuffer { #[pyimpl(flags(BASETYPE), with(Constructor))] impl VecBuffer { - pub fn new(data: Vec) -> Self { + pub fn take(&self) -> Vec { + std::mem::take(&mut self.data.lock()) + } +} + +impl From> for VecBuffer { + fn from(data: Vec) -> Self { Self { data: PyMutex::new(data), } } - - pub fn take(&self) -> Vec { - std::mem::take(&mut self.data.lock()) - } } + impl Unconstructible for VecBuffer {} impl PyRef { diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 0d63d98e39..a2174f114e 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -875,7 +875,7 @@ mod _io { vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj,))? } else { let v = std::mem::take(&mut self.buffer); - let writebuf = VecBuffer::new(v).into_ref(vm); + let writebuf = VecBuffer::from(v).into_ref(vm); let memobj = PyMemoryView::from_buffer_range( writebuf.clone().into_pybuffer(true), buf_range, @@ -1100,7 +1100,7 @@ mod _io { let res = match v { Either::A(v) => { let v = v.unwrap_or(&mut self.buffer); - let readbuf = VecBuffer::new(std::mem::take(v)).into_ref(vm); + let readbuf = VecBuffer::from(std::mem::take(v)).into_ref(vm); let memobj = PyMemoryView::from_buffer_range( readbuf.clone().into_pybuffer(false), buf_range,